| ; This tests the NaCl intrinsics not related to atomic operations. | 
 |  | 
 | ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ | 
 | ; RUN:   --target x8632 --sandbox -i %s --args -O2 \ | 
 | ; RUN:   -allow-externally-defined-symbols \ | 
 | ; RUN:   | %if --need=target_X8632 --command FileCheck %s | 
 | ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ | 
 | ; RUN:   --target x8632 --sandbox -i %s --args -Om1 \ | 
 | ; RUN:   -allow-externally-defined-symbols \ | 
 | ; RUN:   | %if --need=target_X8632 --command FileCheck %s | 
 |  | 
 | ; Do another run w/ O2 and a different check-prefix (otherwise O2 and Om1 | 
 | ; share the same "CHECK" prefix). This separate run helps check that | 
 | ; some code is optimized out. | 
 | ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ | 
 | ; RUN:   --target x8632 --sandbox -i %s --args -O2 \ | 
 | ; RUN:   -allow-externally-defined-symbols \ | 
 | ; RUN:   | %if --need=target_X8632 \ | 
 | ; RUN:   --command FileCheck --check-prefix=CHECKO2REM %s | 
 |  | 
 | ; Do O2 runs without -sandbox to make sure llvm.nacl.read.tp gets | 
 | ; lowered to __nacl_read_tp instead of gs:0x0. | 
 | ; We also know that because it's O2, it'll have the O2REM optimizations. | 
 | ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ | 
 | ; RUN:   --target x8632 -i %s --args -O2 \ | 
 | ; RUN:   -allow-externally-defined-symbols \ | 
 | ; RUN:   | %if --need=target_X8632 \ | 
 | ; RUN:   --command FileCheck --check-prefix=CHECKO2UNSANDBOXEDREM %s | 
 |  | 
 | ; RUN: %if --need=target_ARM32 \ | 
 | ; RUN:   --command %p2i --filetype=obj --disassemble --target arm32 \ | 
 | ; RUN:   -i %s --args -O2 \ | 
 | ; RUN:   -allow-externally-defined-symbols \ | 
 | ; RUN:   | %if --need=target_ARM32 \ | 
 | ; RUN:   --command FileCheck --check-prefix ARM32 %s | 
 |  | 
 | ; RUN: %if --need=target_MIPS32 --need=allow_dump \ | 
 | ; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target mips32\ | 
 | ; RUN:   -i %s --args -Om1 --skip-unimplemented \ | 
 | ; RUN:   -allow-externally-defined-symbols \ | 
 | ; RUN:   | %if --need=target_MIPS32 --need=allow_dump \ | 
 | ; RUN:   --command FileCheck --check-prefix MIPS32 %s | 
 |  | 
 | declare i8* @llvm.nacl.read.tp() | 
 | declare void @llvm.nacl.longjmp(i8*, i32) | 
 | declare i32 @llvm.nacl.setjmp(i8*) | 
 | declare float @llvm.sqrt.f32(float) | 
 | declare double @llvm.sqrt.f64(double) | 
 | declare float @llvm.fabs.f32(float) | 
 | declare double @llvm.fabs.f64(double) | 
 | declare <4 x float> @llvm.fabs.v4f32(<4 x float>) | 
 | declare void @llvm.trap() | 
 | declare i16 @llvm.bswap.i16(i16) | 
 | declare i32 @llvm.bswap.i32(i32) | 
 | declare i64 @llvm.bswap.i64(i64) | 
 | declare i32 @llvm.ctlz.i32(i32, i1) | 
 | declare i64 @llvm.ctlz.i64(i64, i1) | 
 | declare i32 @llvm.cttz.i32(i32, i1) | 
 | declare i64 @llvm.cttz.i64(i64, i1) | 
 | declare i32 @llvm.ctpop.i32(i32) | 
 | declare i64 @llvm.ctpop.i64(i64) | 
 | declare i8* @llvm.stacksave() | 
 | declare void @llvm.stackrestore(i8*) | 
 |  | 
 | define internal i32 @test_nacl_read_tp() { | 
 | entry: | 
 |   %ptr = call i8* @llvm.nacl.read.tp() | 
 |   %__1 = ptrtoint i8* %ptr to i32 | 
 |   ret i32 %__1 | 
 | } | 
 | ; CHECK-LABEL: test_nacl_read_tp | 
 | ; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 | 
 | ; CHECKO2REM-LABEL: test_nacl_read_tp | 
 | ; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 | 
 | ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp | 
 | ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp | 
 | ; MIPS32-LABEL: test_nacl_read_tp | 
 | ; MIPS32: jal {{.*}} __nacl_read_tp | 
 |  | 
 | define internal i32 @test_nacl_read_tp_more_addressing() { | 
 | entry: | 
 |   %ptr = call i8* @llvm.nacl.read.tp() | 
 |   %__1 = ptrtoint i8* %ptr to i32 | 
 |   %x = add i32 %__1, %__1 | 
 |   %__3 = inttoptr i32 %x to i32* | 
 |   %v = load i32, i32* %__3, align 1 | 
 |   %v_add = add i32 %v, 1 | 
 |  | 
 |   %ptr2 = call i8* @llvm.nacl.read.tp() | 
 |   %__6 = ptrtoint i8* %ptr2 to i32 | 
 |   %y = add i32 %__6, 4 | 
 |   %__8 = inttoptr i32 %y to i32* | 
 |   %v_add2 = add i32 %v, 4 | 
 |   store i32 %v_add2, i32* %__8, align 1 | 
 |   ret i32 %v | 
 | } | 
 | ; CHECK-LABEL: test_nacl_read_tp_more_addressing | 
 | ; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 | 
 | ; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 | 
 | ; CHECKO2REM-LABEL: test_nacl_read_tp_more_addressing | 
 | ; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 | 
 | ; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 | 
 | ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_more_addressing | 
 | ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp | 
 | ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp | 
 | ; MIPS32-LABEL: test_nacl_read_tp_more_addressing | 
 | ; MIPS32: jal {{.*}} __nacl_read_tp | 
 |  | 
 | define internal i32 @test_nacl_read_tp_dead(i32 %a) { | 
 | entry: | 
 |   %ptr = call i8* @llvm.nacl.read.tp() | 
 |   ; Not actually using the result of nacl read tp call. | 
 |   ; In O2 mode this should be DCE'ed. | 
 |   ret i32 %a | 
 | } | 
 | ; Consider nacl.read.tp side-effect free, so it can be eliminated. | 
 | ; CHECKO2REM-LABEL: test_nacl_read_tp_dead | 
 | ; CHECKO2REM-NOT: mov e{{.*}}, DWORD PTR gs:0x0 | 
 | ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_dead | 
 | ; CHECKO2UNSANDBOXEDREM-NOT: call {{.*}} R_{{.*}} __nacl_read_tp | 
 | ; MIPS32-LABEL: test_nacl_read_tp_dead | 
 | ; MIPS32: jal {{.*}} __nacl_read_tp | 
 |  | 
 | define internal i32 @test_setjmplongjmp(i32 %iptr_env) { | 
 | entry: | 
 |   %env = inttoptr i32 %iptr_env to i8* | 
 |   %i = call i32 @llvm.nacl.setjmp(i8* %env) | 
 |   %r1 = icmp eq i32 %i, 0 | 
 |   br i1 %r1, label %Zero, label %NonZero | 
 | Zero: | 
 |   ; Redundant inttoptr, to make --pnacl cast-eliding/re-insertion happy. | 
 |   %env2 = inttoptr i32 %iptr_env to i8* | 
 |   call void @llvm.nacl.longjmp(i8* %env2, i32 1) | 
 |   ret i32 0 | 
 | NonZero: | 
 |   ret i32 1 | 
 | } | 
 | ; CHECK-LABEL: test_setjmplongjmp | 
 | ; CHECK: call {{.*}} R_{{.*}} setjmp | 
 | ; CHECK: call {{.*}} R_{{.*}} longjmp | 
 | ; CHECKO2REM-LABEL: test_setjmplongjmp | 
 | ; CHECKO2REM: call {{.*}} R_{{.*}} setjmp | 
 | ; CHECKO2REM: call {{.*}} R_{{.*}} longjmp | 
 | ; ARM32-LABEL: test_setjmplongjmp | 
 | ; ARM32: bl {{.*}} setjmp | 
 | ; ARM32: bl {{.*}} longjmp | 
 | ; MIPS32-LABEL: test_setjmplongjmp | 
 | ; MIPS32: jal {{.*}} setjmp | 
 | ; MIPS32: jal {{.*}} longjmp | 
 |  | 
 | define internal i32 @test_setjmp_unused(i32 %iptr_env, i32 %i_other) { | 
 | entry: | 
 |   %env = inttoptr i32 %iptr_env to i8* | 
 |   %i = call i32 @llvm.nacl.setjmp(i8* %env) | 
 |   ret i32 %i_other | 
 | } | 
 | ; Don't consider setjmp side-effect free, so it's not eliminated if | 
 | ; result unused. | 
 | ; CHECKO2REM-LABEL: test_setjmp_unused | 
 | ; CHECKO2REM: call {{.*}} R_{{.*}} setjmp | 
 | ; MIPS32-LABEL: test_setjmp_unused | 
 | ; MIPS32: jal {{.*}} setjmp | 
 |  | 
 | define internal float @test_sqrt_float(float %x, i32 %iptr) { | 
 | entry: | 
 |   %r = call float @llvm.sqrt.f32(float %x) | 
 |   %r2 = call float @llvm.sqrt.f32(float %r) | 
 |   %r3 = call float @llvm.sqrt.f32(float -0.0) | 
 |   %r4 = fadd float %r2, %r3 | 
 |   ret float %r4 | 
 | } | 
 | ; CHECK-LABEL: test_sqrt_float | 
 | ; CHECK: sqrtss xmm{{.*}} | 
 | ; CHECK: sqrtss xmm{{.*}} | 
 | ; CHECK: sqrtss xmm{{.*}},DWORD PTR | 
 | ; ARM32-LABEL: test_sqrt_float | 
 | ; ARM32: vsqrt.f32 | 
 | ; ARM32: vsqrt.f32 | 
 | ; ARM32: vsqrt.f32 | 
 | ; ARM32: vadd.f32 | 
 | ; MIPS32-LABEL: test_sqrt_float | 
 | ; MIPS32: sqrt.s | 
 | ; MIPS32: sqrt.s | 
 | ; MIPS32: sqrt.s | 
 | ; MIPS32: add.s | 
 |  | 
 | define internal float @test_sqrt_float_mergeable_load(float %x, i32 %iptr) { | 
 | entry: | 
 |   %__2 = inttoptr i32 %iptr to float* | 
 |   %y = load float, float* %__2, align 4 | 
 |   %r5 = call float @llvm.sqrt.f32(float %y) | 
 |   %r6 = fadd float %x, %r5 | 
 |   ret float %r6 | 
 | } | 
 | ; CHECK-LABEL: test_sqrt_float_mergeable_load | 
 | ; We could fold the load and the sqrt into one operation, but the | 
 | ; current folding only handles load + arithmetic op. The sqrt inst | 
 | ; is considered an intrinsic call and not an arithmetic op. | 
 | ; CHECK: sqrtss xmm{{.*}} | 
 | ; ARM32-LABEL: test_sqrt_float_mergeable_load | 
 | ; ARM32: vldr s{{.*}} | 
 | ; ARM32: vsqrt.f32 | 
 |  | 
 | define internal double @test_sqrt_double(double %x, i32 %iptr) { | 
 | entry: | 
 |   %r = call double @llvm.sqrt.f64(double %x) | 
 |   %r2 = call double @llvm.sqrt.f64(double %r) | 
 |   %r3 = call double @llvm.sqrt.f64(double -0.0) | 
 |   %r4 = fadd double %r2, %r3 | 
 |   ret double %r4 | 
 | } | 
 | ; CHECK-LABEL: test_sqrt_double | 
 | ; CHECK: sqrtsd xmm{{.*}} | 
 | ; CHECK: sqrtsd xmm{{.*}} | 
 | ; CHECK: sqrtsd xmm{{.*}},QWORD PTR | 
 | ; ARM32-LABEL: test_sqrt_double | 
 | ; ARM32: vsqrt.f64 | 
 | ; ARM32: vsqrt.f64 | 
 | ; ARM32: vsqrt.f64 | 
 | ; ARM32: vadd.f64 | 
 | ; MIPS32-LABEL: test_sqrt_double | 
 | ; MIPS32: sqrt.d | 
 | ; MIPS32: sqrt.d | 
 | ; MIPS32: sqrt.d | 
 | ; MIPS32: add.d | 
 |  | 
 | define internal double @test_sqrt_double_mergeable_load(double %x, i32 %iptr) { | 
 | entry: | 
 |   %__2 = inttoptr i32 %iptr to double* | 
 |   %y = load double, double* %__2, align 8 | 
 |   %r5 = call double @llvm.sqrt.f64(double %y) | 
 |   %r6 = fadd double %x, %r5 | 
 |   ret double %r6 | 
 | } | 
 | ; CHECK-LABEL: test_sqrt_double_mergeable_load | 
 | ; CHECK: sqrtsd xmm{{.*}} | 
 | ; ARM32-LABEL: test_sqrt_double_mergeable_load | 
 | ; ARM32: vldr d{{.*}} | 
 | ; ARM32: vsqrt.f64 | 
 |  | 
 | define internal float @test_sqrt_ignored(float %x, double %y) { | 
 | entry: | 
 |   %ignored1 = call float @llvm.sqrt.f32(float %x) | 
 |   %ignored2 = call double @llvm.sqrt.f64(double %y) | 
 |   ret float 0.0 | 
 | } | 
 | ; CHECKO2REM-LABEL: test_sqrt_ignored | 
 | ; CHECKO2REM-NOT: sqrtss | 
 | ; CHECKO2REM-NOT: sqrtsd | 
 | ; MIPS32-LABEL: test_sqrt_ignored | 
 | ; MIPS32: sqrt.s | 
 | ; MIPS32: sqrt.d | 
 |  | 
 | define internal float @test_fabs_float(float %x) { | 
 | entry: | 
 |   %r = call float @llvm.fabs.f32(float %x) | 
 |   %r2 = call float @llvm.fabs.f32(float %r) | 
 |   %r3 = call float @llvm.fabs.f32(float -0.0) | 
 |   %r4 = fadd float %r2, %r3 | 
 |   ret float %r4 | 
 | } | 
 | ;;; Specially check that the pand instruction doesn't try to operate on a 32-bit | 
 | ;;; (f32) memory operand, and instead uses two xmm registers. | 
 | ; CHECK-LABEL: test_fabs_float | 
 | ; CHECK: pcmpeqd | 
 | ; CHECK: psrld | 
 | ; CHECK: pand {{.*}}xmm{{.*}}xmm | 
 | ; CHECK: pcmpeqd | 
 | ; CHECK: psrld | 
 | ; CHECK: pand {{.*}}xmm{{.*}}xmm | 
 | ; CHECK: pcmpeqd | 
 | ; CHECK: psrld | 
 | ; CHECK: pand {{.*}}xmm{{.*}}xmm | 
 | ; MIPS32-LABEL: test_fabs_float | 
 | ; MIPS32: abs.s | 
 | ; MIPS32: abs.s | 
 | ; MIPS32: abs.s | 
 | ; MIPS32: add.s | 
 |  | 
 | define internal double @test_fabs_double(double %x) { | 
 | entry: | 
 |   %r = call double @llvm.fabs.f64(double %x) | 
 |   %r2 = call double @llvm.fabs.f64(double %r) | 
 |   %r3 = call double @llvm.fabs.f64(double -0.0) | 
 |   %r4 = fadd double %r2, %r3 | 
 |   ret double %r4 | 
 | } | 
 | ;;; Specially check that the pand instruction doesn't try to operate on a 64-bit | 
 | ;;; (f64) memory operand, and instead uses two xmm registers. | 
 | ; CHECK-LABEL: test_fabs_double | 
 | ; CHECK: pcmpeqd | 
 | ; CHECK: psrlq | 
 | ; CHECK: pand {{.*}}xmm{{.*}}xmm | 
 | ; CHECK: pcmpeqd | 
 | ; CHECK: psrlq | 
 | ; CHECK: pand {{.*}}xmm{{.*}}xmm | 
 | ; CHECK: pcmpeqd | 
 | ; CHECK: psrlq | 
 | ; CHECK: pand {{.*}}xmm{{.*}}xmm | 
 | ; MIPS32-LABEL: test_fabs_double | 
 | ; MIPS32: abs.d | 
 | ; MIPS32: abs.d | 
 | ; MIPS32: abs.d | 
 | ; MIPS32: add.d | 
 |  | 
 | define internal <4 x float> @test_fabs_v4f32(<4 x float> %x) { | 
 | entry: | 
 |   %r = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) | 
 |   %r2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %r) | 
 |   %r3 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) | 
 |   %r4 = fadd <4 x float> %r2, %r3 | 
 |   ret <4 x float> %r4 | 
 | } | 
 | ; CHECK-LABEL: test_fabs_v4f32 | 
 | ; CHECK: pcmpeqd | 
 | ; CHECK: psrld | 
 | ; CHECK: pand | 
 | ; CHECK: pcmpeqd | 
 | ; CHECK: psrld | 
 | ; CHECK: pand | 
 | ; CHECK: pcmpeqd | 
 | ; CHECK: psrld | 
 | ; CHECK: pand | 
 |  | 
 | define internal i32 @test_trap(i32 %br) { | 
 | entry: | 
 |   %r1 = icmp eq i32 %br, 0 | 
 |   br i1 %r1, label %Zero, label %NonZero | 
 | Zero: | 
 |   call void @llvm.trap() | 
 |   unreachable | 
 | NonZero: | 
 |   ret i32 1 | 
 | } | 
 | ; CHECK-LABEL: test_trap | 
 | ; CHECK: ud2 | 
 | ; ARM32-LABEL: test_trap | 
 | ; ARM32: udf | 
 | ; MIPS32-LABEL: test_trap | 
 | ; MIPS32: teq zero,zero | 
 |  | 
 | define internal i32 @test_bswap_16(i32 %x) { | 
 | entry: | 
 |   %x_trunc = trunc i32 %x to i16 | 
 |   %r = call i16 @llvm.bswap.i16(i16 %x_trunc) | 
 |   %r_zext = zext i16 %r to i32 | 
 |   ret i32 %r_zext | 
 | } | 
 | ; CHECK-LABEL: test_bswap_16 | 
 | ; Make sure this is the right operand size so that the most significant bit | 
 | ; to least significant bit rotation happens at the right boundary. | 
 | ; CHECK: rol {{[abcd]x|si|di|bp|word ptr}},0x8 | 
 | ; ARM32-LABEL: test_bswap_16 | 
 | ; ARM32: rev | 
 | ; ARM32: lsr {{.*}} #16 | 
 | ; MIPS32-LABEL: test_bswap_16 | 
 | ; MIPS32: sll {{.*}},0x8 | 
 | ; MIPS32: lui {{.*}},0xff | 
 | ; MIPS32: and | 
 | ; MIPS32: sll {{.*}},0x18 | 
 | ; MIPS32: or | 
 | ; MIPS32: srl {{.*}},0x10 | 
 | ; MIPS32: andi {{.*}},0xffff | 
 |  | 
 | define internal i32 @test_bswap_32(i32 %x) { | 
 | entry: | 
 |   %r = call i32 @llvm.bswap.i32(i32 %x) | 
 |   ret i32 %r | 
 | } | 
 | ; CHECK-LABEL: test_bswap_32 | 
 | ; CHECK: bswap e{{.*}} | 
 | ; ARM32-LABEL: test_bswap_32 | 
 | ; ARM32: rev | 
 | ; MIPS32-LABEL: test_bswap_32 | 
 | ; MIPS32: srl {{.*}},0x18 | 
 | ; MIPS32: srl {{.*}},0x8 | 
 | ; MIPS32: andi {{.*}},0xff00 | 
 | ; MIPS32: or | 
 | ; MIPS32: sll {{.*}},0x8 | 
 | ; MIPS32: lui {{.*}},0xff | 
 | ; MIPS32: and | 
 | ; MIPS32: sll {{.*}},0x18 | 
 | ; MIPS32: or | 
 | ; MIPS32: or | 
 |  | 
 | define internal i64 @test_bswap_64(i64 %x) { | 
 | entry: | 
 |   %r = call i64 @llvm.bswap.i64(i64 %x) | 
 |   ret i64 %r | 
 | } | 
 | ; CHECK-LABEL: test_bswap_64 | 
 | ; CHECK: bswap e{{.*}} | 
 | ; CHECK: bswap e{{.*}} | 
 | ; ARM32-LABEL: test_bswap_64 | 
 | ; ARM32: rev | 
 | ; ARM32: rev | 
 | ; MIPS32-LABEL: test_bswap_64 | 
 | ; MIPS32: sll {{.*}},0x8 | 
 | ; MIPS32: srl {{.*}},0x18 | 
 | ; MIPS32: srl {{.*}},0x8 | 
 | ; MIPS32: andi {{.*}},0xff00 | 
 | ; MIPS32: lui {{.*}},0xff | 
 | ; MIPS32: or | 
 | ; MIPS32: and | 
 | ; MIPS32: sll {{.*}},0x18 | 
 | ; MIPS32: or | 
 | ; MIPS32: srl {{.*}},0x18 | 
 | ; MIPS32: srl {{.*}},0x8 | 
 | ; MIPS32: andi {{.*}},0xff00 | 
 | ; MIPS32: or | 
 | ; MIPS32: or | 
 | ; MIPS32: sll {{.*}},0x8 | 
 | ; MIPS32: and | 
 | ; MIPS32: sll {{.*}},0x18 | 
 | ; MIPS32: or | 
 | ; MIPS32: or | 
 |  | 
 | define internal i64 @test_bswap_64_undef() { | 
 | entry: | 
 |   %r = call i64 @llvm.bswap.i64(i64 undef) | 
 |   ret i64 %r | 
 | } | 
 | ; CHECK-LABEL: test_bswap_64_undef | 
 | ; CHECK: bswap e{{.*}} | 
 | ; CHECK: bswap e{{.*}} | 
 | ; ARM32-LABEL: test_bswap_64 | 
 | ; ARM32: rev | 
 | ; ARM32: rev | 
 | ; MIPS32-LABEL: test_bswap_64_undef | 
 | ; MIPS32: sll {{.*}},0x8 | 
 | ; MIPS32: srl {{.*}},0x18 | 
 | ; MIPS32: srl {{.*}},0x8 | 
 | ; MIPS32: andi {{.*}},0xff00 | 
 | ; MIPS32: lui {{.*}},0xff | 
 | ; MIPS32: or | 
 | ; MIPS32: and | 
 | ; MIPS32: sll {{.*}},0x18 | 
 | ; MIPS32: or | 
 | ; MIPS32: srl {{.*}},0x18 | 
 | ; MIPS32: srl {{.*}},0x8 | 
 | ; MIPS32: andi {{.*}},0xff00 | 
 | ; MIPS32: or | 
 | ; MIPS32: or | 
 | ; MIPS32: sll {{.*}},0x8 | 
 | ; MIPS32: and | 
 | ; MIPS32: sll {{.*}},0x18 | 
 | ; MIPS32: or | 
 | ; MIPS32: or | 
 |  | 
 | define internal i32 @test_ctlz_32(i32 %x) { | 
 | entry: | 
 |   %r = call i32 @llvm.ctlz.i32(i32 %x, i1 false) | 
 |   ret i32 %r | 
 | } | 
 | ; CHECK-LABEL: test_ctlz_32 | 
 | ; TODO(jvoung): If we detect that LZCNT is supported, then use that | 
 | ; and avoid the need to do the cmovne and xor stuff to guarantee that | 
 | ; the result is well-defined w/ input == 0. | 
 | ; CHECK: bsr [[REG_TMP:e.*]],{{.*}} | 
 | ; CHECK: mov [[REG_RES:e.*]],0x3f | 
 | ; CHECK: cmovne [[REG_RES]],[[REG_TMP]] | 
 | ; CHECK: xor [[REG_RES]],0x1f | 
 | ; ARM32-LABEL: test_ctlz_32 | 
 | ; ARM32: clz | 
 | ; MIPS32-LABEL: test_ctlz_32 | 
 | ; MIPS32: clz | 
 |  | 
 | define internal i32 @test_ctlz_32_const() { | 
 | entry: | 
 |   %r = call i32 @llvm.ctlz.i32(i32 123456, i1 false) | 
 |   ret i32 %r | 
 | } | 
 | ; Could potentially constant fold this, but the front-end should have done that. | 
 | ; The dest operand must be a register and the source operand must be a register | 
 | ; or memory. | 
 | ; CHECK-LABEL: test_ctlz_32_const | 
 | ; CHECK: bsr e{{.*}},{{.*}}e{{.*}} | 
 | ; ARM32-LABEL: test_ctlz_32_const | 
 | ; ARM32: clz | 
 | ; MIPS32-LABEL: test_ctlz_32_const | 
 | ; MIPS32: clz | 
 |  | 
 | define internal i32 @test_ctlz_32_ignored(i32 %x) { | 
 | entry: | 
 |   %ignored = call i32 @llvm.ctlz.i32(i32 %x, i1 false) | 
 |   ret i32 1 | 
 | } | 
 | ; CHECKO2REM-LABEL: test_ctlz_32_ignored | 
 | ; CHECKO2REM-NOT: bsr | 
 |  | 
 | define internal i64 @test_ctlz_64(i64 %x) { | 
 | entry: | 
 |   %r = call i64 @llvm.ctlz.i64(i64 %x, i1 false) | 
 |   ret i64 %r | 
 | } | 
 | ; CHECKO2REM-LABEL: test_ctlz_64 | 
 | ; CHECK-LABEL: test_ctlz_64 | 
 | ; CHECK: bsr [[REG_TMP1:e.*]],{{.*}} | 
 | ; CHECK: mov [[REG_RES1:e.*]],0x3f | 
 | ; CHECK: cmovne [[REG_RES1]],[[REG_TMP1]] | 
 | ; CHECK: xor [[REG_RES1]],0x1f | 
 | ; CHECK: add [[REG_RES1]],0x20 | 
 | ; CHECK: bsr [[REG_RES2:e.*]],{{.*}} | 
 | ; CHECK: xor [[REG_RES2]],0x1f | 
 | ; CHECK: test [[REG_UPPER:.*]],[[REG_UPPER]] | 
 | ; CHECK: cmove [[REG_RES2]],[[REG_RES1]] | 
 | ; CHECK: mov {{.*}},0x0 | 
 | ; ARM32-LABEL: test_ctlz_64 | 
 | ; ARM32: clz | 
 | ; ARM32: cmp {{.*}}, #0 | 
 | ; ARM32: add {{.*}}, #32 | 
 | ; ARM32: clzne | 
 | ; ARM32: mov {{.*}}, #0 | 
 | ; MIPS32-LABEL: test_ctlz_64 | 
 | ; MIPS32: clz | 
 | ; MIPS32: clz | 
 | ; MIPS32: addiu | 
 | ; MIPS32: movn | 
 | ; MIPS32: addiu | 
 |  | 
 | define internal i32 @test_ctlz_64_const(i64 %x) { | 
 | entry: | 
 |   %r = call i64 @llvm.ctlz.i64(i64 123456789012, i1 false) | 
 |   %r2 = trunc i64 %r to i32 | 
 |   ret i32 %r2 | 
 | } | 
 | ; CHECK-LABEL: test_ctlz_64_const | 
 | ; CHECK: bsr e{{.*}},{{.*}}e{{.*}} | 
 | ; CHECK: bsr e{{.*}},{{.*}}e{{.*}} | 
 | ; ARM32-LABEL: test_ctlz_64 | 
 | ; ARM32: clz | 
 | ; ARM32: clzne | 
 | ; MIPS32-LABEL: test_ctlz_64_const | 
 | ; MIPS32: clz | 
 | ; MIPS32: clz | 
 | ; MIPS32: addiu | 
 | ; MIPS32: movn | 
 | ; MIPS32: addiu | 
 |  | 
 | define internal i32 @test_ctlz_64_ignored(i64 %x) { | 
 | entry: | 
 |   %ignored = call i64 @llvm.ctlz.i64(i64 1234567890, i1 false) | 
 |   ret i32 2 | 
 | } | 
 | ; CHECKO2REM-LABEL: test_ctlz_64_ignored | 
 | ; CHECKO2REM-NOT: bsr | 
 |  | 
 | define internal i32 @test_cttz_32(i32 %x) { | 
 | entry: | 
 |   %r = call i32 @llvm.cttz.i32(i32 %x, i1 false) | 
 |   ret i32 %r | 
 | } | 
 | ; CHECK-LABEL: test_cttz_32 | 
 | ; CHECK: bsf [[REG_IF_NOTZERO:e.*]],{{.*}} | 
 | ; CHECK: mov [[REG_IF_ZERO:e.*]],0x20 | 
 | ; CHECK: cmovne [[REG_IF_ZERO]],[[REG_IF_NOTZERO]] | 
 | ; ARM32-LABEL: test_cttz_32 | 
 | ; ARM32: rbit | 
 | ; ARM32: clz | 
 | ; MIPS32-LABEL: test_cttz_32 | 
 | ; MIPS32: addiu | 
 | ; MIPS32: nor | 
 | ; MIPS32: and | 
 | ; MIPS32: clz | 
 | ; MIPS32: li | 
 | ; MIPS32: subu | 
 |  | 
 | define internal i64 @test_cttz_64(i64 %x) { | 
 | entry: | 
 |   %r = call i64 @llvm.cttz.i64(i64 %x, i1 false) | 
 |   ret i64 %r | 
 | } | 
 | ; CHECK-LABEL: test_cttz_64 | 
 | ; CHECK: bsf [[REG_IF_NOTZERO:e.*]],{{.*}} | 
 | ; CHECK: mov [[REG_RES1:e.*]],0x20 | 
 | ; CHECK: cmovne [[REG_RES1]],[[REG_IF_NOTZERO]] | 
 | ; CHECK: add [[REG_RES1]],0x20 | 
 | ; CHECK: bsf [[REG_RES2:e.*]],[[REG_LOWER:.*]] | 
 | ; CHECK: test [[REG_LOWER]],[[REG_LOWER]] | 
 | ; CHECK: cmove [[REG_RES2]],[[REG_RES1]] | 
 | ; CHECK: mov {{.*}},0x0 | 
 | ; ARM32-LABEL: test_cttz_64 | 
 | ; ARM32: rbit | 
 | ; ARM32: rbit | 
 | ; ARM32: clz | 
 | ; ARM32: cmp {{.*}}, #0 | 
 | ; ARM32: add {{.*}}, #32 | 
 | ; ARM32: clzne | 
 | ; ARM32: mov {{.*}}, #0 | 
 | ; MIPS32-LABEL: test_cttz_64 | 
 | ; MIPS32: addiu | 
 | ; MIPS32: nor | 
 | ; MIPS32: and | 
 | ; MIPS32: clz | 
 | ; MIPS32: li | 
 | ; MIPS32: subu | 
 | ; MIPS32: addiu | 
 | ; MIPS32: nor | 
 | ; MIPS32: and | 
 | ; MIPS32: clz | 
 | ; MIPS32: li | 
 | ; MIPS32: subu | 
 |  | 
 | define internal i32 @test_popcount_32(i32 %x) { | 
 | entry: | 
 |   %r = call i32 @llvm.ctpop.i32(i32 %x) | 
 |   ret i32 %r | 
 | } | 
 | ; CHECK-LABEL: test_popcount_32 | 
 | ; CHECK: call {{.*}} R_{{.*}} __popcountsi2 | 
 | ; ARM32-LABEL: test_popcount_32 | 
 | ; ARM32: bl {{.*}} __popcountsi2 | 
 | ; MIPS32-LABEL: test_popcount_32 | 
 | ; MIPS32: jal {{.*}} __popcountsi2 | 
 |  | 
 | define internal i64 @test_popcount_64(i64 %x) { | 
 | entry: | 
 |   %r = call i64 @llvm.ctpop.i64(i64 %x) | 
 |   ret i64 %r | 
 | } | 
 | ; CHECK-LABEL: test_popcount_64 | 
 | ; CHECK: call {{.*}} R_{{.*}} __popcountdi2 | 
 | ; __popcountdi2 only returns a 32-bit result, so clear the upper bits of | 
 | ; the return value just in case. | 
 | ; CHECK: mov {{.*}},0x0 | 
 | ; ARM32-LABEL: test_popcount_64 | 
 | ; ARM32: bl {{.*}} __popcountdi2 | 
 | ; ARM32: mov {{.*}}, #0 | 
 | ; MIPS32-LABEL: test_popcount_64 | 
 | ; MIPS32: jal {{.*}} __popcountdi2 | 
 |  | 
 | define internal i32 @test_popcount_64_ret_i32(i64 %x) { | 
 | entry: | 
 |   %r_i64 = call i64 @llvm.ctpop.i64(i64 %x) | 
 |   %r = trunc i64 %r_i64 to i32 | 
 |   ret i32 %r | 
 | } | 
 | ; If there is a trunc, then the mov {{.*}}, 0 is dead and gets optimized out. | 
 | ; CHECKO2REM-LABEL: test_popcount_64_ret_i32 | 
 | ; CHECKO2REM: call {{.*}} R_{{.*}} __popcountdi2 | 
 | ; CHECKO2REM-NOT: mov {{.*}}, 0 | 
 | ; MIPS32-LABEL: test_popcount_64_ret_i32 | 
 | ; MIPS32: jal {{.*}} __popcountdi2 | 
 | ; MIPS32: sw v0,{{.*}} | 
 | ; MIPS32: sw v1,{{.*}} | 
 | ; MIPS32: lw v0,{{.*}} | 
 | ; MIPS32: lw ra,{{.*}} | 
 |  | 
 | define internal void @test_stacksave_noalloca() { | 
 | entry: | 
 |   %sp = call i8* @llvm.stacksave() | 
 |   call void @llvm.stackrestore(i8* %sp) | 
 |   ret void | 
 | } | 
 | ; CHECK-LABEL: test_stacksave_noalloca | 
 | ; CHECK: mov {{.*}},esp | 
 | ; CHECK: mov esp,{{.*}} | 
 | ; ARM32-LABEL: test_stacksave_noalloca | 
 | ; ARM32: mov {{.*}}, sp | 
 | ; ARM32: mov sp, {{.*}} | 
 | ; MIPS32-LABEL: test_stacksave_noalloca | 
 | ; MIPS32: 	sw	sp,{{.*}} | 
 | ; MIPS32: 	lw	[[REG:.*]],0(sp) | 
 | ; MIPS32: 	move	sp,[[REG]] | 
 |  | 
 | declare i32 @foo(i32 %x) | 
 |  | 
 | define internal void @test_stacksave_multiple(i32 %x) { | 
 | entry: | 
 |   %x_4 = mul i32 %x, 4 | 
 |   %sp1 = call i8* @llvm.stacksave() | 
 |   %tmp1 = alloca i8, i32 %x_4, align 4 | 
 |  | 
 |   %sp2 = call i8* @llvm.stacksave() | 
 |   %tmp2 = alloca i8, i32 %x_4, align 4 | 
 |  | 
 |   %y = call i32 @foo(i32 %x) | 
 |  | 
 |   %sp3 = call i8* @llvm.stacksave() | 
 |   %tmp3 = alloca i8, i32 %x_4, align 4 | 
 |  | 
 |   %__9 = bitcast i8* %tmp1 to i32* | 
 |   store i32 %y, i32* %__9, align 1 | 
 |  | 
 |   %__10 = bitcast i8* %tmp2 to i32* | 
 |   store i32 %x, i32* %__10, align 1 | 
 |  | 
 |   %__11 = bitcast i8* %tmp3 to i32* | 
 |   store i32 %x, i32* %__11, align 1 | 
 |  | 
 |   call void @llvm.stackrestore(i8* %sp1) | 
 |   ret void | 
 | } | 
 | ; CHECK-LABEL: test_stacksave_multiple | 
 | ; lea is used to copy from esp for the allocas. | 
 | ; Otherwise, only one stacksave is live. | 
 | ; CHECK: mov ebp,esp | 
 | ; CHECK: mov {{.*}},esp | 
 | ; CHECK: lea {{.*}},[esp+0x10] | 
 | ; CHECK: lea {{.*}},[esp+0x10] | 
 | ; CHECK: call | 
 | ; CHECK: mov esp,{{.*}} | 
 | ; CHECK: mov esp,ebp | 
 | ; ARM32-LABEL: test_stacksave_multiple | 
 | ; ARM32: mov {{.*}}, sp | 
 | ; ARM32: mov {{.*}}, sp | 
 | ; ARM32: mov {{.*}}, sp | 
 | ; ARM32: mov sp, {{.*}} | 
 | ; MIPS32-LABEL: test_stacksave_multiple | 
 | ; MIPS32: 	sw	sp,[[MEMLOC:.*]] | 
 | ; MIPS32: 	sw	sp,{{.*}} | 
 | ; MIPS32: 	sw	sp,{{.*}} | 
 | ; MIPS32: 	lw	[[REG:.*]],[[MEMLOC]] | 
 | ; MIPS32: 	move	sp,[[REG]] |