| ; This tests the NaCl intrinsics not related to atomic operations. |
| |
| ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s |
| ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s |
| |
| ; Do another run w/ O2 and a different check-prefix (otherwise O2 and Om1 |
| ; share the same "CHECK" prefix). This separate run helps check that |
| ; some code is optimized out. |
| ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s --check-prefix=CHECKO2REM |
| |
| ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s |
| ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s |
| ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \ |
| ; RUN: | FileCheck --check-prefix=DUMP %s |
| |
| declare i8* @llvm.nacl.read.tp() |
| declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) |
| declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) |
| declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) |
| declare void @llvm.nacl.longjmp(i8*, i32) |
| declare i32 @llvm.nacl.setjmp(i8*) |
| declare float @llvm.sqrt.f32(float) |
| declare double @llvm.sqrt.f64(double) |
| declare void @llvm.trap() |
| declare i16 @llvm.bswap.i16(i16) |
| declare i32 @llvm.bswap.i32(i32) |
| declare i64 @llvm.bswap.i64(i64) |
| declare i32 @llvm.ctlz.i32(i32, i1) |
| declare i64 @llvm.ctlz.i64(i64, i1) |
| declare i32 @llvm.cttz.i32(i32, i1) |
| declare i64 @llvm.cttz.i64(i64, i1) |
| declare i32 @llvm.ctpop.i32(i32) |
| declare i64 @llvm.ctpop.i64(i64) |
| declare i8* @llvm.stacksave() |
| declare void @llvm.stackrestore(i8*) |
| |
| define i32 @test_nacl_read_tp() { |
| entry: |
| %ptr = call i8* @llvm.nacl.read.tp() |
| %__1 = ptrtoint i8* %ptr to i32 |
| ret i32 %__1 |
| } |
| ; CHECK-LABEL: test_nacl_read_tp |
| ; CHECK: mov e{{.*}}, dword ptr gs:[0] |
| ; CHECKO2REM-LABEL: test_nacl_read_tp |
| ; CHECKO2REM: mov e{{.*}}, dword ptr gs:[0] |
| |
| define i32 @test_nacl_read_tp_more_addressing() { |
| entry: |
| %ptr = call i8* @llvm.nacl.read.tp() |
| %__1 = ptrtoint i8* %ptr to i32 |
| %x = add i32 %__1, %__1 |
| %__3 = inttoptr i32 %x to i32* |
| %v = load i32* %__3, align 1 |
| %ptr2 = call i8* @llvm.nacl.read.tp() |
| %__6 = ptrtoint i8* %ptr2 to i32 |
| %y = add i32 %__6, 4 |
| %__8 = inttoptr i32 %y to i32* |
| store i32 %v, i32* %__8, align 1 |
| ret i32 %v |
| } |
| ; CHECK-LABEL: test_nacl_read_tp_more_addressing |
| ; CHECK: mov e{{.*}}, dword ptr gs:[0] |
| ; CHECK: mov e{{.*}}, dword ptr gs:[0] |
| ; CHECKO2REM-LABEL: test_nacl_read_tp_more_addressing |
| ; CHECKO2REM: mov e{{.*}}, dword ptr gs:[0] |
| ; CHECKO2REM: mov e{{.*}}, dword ptr gs:[0] |
| |
| define i32 @test_nacl_read_tp_dead(i32 %a) { |
| entry: |
| %ptr = call i8* @llvm.nacl.read.tp() |
| ; Not actually using the result of nacl read tp call. |
| ; In O2 mode this should be DCE'ed. |
| ret i32 %a |
| } |
| ; Consider nacl.read.tp side-effect free, so it can be eliminated. |
| ; CHECKO2REM-LABEL: test_nacl_read_tp_dead |
| ; CHECKO2REM-NOT: mov e{{.*}}, dword ptr gs:[0] |
| |
| define void @test_memcpy(i32 %iptr_dst, i32 %iptr_src, i32 %len) { |
| entry: |
| %dst = inttoptr i32 %iptr_dst to i8* |
| %src = inttoptr i32 %iptr_src to i8* |
| call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, |
| i32 %len, i32 1, i1 0) |
| ret void |
| } |
| ; CHECK-LABEL: test_memcpy |
| ; CHECK: call memcpy |
| |
| ; TODO(jvoung) -- if we want to be clever, we can do this and the memmove, |
| ; memset without a function call. |
| define void @test_memcpy_const_len_align(i32 %iptr_dst, i32 %iptr_src) { |
| entry: |
| %dst = inttoptr i32 %iptr_dst to i8* |
| %src = inttoptr i32 %iptr_src to i8* |
| call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, |
| i32 8, i32 1, i1 0) |
| ret void |
| } |
| ; CHECK-LABEL: test_memcpy_const_len_align |
| ; CHECK: call memcpy |
| |
| define void @test_memmove(i32 %iptr_dst, i32 %iptr_src, i32 %len) { |
| entry: |
| %dst = inttoptr i32 %iptr_dst to i8* |
| %src = inttoptr i32 %iptr_src to i8* |
| call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, |
| i32 %len, i32 1, i1 0) |
| ret void |
| } |
| ; CHECK-LABEL: test_memmove |
| ; CHECK: call memmove |
| |
| define void @test_memmove_const_len_align(i32 %iptr_dst, i32 %iptr_src) { |
| entry: |
| %dst = inttoptr i32 %iptr_dst to i8* |
| %src = inttoptr i32 %iptr_src to i8* |
| call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, |
| i32 8, i32 1, i1 0) |
| ret void |
| } |
| ; CHECK-LABEL: test_memmove_const_len_align |
| ; CHECK: call memmove |
| |
| define void @test_memset(i32 %iptr_dst, i32 %wide_val, i32 %len) { |
| entry: |
| %val = trunc i32 %wide_val to i8 |
| %dst = inttoptr i32 %iptr_dst to i8* |
| call void @llvm.memset.p0i8.i32(i8* %dst, i8 %val, |
| i32 %len, i32 1, i1 0) |
| ret void |
| } |
| ; CHECK-LABEL: test_memset |
| ; CHECK: movzx |
| ; CHECK: call memset |
| |
| define void @test_memset_const_len_align(i32 %iptr_dst, i32 %wide_val) { |
| entry: |
| %val = trunc i32 %wide_val to i8 |
| %dst = inttoptr i32 %iptr_dst to i8* |
| call void @llvm.memset.p0i8.i32(i8* %dst, i8 %val, |
| i32 8, i32 1, i1 0) |
| ret void |
| } |
| ; CHECK-LABEL: test_memset_const_len_align |
| ; CHECK: movzx |
| ; CHECK: call memset |
| |
| define void @test_memset_const_val(i32 %iptr_dst, i32 %len) { |
| entry: |
| %dst = inttoptr i32 %iptr_dst to i8* |
| call void @llvm.memset.p0i8.i32(i8* %dst, i8 0, i32 %len, i32 1, i1 0) |
| ret void |
| } |
| ; CHECK-LABEL: test_memset_const_val |
| ; Make sure the argument is legalized (can't movzx reg, 0). |
| ; CHECK: movzx {{.*}}, {{[^0]}} |
| ; CHECK: call memset |
| |
| define i32 @test_setjmplongjmp(i32 %iptr_env) { |
| entry: |
| %env = inttoptr i32 %iptr_env to i8* |
| %i = call i32 @llvm.nacl.setjmp(i8* %env) |
| %r1 = icmp eq i32 %i, 0 |
| br i1 %r1, label %Zero, label %NonZero |
| Zero: |
| ; Redundant inttoptr, to make --pnacl cast-eliding/re-insertion happy. |
| %env2 = inttoptr i32 %iptr_env to i8* |
| call void @llvm.nacl.longjmp(i8* %env2, i32 1) |
| ret i32 0 |
| NonZero: |
| ret i32 1 |
| } |
| ; CHECK-LABEL: test_setjmplongjmp |
| ; CHECK: call setjmp |
| ; CHECK: call longjmp |
| ; CHECKO2REM-LABEL: test_setjmplongjmp |
| ; CHECKO2REM: call setjmp |
| ; CHECKO2REM: call longjmp |
| |
| define i32 @test_setjmp_unused(i32 %iptr_env, i32 %i_other) { |
| entry: |
| %env = inttoptr i32 %iptr_env to i8* |
| %i = call i32 @llvm.nacl.setjmp(i8* %env) |
| ret i32 %i_other |
| } |
| ; Don't consider setjmp side-effect free, so it's not eliminated if |
| ; result unused. |
| ; CHECKO2REM-LABEL: test_setjmp_unused |
| ; CHECKO2REM: call setjmp |
| |
| define float @test_sqrt_float(float %x, i32 %iptr) { |
| entry: |
| %r = call float @llvm.sqrt.f32(float %x) |
| %r2 = call float @llvm.sqrt.f32(float %r) |
| %r3 = call float @llvm.sqrt.f32(float -0.0) |
| %r4 = fadd float %r2, %r3 |
| br label %next |
| |
| next: |
| %__6 = inttoptr i32 %iptr to float* |
| %y = load float* %__6, align 4 |
| %r5 = call float @llvm.sqrt.f32(float %y) |
| %r6 = fadd float %r4, %r5 |
| ret float %r6 |
| } |
| ; CHECK-LABEL: test_sqrt_float |
| ; CHECK: sqrtss xmm{{.*}} |
| ; CHECK: sqrtss xmm{{.*}} |
| ; CHECK: sqrtss xmm{{.*}}, dword ptr |
| ; CHECK-LABEL: .L{{.*}}next |
| ; We could fold the load and the sqrt into one operation, but the |
| ; current folding only handles load + arithmetic op. The sqrt inst |
| ; is considered an intrinsic call and not an arithmetic op. |
| ; CHECK: sqrtss xmm{{.*}} |
| |
| define double @test_sqrt_double(double %x, i32 %iptr) { |
| entry: |
| %r = call double @llvm.sqrt.f64(double %x) |
| %r2 = call double @llvm.sqrt.f64(double %r) |
| %r3 = call double @llvm.sqrt.f64(double -0.0) |
| %r4 = fadd double %r2, %r3 |
| br label %next |
| |
| next: |
| %__6 = inttoptr i32 %iptr to double* |
| %y = load double* %__6, align 8 |
| %r5 = call double @llvm.sqrt.f64(double %y) |
| %r6 = fadd double %r4, %r5 |
| ret double %r6 |
| } |
| ; CHECK-LABEL: test_sqrt_double |
| ; CHECK: sqrtsd xmm{{.*}} |
| ; CHECK: sqrtsd xmm{{.*}} |
| ; CHECK: sqrtsd xmm{{.*}}, qword ptr |
| ; CHECK-LABEL: .L{{.*}}next |
| ; CHECK: sqrtsd xmm{{.*}} |
| |
| define float @test_sqrt_ignored(float %x, double %y) { |
| entry: |
| %ignored1 = call float @llvm.sqrt.f32(float %x) |
| %ignored2 = call double @llvm.sqrt.f64(double %y) |
| ret float 0.0 |
| } |
| ; CHECKO2REM-LABEL: test_sqrt_ignored |
| ; CHECKO2REM-NOT: sqrtss |
| ; CHECKO2REM-NOT: sqrtsd |
| |
| define i32 @test_trap(i32 %br) { |
| entry: |
| %r1 = icmp eq i32 %br, 0 |
| br i1 %r1, label %Zero, label %NonZero |
| Zero: |
| call void @llvm.trap() |
| unreachable |
| NonZero: |
| ret i32 1 |
| } |
| ; CHECK-LABEL: test_trap |
| ; CHECK: ud2 |
| |
| define i32 @test_bswap_16(i32 %x) { |
| entry: |
| %x_trunc = trunc i32 %x to i16 |
| %r = call i16 @llvm.bswap.i16(i16 %x_trunc) |
| %r_zext = zext i16 %r to i32 |
| ret i32 %r_zext |
| } |
| ; CHECK-LABEL: test_bswap_16 |
| ; CHECK: rol {{.*}}, 8 |
| |
| define i32 @test_bswap_32(i32 %x) { |
| entry: |
| %r = call i32 @llvm.bswap.i32(i32 %x) |
| ret i32 %r |
| } |
| ; CHECK-LABEL: test_bswap_32 |
| ; CHECK: bswap e{{.*}} |
| |
| define i64 @test_bswap_64(i64 %x) { |
| entry: |
| %r = call i64 @llvm.bswap.i64(i64 %x) |
| ret i64 %r |
| } |
| ; CHECK-LABEL: test_bswap_64 |
| ; CHECK: bswap e{{.*}} |
| ; CHECK: bswap e{{.*}} |
| |
| define i32 @test_ctlz_32(i32 %x) { |
| entry: |
| %r = call i32 @llvm.ctlz.i32(i32 %x, i1 0) |
| ret i32 %r |
| } |
| ; CHECK-LABEL: test_ctlz_32 |
| ; TODO(jvoung): If we detect that LZCNT is supported, then use that |
| ; and avoid the need to do the cmovne and xor stuff to guarantee that |
| ; the result is well-defined w/ input == 0. |
| ; CHECK: bsr [[REG_TMP:e.*]], {{.*}} |
| ; CHECK: mov [[REG_RES:e.*]], 63 |
| ; CHECK: cmovne [[REG_RES]], [[REG_TMP]] |
| ; CHECK: xor [[REG_RES]], 31 |
| |
| define i32 @test_ctlz_32_const() { |
| entry: |
| %r = call i32 @llvm.ctlz.i32(i32 123456, i1 0) |
| ret i32 %r |
| } |
| ; Could potentially constant fold this, but the front-end should have done that. |
| ; CHECK-LABEL: test_ctlz_32_const |
| ; CHECK: bsr |
| |
| define i32 @test_ctlz_32_ignored(i32 %x) { |
| entry: |
| %ignored = call i32 @llvm.ctlz.i32(i32 %x, i1 0) |
| ret i32 1 |
| } |
| ; CHECKO2REM-LABEL: test_ctlz_32_ignored |
| ; CHECKO2REM-NOT: bsr |
| |
| define i64 @test_ctlz_64(i64 %x) { |
| entry: |
| %r = call i64 @llvm.ctlz.i64(i64 %x, i1 0) |
| ret i64 %r |
| } |
| ; CHECKO2REM-LABEL: test_ctlz_64 |
| ; CHECK-LABEL: test_ctlz_64 |
| ; CHECK: bsr [[REG_TMP1:e.*]], {{.*}} |
| ; CHECK: mov [[REG_RES1:e.*]], 63 |
| ; CHECK: cmovne [[REG_RES1]], [[REG_TMP1]] |
| ; CHECK: xor [[REG_RES1]], 31 |
| ; CHECK: add [[REG_RES1]], 32 |
| ; CHECK: bsr [[REG_RES2:e.*]], {{.*}} |
| ; CHECK: xor [[REG_RES2]], 31 |
| ; CHECK: test [[REG_UPPER:.*]], [[REG_UPPER]] |
| ; CHECK: cmove [[REG_RES2]], [[REG_RES1]] |
| ; CHECK: mov {{.*}}, 0 |
| |
| define i32 @test_ctlz_64_const(i64 %x) { |
| entry: |
| %r = call i64 @llvm.ctlz.i64(i64 123456789012, i1 0) |
| %r2 = trunc i64 %r to i32 |
| ret i32 %r2 |
| } |
| ; CHECK-LABEL: test_ctlz_64_const |
| ; CHECK: bsr |
| ; CHECK: bsr |
| |
| define i32 @test_ctlz_64_ignored(i64 %x) { |
| entry: |
| %ignored = call i64 @llvm.ctlz.i64(i64 1234567890, i1 0) |
| ret i32 2 |
| } |
| ; CHECKO2REM-LABEL: test_ctlz_64_ignored |
| ; CHECKO2REM-NOT: bsr |
| |
| define i32 @test_cttz_32(i32 %x) { |
| entry: |
| %r = call i32 @llvm.cttz.i32(i32 %x, i1 0) |
| ret i32 %r |
| } |
| ; CHECK-LABEL: test_cttz_32 |
| ; CHECK: bsf [[REG_IF_NOTZERO:e.*]], {{.*}} |
| ; CHECK: mov [[REG_IF_ZERO:e.*]], 32 |
| ; CHECK: cmovne [[REG_IF_ZERO]], [[REG_IF_NOTZERO]] |
| |
| define i64 @test_cttz_64(i64 %x) { |
| entry: |
| %r = call i64 @llvm.cttz.i64(i64 %x, i1 0) |
| ret i64 %r |
| } |
| ; CHECK-LABEL: test_cttz_64 |
| ; CHECK: bsf [[REG_IF_NOTZERO:e.*]], {{.*}} |
| ; CHECK: mov [[REG_RES1:e.*]], 32 |
| ; CHECK: cmovne [[REG_RES1]], [[REG_IF_NOTZERO]] |
| ; CHECK: add [[REG_RES1]], 32 |
| ; CHECK: bsf [[REG_RES2:e.*]], [[REG_LOWER:.*]] |
| ; CHECK: test [[REG_LOWER]], [[REG_LOWER]] |
| ; CHECK: cmove [[REG_RES2]], [[REG_RES1]] |
| ; CHECK: mov {{.*}}, 0 |
| |
| define i32 @test_popcount_32(i32 %x) { |
| entry: |
| %r = call i32 @llvm.ctpop.i32(i32 %x) |
| ret i32 %r |
| } |
| ; CHECK-LABEL: test_popcount_32 |
| ; CHECK: call __popcountsi2 |
| |
| define i64 @test_popcount_64(i64 %x) { |
| entry: |
| %r = call i64 @llvm.ctpop.i64(i64 %x) |
| ret i64 %r |
| } |
| ; CHECK-LABEL: test_popcount_64 |
| ; CHECK: call __popcountdi2 |
| ; __popcountdi2 only returns a 32-bit result, so clear the upper bits of |
| ; the return value just in case. |
| ; CHECK: mov {{.*}}, 0 |
| |
| define i32 @test_popcount_64_ret_i32(i64 %x) { |
| entry: |
| %r_i64 = call i64 @llvm.ctpop.i64(i64 %x) |
| %r = trunc i64 %r_i64 to i32 |
| ret i32 %r |
| } |
| ; If there is a trunc, then the mov {{.*}}, 0 is dead and gets optimized out. |
| ; CHECKO2REM-LABEL: test_popcount_64_ret_i32 |
| ; CHECKO2REM: call __popcountdi2 |
| ; CHECKO2REM-NOT: mov {{.*}}, 0 |
| |
| define void @test_stacksave_noalloca() { |
| entry: |
| %sp = call i8* @llvm.stacksave() |
| call void @llvm.stackrestore(i8* %sp) |
| ret void |
| } |
| ; CHECK-LABEL: test_stacksave_noalloca |
| ; CHECK: mov {{.*}}, esp |
| ; CHECK: mov esp, {{.*}} |
| |
| declare i32 @foo(i32 %x) |
| |
| define void @test_stacksave_multiple(i32 %x) { |
| entry: |
| %x_4 = mul i32 %x, 4 |
| %sp1 = call i8* @llvm.stacksave() |
| %tmp1 = alloca i8, i32 %x_4, align 4 |
| |
| %sp2 = call i8* @llvm.stacksave() |
| %tmp2 = alloca i8, i32 %x_4, align 4 |
| |
| %y = call i32 @foo(i32 %x) |
| |
| %sp3 = call i8* @llvm.stacksave() |
| %tmp3 = alloca i8, i32 %x_4, align 4 |
| |
| %__9 = bitcast i8* %tmp1 to i32* |
| store i32 %y, i32* %__9, align 1 |
| |
| %__10 = bitcast i8* %tmp2 to i32* |
| store i32 %x, i32* %__10, align 1 |
| |
| %__11 = bitcast i8* %tmp3 to i32* |
| store i32 %x, i32* %__11, align 1 |
| |
| call void @llvm.stackrestore(i8* %sp1) |
| ret void |
| } |
| ; CHECK-LABEL: test_stacksave_multiple |
| ; At least 3 copies of esp, but probably more from having to do the allocas. |
| ; CHECK: mov {{.*}}, esp |
| ; CHECK: mov {{.*}}, esp |
| ; CHECK: mov {{.*}}, esp |
| ; CHECK: mov esp, {{.*}} |
| |
| ; ERRORS-NOT: ICE translation error |
| ; DUMP-NOT: SZ |