| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -instcombine -S | FileCheck %s |
| |
| target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" |
| |
| ; |
| ; DemandedBits - MOVMSK zeros the upper bits of the result. |
| ; |
| |
| define i32 @test_upper_x86_mmx_pmovmskb(x86_mmx %a0) { |
| ; CHECK-LABEL: @test_upper_x86_mmx_pmovmskb( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0) |
| ; CHECK-NEXT: ret i32 [[TMP1]] |
| ; |
| %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0) |
| %2 = and i32 %1, 255 |
| ret i32 %2 |
| } |
| |
| define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) { |
| ; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) |
| ; CHECK-NEXT: ret i32 [[TMP1]] |
| ; |
| %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) |
| %2 = and i32 %1, 15 |
| ret i32 %2 |
| } |
| |
| define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) { |
| ; CHECK-LABEL: @test_upper_x86_sse2_movmsk_pd( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) |
| ; CHECK-NEXT: ret i32 [[TMP1]] |
| ; |
| %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) |
| %2 = and i32 %1, 3 |
| ret i32 %2 |
| } |
| |
| define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) { |
| ; CHECK-LABEL: @test_upper_x86_sse2_pmovmskb_128( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) |
| ; CHECK-NEXT: ret i32 [[TMP1]] |
| ; |
| %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) |
| %2 = and i32 %1, 65535 |
| ret i32 %2 |
| } |
| |
| define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) { |
| ; CHECK-LABEL: @test_upper_x86_avx_movmsk_ps_256( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) |
| ; CHECK-NEXT: ret i32 [[TMP1]] |
| ; |
| %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) |
| %2 = and i32 %1, 255 |
| ret i32 %2 |
| } |
| |
| define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) { |
| ; CHECK-LABEL: @test_upper_x86_avx_movmsk_pd_256( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) |
| ; CHECK-NEXT: ret i32 [[TMP1]] |
| ; |
| %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) |
| %2 = and i32 %1, 15 |
| ret i32 %2 |
| } |
| |
| ; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register. |
| |
| ; |
| ; DemandedBits - If we don't use the lower bits then we just return zero. |
| ; |
| |
| define i32 @test_lower_x86_mmx_pmovmskb(x86_mmx %a0) { |
| ; CHECK-LABEL: @test_lower_x86_mmx_pmovmskb( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0) |
| %2 = and i32 %1, -256 |
| ret i32 %2 |
| } |
| |
| define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) { |
| ; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) |
| %2 = and i32 %1, -16 |
| ret i32 %2 |
| } |
| |
| define i32 @test_lower_x86_sse2_movmsk_pd(<2 x double> %a0) { |
| ; CHECK-LABEL: @test_lower_x86_sse2_movmsk_pd( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) |
| %2 = and i32 %1, -4 |
| ret i32 %2 |
| } |
| |
| define i32 @test_lower_x86_sse2_pmovmskb_128(<16 x i8> %a0) { |
| ; CHECK-LABEL: @test_lower_x86_sse2_pmovmskb_128( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) |
| %2 = and i32 %1, -65536 |
| ret i32 %2 |
| } |
| |
| define i32 @test_lower_x86_avx_movmsk_ps_256(<8 x float> %a0) { |
| ; CHECK-LABEL: @test_lower_x86_avx_movmsk_ps_256( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) |
| %2 = and i32 %1, -256 |
| ret i32 %2 |
| } |
| |
| define i32 @test_lower_x86_avx_movmsk_pd_256(<4 x double> %a0) { |
| ; CHECK-LABEL: @test_lower_x86_avx_movmsk_pd_256( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) |
| %2 = and i32 %1, -16 |
| ret i32 %2 |
| } |
| |
| ; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register. |
| |
| ; |
| ; Constant Folding (UNDEF -> ZERO) |
| ; |
| |
| define i32 @undef_x86_mmx_pmovmskb() { |
| ; CHECK-LABEL: @undef_x86_mmx_pmovmskb( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx undef) |
| ret i32 %1 |
| } |
| |
| define i32 @undef_x86_sse_movmsk_ps() { |
| ; CHECK-LABEL: @undef_x86_sse_movmsk_ps( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> undef) |
| ret i32 %1 |
| } |
| |
| define i32 @undef_x86_sse2_movmsk_pd() { |
| ; CHECK-LABEL: @undef_x86_sse2_movmsk_pd( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> undef) |
| ret i32 %1 |
| } |
| |
| define i32 @undef_x86_sse2_pmovmskb_128() { |
| ; CHECK-LABEL: @undef_x86_sse2_pmovmskb_128( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> undef) |
| ret i32 %1 |
| } |
| |
| define i32 @undef_x86_avx_movmsk_ps_256() { |
| ; CHECK-LABEL: @undef_x86_avx_movmsk_ps_256( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> undef) |
| ret i32 %1 |
| } |
| |
| define i32 @undef_x86_avx_movmsk_pd_256() { |
| ; CHECK-LABEL: @undef_x86_avx_movmsk_pd_256( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> undef) |
| ret i32 %1 |
| } |
| |
| define i32 @undef_x86_avx2_pmovmskb() { |
| ; CHECK-LABEL: @undef_x86_avx2_pmovmskb( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> undef) |
| ret i32 %1 |
| } |
| |
| ; |
| ; Constant Folding (ZERO -> ZERO) |
| ; |
| |
| define i32 @zero_x86_mmx_pmovmskb() { |
| ; CHECK-LABEL: @zero_x86_mmx_pmovmskb( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<1 x i64> zeroinitializer to x86_mmx)) |
| ; CHECK-NEXT: ret i32 [[TMP1]] |
| ; |
| %1 = bitcast <1 x i64> zeroinitializer to x86_mmx |
| %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1) |
| ret i32 %2 |
| } |
| |
| define i32 @zero_x86_sse_movmsk_ps() { |
| ; CHECK-LABEL: @zero_x86_sse_movmsk_ps( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> zeroinitializer) |
| ret i32 %1 |
| } |
| |
| define i32 @zero_x86_sse2_movmsk_pd() { |
| ; CHECK-LABEL: @zero_x86_sse2_movmsk_pd( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> zeroinitializer) |
| ret i32 %1 |
| } |
| |
| define i32 @zero_x86_sse2_pmovmskb_128() { |
| ; CHECK-LABEL: @zero_x86_sse2_pmovmskb_128( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> zeroinitializer) |
| ret i32 %1 |
| } |
| |
| define i32 @zero_x86_avx_movmsk_ps_256() { |
| ; CHECK-LABEL: @zero_x86_avx_movmsk_ps_256( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> zeroinitializer) |
| ret i32 %1 |
| } |
| |
| define i32 @zero_x86_avx_movmsk_pd_256() { |
| ; CHECK-LABEL: @zero_x86_avx_movmsk_pd_256( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> zeroinitializer) |
| ret i32 %1 |
| } |
| |
| define i32 @zero_x86_avx2_pmovmskb() { |
| ; CHECK-LABEL: @zero_x86_avx2_pmovmskb( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> zeroinitializer) |
| ret i32 %1 |
| } |
| |
| ; |
| ; Constant Folding |
| ; |
| |
| define i32 @fold_x86_mmx_pmovmskb() { |
| ; CHECK-LABEL: @fold_x86_mmx_pmovmskb( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<8 x i8> <i8 0, i8 -1, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 0> to x86_mmx)) |
| ; CHECK-NEXT: ret i32 [[TMP1]] |
| ; |
| %1 = bitcast <8 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256> to x86_mmx |
| %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1) |
| ret i32 %2 |
| } |
| |
| define i32 @fold_x86_sse_movmsk_ps() { |
| ; CHECK-LABEL: @fold_x86_sse_movmsk_ps( |
| ; CHECK-NEXT: ret i32 10 |
| ; |
| %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> <float 1.0, float -1.0, float 100.0, float -200.0>) |
| ret i32 %1 |
| } |
| |
| define i32 @fold_x86_sse2_movmsk_pd() { |
| ; CHECK-LABEL: @fold_x86_sse2_movmsk_pd( |
| ; CHECK-NEXT: ret i32 2 |
| ; |
| %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> <double 1.0, double -1.0>) |
| ret i32 %1 |
| } |
| |
| define i32 @fold_x86_sse2_pmovmskb_128() { |
| ; CHECK-LABEL: @fold_x86_sse2_pmovmskb_128( |
| ; CHECK-NEXT: ret i32 5654 |
| ; |
| %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>) |
| ret i32 %1 |
| } |
| |
| define i32 @fold_x86_avx_movmsk_ps_256() { |
| ; CHECK-LABEL: @fold_x86_avx_movmsk_ps_256( |
| ; CHECK-NEXT: ret i32 170 |
| ; |
| %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> <float 1.0, float -1.0, float 100.0, float -200.0, float +0.0, float -0.0, float 100000.0, float -5000000.0>) |
| ret i32 %1 |
| } |
| |
| define i32 @fold_x86_avx_movmsk_pd_256() { |
| ; CHECK-LABEL: @fold_x86_avx_movmsk_pd_256( |
| ; CHECK-NEXT: ret i32 10 |
| ; |
| %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> <double 1.0, double -1.0, double 100.0, double -200.0>) |
| ret i32 %1 |
| } |
| |
| define i32 @fold_x86_avx2_pmovmskb() { |
| ; CHECK-LABEL: @fold_x86_avx2_pmovmskb( |
| ; CHECK-NEXT: ret i32 370546176 |
| ; |
| %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>) |
| ret i32 %1 |
| } |
| |
| declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) |
| |
| declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) |
| declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) |
| declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) |
| |
| declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) |
| declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) |
| declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) |