| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s --check-prefix=X86-64 |
| ; RUN: llc -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s --check-prefix=X86-32 |
| |
| define void @test_fcmp_storefloat(i1 %cond, float* %fptr, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) { |
| ; X86-64-LABEL: test_fcmp_storefloat: |
| ; X86-64: # %bb.0: # %entry |
| ; X86-64-NEXT: testb $1, %dil |
| ; X86-64-NEXT: je .LBB0_2 |
| ; X86-64-NEXT: # %bb.1: # %if |
| ; X86-64-NEXT: vcmpeqss %xmm3, %xmm2, %k1 |
| ; X86-64-NEXT: jmp .LBB0_3 |
| ; X86-64-NEXT: .LBB0_2: # %else |
| ; X86-64-NEXT: vcmpeqss %xmm5, %xmm4, %k1 |
| ; X86-64-NEXT: .LBB0_3: # %exit |
| ; X86-64-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1} |
| ; X86-64-NEXT: vmovss %xmm1, (%rsi) |
| ; X86-64-NEXT: retq |
| ; |
| ; X86-32-LABEL: test_fcmp_storefloat: |
| ; X86-32: # %bb.0: # %entry |
| ; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) |
| ; X86-32-NEXT: je .LBB0_2 |
| ; X86-32-NEXT: # %bb.1: # %if |
| ; X86-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero |
| ; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm2, %k1 |
| ; X86-32-NEXT: jmp .LBB0_3 |
| ; X86-32-NEXT: .LBB0_2: # %else |
| ; X86-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero |
| ; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm2, %k1 |
| ; X86-32-NEXT: .LBB0_3: # %exit |
| ; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} |
| ; X86-32-NEXT: vmovss %xmm0, (%eax) |
| ; X86-32-NEXT: retl |
| entry: |
| br i1 %cond, label %if, label %else |
| |
| if: |
| %cmp1 = fcmp oeq float %f3, %f4 |
| br label %exit |
| |
| else: |
| %cmp2 = fcmp oeq float %f5, %f6 |
| br label %exit |
| |
| exit: |
| %val = phi i1 [%cmp1, %if], [%cmp2, %else] |
| %selected = select i1 %val, float %f1, float %f2 |
| store float %selected, float* %fptr |
| ret void |
| } |
| |
| define void @test_fcmp_storei1(i1 %cond, float* %fptr, i1* %iptr, float %f1, float %f2, float %f3, float %f4) { |
| ; X86-64-LABEL: test_fcmp_storei1: |
| ; X86-64: # %bb.0: # %entry |
| ; X86-64-NEXT: testb $1, %dil |
| ; X86-64-NEXT: je .LBB1_2 |
| ; X86-64-NEXT: # %bb.1: # %if |
| ; X86-64-NEXT: vcmpeqss %xmm1, %xmm0, %k0 |
| ; X86-64-NEXT: kmovb %k0, (%rdx) |
| ; X86-64-NEXT: retq |
| ; X86-64-NEXT: .LBB1_2: # %else |
| ; X86-64-NEXT: vcmpeqss %xmm3, %xmm2, %k0 |
| ; X86-64-NEXT: kmovb %k0, (%rdx) |
| ; X86-64-NEXT: retq |
| ; |
| ; X86-32-LABEL: test_fcmp_storei1: |
| ; X86-32: # %bb.0: # %entry |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) |
| ; X86-32-NEXT: je .LBB1_2 |
| ; X86-32-NEXT: # %bb.1: # %if |
| ; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm0, %k0 |
| ; X86-32-NEXT: kmovb %k0, (%eax) |
| ; X86-32-NEXT: retl |
| ; X86-32-NEXT: .LBB1_2: # %else |
| ; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm0, %k0 |
| ; X86-32-NEXT: kmovb %k0, (%eax) |
| ; X86-32-NEXT: retl |
| entry: |
| br i1 %cond, label %if, label %else |
| |
| if: |
| %cmp1 = fcmp oeq float %f1, %f2 |
| br label %exit |
| |
| else: |
| %cmp2 = fcmp oeq float %f3, %f4 |
| br label %exit |
| |
| exit: |
| %val = phi i1 [%cmp1, %if], [%cmp2, %else] |
| store i1 %val, i1* %iptr |
| ret void |
| } |
| |
| define void @test_load_add(i1 %cond, float* %fptr, i1* %iptr1, i1* %iptr2, float %f1, float %f2) { |
| ; X86-64-LABEL: test_load_add: |
| ; X86-64: # %bb.0: # %entry |
| ; X86-64-NEXT: testb $1, %dil |
| ; X86-64-NEXT: je .LBB2_2 |
| ; X86-64-NEXT: # %bb.1: # %if |
| ; X86-64-NEXT: movb (%rdx), %al |
| ; X86-64-NEXT: addb (%rcx), %al |
| ; X86-64-NEXT: jmp .LBB2_3 |
| ; X86-64-NEXT: .LBB2_2: # %else |
| ; X86-64-NEXT: movb (%rcx), %al |
| ; X86-64-NEXT: .LBB2_3: # %exit |
| ; X86-64-NEXT: kmovd %eax, %k1 |
| ; X86-64-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1} |
| ; X86-64-NEXT: vmovss %xmm1, (%rsi) |
| ; X86-64-NEXT: retq |
| ; |
| ; X86-32-LABEL: test_load_add: |
| ; X86-32: # %bb.0: # %entry |
| ; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) |
| ; X86-32-NEXT: je .LBB2_2 |
| ; X86-32-NEXT: # %bb.1: # %if |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-32-NEXT: movb (%edx), %dl |
| ; X86-32-NEXT: addb (%ecx), %dl |
| ; X86-32-NEXT: jmp .LBB2_3 |
| ; X86-32-NEXT: .LBB2_2: # %else |
| ; X86-32-NEXT: movb (%ecx), %dl |
| ; X86-32-NEXT: .LBB2_3: # %exit |
| ; X86-32-NEXT: kmovd %edx, %k1 |
| ; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} |
| ; X86-32-NEXT: vmovss %xmm0, (%eax) |
| ; X86-32-NEXT: retl |
| entry: |
| br i1 %cond, label %if, label %else |
| |
| if: |
| %loaded1 = load i1, i1* %iptr1 |
| %loaded2if = load i1, i1* %iptr2 |
| %added = add i1 %loaded1, %loaded2if |
| br label %exit |
| |
| else: |
| %loaded2else = load i1, i1* %iptr2 |
| br label %exit |
| |
| exit: |
| %val = phi i1 [%added, %if], [%loaded2else, %else] |
| %selected = select i1 %val, float %f1, float %f2 |
| store float %selected, float* %fptr |
| ret void |
| } |
| |
| define void @test_load_i1(i1 %cond, float* %fptr, i1* %iptr1, i1* %iptr2, float %f1, float %f2) { |
| ; X86-64-LABEL: test_load_i1: |
| ; X86-64: # %bb.0: # %entry |
| ; X86-64-NEXT: testb $1, %dil |
| ; X86-64-NEXT: je .LBB3_2 |
| ; X86-64-NEXT: # %bb.1: # %if |
| ; X86-64-NEXT: kmovb (%rdx), %k1 |
| ; X86-64-NEXT: jmp .LBB3_3 |
| ; X86-64-NEXT: .LBB3_2: # %else |
| ; X86-64-NEXT: kmovb (%rcx), %k1 |
| ; X86-64-NEXT: .LBB3_3: # %exit |
| ; X86-64-NEXT: vmovss %xmm0, %xmm0, %xmm1 {%k1} |
| ; X86-64-NEXT: vmovss %xmm1, (%rsi) |
| ; X86-64-NEXT: retq |
| ; |
| ; X86-32-LABEL: test_load_i1: |
| ; X86-32: # %bb.0: # %entry |
| ; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) |
| ; X86-32-NEXT: je .LBB3_2 |
| ; X86-32-NEXT: # %bb.1: # %if |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-32-NEXT: jmp .LBB3_3 |
| ; X86-32-NEXT: .LBB3_2: # %else |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-32-NEXT: .LBB3_3: # %exit |
| ; X86-32-NEXT: kmovb (%ecx), %k1 |
| ; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} |
| ; X86-32-NEXT: vmovss %xmm0, (%eax) |
| ; X86-32-NEXT: retl |
| entry: |
| br i1 %cond, label %if, label %else |
| |
| if: |
| %loaded1 = load i1, i1* %iptr1 |
| br label %exit |
| |
| else: |
| %loaded2 = load i1, i1* %iptr2 |
| br label %exit |
| |
| exit: |
| %val = phi i1 [%loaded1, %if], [%loaded2, %else] |
| %selected = select i1 %val, float %f1, float %f2 |
| store float %selected, float* %fptr |
| ret void |
| } |
| |
| define void @test_loadi1_storei1(i1 %cond, i1* %iptr1, i1* %iptr2, i1* %iptr3) { |
| ; X86-64-LABEL: test_loadi1_storei1: |
| ; X86-64: # %bb.0: # %entry |
| ; X86-64-NEXT: testb $1, %dil |
| ; X86-64-NEXT: je .LBB4_2 |
| ; X86-64-NEXT: # %bb.1: # %if |
| ; X86-64-NEXT: movb (%rsi), %al |
| ; X86-64-NEXT: jmp .LBB4_3 |
| ; X86-64-NEXT: .LBB4_2: # %else |
| ; X86-64-NEXT: movb (%rdx), %al |
| ; X86-64-NEXT: .LBB4_3: # %exit |
| ; X86-64-NEXT: andb $1, %al |
| ; X86-64-NEXT: movb %al, (%rcx) |
| ; X86-64-NEXT: retq |
| ; |
| ; X86-32-LABEL: test_loadi1_storei1: |
| ; X86-32: # %bb.0: # %entry |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) |
| ; X86-32-NEXT: je .LBB4_2 |
| ; X86-32-NEXT: # %bb.1: # %if |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-32-NEXT: jmp .LBB4_3 |
| ; X86-32-NEXT: .LBB4_2: # %else |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-32-NEXT: .LBB4_3: # %exit |
| ; X86-32-NEXT: movb (%ecx), %cl |
| ; X86-32-NEXT: andb $1, %cl |
| ; X86-32-NEXT: movb %cl, (%eax) |
| ; X86-32-NEXT: retl |
| entry: |
| br i1 %cond, label %if, label %else |
| |
| if: |
| %loaded1 = load i1, i1* %iptr1 |
| br label %exit |
| |
| else: |
| %loaded2 = load i1, i1* %iptr2 |
| br label %exit |
| |
| exit: |
| %val = phi i1 [%loaded1, %if], [%loaded2, %else] |
| store i1 %val, i1* %iptr3 |
| ret void |
| } |
| |
| define void @test_shl1(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { |
| ; X86-64-LABEL: test_shl1: |
| ; X86-64: # %bb.0: # %entry |
| ; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; X86-64-NEXT: testb $1, %dil |
| ; X86-64-NEXT: je .LBB5_2 |
| ; X86-64-NEXT: # %bb.1: # %if |
| ; X86-64-NEXT: kmovb (%rsi), %k0 |
| ; X86-64-NEXT: kaddb %k0, %k0, %k1 |
| ; X86-64-NEXT: jmp .LBB5_3 |
| ; X86-64-NEXT: .LBB5_2: # %else |
| ; X86-64-NEXT: kmovb (%rdx), %k1 |
| ; X86-64-NEXT: .LBB5_3: # %exit |
| ; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} |
| ; X86-64-NEXT: vmovaps %ymm1, (%rcx) |
| ; X86-64-NEXT: vzeroupper |
| ; X86-64-NEXT: retq |
| ; |
| ; X86-32-LABEL: test_shl1: |
| ; X86-32: # %bb.0: # %entry |
| ; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) |
| ; X86-32-NEXT: je .LBB5_2 |
| ; X86-32-NEXT: # %bb.1: # %if |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-32-NEXT: kmovb (%ecx), %k0 |
| ; X86-32-NEXT: kaddb %k0, %k0, %k1 |
| ; X86-32-NEXT: jmp .LBB5_3 |
| ; X86-32-NEXT: .LBB5_2: # %else |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-32-NEXT: kmovb (%ecx), %k1 |
| ; X86-32-NEXT: .LBB5_3: # %exit |
| ; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} |
| ; X86-32-NEXT: vmovaps %ymm1, (%eax) |
| ; X86-32-NEXT: vzeroupper |
| ; X86-32-NEXT: retl |
| entry: |
| br i1 %cond, label %if, label %else |
| |
| if: |
| %loaded1 = load i8, i8* %ptr1 |
| %shifted = shl i8 %loaded1, 1 |
| br label %exit |
| |
| else: |
| %loaded2 = load i8, i8* %ptr2 |
| br label %exit |
| |
| exit: |
| %val = phi i8 [%shifted, %if], [%loaded2, %else] |
| %mask = bitcast i8 %val to <8 x i1> |
| %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 |
| store <8 x float> %selected, <8 x float>* %fptrvec |
| ret void |
| } |
| |
| define void @test_shr1(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { |
| ; X86-64-LABEL: test_shr1: |
| ; X86-64: # %bb.0: # %entry |
| ; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; X86-64-NEXT: testb $1, %dil |
| ; X86-64-NEXT: je .LBB6_2 |
| ; X86-64-NEXT: # %bb.1: # %if |
| ; X86-64-NEXT: movb (%rsi), %al |
| ; X86-64-NEXT: shrb %al |
| ; X86-64-NEXT: jmp .LBB6_3 |
| ; X86-64-NEXT: .LBB6_2: # %else |
| ; X86-64-NEXT: movb (%rdx), %al |
| ; X86-64-NEXT: .LBB6_3: # %exit |
| ; X86-64-NEXT: kmovd %eax, %k1 |
| ; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} |
| ; X86-64-NEXT: vmovaps %ymm1, (%rcx) |
| ; X86-64-NEXT: vzeroupper |
| ; X86-64-NEXT: retq |
| ; |
| ; X86-32-LABEL: test_shr1: |
| ; X86-32: # %bb.0: # %entry |
| ; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) |
| ; X86-32-NEXT: je .LBB6_2 |
| ; X86-32-NEXT: # %bb.1: # %if |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-32-NEXT: movb (%ecx), %cl |
| ; X86-32-NEXT: shrb %cl |
| ; X86-32-NEXT: jmp .LBB6_3 |
| ; X86-32-NEXT: .LBB6_2: # %else |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-32-NEXT: movb (%ecx), %cl |
| ; X86-32-NEXT: .LBB6_3: # %exit |
| ; X86-32-NEXT: kmovd %ecx, %k1 |
| ; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} |
| ; X86-32-NEXT: vmovaps %ymm1, (%eax) |
| ; X86-32-NEXT: vzeroupper |
| ; X86-32-NEXT: retl |
| entry: |
| br i1 %cond, label %if, label %else |
| |
| if: |
| %loaded1 = load i8, i8* %ptr1 |
| %shifted = lshr i8 %loaded1, 1 |
| br label %exit |
| |
| else: |
| %loaded2 = load i8, i8* %ptr2 |
| br label %exit |
| |
| exit: |
| %val = phi i8 [%shifted, %if], [%loaded2, %else] |
| %mask = bitcast i8 %val to <8 x i1> |
| %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 |
| store <8 x float> %selected, <8 x float>* %fptrvec |
| ret void |
| } |
| |
| define void @test_shr2(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { |
| ; X86-64-LABEL: test_shr2: |
| ; X86-64: # %bb.0: # %entry |
| ; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; X86-64-NEXT: testb $1, %dil |
| ; X86-64-NEXT: je .LBB7_2 |
| ; X86-64-NEXT: # %bb.1: # %if |
| ; X86-64-NEXT: kmovb (%rsi), %k0 |
| ; X86-64-NEXT: kshiftrb $2, %k0, %k1 |
| ; X86-64-NEXT: jmp .LBB7_3 |
| ; X86-64-NEXT: .LBB7_2: # %else |
| ; X86-64-NEXT: kmovb (%rdx), %k1 |
| ; X86-64-NEXT: .LBB7_3: # %exit |
| ; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} |
| ; X86-64-NEXT: vmovaps %ymm1, (%rcx) |
| ; X86-64-NEXT: vzeroupper |
| ; X86-64-NEXT: retq |
| ; |
| ; X86-32-LABEL: test_shr2: |
| ; X86-32: # %bb.0: # %entry |
| ; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) |
| ; X86-32-NEXT: je .LBB7_2 |
| ; X86-32-NEXT: # %bb.1: # %if |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-32-NEXT: kmovb (%ecx), %k0 |
| ; X86-32-NEXT: kshiftrb $2, %k0, %k1 |
| ; X86-32-NEXT: jmp .LBB7_3 |
| ; X86-32-NEXT: .LBB7_2: # %else |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-32-NEXT: kmovb (%ecx), %k1 |
| ; X86-32-NEXT: .LBB7_3: # %exit |
| ; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} |
| ; X86-32-NEXT: vmovaps %ymm1, (%eax) |
| ; X86-32-NEXT: vzeroupper |
| ; X86-32-NEXT: retl |
| entry: |
| br i1 %cond, label %if, label %else |
| |
| if: |
| %loaded1 = load i8, i8* %ptr1 |
| %shifted = lshr i8 %loaded1, 2 |
| br label %exit |
| |
| else: |
| %loaded2 = load i8, i8* %ptr2 |
| br label %exit |
| |
| exit: |
| %val = phi i8 [%shifted, %if], [%loaded2, %else] |
| %mask = bitcast i8 %val to <8 x i1> |
| %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 |
| store <8 x float> %selected, <8 x float>* %fptrvec |
| ret void |
| } |
| |
| define void @test_shl(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { |
| ; X86-64-LABEL: test_shl: |
| ; X86-64: # %bb.0: # %entry |
| ; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; X86-64-NEXT: testb $1, %dil |
| ; X86-64-NEXT: je .LBB8_2 |
| ; X86-64-NEXT: # %bb.1: # %if |
| ; X86-64-NEXT: kmovb (%rsi), %k0 |
| ; X86-64-NEXT: kshiftlb $6, %k0, %k1 |
| ; X86-64-NEXT: jmp .LBB8_3 |
| ; X86-64-NEXT: .LBB8_2: # %else |
| ; X86-64-NEXT: kmovb (%rdx), %k1 |
| ; X86-64-NEXT: .LBB8_3: # %exit |
| ; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} |
| ; X86-64-NEXT: vmovaps %ymm1, (%rcx) |
| ; X86-64-NEXT: vzeroupper |
| ; X86-64-NEXT: retq |
| ; |
| ; X86-32-LABEL: test_shl: |
| ; X86-32: # %bb.0: # %entry |
| ; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) |
| ; X86-32-NEXT: je .LBB8_2 |
| ; X86-32-NEXT: # %bb.1: # %if |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-32-NEXT: kmovb (%ecx), %k0 |
| ; X86-32-NEXT: kshiftlb $6, %k0, %k1 |
| ; X86-32-NEXT: jmp .LBB8_3 |
| ; X86-32-NEXT: .LBB8_2: # %else |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-32-NEXT: kmovb (%ecx), %k1 |
| ; X86-32-NEXT: .LBB8_3: # %exit |
| ; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} |
| ; X86-32-NEXT: vmovaps %ymm1, (%eax) |
| ; X86-32-NEXT: vzeroupper |
| ; X86-32-NEXT: retl |
| entry: |
| br i1 %cond, label %if, label %else |
| |
| if: |
| %loaded1 = load i8, i8* %ptr1 |
| %shifted = shl i8 %loaded1, 6 |
| br label %exit |
| |
| else: |
| %loaded2 = load i8, i8* %ptr2 |
| br label %exit |
| |
| exit: |
| %val = phi i8 [%shifted, %if], [%loaded2, %else] |
| %mask = bitcast i8 %val to <8 x i1> |
| %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 |
| store <8 x float> %selected, <8 x float>* %fptrvec |
| ret void |
| } |
| |
| define void @test_add(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { |
| ; X86-64-LABEL: test_add: |
| ; X86-64: # %bb.0: # %entry |
| ; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; X86-64-NEXT: kmovb (%rsi), %k0 |
| ; X86-64-NEXT: kmovb (%rdx), %k1 |
| ; X86-64-NEXT: testb $1, %dil |
| ; X86-64-NEXT: je .LBB9_2 |
| ; X86-64-NEXT: # %bb.1: # %if |
| ; X86-64-NEXT: kandb %k1, %k0, %k1 |
| ; X86-64-NEXT: jmp .LBB9_3 |
| ; X86-64-NEXT: .LBB9_2: # %else |
| ; X86-64-NEXT: kaddb %k1, %k0, %k1 |
| ; X86-64-NEXT: .LBB9_3: # %exit |
| ; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} |
| ; X86-64-NEXT: vmovaps %ymm1, (%rcx) |
| ; X86-64-NEXT: vzeroupper |
| ; X86-64-NEXT: retq |
| ; |
| ; X86-32-LABEL: test_add: |
| ; X86-32: # %bb.0: # %entry |
| ; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-32-NEXT: kmovb (%edx), %k0 |
| ; X86-32-NEXT: kmovb (%ecx), %k1 |
| ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) |
| ; X86-32-NEXT: je .LBB9_2 |
| ; X86-32-NEXT: # %bb.1: # %if |
| ; X86-32-NEXT: kandb %k1, %k0, %k1 |
| ; X86-32-NEXT: jmp .LBB9_3 |
| ; X86-32-NEXT: .LBB9_2: # %else |
| ; X86-32-NEXT: kaddb %k1, %k0, %k1 |
| ; X86-32-NEXT: .LBB9_3: # %exit |
| ; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} |
| ; X86-32-NEXT: vmovaps %ymm1, (%eax) |
| ; X86-32-NEXT: vzeroupper |
| ; X86-32-NEXT: retl |
| entry: |
| %loaded1 = load i8, i8* %ptr1 |
| %loaded2 = load i8, i8* %ptr2 |
| br i1 %cond, label %if, label %else |
| |
| if: |
| %and = and i8 %loaded1, %loaded2 |
| br label %exit |
| |
| else: |
| %add = add i8 %loaded1, %loaded2 |
| br label %exit |
| |
| exit: |
| %val = phi i8 [%and, %if], [%add, %else] |
| %mask = bitcast i8 %val to <8 x i1> |
| %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 |
| store <8 x float> %selected, <8 x float>* %fptrvec |
| ret void |
| } |