| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX |
| |
| ; This test is an assembly of avx512 instructions to check their scheduling |
| |
| define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { |
| ; GENERIC-LABEL: addpd512: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: addpd512: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %add.i = fadd <8 x double> %x, %y |
| ret <8 x double> %add.i |
| } |
| |
| define <8 x double> @addpd512fold(<8 x double> %y) { |
| ; GENERIC-LABEL: addpd512fold: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: addpd512fold: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00> |
| ret <8 x double> %add.i |
| } |
| |
| define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { |
| ; GENERIC-LABEL: addps512: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: addps512: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %add.i = fadd <16 x float> %x, %y |
| ret <16 x float> %add.i |
| } |
| |
| define <16 x float> @addps512fold(<16 x float> %y) { |
| ; GENERIC-LABEL: addps512fold: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: addps512fold: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> |
| ret <16 x float> %add.i |
| } |
| |
| define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { |
| ; GENERIC-LABEL: subpd512: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: subpd512: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %sub.i = fsub <8 x double> %x, %y |
| ret <8 x double> %sub.i |
| } |
| |
| define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { |
| ; GENERIC-LABEL: subpd512fold: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: subpd512fold: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [11:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %tmp2 = load <8 x double>, <8 x double>* %x, align 8 |
| %sub.i = fsub <8 x double> %y, %tmp2 |
| ret <8 x double> %sub.i |
| } |
| |
| define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { |
| ; GENERIC-LABEL: subps512: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: subps512: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %sub.i = fsub <16 x float> %x, %y |
| ret <16 x float> %sub.i |
| } |
| |
| define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { |
| ; GENERIC-LABEL: subps512fold: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: subps512fold: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [11:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %tmp2 = load <16 x float>, <16 x float>* %x, align 4 |
| %sub.i = fsub <16 x float> %y, %tmp2 |
| ret <16 x float> %sub.i |
| } |
| |
| define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { |
| ; GENERIC-LABEL: imulq512: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: imulq512: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %z = mul <8 x i64>%x, %y |
| ret <8 x i64>%z |
| } |
| |
| define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { |
| ; GENERIC-LABEL: imulq256: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: imulq256: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %z = mul <4 x i64>%x, %y |
| ret <4 x i64>%z |
| } |
| |
| define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { |
| ; GENERIC-LABEL: imulq128: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: imulq128: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %z = mul <2 x i64>%x, %y |
| ret <2 x i64>%z |
| } |
| |
| define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { |
| ; GENERIC-LABEL: mulpd512: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mulpd512: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %mul.i = fmul <8 x double> %x, %y |
| ret <8 x double> %mul.i |
| } |
| |
| define <8 x double> @mulpd512fold(<8 x double> %y) { |
| ; GENERIC-LABEL: mulpd512fold: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mulpd512fold: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> |
| ret <8 x double> %mul.i |
| } |
| |
| define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { |
| ; GENERIC-LABEL: mulps512: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mulps512: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %mul.i = fmul <16 x float> %x, %y |
| ret <16 x float> %mul.i |
| } |
| |
| define <16 x float> @mulps512fold(<16 x float> %y) { |
| ; GENERIC-LABEL: mulps512fold: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mulps512fold: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> |
| ret <16 x float> %mul.i |
| } |
| |
| define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { |
| ; GENERIC-LABEL: divpd512: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [45:44.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: divpd512: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [23:16.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %div.i = fdiv <8 x double> %x, %y |
| ret <8 x double> %div.i |
| } |
| |
| define <8 x double> @divpd512fold(<8 x double> %y) { |
| ; GENERIC-LABEL: divpd512fold: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [52:44.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: divpd512fold: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [30:16.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> |
| ret <8 x double> %div.i |
| } |
| |
| define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { |
| ; GENERIC-LABEL: divps512: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [29:28.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: divps512: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [18:10.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %div.i = fdiv <16 x float> %x, %y |
| ret <16 x float> %div.i |
| } |
| |
| define <16 x float> @divps512fold(<16 x float> %y) { |
| ; GENERIC-LABEL: divps512fold: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [36:28.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: divps512fold: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [25:10.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000> |
| ret <16 x float> %div.i |
| } |
| |
| define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { |
| ; GENERIC-LABEL: vpaddq_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpaddq_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = add <8 x i64> %i, %j |
| ret <8 x i64> %x |
| } |
| |
| define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { |
| ; GENERIC-LABEL: vpaddq_fold_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpaddq_fold_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %tmp = load <8 x i64>, <8 x i64>* %j, align 4 |
| %x = add <8 x i64> %i, %tmp |
| ret <8 x i64> %x |
| } |
| |
| define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { |
| ; GENERIC-LABEL: vpaddq_broadcast_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpaddq_broadcast_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> |
| ret <8 x i64> %x |
| } |
| |
| define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { |
| ; GENERIC-LABEL: vpaddq_broadcast2_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpaddq_broadcast2_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %tmp = load i64, i64* %j |
| %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 |
| %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 |
| %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2 |
| %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3 |
| %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4 |
| %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5 |
| %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6 |
| %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7 |
| %x = add <8 x i64> %i, %j.7 |
| ret <8 x i64> %x |
| } |
| |
| define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { |
| ; GENERIC-LABEL: vpaddd_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpaddd_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = add <16 x i32> %i, %j |
| ret <16 x i32> %x |
| } |
| |
| define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { |
| ; GENERIC-LABEL: vpaddd_fold_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpaddd_fold_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %tmp = load <16 x i32>, <16 x i32>* %j, align 4 |
| %x = add <16 x i32> %i, %tmp |
| ret <16 x i32> %x |
| } |
| |
| define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { |
| ; GENERIC-LABEL: vpaddd_broadcast_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpaddd_broadcast_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
| ret <16 x i32> %x |
| } |
| |
| define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: vpaddd_mask_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpaddd_mask_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %x = add <16 x i32> %i, %j |
| %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i |
| ret <16 x i32> %r |
| } |
| |
| define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: vpaddd_maskz_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpaddd_maskz_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %x = add <16 x i32> %i, %j |
| %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %r |
| } |
| |
| define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: vpaddd_mask_fold_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpaddd_mask_fold_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %j = load <16 x i32>, <16 x i32>* %j.ptr |
| %x = add <16 x i32> %i, %j |
| %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i |
| ret <16 x i32> %r |
| } |
| |
| define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: vpaddd_mask_broadcast_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpaddd_mask_broadcast_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %x = add <16 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> |
| %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i |
| ret <16 x i32> %r |
| } |
| |
| define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: vpaddd_maskz_fold_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpaddd_maskz_fold_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %j = load <16 x i32>, <16 x i32>* %j.ptr |
| %x = add <16 x i32> %i, %j |
| %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %r |
| } |
| |
| define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: vpaddd_maskz_broadcast_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpaddd_maskz_broadcast_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %x = add <16 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> |
| %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %r |
| } |
| |
| define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { |
| ; GENERIC-LABEL: vpsubq_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpsubq_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = sub <8 x i64> %i, %j |
| ret <8 x i64> %x |
| } |
| |
| define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { |
| ; GENERIC-LABEL: vpsubd_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpsubd_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = sub <16 x i32> %i, %j |
| ret <16 x i32> %x |
| } |
| |
| define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { |
| ; GENERIC-LABEL: vpmulld_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpmulld_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = mul <16 x i32> %i, %j |
| ret <16 x i32> %x |
| } |
| |
| declare float @sqrtf(float) readnone |
| define float @sqrtA(float %a) nounwind uwtable readnone ssp { |
| ; GENERIC-LABEL: sqrtA: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sqrtA: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %conv1 = tail call float @sqrtf(float %a) nounwind readnone |
| ret float %conv1 |
| } |
| |
| declare double @sqrt(double) readnone |
| define double @sqrtB(double %a) nounwind uwtable readnone ssp { |
| ; GENERIC-LABEL: sqrtB: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sqrtB: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %call = tail call double @sqrt(double %a) nounwind readnone |
| ret double %call |
| } |
| |
| declare float @llvm.sqrt.f32(float) |
| define float @sqrtC(float %a) nounwind { |
| ; GENERIC-LABEL: sqrtC: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sqrtC: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = call float @llvm.sqrt.f32(float %a) |
| ret float %b |
| } |
| |
| declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) |
| define <16 x float> @sqrtD(<16 x float> %a) nounwind { |
| ; GENERIC-LABEL: sqrtD: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vsqrtps %zmm0, %zmm0 # sched: [29:28.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sqrtD: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vsqrtps %zmm0, %zmm0 # sched: [20:12.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) |
| ret <16 x float> %b |
| } |
| |
| declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) |
| define <8 x double> @sqrtE(<8 x double> %a) nounwind { |
| ; GENERIC-LABEL: sqrtE: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [45:44.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sqrtE: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [32:24.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) |
| ret <8 x double> %b |
| } |
| |
| define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { |
| ; GENERIC-LABEL: fadd_broadcast: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: fadd_broadcast: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> |
| ret <16 x float> %b |
| } |
| |
| define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { |
| ; GENERIC-LABEL: addq_broadcast: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: addq_broadcast: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> |
| ret <8 x i64> %b |
| } |
| |
| define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { |
| ; GENERIC-LABEL: orq_broadcast: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: orq_broadcast: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> |
| ret <8 x i64> %b |
| } |
| |
| define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { |
| ; GENERIC-LABEL: andd512fold: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: andd512fold: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %a = load <16 x i32>, <16 x i32>* %x, align 4 |
| %b = and <16 x i32> %y, %a |
| ret <16 x i32> %b |
| } |
| |
| define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { |
| ; GENERIC-LABEL: andqbrst: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: andqbrst: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %a = load i64, i64* %ap, align 8 |
| %b = insertelement <8 x i64> undef, i64 %a, i32 0 |
| %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer |
| %d = and <8 x i64> %p1, %c |
| ret <8 x i64>%d |
| } |
| |
| define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, |
| ; GENERIC-LABEL: test_mask_vaddps: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mask_vaddps: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| <16 x float> %j, <16 x i32> %mask1) |
| nounwind readnone { |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %x = fadd <16 x float> %i, %j |
| %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst |
| ret <16 x float> %r |
| } |
| |
| define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: test_mask_vmulps: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [5:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mask_vmulps: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %x = fmul <16 x float> %i, %j |
| %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst |
| ret <16 x float> %r |
| } |
| |
| define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: test_mask_vminps: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mask_vminps: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %cmp_res = fcmp olt <16 x float> %i, %j |
| %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j |
| %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst |
| ret <16 x float> %r |
| } |
| |
| define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: test_mask_vminpd: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mask_vminpd: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <8 x i32> %mask1, zeroinitializer |
| %cmp_res = fcmp olt <8 x double> %i, %j |
| %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j |
| %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst |
| ret <8 x double> %r |
| } |
| |
| define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: test_mask_vmaxps: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mask_vmaxps: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %cmp_res = fcmp ogt <16 x float> %i, %j |
| %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j |
| %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst |
| ret <16 x float> %r |
| } |
| |
| define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: test_mask_vmaxpd: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mask_vmaxpd: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <8 x i32> %mask1, zeroinitializer |
| %cmp_res = fcmp ogt <8 x double> %i, %j |
| %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j |
| %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst |
| ret <8 x double> %r |
| } |
| |
| define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: test_mask_vsubps: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mask_vsubps: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %x = fsub <16 x float> %i, %j |
| %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst |
| ret <16 x float> %r |
| } |
| |
| define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: test_mask_vdivps: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [29:28.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mask_vdivps: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [18:10.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %x = fdiv <16 x float> %i, %j |
| %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst |
| ret <16 x float> %r |
| } |
| |
| define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: test_mask_vaddpd: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mask_vaddpd: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <8 x i64> %mask1, zeroinitializer |
| %x = fadd <8 x double> %i, %j |
| %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst |
| ret <8 x double> %r |
| } |
| |
| define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone { |
| ; GENERIC-LABEL: test_maskz_vaddpd: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_maskz_vaddpd: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <8 x i64> %mask1, zeroinitializer |
| %x = fadd <8 x double> %i, %j |
| %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer |
| ret <8 x double> %r |
| } |
| |
| define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind { |
| ; GENERIC-LABEL: test_mask_fold_vaddpd: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mask_fold_vaddpd: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [11:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <8 x i64> %mask1, zeroinitializer |
| %tmp = load <8 x double>, <8 x double>* %j, align 8 |
| %x = fadd <8 x double> %i, %tmp |
| %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst |
| ret <8 x double> %r |
| } |
| |
| define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind { |
| ; GENERIC-LABEL: test_maskz_fold_vaddpd: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_maskz_fold_vaddpd: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <8 x i64> %mask1, zeroinitializer |
| %tmp = load <8 x double>, <8 x double>* %j, align 8 |
| %x = fadd <8 x double> %i, %tmp |
| %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer |
| ret <8 x double> %r |
| } |
| |
| define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind { |
| ; GENERIC-LABEL: test_broadcast_vaddpd: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [10:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_broadcast_vaddpd: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [11:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %tmp = load double, double* %j |
| %b = insertelement <8 x double> undef, double %tmp, i32 0 |
| %c = shufflevector <8 x double> %b, <8 x double> undef, |
| <8 x i32> zeroinitializer |
| %x = fadd <8 x double> %c, %i |
| ret <8 x double> %x |
| } |
| |
| define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind { |
| ; GENERIC-LABEL: test_mask_broadcast_vaddpd: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [10:1.00] |
| ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mask_broadcast_vaddpd: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [11:0.50] |
| ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <8 x i64> %mask1, zeroinitializer |
| %tmp = load double, double* %j |
| %b = insertelement <8 x double> undef, double %tmp, i32 0 |
| %c = shufflevector <8 x double> %b, <8 x double> undef, |
| <8 x i32> zeroinitializer |
| %x = fadd <8 x double> %c, %i |
| %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i |
| ret <8 x double> %r |
| } |
| |
| define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, |
| ; GENERIC-LABEL: test_maskz_broadcast_vaddpd: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_maskz_broadcast_vaddpd: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| <8 x i64> %mask1) nounwind { |
| %mask = icmp ne <8 x i64> %mask1, zeroinitializer |
| %tmp = load double, double* %j |
| %b = insertelement <8 x double> undef, double %tmp, i32 0 |
| %c = shufflevector <8 x double> %b, <8 x double> undef, |
| <8 x i32> zeroinitializer |
| %x = fadd <8 x double> %c, %i |
| %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer |
| ret <8 x double> %r |
| } |
| |
| define <16 x float> @test_fxor(<16 x float> %a) { |
| ; GENERIC-LABEL: test_fxor: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_fxor: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| |
| %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a |
| ret <16 x float>%res |
| } |
| |
| define <8 x float> @test_fxor_8f32(<8 x float> %a) { |
| ; GENERIC-LABEL: test_fxor_8f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_fxor_8f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a |
| ret <8 x float>%res |
| } |
| |
| define <8 x double> @fabs_v8f64(<8 x double> %p) |
| ; GENERIC-LABEL: fabs_v8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: fabs_v8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| { |
| %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) |
| ret <8 x double> %t |
| } |
| declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) |
| |
| define <16 x float> @fabs_v16f32(<16 x float> %p) |
| ; GENERIC-LABEL: fabs_v16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: fabs_v16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| { |
| %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) |
| ret <16 x float> %t |
| } |
| declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) |
| |
| define double @test1(double %a, double %b) nounwind { |
| ; GENERIC-LABEL: test1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] |
| ; GENERIC-NEXT: jne .LBB64_1 # sched: [1:1.00] |
| ; GENERIC-NEXT: jnp .LBB64_2 # sched: [1:1.00] |
| ; GENERIC-NEXT: .LBB64_1: # %l1 |
| ; GENERIC-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; GENERIC-NEXT: .LBB64_2: # %l2 |
| ; GENERIC-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] |
| ; SKX-NEXT: jne .LBB64_1 # sched: [1:0.50] |
| ; SKX-NEXT: jnp .LBB64_2 # sched: [1:0.50] |
| ; SKX-NEXT: .LBB64_1: # %l1 |
| ; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; SKX-NEXT: .LBB64_2: # %l2 |
| ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %tobool = fcmp une double %a, %b |
| br i1 %tobool, label %l1, label %l2 |
| |
| l1: |
| %c = fsub double %a, %b |
| ret double %c |
| l2: |
| %c1 = fadd double %a, %b |
| ret double %c1 |
| } |
| |
| define float @test2(float %a, float %b) nounwind { |
| ; GENERIC-LABEL: test2: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00] |
| ; GENERIC-NEXT: jbe .LBB65_2 # sched: [1:1.00] |
| ; GENERIC-NEXT: # %bb.1: # %l1 |
| ; GENERIC-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; GENERIC-NEXT: .LBB65_2: # %l2 |
| ; GENERIC-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test2: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00] |
| ; SKX-NEXT: jbe .LBB65_2 # sched: [1:0.50] |
| ; SKX-NEXT: # %bb.1: # %l1 |
| ; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; SKX-NEXT: .LBB65_2: # %l2 |
| ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %tobool = fcmp olt float %a, %b |
| br i1 %tobool, label %l1, label %l2 |
| |
| l1: |
| %c = fsub float %a, %b |
| ret float %c |
| l2: |
| %c1 = fadd float %a, %b |
| ret float %c1 |
| } |
| |
| define i32 @test3(float %a, float %b) { |
| ; GENERIC-LABEL: test3: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00] |
| ; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test3: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: kmovw %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| |
| %cmp10.i = fcmp oeq float %a, %b |
| %conv11.i = zext i1 %cmp10.i to i32 |
| ret i32 %conv11.i |
| } |
| |
| define float @test5(float %p) #0 { |
| ; GENERIC-LABEL: test5: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] |
| ; GENERIC-NEXT: jne .LBB67_1 # sched: [1:1.00] |
| ; GENERIC-NEXT: jp .LBB67_1 # sched: [1:1.00] |
| ; GENERIC-NEXT: # %bb.2: # %return |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; GENERIC-NEXT: .LBB67_1: # %if.end |
| ; GENERIC-NEXT: seta %al # sched: [2:1.00] |
| ; GENERIC-NEXT: movzbl %al, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test5: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] |
| ; SKX-NEXT: jne .LBB67_1 # sched: [1:0.50] |
| ; SKX-NEXT: jp .LBB67_1 # sched: [1:0.50] |
| ; SKX-NEXT: # %bb.2: # %return |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; SKX-NEXT: .LBB67_1: # %if.end |
| ; SKX-NEXT: seta %al # sched: [2:1.00] |
| ; SKX-NEXT: movzbl %al, %eax # sched: [1:0.25] |
| ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %cmp = fcmp oeq float %p, 0.000000e+00 |
| br i1 %cmp, label %return, label %if.end |
| |
| if.end: ; preds = %entry |
| %cmp1 = fcmp ogt float %p, 0.000000e+00 |
| %cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00 |
| br label %return |
| |
| return: ; preds = %if.end, %entry |
| %retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ] |
| ret float %retval.0 |
| } |
| |
| define i32 @test6(i32 %a, i32 %b) { |
| ; GENERIC-LABEL: test6: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] |
| ; GENERIC-NEXT: sete %al # sched: [1:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test6: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] |
| ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] |
| ; SKX-NEXT: sete %al # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %cmp = icmp eq i32 %a, %b |
| %res = zext i1 %cmp to i32 |
| ret i32 %res |
| } |
| |
| define i32 @test7(double %x, double %y) #2 { |
| ; GENERIC-LABEL: test7: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] |
| ; GENERIC-NEXT: setne %al # sched: [1:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test7: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] |
| ; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] |
| ; SKX-NEXT: setne %al # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = fcmp one double %x, %y |
| %or = zext i1 %0 to i32 |
| ret i32 %or |
| } |
| |
| define i32 @test8(i32 %a1, i32 %a2, i32 %a3) { |
| ; GENERIC-LABEL: test8: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: xorl $-2147483648, %esi # imm = 0x80000000 |
| ; GENERIC-NEXT: # sched: [1:0.33] |
| ; GENERIC-NEXT: testl %edx, %edx # sched: [1:0.33] |
| ; GENERIC-NEXT: movl $1, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: cmovel %eax, %edx # sched: [2:0.67] |
| ; GENERIC-NEXT: notl %edi # sched: [1:0.33] |
| ; GENERIC-NEXT: orl %edi, %esi # sched: [1:0.33] |
| ; GENERIC-NEXT: cmovnel %edx, %eax # sched: [2:0.67] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test8: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: notl %edi # sched: [1:0.25] |
| ; SKX-NEXT: xorl $-2147483648, %esi # imm = 0x80000000 |
| ; SKX-NEXT: # sched: [1:0.25] |
| ; SKX-NEXT: testl %edx, %edx # sched: [1:0.25] |
| ; SKX-NEXT: movl $1, %eax # sched: [1:0.25] |
| ; SKX-NEXT: cmovel %eax, %edx # sched: [1:0.50] |
| ; SKX-NEXT: orl %edi, %esi # sched: [1:0.25] |
| ; SKX-NEXT: cmovnel %edx, %eax # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %tmp1 = icmp eq i32 %a1, -1 |
| %tmp2 = icmp eq i32 %a2, -2147483648 |
| %tmp3 = and i1 %tmp1, %tmp2 |
| %tmp4 = icmp eq i32 %a3, 0 |
| %tmp5 = or i1 %tmp3, %tmp4 |
| %res = select i1 %tmp5, i32 1, i32 %a3 |
| ret i32 %res |
| } |
| |
| define i32 @test9(i64 %a) { |
| ; GENERIC-LABEL: test9: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: testb $1, %dil # sched: [1:0.33] |
| ; GENERIC-NEXT: jne .LBB71_2 # sched: [1:1.00] |
| ; GENERIC-NEXT: # %bb.1: # %A |
| ; GENERIC-NEXT: movl $6, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; GENERIC-NEXT: .LBB71_2: # %B |
| ; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test9: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: testb $1, %dil # sched: [1:0.25] |
| ; SKX-NEXT: jne .LBB71_2 # sched: [1:0.50] |
| ; SKX-NEXT: # %bb.1: # %A |
| ; SKX-NEXT: movl $6, %eax # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; SKX-NEXT: .LBB71_2: # %B |
| ; SKX-NEXT: movl $7, %eax # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = and i64 %a, 1 |
| %cmp10.i = icmp eq i64 %b, 0 |
| br i1 %cmp10.i, label %A, label %B |
| A: |
| ret i32 6 |
| B: |
| ret i32 7 |
| } |
| |
| define i32 @test10(i64 %b, i64 %c, i1 %d) { |
| ; GENERIC-LABEL: test10: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: movl %edx, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: andb $1, %al # sched: [1:0.33] |
| ; GENERIC-NEXT: cmpq %rsi, %rdi # sched: [1:0.33] |
| ; GENERIC-NEXT: sete %cl # sched: [1:0.50] |
| ; GENERIC-NEXT: orb %dl, %cl # sched: [1:0.33] |
| ; GENERIC-NEXT: andb $1, %cl # sched: [1:0.33] |
| ; GENERIC-NEXT: cmpb %cl, %al # sched: [1:0.33] |
| ; GENERIC-NEXT: je .LBB72_1 # sched: [1:1.00] |
| ; GENERIC-NEXT: # %bb.2: # %if.end.i |
| ; GENERIC-NEXT: movl $6, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; GENERIC-NEXT: .LBB72_1: # %if.then.i |
| ; GENERIC-NEXT: movl $5, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test10: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: movl %edx, %eax # sched: [1:0.25] |
| ; SKX-NEXT: andb $1, %al # sched: [1:0.25] |
| ; SKX-NEXT: cmpq %rsi, %rdi # sched: [1:0.25] |
| ; SKX-NEXT: sete %cl # sched: [1:0.50] |
| ; SKX-NEXT: orb %dl, %cl # sched: [1:0.25] |
| ; SKX-NEXT: andb $1, %cl # sched: [1:0.25] |
| ; SKX-NEXT: cmpb %cl, %al # sched: [1:0.25] |
| ; SKX-NEXT: je .LBB72_1 # sched: [1:0.50] |
| ; SKX-NEXT: # %bb.2: # %if.end.i |
| ; SKX-NEXT: movl $6, %eax # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; SKX-NEXT: .LBB72_1: # %if.then.i |
| ; SKX-NEXT: movl $5, %eax # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| |
| %cmp8.i = icmp eq i64 %b, %c |
| %or1 = or i1 %d, %cmp8.i |
| %xor1 = xor i1 %d, %or1 |
| br i1 %xor1, label %if.end.i, label %if.then.i |
| |
| if.then.i: |
| ret i32 5 |
| |
| if.end.i: |
| ret i32 6 |
| } |
| |
| define <16 x float> @sitof32(<16 x i32> %a) nounwind { |
| ; GENERIC-LABEL: sitof32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sitof32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <16 x i32> %a to <16 x float> |
| ret <16 x float> %b |
| } |
| |
| define <8 x double> @sltof864(<8 x i64> %a) { |
| ; GENERIC-LABEL: sltof864: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sltof864: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <8 x i64> %a to <8 x double> |
| ret <8 x double> %b |
| } |
| |
| define <4 x double> @slto4f64(<4 x i64> %a) { |
| ; GENERIC-LABEL: slto4f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: slto4f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <4 x i64> %a to <4 x double> |
| ret <4 x double> %b |
| } |
| |
| define <2 x double> @slto2f64(<2 x i64> %a) { |
| ; GENERIC-LABEL: slto2f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: slto2f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <2 x i64> %a to <2 x double> |
| ret <2 x double> %b |
| } |
| |
| define <2 x float> @sltof2f32(<2 x i64> %a) { |
| ; GENERIC-LABEL: sltof2f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtqq2ps %xmm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sltof2f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtqq2ps %xmm0, %xmm0 # sched: [5:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <2 x i64> %a to <2 x float> |
| ret <2 x float>%b |
| } |
| |
| define <4 x float> @slto4f32_mem(<4 x i64>* %a) { |
| ; GENERIC-LABEL: slto4f32_mem: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtqq2psy (%rdi), %xmm0 # sched: [10:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: slto4f32_mem: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtqq2psy (%rdi), %xmm0 # sched: [11:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a1 = load <4 x i64>, <4 x i64>* %a, align 8 |
| %b = sitofp <4 x i64> %a1 to <4 x float> |
| ret <4 x float>%b |
| } |
| |
| define <4 x i64> @f64to4sl(<4 x double> %a) { |
| ; GENERIC-LABEL: f64to4sl: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f64to4sl: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fptosi <4 x double> %a to <4 x i64> |
| ret <4 x i64> %b |
| } |
| |
| define <4 x i64> @f32to4sl(<4 x float> %a) { |
| ; GENERIC-LABEL: f32to4sl: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttps2qq %xmm0, %ymm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f32to4sl: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttps2qq %xmm0, %ymm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fptosi <4 x float> %a to <4 x i64> |
| ret <4 x i64> %b |
| } |
| |
| define <4 x float> @slto4f32(<4 x i64> %a) { |
| ; GENERIC-LABEL: slto4f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: slto4f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [7:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <4 x i64> %a to <4 x float> |
| ret <4 x float> %b |
| } |
| |
| define <4 x float> @ulto4f32(<4 x i64> %a) { |
| ; GENERIC-LABEL: ulto4f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ulto4f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [7:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <4 x i64> %a to <4 x float> |
| ret <4 x float> %b |
| } |
| |
| define <8 x double> @ulto8f64(<8 x i64> %a) { |
| ; GENERIC-LABEL: ulto8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ulto8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <8 x i64> %a to <8 x double> |
| ret <8 x double> %b |
| } |
| |
| define <16 x double> @ulto16f64(<16 x i64> %a) { |
| ; GENERIC-LABEL: ulto16f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ulto16f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <16 x i64> %a to <16 x double> |
| ret <16 x double> %b |
| } |
| |
| define <16 x i32> @f64to16si(<16 x float> %a) nounwind { |
| ; GENERIC-LABEL: f64to16si: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f64to16si: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fptosi <16 x float> %a to <16 x i32> |
| ret <16 x i32> %b |
| } |
| |
| define <16 x i32> @f32to16ui(<16 x float> %a) nounwind { |
| ; GENERIC-LABEL: f32to16ui: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f32to16ui: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fptoui <16 x float> %a to <16 x i32> |
| ret <16 x i32> %b |
| } |
| |
| define <16 x i8> @f32to16uc(<16 x float> %f) { |
| ; GENERIC-LABEL: f32to16uc: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vpmovdb %zmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f32to16uc: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = fptoui <16 x float> %f to <16 x i8> |
| ret <16 x i8> %res |
| } |
| |
| define <16 x i16> @f32to16us(<16 x float> %f) { |
| ; GENERIC-LABEL: f32to16us: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vpmovdw %zmm0, %ymm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f32to16us: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = fptoui <16 x float> %f to <16 x i16> |
| ret <16 x i16> %res |
| } |
| |
| define <8 x i32> @f32to8ui(<8 x float> %a) nounwind { |
| ; GENERIC-LABEL: f32to8ui: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f32to8ui: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fptoui <8 x float> %a to <8 x i32> |
| ret <8 x i32> %b |
| } |
| |
| define <4 x i32> @f32to4ui(<4 x float> %a) nounwind { |
| ; GENERIC-LABEL: f32to4ui: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f32to4ui: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fptoui <4 x float> %a to <4 x i32> |
| ret <4 x i32> %b |
| } |
| |
| define <8 x i32> @f64to8ui(<8 x double> %a) nounwind { |
| ; GENERIC-LABEL: f64to8ui: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f64to8ui: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fptoui <8 x double> %a to <8 x i32> |
| ret <8 x i32> %b |
| } |
| |
| define <8 x i16> @f64to8us(<8 x double> %f) { |
| ; GENERIC-LABEL: f64to8us: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f64to8us: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] |
| ; SKX-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = fptoui <8 x double> %f to <8 x i16> |
| ret <8 x i16> %res |
| } |
| |
| define <8 x i8> @f64to8uc(<8 x double> %f) { |
| ; GENERIC-LABEL: f64to8uc: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f64to8uc: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] |
| ; SKX-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = fptoui <8 x double> %f to <8 x i8> |
| ret <8 x i8> %res |
| } |
| |
| define <4 x i32> @f64to4ui(<4 x double> %a) nounwind { |
| ; GENERIC-LABEL: f64to4ui: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f64to4ui: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [7:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fptoui <4 x double> %a to <4 x i32> |
| ret <4 x i32> %b |
| } |
| |
| define <8 x double> @sito8f64(<8 x i32> %a) { |
| ; GENERIC-LABEL: sito8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sito8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <8 x i32> %a to <8 x double> |
| ret <8 x double> %b |
| } |
| define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { |
| ; GENERIC-LABEL: i32to8f64_mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: i32to8f64_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; VLNOBW-LABEL: i32to8f64_mask: |
| ; VLNOBW: # %bb.0: |
| ; VLNOBW-NEXT: kmovw %edi, %k1 |
| ; VLNOBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} |
| ; VLNOBW-NEXT: ret{{[l|q]}} |
| %1 = bitcast i8 %c to <8 x i1> |
| %2 = sitofp <8 x i32> %b to <8 x double> |
| %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a |
| ret <8 x double> %3 |
| } |
| define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { |
| ; GENERIC-LABEL: sito8f64_maskz: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sito8f64_maskz: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; VLNOBW-LABEL: sito8f64_maskz: |
| ; VLNOBW: # %bb.0: |
| ; VLNOBW-NEXT: kmovw %edi, %k1 |
| ; VLNOBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} |
| ; VLNOBW-NEXT: ret{{[l|q]}} |
| %1 = bitcast i8 %b to <8 x i1> |
| %2 = sitofp <8 x i32> %a to <8 x double> |
| %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer |
| ret <8 x double> %3 |
| } |
| |
| define <8 x i32> @f64to8si(<8 x double> %a) { |
| ; GENERIC-LABEL: f64to8si: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f64to8si: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fptosi <8 x double> %a to <8 x i32> |
| ret <8 x i32> %b |
| } |
| |
| define <4 x i32> @f64to4si(<4 x double> %a) { |
| ; GENERIC-LABEL: f64to4si: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f64to4si: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fptosi <4 x double> %a to <4 x i32> |
| ret <4 x i32> %b |
| } |
| |
| define <16 x float> @f64to16f32(<16 x double> %b) nounwind { |
| ; GENERIC-LABEL: f64to16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [4:1.00] |
| ; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f64to16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00] |
| ; SKX-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [7:1.00] |
| ; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = fptrunc <16 x double> %b to <16 x float> |
| ret <16 x float> %a |
| } |
| |
| define <4 x float> @f64to4f32(<4 x double> %b) { |
| ; GENERIC-LABEL: f64to4f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f64to4f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = fptrunc <4 x double> %b to <4 x float> |
| ret <4 x float> %a |
| } |
| |
| define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) { |
| ; GENERIC-LABEL: f64to4f32_mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [4:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f64to4f32_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [7:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = fptrunc <4 x double> %b to <4 x float> |
| %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer |
| ret <4 x float> %c |
| } |
| |
| define <4 x float> @f64tof32_inreg(<2 x double> %a0, <4 x float> %a1) nounwind { |
| ; GENERIC-LABEL: f64tof32_inreg: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f64tof32_inreg: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %ext = extractelement <2 x double> %a0, i32 0 |
| %cvt = fptrunc double %ext to float |
| %res = insertelement <4 x float> %a1, float %cvt, i32 0 |
| ret <4 x float> %res |
| } |
| |
| define <8 x double> @f32to8f64(<8 x float> %b) nounwind { |
| ; GENERIC-LABEL: f32to8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f32to8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = fpext <8 x float> %b to <8 x double> |
| ret <8 x double> %a |
| } |
| |
| define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) { |
| ; GENERIC-LABEL: f32to4f64_mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00] |
| ; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [2:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f32to4f64_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = fpext <4 x float> %b to <4 x double> |
| %mask = fcmp ogt <4 x double> %a1, %b1 |
| %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer |
| ret <4 x double> %c |
| } |
| |
| define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind { |
| ; GENERIC-LABEL: f32tof64_inreg: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f32tof64_inreg: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %ext = extractelement <4 x float> %a1, i32 0 |
| %cvt = fpext float %ext to double |
| %res = insertelement <2 x double> %a0, double %cvt, i32 0 |
| ret <2 x double> %res |
| } |
| |
| define double @sltof64_load(i64* nocapture %e) { |
| ; GENERIC-LABEL: sltof64_load: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sltof64_load: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %tmp1 = load i64, i64* %e, align 8 |
| %conv = sitofp i64 %tmp1 to double |
| ret double %conv |
| } |
| |
| define double @sitof64_load(i32* %e) { |
| ; GENERIC-LABEL: sitof64_load: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sitof64_load: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %tmp1 = load i32, i32* %e, align 4 |
| %conv = sitofp i32 %tmp1 to double |
| ret double %conv |
| } |
| |
| define float @sitof32_load(i32* %e) { |
| ; GENERIC-LABEL: sitof32_load: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [10:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sitof32_load: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %tmp1 = load i32, i32* %e, align 4 |
| %conv = sitofp i32 %tmp1 to float |
| ret float %conv |
| } |
| |
| define float @sltof32_load(i64* %e) { |
| ; GENERIC-LABEL: sltof32_load: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sltof32_load: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %tmp1 = load i64, i64* %e, align 8 |
| %conv = sitofp i64 %tmp1 to float |
| ret float %conv |
| } |
| |
| define void @f32tof64_loadstore() { |
| ; GENERIC-LABEL: f32tof64_loadstore: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] |
| ; GENERIC-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f32tof64_loadstore: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] |
| ; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] |
| ; SKX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %f = alloca float, align 4 |
| %d = alloca double, align 8 |
| %tmp = load float, float* %f, align 4 |
| %conv = fpext float %tmp to double |
| store double %conv, double* %d, align 8 |
| ret void |
| } |
| |
| define void @f64tof32_loadstore() nounwind uwtable { |
| ; GENERIC-LABEL: f64tof32_loadstore: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] |
| ; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: f64tof32_loadstore: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] |
| ; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] |
| ; SKX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %f = alloca float, align 4 |
| %d = alloca double, align 8 |
| %tmp = load double, double* %d, align 8 |
| %conv = fptrunc double %tmp to float |
| store float %conv, float* %f, align 4 |
| ret void |
| } |
| |
| define double @long_to_double(i64 %x) { |
| ; GENERIC-LABEL: long_to_double: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: long_to_double: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = bitcast i64 %x to double |
| ret double %res |
| } |
| |
| define i64 @double_to_long(double %x) { |
| ; GENERIC-LABEL: double_to_long: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: double_to_long: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = bitcast double %x to i64 |
| ret i64 %res |
| } |
| |
| define float @int_to_float(i32 %x) { |
| ; GENERIC-LABEL: int_to_float: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: int_to_float: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = bitcast i32 %x to float |
| ret float %res |
| } |
| |
| define i32 @float_to_int(float %x) { |
| ; GENERIC-LABEL: float_to_int: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: float_to_int: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = bitcast float %x to i32 |
| ret i32 %res |
| } |
| |
| define <16 x double> @uito16f64(<16 x i32> %a) nounwind { |
| ; GENERIC-LABEL: uito16f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [4:1.00] |
| ; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [4:1.00] |
| ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: uito16f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [7:1.00] |
| ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [7:1.00] |
| ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <16 x i32> %a to <16 x double> |
| ret <16 x double> %b |
| } |
| |
| define <8 x float> @slto8f32(<8 x i64> %a) { |
| ; GENERIC-LABEL: slto8f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: slto8f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <8 x i64> %a to <8 x float> |
| ret <8 x float> %b |
| } |
| |
| define <16 x float> @slto16f32(<16 x i64> %a) { |
| ; GENERIC-LABEL: slto16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [3:1.00] |
| ; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: slto16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00] |
| ; SKX-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [7:1.00] |
| ; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <16 x i64> %a to <16 x float> |
| ret <16 x float> %b |
| } |
| |
| define <8 x double> @slto8f64(<8 x i64> %a) { |
| ; GENERIC-LABEL: slto8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: slto8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <8 x i64> %a to <8 x double> |
| ret <8 x double> %b |
| } |
| |
| define <16 x double> @slto16f64(<16 x i64> %a) { |
| ; GENERIC-LABEL: slto16f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: slto16f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <16 x i64> %a to <16 x double> |
| ret <16 x double> %b |
| } |
| |
| define <8 x float> @ulto8f32(<8 x i64> %a) { |
| ; GENERIC-LABEL: ulto8f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ulto8f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <8 x i64> %a to <8 x float> |
| ret <8 x float> %b |
| } |
| |
| define <16 x float> @ulto16f32(<16 x i64> %a) { |
| ; GENERIC-LABEL: ulto16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [3:1.00] |
| ; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ulto16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00] |
| ; SKX-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [7:1.00] |
| ; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <16 x i64> %a to <16 x float> |
| ret <16 x float> %b |
| } |
| |
| define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { |
| ; GENERIC-LABEL: uito8f64_mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: uito8f64_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; VLNOBW-LABEL: uito8f64_mask: |
| ; VLNOBW: # %bb.0: |
| ; VLNOBW-NEXT: kmovw %edi, %k1 |
| ; VLNOBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} |
| ; VLNOBW-NEXT: ret{{[l|q]}} |
| %1 = bitcast i8 %c to <8 x i1> |
| %2 = uitofp <8 x i32> %b to <8 x double> |
| %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a |
| ret <8 x double> %3 |
| } |
| define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { |
| ; GENERIC-LABEL: uito8f64_maskz: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: uito8f64_maskz: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %1 = bitcast i8 %b to <8 x i1> |
| %2 = uitofp <8 x i32> %a to <8 x double> |
| %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer |
| ret <8 x double> %3 |
| } |
| |
| define <4 x double> @uito4f64(<4 x i32> %a) nounwind { |
| ; GENERIC-LABEL: uito4f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: uito4f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <4 x i32> %a to <4 x double> |
| ret <4 x double> %b |
| } |
| |
| define <16 x float> @uito16f32(<16 x i32> %a) nounwind { |
| ; GENERIC-LABEL: uito16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: uito16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <16 x i32> %a to <16 x float> |
| ret <16 x float> %b |
| } |
| |
| define <8 x double> @uito8f64(<8 x i32> %a) { |
| ; GENERIC-LABEL: uito8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: uito8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <8 x i32> %a to <8 x double> |
| ret <8 x double> %b |
| } |
| |
| define <8 x float> @uito8f32(<8 x i32> %a) nounwind { |
| ; GENERIC-LABEL: uito8f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: uito8f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <8 x i32> %a to <8 x float> |
| ret <8 x float> %b |
| } |
| |
| define <4 x float> @uito4f32(<4 x i32> %a) nounwind { |
| ; GENERIC-LABEL: uito4f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: uito4f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <4 x i32> %a to <4 x float> |
| ret <4 x float> %b |
| } |
| |
| define i32 @fptosi(float %a) nounwind { |
| ; GENERIC-LABEL: fptosi: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttss2si %xmm0, %eax # sched: [5:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: fptosi: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttss2si %xmm0, %eax # sched: [6:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fptosi float %a to i32 |
| ret i32 %b |
| } |
| |
| define i32 @fptoui(float %a) nounwind { |
| ; GENERIC-LABEL: fptoui: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvttss2usi %xmm0, %eax # sched: [5:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: fptoui: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvttss2usi %xmm0, %eax # sched: [6:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fptoui float %a to i32 |
| ret i32 %b |
| } |
| |
| define float @uitof32(i32 %a) nounwind { |
| ; GENERIC-LABEL: uitof32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: uitof32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp i32 %a to float |
| ret float %b |
| } |
| |
| define double @uitof64(i32 %a) nounwind { |
| ; GENERIC-LABEL: uitof64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: uitof64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp i32 %a to double |
| ret double %b |
| } |
| |
| define <16 x float> @sbto16f32(<16 x i32> %a) { |
| ; GENERIC-LABEL: sbto16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sbto16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp slt <16 x i32> %a, zeroinitializer |
| %1 = sitofp <16 x i1> %mask to <16 x float> |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @scto16f32(<16 x i8> %a) { |
| ; GENERIC-LABEL: scto16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: scto16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %1 = sitofp <16 x i8> %a to <16 x float> |
| ret <16 x float> %1 |
| } |
| |
| define <16 x float> @ssto16f32(<16 x i16> %a) { |
| ; GENERIC-LABEL: ssto16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ssto16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %1 = sitofp <16 x i16> %a to <16 x float> |
| ret <16 x float> %1 |
| } |
| |
| define <8 x double> @ssto16f64(<8 x i16> %a) { |
| ; GENERIC-LABEL: ssto16f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ssto16f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %1 = sitofp <8 x i16> %a to <8 x double> |
| ret <8 x double> %1 |
| } |
| |
| define <8 x double> @scto8f64(<8 x i8> %a) { |
| ; GENERIC-LABEL: scto8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] |
| ; GENERIC-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: scto8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] |
| ; SKX-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:0.50] |
| ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %1 = sitofp <8 x i8> %a to <8 x double> |
| ret <8 x double> %1 |
| } |
| |
| define <16 x double> @scto16f64(<16 x i8> %a) { |
| ; GENERIC-LABEL: scto16f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: scto16f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <16 x i8> %a to <16 x double> |
| ret <16 x double> %b |
| } |
| |
| define <16 x double> @sbto16f64(<16 x double> %a) { |
| ; GENERIC-LABEL: sbto16f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00] |
| ; GENERIC-NEXT: kunpckbw %k0, %k1, %k0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovm2d %k0, %zmm1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sbto16f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: kunpckbw %k0, %k1, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: vpmovm2d %k0, %zmm1 # sched: [1:0.25] |
| ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %cmpres = fcmp ogt <16 x double> %a, zeroinitializer |
| %1 = sitofp <16 x i1> %cmpres to <16 x double> |
| ret <16 x double> %1 |
| } |
| |
| define <8 x double> @sbto8f64(<8 x double> %a) { |
| ; GENERIC-LABEL: sbto8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sbto8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25] |
| ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %cmpres = fcmp ogt <8 x double> %a, zeroinitializer |
| %1 = sitofp <8 x i1> %cmpres to <8 x double> |
| ret <8 x double> %1 |
| } |
| |
| define <8 x float> @sbto8f32(<8 x float> %a) { |
| ; GENERIC-LABEL: sbto8f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sbto8f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] |
| ; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %cmpres = fcmp ogt <8 x float> %a, zeroinitializer |
| %1 = sitofp <8 x i1> %cmpres to <8 x float> |
| ret <8 x float> %1 |
| } |
| |
| define <4 x float> @sbto4f32(<4 x float> %a) { |
| ; GENERIC-LABEL: sbto4f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sbto4f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %cmpres = fcmp ogt <4 x float> %a, zeroinitializer |
| %1 = sitofp <4 x i1> %cmpres to <4 x float> |
| ret <4 x float> %1 |
| } |
| |
| define <4 x double> @sbto4f64(<4 x double> %a) { |
| ; GENERIC-LABEL: sbto4f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sbto4f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] |
| ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %cmpres = fcmp ogt <4 x double> %a, zeroinitializer |
| %1 = sitofp <4 x i1> %cmpres to <4 x double> |
| ret <4 x double> %1 |
| } |
| |
| define <2 x float> @sbto2f32(<2 x float> %a) { |
| ; GENERIC-LABEL: sbto2f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sbto2f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %cmpres = fcmp ogt <2 x float> %a, zeroinitializer |
| %1 = sitofp <2 x i1> %cmpres to <2 x float> |
| ret <2 x float> %1 |
| } |
| |
| define <2 x double> @sbto2f64(<2 x double> %a) { |
| ; GENERIC-LABEL: sbto2f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sbto2f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] |
| ; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %cmpres = fcmp ogt <2 x double> %a, zeroinitializer |
| %1 = sitofp <2 x i1> %cmpres to <2 x double> |
| ret <2 x double> %1 |
| } |
| |
| define <16 x float> @ucto16f32(<16 x i8> %a) { |
| ; GENERIC-LABEL: ucto16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ucto16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <16 x i8> %a to <16 x float> |
| ret <16 x float>%b |
| } |
| |
| define <8 x double> @ucto8f64(<8 x i8> %a) { |
| ; GENERIC-LABEL: ucto8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ucto8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <8 x i8> %a to <8 x double> |
| ret <8 x double> %b |
| } |
| |
| define <16 x float> @swto16f32(<16 x i16> %a) { |
| ; GENERIC-LABEL: swto16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: swto16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <16 x i16> %a to <16 x float> |
| ret <16 x float> %b |
| } |
| |
| define <8 x double> @swto8f64(<8 x i16> %a) { |
| ; GENERIC-LABEL: swto8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: swto8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <8 x i16> %a to <8 x double> |
| ret <8 x double> %b |
| } |
| |
| define <16 x double> @swto16f64(<16 x i16> %a) { |
| ; GENERIC-LABEL: swto16f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: swto16f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <16 x i16> %a to <16 x double> |
| ret <16 x double> %b |
| } |
| |
| define <16 x double> @ucto16f64(<16 x i8> %a) { |
| ; GENERIC-LABEL: ucto16f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ucto16f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <16 x i8> %a to <16 x double> |
| ret <16 x double> %b |
| } |
| |
| define <16 x float> @uwto16f32(<16 x i16> %a) { |
| ; GENERIC-LABEL: uwto16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: uwto16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <16 x i16> %a to <16 x float> |
| ret <16 x float> %b |
| } |
| |
| define <8 x double> @uwto8f64(<8 x i16> %a) { |
| ; GENERIC-LABEL: uwto8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: uwto8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <8 x i16> %a to <8 x double> |
| ret <8 x double> %b |
| } |
| |
| define <16 x double> @uwto16f64(<16 x i16> %a) { |
| ; GENERIC-LABEL: uwto16f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: uwto16f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <16 x i16> %a to <16 x double> |
| ret <16 x double> %b |
| } |
| |
| define <16 x float> @sito16f32(<16 x i32> %a) { |
| ; GENERIC-LABEL: sito16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sito16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <16 x i32> %a to <16 x float> |
| ret <16 x float> %b |
| } |
| |
| define <16 x double> @sito16f64(<16 x i32> %a) { |
| ; GENERIC-LABEL: sito16f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [4:1.00] |
| ; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [4:1.00] |
| ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sito16f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [7:1.00] |
| ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [7:1.00] |
| ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = sitofp <16 x i32> %a to <16 x double> |
| ret <16 x double> %b |
| } |
| |
| define <16 x float> @usto16f32(<16 x i16> %a) { |
| ; GENERIC-LABEL: usto16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: usto16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = uitofp <16 x i16> %a to <16 x float> |
| ret <16 x float> %b |
| } |
| |
| define <16 x float> @ubto16f32(<16 x i32> %a) { |
| ; GENERIC-LABEL: ubto16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ubto16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] |
| ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp slt <16 x i32> %a, zeroinitializer |
| %1 = uitofp <16 x i1> %mask to <16 x float> |
| ret <16 x float> %1 |
| } |
| |
| define <16 x double> @ubto16f64(<16 x i32> %a) { |
| ; GENERIC-LABEL: ubto16f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ubto16f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] |
| ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp slt <16 x i32> %a, zeroinitializer |
| %1 = uitofp <16 x i1> %mask to <16 x double> |
| ret <16 x double> %1 |
| } |
| |
| define <8 x float> @ubto8f32(<8 x i32> %a) { |
| ; GENERIC-LABEL: ubto8f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ubto8f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp slt <8 x i32> %a, zeroinitializer |
| %1 = uitofp <8 x i1> %mask to <8 x float> |
| ret <8 x float> %1 |
| } |
| |
| define <8 x double> @ubto8f64(<8 x i32> %a) { |
| ; GENERIC-LABEL: ubto8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ubto8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50] |
| ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp slt <8 x i32> %a, zeroinitializer |
| %1 = uitofp <8 x i1> %mask to <8 x double> |
| ret <8 x double> %1 |
| } |
| |
| define <4 x float> @ubto4f32(<4 x i32> %a) { |
| ; GENERIC-LABEL: ubto4f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ubto4f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp slt <4 x i32> %a, zeroinitializer |
| %1 = uitofp <4 x i1> %mask to <4 x float> |
| ret <4 x float> %1 |
| } |
| |
| define <4 x double> @ubto4f64(<4 x i32> %a) { |
| ; GENERIC-LABEL: ubto4f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ubto4f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp slt <4 x i32> %a, zeroinitializer |
| %1 = uitofp <4 x i1> %mask to <4 x double> |
| ret <4 x double> %1 |
| } |
| |
| define <2 x float> @ubto2f32(<2 x i32> %a) { |
| ; GENERIC-LABEL: ubto2f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50] |
| ; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ubto2f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33] |
| ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] |
| ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <2 x i32> %a, zeroinitializer |
| %1 = uitofp <2 x i1> %mask to <2 x float> |
| ret <2 x float> %1 |
| } |
| |
| define <2 x double> @ubto2f64(<2 x i32> %a) { |
| ; GENERIC-LABEL: ubto2f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50] |
| ; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50] |
| ; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ubto2f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33] |
| ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] |
| ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] |
| ; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <2 x i32> %a, zeroinitializer |
| %1 = uitofp <2 x i1> %mask to <2 x double> |
| ret <2 x double> %1 |
| } |
| |
| define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_8x8mem_to_8x16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_8x8mem_to_8x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i8>,<8 x i8> *%i,align 1 |
| %x = zext <8 x i8> %a to <8 x i16> |
| %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer |
| ret <8 x i16> %ret |
| } |
| |
| define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_8x8mem_to_8x16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_8x8mem_to_8x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i8>,<8 x i8> *%i,align 1 |
| %x = sext <8 x i8> %a to <8 x i16> |
| %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer |
| ret <8 x i16> %ret |
| } |
| |
| |
| define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_16x8mem_to_16x16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_16x8mem_to_16x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <16 x i8>,<16 x i8> *%i,align 1 |
| %x = zext <16 x i8> %a to <16 x i16> |
| %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer |
| ret <16 x i16> %ret |
| } |
| |
| define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_16x8mem_to_16x16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_16x8mem_to_16x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <16 x i8>,<16 x i8> *%i,align 1 |
| %x = sext <16 x i8> %a to <16 x i16> |
| %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer |
| ret <16 x i16> %ret |
| } |
| |
| define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { |
| ; GENERIC-LABEL: zext_16x8_to_16x16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_16x8_to_16x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = zext <16 x i8> %a to <16 x i16> |
| ret <16 x i16> %x |
| } |
| |
| define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_16x8_to_16x16_mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_16x8_to_16x16_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = zext <16 x i8> %a to <16 x i16> |
| %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer |
| ret <16 x i16> %ret |
| } |
| |
| define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { |
| ; GENERIC-LABEL: sext_16x8_to_16x16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_16x8_to_16x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = sext <16 x i8> %a to <16 x i16> |
| ret <16 x i16> %x |
| } |
| |
| define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_16x8_to_16x16_mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_16x8_to_16x16_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = sext <16 x i8> %a to <16 x i16> |
| %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer |
| ret <16 x i16> %ret |
| } |
| |
| define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_32x8mem_to_32x16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_32x8mem_to_32x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <32 x i8>,<32 x i8> *%i,align 1 |
| %x = zext <32 x i8> %a to <32 x i16> |
| %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer |
| ret <32 x i16> %ret |
| } |
| |
| define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_32x8mem_to_32x16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_32x8mem_to_32x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <32 x i8>,<32 x i8> *%i,align 1 |
| %x = sext <32 x i8> %a to <32 x i16> |
| %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer |
| ret <32 x i16> %ret |
| } |
| |
| define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { |
| ; GENERIC-LABEL: zext_32x8_to_32x16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_32x8_to_32x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = zext <32 x i8> %a to <32 x i16> |
| ret <32 x i16> %x |
| } |
| |
| define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_32x8_to_32x16_mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_32x8_to_32x16_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = zext <32 x i8> %a to <32 x i16> |
| %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer |
| ret <32 x i16> %ret |
| } |
| |
| define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { |
| ; GENERIC-LABEL: sext_32x8_to_32x16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_32x8_to_32x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = sext <32 x i8> %a to <32 x i16> |
| ret <32 x i16> %x |
| } |
| |
| define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_32x8_to_32x16_mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_32x8_to_32x16_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = sext <32 x i8> %a to <32 x i16> |
| %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer |
| ret <32 x i16> %ret |
| } |
| |
| define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_4x8mem_to_4x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_4x8mem_to_4x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <4 x i8>,<4 x i8> *%i,align 1 |
| %x = zext <4 x i8> %a to <4 x i32> |
| %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer |
| ret <4 x i32> %ret |
| } |
| |
| define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_4x8mem_to_4x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_4x8mem_to_4x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <4 x i8>,<4 x i8> *%i,align 1 |
| %x = sext <4 x i8> %a to <4 x i32> |
| %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer |
| ret <4 x i32> %ret |
| } |
| |
| define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_8x8mem_to_8x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_8x8mem_to_8x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i8>,<8 x i8> *%i,align 1 |
| %x = zext <8 x i8> %a to <8 x i32> |
| %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer |
| ret <8 x i32> %ret |
| } |
| |
| define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_8x8mem_to_8x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_8x8mem_to_8x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i8>,<8 x i8> *%i,align 1 |
| %x = sext <8 x i8> %a to <8 x i32> |
| %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer |
| ret <8 x i32> %ret |
| } |
| |
| define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_16x8mem_to_16x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_16x8mem_to_16x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <16 x i8>,<16 x i8> *%i,align 1 |
| %x = zext <16 x i8> %a to <16 x i32> |
| %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %ret |
| } |
| |
| define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_16x8mem_to_16x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_16x8mem_to_16x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <16 x i8>,<16 x i8> *%i,align 1 |
| %x = sext <16 x i8> %a to <16 x i32> |
| %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %ret |
| } |
| |
| define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_16x8_to_16x32_mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_16x8_to_16x32_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = zext <16 x i8> %a to <16 x i32> |
| %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %ret |
| } |
| |
| define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_16x8_to_16x32_mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_16x8_to_16x32_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = sext <16 x i8> %a to <16 x i32> |
| %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %ret |
| } |
| |
| define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { |
| ; GENERIC-LABEL: zext_16x8_to_16x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_16x8_to_16x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = zext <16 x i8> %i to <16 x i32> |
| ret <16 x i32> %x |
| } |
| |
| define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { |
| ; GENERIC-LABEL: sext_16x8_to_16x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_16x8_to_16x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = sext <16 x i8> %i to <16 x i32> |
| ret <16 x i32> %x |
| } |
| |
| define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_2x8mem_to_2x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_2x8mem_to_2x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <2 x i8>,<2 x i8> *%i,align 1 |
| %x = zext <2 x i8> %a to <2 x i64> |
| %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer |
| ret <2 x i64> %ret |
| } |
| define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_2x8mem_to_2x64mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_2x8mem_to_2x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <2 x i8>,<2 x i8> *%i,align 1 |
| %x = sext <2 x i8> %a to <2 x i64> |
| %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer |
| ret <2 x i64> %ret |
| } |
| define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone { |
| ; GENERIC-LABEL: sext_2x8mem_to_2x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_2x8mem_to_2x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [6:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <2 x i8>,<2 x i8> *%i,align 1 |
| %x = sext <2 x i8> %a to <2 x i64> |
| ret <2 x i64> %x |
| } |
| |
| define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_4x8mem_to_4x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_4x8mem_to_4x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <4 x i8>,<4 x i8> *%i,align 1 |
| %x = zext <4 x i8> %a to <4 x i64> |
| %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer |
| ret <4 x i64> %ret |
| } |
| |
| define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_4x8mem_to_4x64mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_4x8mem_to_4x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <4 x i8>,<4 x i8> *%i,align 1 |
| %x = sext <4 x i8> %a to <4 x i64> |
| %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer |
| ret <4 x i64> %ret |
| } |
| |
| define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { |
| ; GENERIC-LABEL: sext_4x8mem_to_4x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_4x8mem_to_4x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <4 x i8>,<4 x i8> *%i,align 1 |
| %x = sext <4 x i8> %a to <4 x i64> |
| ret <4 x i64> %x |
| } |
| |
| define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_8x8mem_to_8x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_8x8mem_to_8x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i8>,<8 x i8> *%i,align 1 |
| %x = zext <8 x i8> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| |
| define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_8x8mem_to_8x64mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_8x8mem_to_8x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i8>,<8 x i8> *%i,align 1 |
| %x = sext <8 x i8> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| |
| define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { |
| ; GENERIC-LABEL: sext_8x8mem_to_8x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_8x8mem_to_8x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i8>,<8 x i8> *%i,align 1 |
| %x = sext <8 x i8> %a to <8 x i64> |
| ret <8 x i64> %x |
| } |
| |
| define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_4x16mem_to_4x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_4x16mem_to_4x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <4 x i16>,<4 x i16> *%i,align 1 |
| %x = zext <4 x i16> %a to <4 x i32> |
| %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer |
| ret <4 x i32> %ret |
| } |
| |
| define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_4x16mem_to_4x32mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_4x16mem_to_4x32mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <4 x i16>,<4 x i16> *%i,align 1 |
| %x = sext <4 x i16> %a to <4 x i32> |
| %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer |
| ret <4 x i32> %ret |
| } |
| |
| define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone { |
| ; GENERIC-LABEL: sext_4x16mem_to_4x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_4x16mem_to_4x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [6:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <4 x i16>,<4 x i16> *%i,align 1 |
| %x = sext <4 x i16> %a to <4 x i32> |
| ret <4 x i32> %x |
| } |
| |
| |
| define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_8x16mem_to_8x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_8x16mem_to_8x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i16>,<8 x i16> *%i,align 1 |
| %x = zext <8 x i16> %a to <8 x i32> |
| %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer |
| ret <8 x i32> %ret |
| } |
| |
| define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_8x16mem_to_8x32mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_8x16mem_to_8x32mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i16>,<8 x i16> *%i,align 1 |
| %x = sext <8 x i16> %a to <8 x i32> |
| %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer |
| ret <8 x i32> %ret |
| } |
| |
| define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { |
| ; GENERIC-LABEL: sext_8x16mem_to_8x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_8x16mem_to_8x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i16>,<8 x i16> *%i,align 1 |
| %x = sext <8 x i16> %a to <8 x i32> |
| ret <8 x i32> %x |
| } |
| |
| define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_8x16_to_8x32mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_8x16_to_8x32mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = zext <8 x i16> %a to <8 x i32> |
| %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer |
| ret <8 x i32> %ret |
| } |
| |
| define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone { |
| ; GENERIC-LABEL: zext_8x16_to_8x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_8x16_to_8x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = zext <8 x i16> %a to <8 x i32> |
| ret <8 x i32> %x |
| } |
| |
| define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_16x16mem_to_16x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_16x16mem_to_16x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <16 x i16>,<16 x i16> *%i,align 1 |
| %x = zext <16 x i16> %a to <16 x i32> |
| %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %ret |
| } |
| |
| define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_16x16mem_to_16x32mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_16x16mem_to_16x32mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <16 x i16>,<16 x i16> *%i,align 1 |
| %x = sext <16 x i16> %a to <16 x i32> |
| %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %ret |
| } |
| |
| define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { |
| ; GENERIC-LABEL: sext_16x16mem_to_16x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_16x16mem_to_16x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <16 x i16>,<16 x i16> *%i,align 1 |
| %x = sext <16 x i16> %a to <16 x i32> |
| ret <16 x i32> %x |
| } |
| define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_16x16_to_16x32mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_16x16_to_16x32mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = zext <16 x i16> %a to <16 x i32> |
| %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer |
| ret <16 x i32> %ret |
| } |
| |
| define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone { |
| ; GENERIC-LABEL: zext_16x16_to_16x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_16x16_to_16x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = zext <16 x i16> %a to <16 x i32> |
| ret <16 x i32> %x |
| } |
| |
| define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_2x16mem_to_2x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_2x16mem_to_2x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <2 x i16>,<2 x i16> *%i,align 1 |
| %x = zext <2 x i16> %a to <2 x i64> |
| %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer |
| ret <2 x i64> %ret |
| } |
| |
| define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_2x16mem_to_2x64mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_2x16mem_to_2x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <2 x i16>,<2 x i16> *%i,align 1 |
| %x = sext <2 x i16> %a to <2 x i64> |
| %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer |
| ret <2 x i64> %ret |
| } |
| |
| define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone { |
| ; GENERIC-LABEL: sext_2x16mem_to_2x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_2x16mem_to_2x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 # sched: [6:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <2 x i16>,<2 x i16> *%i,align 1 |
| %x = sext <2 x i16> %a to <2 x i64> |
| ret <2 x i64> %x |
| } |
| |
| define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_4x16mem_to_4x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_4x16mem_to_4x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <4 x i16>,<4 x i16> *%i,align 1 |
| %x = zext <4 x i16> %a to <4 x i64> |
| %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer |
| ret <4 x i64> %ret |
| } |
| |
| define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_4x16mem_to_4x64mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_4x16mem_to_4x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <4 x i16>,<4 x i16> *%i,align 1 |
| %x = sext <4 x i16> %a to <4 x i64> |
| %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer |
| ret <4 x i64> %ret |
| } |
| |
| define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { |
| ; GENERIC-LABEL: sext_4x16mem_to_4x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_4x16mem_to_4x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <4 x i16>,<4 x i16> *%i,align 1 |
| %x = sext <4 x i16> %a to <4 x i64> |
| ret <4 x i64> %x |
| } |
| |
| define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_8x16mem_to_8x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_8x16mem_to_8x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i16>,<8 x i16> *%i,align 1 |
| %x = zext <8 x i16> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| |
| define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_8x16mem_to_8x64mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_8x16mem_to_8x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i16>,<8 x i16> *%i,align 1 |
| %x = sext <8 x i16> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| |
| define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { |
| ; GENERIC-LABEL: sext_8x16mem_to_8x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_8x16mem_to_8x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i16>,<8 x i16> *%i,align 1 |
| %x = sext <8 x i16> %a to <8 x i64> |
| ret <8 x i64> %x |
| } |
| |
| define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_8x16_to_8x64mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_8x16_to_8x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = zext <8 x i16> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| |
| define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone { |
| ; GENERIC-LABEL: zext_8x16_to_8x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_8x16_to_8x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %ret = zext <8 x i16> %a to <8 x i64> |
| ret <8 x i64> %ret |
| } |
| |
| define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_2x32mem_to_2x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_2x32mem_to_2x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <2 x i32>,<2 x i32> *%i,align 1 |
| %x = zext <2 x i32> %a to <2 x i64> |
| %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer |
| ret <2 x i64> %ret |
| } |
| |
| define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_2x32mem_to_2x64mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_2x32mem_to_2x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <2 x i32>,<2 x i32> *%i,align 1 |
| %x = sext <2 x i32> %a to <2 x i64> |
| %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer |
| ret <2 x i64> %ret |
| } |
| |
| define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone { |
| ; GENERIC-LABEL: sext_2x32mem_to_2x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_2x32mem_to_2x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 # sched: [6:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <2 x i32>,<2 x i32> *%i,align 1 |
| %x = sext <2 x i32> %a to <2 x i64> |
| ret <2 x i64> %x |
| } |
| |
| define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_4x32mem_to_4x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_4x32mem_to_4x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <4 x i32>,<4 x i32> *%i,align 1 |
| %x = zext <4 x i32> %a to <4 x i64> |
| %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer |
| ret <4 x i64> %ret |
| } |
| |
| define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_4x32mem_to_4x64mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_4x32mem_to_4x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <4 x i32>,<4 x i32> *%i,align 1 |
| %x = sext <4 x i32> %a to <4 x i64> |
| %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer |
| ret <4 x i64> %ret |
| } |
| |
| define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone { |
| ; GENERIC-LABEL: sext_4x32mem_to_4x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_4x32mem_to_4x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [9:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <4 x i32>,<4 x i32> *%i,align 1 |
| %x = sext <4 x i32> %a to <4 x i64> |
| ret <4 x i64> %x |
| } |
| |
| define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone { |
| ; GENERIC-LABEL: sext_4x32_to_4x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_4x32_to_4x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = sext <4 x i32> %a to <4 x i64> |
| ret <4 x i64> %x |
| } |
| |
| define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_4x32_to_4x64mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_4x32_to_4x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = zext <4 x i32> %a to <4 x i64> |
| %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer |
| ret <4 x i64> %ret |
| } |
| |
| define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_8x32mem_to_8x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_8x32mem_to_8x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i32>,<8 x i32> *%i,align 1 |
| %x = zext <8 x i32> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| |
| define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: sext_8x32mem_to_8x64mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_8x32mem_to_8x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i32>,<8 x i32> *%i,align 1 |
| %x = sext <8 x i32> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| |
| define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { |
| ; GENERIC-LABEL: sext_8x32mem_to_8x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_8x32mem_to_8x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [10:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load <8 x i32>,<8 x i32> *%i,align 1 |
| %x = sext <8 x i32> %a to <8 x i64> |
| ret <8 x i64> %x |
| } |
| |
| define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { |
| ; GENERIC-LABEL: sext_8x32_to_8x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxdq %ymm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_8x32_to_8x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxdq %ymm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = sext <8 x i32> %a to <8 x i64> |
| ret <8 x i64> %x |
| } |
| |
| define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: zext_8x32_to_8x64mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_8x32_to_8x64mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = zext <8 x i32> %a to <8 x i64> |
| %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer |
| ret <8 x i64> %ret |
| } |
| define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { |
| ; GENERIC-LABEL: fptrunc_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: fptrunc_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fptrunc <8 x double> %a to <8 x float> |
| ret <8 x float> %b |
| } |
| |
| define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { |
| ; GENERIC-LABEL: fpext_test: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: fpext_test: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = fpext <8 x float> %a to <8 x double> |
| ret <8 x double> %b |
| } |
| |
| define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { |
| ; GENERIC-LABEL: zext_16i1_to_16xi32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_16i1_to_16xi32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = bitcast i16 %b to <16 x i1> |
| %c = zext <16 x i1> %a to <16 x i32> |
| ret <16 x i32> %c |
| } |
| |
| define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { |
| ; GENERIC-LABEL: zext_8i1_to_8xi64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_8i1_to_8xi64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = bitcast i8 %b to <8 x i1> |
| %c = zext <8 x i1> %a to <8 x i64> |
| ret <8 x i64> %c |
| } |
| |
| define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { |
| ; GENERIC-LABEL: trunc_16i8_to_16i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: trunc_16i8_to_16i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: # kill: def $ax killed $ax killed $eax |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask_b = trunc <16 x i8>%a to <16 x i1> |
| %mask = bitcast <16 x i1> %mask_b to i16 |
| ret i16 %mask |
| } |
| |
| define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { |
| ; GENERIC-LABEL: trunc_16i32_to_16i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: trunc_16i32_to_16i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: # kill: def $ax killed $ax killed $eax |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask_b = trunc <16 x i32>%a to <16 x i1> |
| %mask = bitcast <16 x i1> %mask_b to i16 |
| ret i16 %mask |
| } |
| |
| define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { |
| ; GENERIC-LABEL: trunc_4i32_to_4i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpsrad $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: trunc_4i32_to_4i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpsrad $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask_a = trunc <4 x i32>%a to <4 x i1> |
| %mask_b = trunc <4 x i32>%b to <4 x i1> |
| %a_and_b = and <4 x i1>%mask_a, %mask_b |
| %res = sext <4 x i1>%a_and_b to <4 x i32> |
| ret <4 x i32>%res |
| } |
| |
| |
| define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { |
| ; GENERIC-LABEL: trunc_8i16_to_8i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: # kill: def $al killed $al killed $eax |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: trunc_8i16_to_8i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: # kill: def $al killed $al killed $eax |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask_b = trunc <8 x i16>%a to <8 x i1> |
| %mask = bitcast <8 x i1> %mask_b to i8 |
| ret i8 %mask |
| } |
| |
| define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { |
| ; GENERIC-LABEL: sext_8i1_8i32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_8i1_8i32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = icmp slt <8 x i32> %a1, %a2 |
| %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> |
| %y = sext <8 x i1> %x1 to <8 x i32> |
| ret <8 x i32> %y |
| } |
| |
| |
| define i16 @trunc_i32_to_i1(i32 %a) { |
| ; GENERIC-LABEL: trunc_i32_to_i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: movw $-4, %ax # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %eax, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kshiftrw $1, %k0, %k0 # sched: [1:1.00] |
| ; GENERIC-NEXT: kshiftlw $1, %k0, %k0 # sched: [1:1.00] |
| ; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovw %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: korw %k1, %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: trunc_i32_to_i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: movw $-4, %ax # sched: [1:0.25] |
| ; SKX-NEXT: kmovd %eax, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kshiftrw $1, %k0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: kshiftlw $1, %k0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: andl $1, %edi # sched: [1:0.25] |
| ; SKX-NEXT: kmovw %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: # kill: def $ax killed $ax killed $eax |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a_i = trunc i32 %a to i1 |
| %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0 |
| %res = bitcast <16 x i1> %maskv to i16 |
| ret i16 %res |
| } |
| |
| define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { |
| ; GENERIC-LABEL: sext_8i1_8i16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_8i1_8i16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = icmp slt <8 x i32> %a1, %a2 |
| %y = sext <8 x i1> %x to <8 x i16> |
| ret <8 x i16> %y |
| } |
| |
| define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { |
| ; GENERIC-LABEL: sext_16i1_16i32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_16i1_16i32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = icmp slt <16 x i32> %a1, %a2 |
| %y = sext <16 x i1> %x to <16 x i32> |
| ret <16 x i32> %y |
| } |
| |
| define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { |
| ; GENERIC-LABEL: sext_8i1_8i64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: sext_8i1_8i64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = icmp slt <8 x i32> %a1, %a2 |
| %y = sext <8 x i1> %x to <8 x i64> |
| ret <8 x i64> %y |
| } |
| |
| define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { |
| ; GENERIC-LABEL: extload_v8i64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: extload_v8i64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00] |
| ; SKX-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %sign_load = load <8 x i8>, <8 x i8>* %a |
| %c = sext <8 x i8> %sign_load to <8 x i64> |
| store <8 x i64> %c, <8 x i64>* %res |
| ret void |
| } |
| |
| define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: test21: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] |
| ; GENERIC-NEXT: kshiftrq $32, %k1, %k1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test21: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] |
| ; SKX-NEXT: kshiftrq $32, %k1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer |
| ret <64 x i16> %ret |
| } |
| |
| define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone { |
| ; GENERIC-LABEL: shuffle_zext_16x8_to_16x16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: shuffle_zext_16x8_to_16x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> |
| %2 = bitcast <32 x i8> %1 to <16 x i16> |
| ret <16 x i16> %2 |
| } |
| |
| define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: shuffle_zext_16x8_to_16x16_mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> |
| %bc = bitcast <32 x i8> %x to <16 x i16> |
| %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer |
| ret <16 x i16> %ret |
| } |
| |
| define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) { |
| ; GENERIC-LABEL: zext_32x8_to_16x16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_32x8_to_16x16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32> |
| %2 = bitcast <32 x i8> %1 to <16 x i16> |
| ret <16 x i16> %2 |
| } |
| |
| define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) { |
| ; GENERIC-LABEL: zext_32x8_to_8x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_32x8_to_8x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32> |
| %2 = bitcast <32 x i8> %1 to <8 x i32> |
| ret <8 x i32> %2 |
| } |
| |
| define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) { |
| ; GENERIC-LABEL: zext_32x8_to_4x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_32x8_to_4x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> |
| %2 = bitcast <32 x i8> %1 to <4 x i64> |
| ret <4 x i64> %2 |
| } |
| |
| define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) { |
| ; GENERIC-LABEL: zext_16x16_to_8x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_16x16_to_8x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16> |
| %2 = bitcast <16 x i16> %1 to <8 x i32> |
| ret <8 x i32> %2 |
| } |
| |
| define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) { |
| ; GENERIC-LABEL: zext_16x16_to_4x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_16x16_to_4x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16> |
| %2 = bitcast <16 x i16> %1 to <4 x i64> |
| ret <4 x i64> %2 |
| } |
| |
| define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) { |
| ; GENERIC-LABEL: zext_8x32_to_4x64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_8x32_to_4x64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8> |
| %2 = bitcast <8 x i32> %1 to <4 x i64> |
| ret <4 x i64> %2 |
| } |
| |
| define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 { |
| ; GENERIC-LABEL: zext_64xi1_to_64xi8: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [1:0.50] |
| ; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_64xi1_to_64xi8: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp eq <64 x i8> %x, %y |
| %1 = zext <64 x i1> %mask to <64 x i8> |
| ret <64 x i8> %1 |
| } |
| |
| define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { |
| ; GENERIC-LABEL: zext_32xi1_to_32xi16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_32xi1_to_32xi16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp eq <32 x i16> %x, %y |
| %1 = zext <32 x i1> %mask to <32 x i16> |
| ret <32 x i16> %1 |
| } |
| |
| define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 { |
| ; GENERIC-LABEL: zext_16xi1_to_16xi16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_16xi1_to_16xi16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp eq <16 x i16> %x, %y |
| %1 = zext <16 x i1> %mask to <16 x i16> |
| ret <16 x i16> %1 |
| } |
| |
| |
| define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 { |
| ; GENERIC-LABEL: zext_32xi1_to_32xi8: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [1:0.50] |
| ; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_32xi1_to_32xi8: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp eq <32 x i16> %x, %y |
| %1 = zext <32 x i1> %mask to <32 x i8> |
| ret <32 x i8> %1 |
| } |
| |
| define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 { |
| ; GENERIC-LABEL: zext_4xi1_to_4x32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50] |
| ; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_4xi1_to_4x32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50] |
| ; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] |
| ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp eq <4 x i8> %x, %y |
| %1 = zext <4 x i1> %mask to <4 x i32> |
| ret <4 x i32> %1 |
| } |
| |
| define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 { |
| ; GENERIC-LABEL: zext_2xi1_to_2xi64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50] |
| ; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_2xi1_to_2xi64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50] |
| ; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] |
| ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp eq <2 x i8> %x, %y |
| %1 = zext <2 x i1> %mask to <2 x i64> |
| ret <2 x i64> %1 |
| } |
| |
| define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { |
| ; GENERIC-LABEL: test_x86_fmadd_ps_z: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_x86_fmadd_ps_z: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = fmul <16 x float> %a0, %a1 |
| %res = fadd <16 x float> %x, %a2 |
| ret <16 x float> %res |
| } |
| |
| define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { |
| ; GENERIC-LABEL: test_x86_fmsub_ps_z: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_x86_fmsub_ps_z: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = fmul <16 x float> %a0, %a1 |
| %res = fsub <16 x float> %x, %a2 |
| ret <16 x float> %res |
| } |
| |
| define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { |
| ; GENERIC-LABEL: test_x86_fnmadd_ps_z: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_x86_fnmadd_ps_z: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = fmul <16 x float> %a0, %a1 |
| %res = fsub <16 x float> %a2, %x |
| ret <16 x float> %res |
| } |
| |
| define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { |
| ; GENERIC-LABEL: test_x86_fnmsub_ps_z: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_x86_fnmsub_ps_z: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = fmul <16 x float> %a0, %a1 |
| %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, |
| float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, |
| float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, |
| float -0.000000e+00>, %x |
| %res = fsub <16 x float> %y, %a2 |
| ret <16 x float> %res |
| } |
| |
| define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { |
| ; GENERIC-LABEL: test_x86_fmadd_pd_z: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_x86_fmadd_pd_z: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = fmul <8 x double> %a0, %a1 |
| %res = fadd <8 x double> %x, %a2 |
| ret <8 x double> %res |
| } |
| |
| define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { |
| ; GENERIC-LABEL: test_x86_fmsub_pd_z: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_x86_fmsub_pd_z: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = fmul <8 x double> %a0, %a1 |
| %res = fsub <8 x double> %x, %a2 |
| ret <8 x double> %res |
| } |
| |
| define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) { |
| ; GENERIC-LABEL: test_x86_fmsub_213: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_x86_fmsub_213: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = fmul double %a0, %a1 |
| %res = fsub double %x, %a2 |
| ret double %res |
| } |
| |
| define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { |
| ; GENERIC-LABEL: test_x86_fmsub_213_m: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_x86_fmsub_213_m: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a2 = load double , double *%a2_ptr |
| %x = fmul double %a0, %a1 |
| %res = fsub double %x, %a2 |
| ret double %res |
| } |
| |
| define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) { |
| ; GENERIC-LABEL: test_x86_fmsub_231_m: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] |
| ; GENERIC-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_x86_fmsub_231_m: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] |
| ; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a2 = load double , double *%a2_ptr |
| %x = fmul double %a0, %a2 |
| %res = fsub double %x, %a1 |
| ret double %res |
| } |
| |
| define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { |
| ; GENERIC-LABEL: test231_br: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [12:1.00] |
| ; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test231_br: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] |
| ; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> |
| %b2 = fadd <16 x float> %b1, %a2 |
| ret <16 x float> %b2 |
| } |
| |
| define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { |
| ; GENERIC-LABEL: test213_br: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test213_br: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b1 = fmul <16 x float> %a1, %a2 |
| %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> |
| ret <16 x float> %b2 |
| } |
| |
| ;mask (a*c+b , a) |
| define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { |
| ; GENERIC-LABEL: test_x86_fmadd132_ps: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [12:1.00] |
| ; GENERIC-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_x86_fmadd132_ps: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [11:0.50] |
| ; SKX-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 |
| %x = fmul <16 x float> %a0, %a2 |
| %y = fadd <16 x float> %x, %a1 |
| %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0 |
| ret <16 x float> %res |
| } |
| |
| ;mask (a*c+b , b) |
| define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { |
| ; GENERIC-LABEL: test_x86_fmadd231_ps: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [12:1.00] |
| ; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [3:1.00] |
| ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_x86_fmadd231_ps: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [11:0.50] |
| ; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.50] |
| ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 |
| %x = fmul <16 x float> %a0, %a2 |
| %y = fadd <16 x float> %x, %a1 |
| %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 |
| ret <16 x float> %res |
| } |
| |
| ;mask (b*a+c , b) |
| define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { |
| ; GENERIC-LABEL: test_x86_fmadd213_ps: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [10:1.00] |
| ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_x86_fmadd213_ps: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [11:0.50] |
| ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 |
| %x = fmul <16 x float> %a1, %a0 |
| %y = fadd <16 x float> %x, %a2 |
| %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 |
| ret <16 x float> %res |
| } |
| |
| define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { |
| ; GENERIC-LABEL: vpandd: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpandd: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| ; Force the execution domain with an add. |
| %a2 = add <16 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, |
| i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> |
| %x = and <16 x i32> %a2, %b |
| ret <16 x i32> %x |
| } |
| |
| define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { |
| ; GENERIC-LABEL: vpandnd: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpandnd: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| ; Force the execution domain with an add. |
| %a2 = add <16 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, |
| i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
| %b2 = xor <16 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, |
| i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> |
| %x = and <16 x i32> %a2, %b2 |
| ret <16 x i32> %x |
| } |
| |
| define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { |
| ; GENERIC-LABEL: vpord: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpord: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| ; Force the execution domain with an add. |
| %a2 = add <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, |
| i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> |
| %x = or <16 x i32> %a2, %b |
| ret <16 x i32> %x |
| } |
| |
| define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { |
| ; GENERIC-LABEL: vpxord: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpxord: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| ; Force the execution domain with an add. |
| %a2 = add <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, |
| i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> |
| %x = xor <16 x i32> %a2, %b |
| ret <16 x i32> %x |
| } |
| |
| define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { |
| ; GENERIC-LABEL: vpandq: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpandq: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| ; Force the execution domain with an add. |
| %a2 = add <8 x i64> %a, <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6> |
| %x = and <8 x i64> %a2, %b |
| ret <8 x i64> %x |
| } |
| |
| define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { |
| ; GENERIC-LABEL: vpandnq: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpandnq: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| ; Force the execution domain with an add. |
| %a2 = add <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7> |
| %b2 = xor <8 x i64> %b, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> |
| %x = and <8 x i64> %a2, %b2 |
| ret <8 x i64> %x |
| } |
| |
| define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { |
| ; GENERIC-LABEL: vporq: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vporq: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| ; Force the execution domain with an add. |
| %a2 = add <8 x i64> %a, <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8> |
| %x = or <8 x i64> %a2, %b |
| ret <8 x i64> %x |
| } |
| |
| define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { |
| ; GENERIC-LABEL: vpxorq: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpxorq: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| ; Force the execution domain with an add. |
| %a2 = add <8 x i64> %a, <i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9> |
| %x = xor <8 x i64> %a2, %b |
| ret <8 x i64> %x |
| } |
| |
| define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) { |
| ; GENERIC-LABEL: and_v64i8: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: and_v64i8: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = and <64 x i8> %a, %b |
| ret <64 x i8> %res |
| } |
| |
| define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) { |
| ; GENERIC-LABEL: andn_v64i8: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: andn_v64i8: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b2 = xor <64 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, |
| i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, |
| i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, |
| i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| %res = and <64 x i8> %a, %b2 |
| ret <64 x i8> %res |
| } |
| |
| define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) { |
| ; GENERIC-LABEL: or_v64i8: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: or_v64i8: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = or <64 x i8> %a, %b |
| ret <64 x i8> %res |
| } |
| |
| define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) { |
| ; GENERIC-LABEL: xor_v64i8: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: xor_v64i8: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = xor <64 x i8> %a, %b |
| ret <64 x i8> %res |
| } |
| |
| define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) { |
| ; GENERIC-LABEL: and_v32i16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: and_v32i16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = and <32 x i16> %a, %b |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) { |
| ; GENERIC-LABEL: andn_v32i16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: andn_v32i16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b2 = xor <32 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, |
| i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> |
| %res = and <32 x i16> %a, %b2 |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) { |
| ; GENERIC-LABEL: or_v32i16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: or_v32i16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = or <32 x i16> %a, %b |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) { |
| ; GENERIC-LABEL: xor_v32i16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: xor_v32i16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = xor <32 x i16> %a, %b |
| ret <32 x i16> %res |
| } |
| |
| define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { |
| ; GENERIC-LABEL: masked_and_v16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: masked_and_v16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a1 = bitcast <16 x float> %a to <16 x i32> |
| %b1 = bitcast <16 x float> %b to <16 x i32> |
| %passThru1 = bitcast <16 x float> %passThru to <16 x i32> |
| %mask1 = bitcast i16 %mask to <16 x i1> |
| %op = and <16 x i32> %a1, %b1 |
| %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1 |
| %cast = bitcast <16 x i32> %select to <16 x float> |
| %add = fadd <16 x float> %c, %cast |
| ret <16 x float> %add |
| } |
| |
| define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { |
| ; GENERIC-LABEL: masked_or_v16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: masked_or_v16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a1 = bitcast <16 x float> %a to <16 x i32> |
| %b1 = bitcast <16 x float> %b to <16 x i32> |
| %passThru1 = bitcast <16 x float> %passThru to <16 x i32> |
| %mask1 = bitcast i16 %mask to <16 x i1> |
| %op = and <16 x i32> %a1, %b1 |
| %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1 |
| %cast = bitcast <16 x i32> %select to <16 x float> |
| %add = fadd <16 x float> %c, %cast |
| ret <16 x float> %add |
| } |
| |
| define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { |
| ; GENERIC-LABEL: masked_xor_v16f32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: masked_xor_v16f32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a1 = bitcast <16 x float> %a to <16 x i32> |
| %b1 = bitcast <16 x float> %b to <16 x i32> |
| %passThru1 = bitcast <16 x float> %passThru to <16 x i32> |
| %mask1 = bitcast i16 %mask to <16 x i1> |
| %op = and <16 x i32> %a1, %b1 |
| %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1 |
| %cast = bitcast <16 x i32> %select to <16 x float> |
| %add = fadd <16 x float> %c, %cast |
| ret <16 x float> %add |
| } |
| |
| define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { |
| ; GENERIC-LABEL: masked_and_v8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: masked_and_v8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a1 = bitcast <8 x double> %a to <8 x i64> |
| %b1 = bitcast <8 x double> %b to <8 x i64> |
| %passThru1 = bitcast <8 x double> %passThru to <8 x i64> |
| %mask1 = bitcast i8 %mask to <8 x i1> |
| %op = and <8 x i64> %a1, %b1 |
| %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1 |
| %cast = bitcast <8 x i64> %select to <8 x double> |
| %add = fadd <8 x double> %c, %cast |
| ret <8 x double> %add |
| } |
| |
| define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { |
| ; GENERIC-LABEL: masked_or_v8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: masked_or_v8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a1 = bitcast <8 x double> %a to <8 x i64> |
| %b1 = bitcast <8 x double> %b to <8 x i64> |
| %passThru1 = bitcast <8 x double> %passThru to <8 x i64> |
| %mask1 = bitcast i8 %mask to <8 x i1> |
| %op = and <8 x i64> %a1, %b1 |
| %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1 |
| %cast = bitcast <8 x i64> %select to <8 x double> |
| %add = fadd <8 x double> %c, %cast |
| ret <8 x double> %add |
| } |
| |
| define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { |
| ; GENERIC-LABEL: masked_xor_v8f64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: masked_xor_v8f64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a1 = bitcast <8 x double> %a to <8 x i64> |
| %b1 = bitcast <8 x double> %b to <8 x i64> |
| %passThru1 = bitcast <8 x double> %passThru to <8 x i64> |
| %mask1 = bitcast i8 %mask to <8 x i1> |
| %op = and <8 x i64> %a1, %b1 |
| %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1 |
| %cast = bitcast <8 x i64> %select to <8 x double> |
| %add = fadd <8 x double> %c, %cast |
| ret <8 x double> %add |
| } |
| |
| define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { |
| ; GENERIC-LABEL: test_mm512_mask_and_epi32: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_mask_and_epi32: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %and1.i.i = and <8 x i64> %__a, %__b |
| %0 = bitcast <8 x i64> %and1.i.i to <16 x i32> |
| %1 = bitcast <8 x i64> %__src to <16 x i32> |
| %2 = bitcast i16 %__k to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 |
| %4 = bitcast <16 x i32> %3 to <8 x i64> |
| ret <8 x i64> %4 |
| } |
| |
| define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { |
| ; GENERIC-LABEL: test_mm512_mask_or_epi32: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_mask_or_epi32: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %or1.i.i = or <8 x i64> %__a, %__b |
| %0 = bitcast <8 x i64> %or1.i.i to <16 x i32> |
| %1 = bitcast <8 x i64> %__src to <16 x i32> |
| %2 = bitcast i16 %__k to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 |
| %4 = bitcast <16 x i32> %3 to <8 x i64> |
| ret <8 x i64> %4 |
| } |
| |
| define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { |
| ; GENERIC-LABEL: test_mm512_mask_xor_epi32: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_mask_xor_epi32: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %xor1.i.i = xor <8 x i64> %__a, %__b |
| %0 = bitcast <8 x i64> %xor1.i.i to <16 x i32> |
| %1 = bitcast <8 x i64> %__src to <16 x i32> |
| %2 = bitcast i16 %__k to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 |
| %4 = bitcast <16 x i32> %3 to <8 x i64> |
| ret <8 x i64> %4 |
| } |
| |
| define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { |
| ; GENERIC-LABEL: test_mm512_mask_xor_pd: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_mask_xor_pd: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <8 x double> %__A to <8 x i64> |
| %1 = bitcast <8 x double> %__B to <8 x i64> |
| %xor.i.i = xor <8 x i64> %0, %1 |
| %2 = bitcast <8 x i64> %xor.i.i to <8 x double> |
| %3 = bitcast i8 %__U to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W |
| ret <8 x double> %4 |
| } |
| |
| define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { |
| ; GENERIC-LABEL: test_mm512_maskz_xor_pd: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_maskz_xor_pd: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <8 x double> %__A to <8 x i64> |
| %1 = bitcast <8 x double> %__B to <8 x i64> |
| %xor.i.i = xor <8 x i64> %0, %1 |
| %2 = bitcast <8 x i64> %xor.i.i to <8 x double> |
| %3 = bitcast i8 %__U to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer |
| ret <8 x double> %4 |
| } |
| |
| define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { |
| ; GENERIC-LABEL: test_mm512_mask_xor_ps: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_mask_xor_ps: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <16 x float> %__A to <16 x i32> |
| %1 = bitcast <16 x float> %__B to <16 x i32> |
| %xor.i.i = xor <16 x i32> %0, %1 |
| %2 = bitcast <16 x i32> %xor.i.i to <16 x float> |
| %3 = bitcast i16 %__U to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W |
| ret <16 x float> %4 |
| } |
| |
| define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { |
| ; GENERIC-LABEL: test_mm512_maskz_xor_ps: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_maskz_xor_ps: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <16 x float> %__A to <16 x i32> |
| %1 = bitcast <16 x float> %__B to <16 x i32> |
| %xor.i.i = xor <16 x i32> %0, %1 |
| %2 = bitcast <16 x i32> %xor.i.i to <16 x float> |
| %3 = bitcast i16 %__U to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer |
| ret <16 x float> %4 |
| } |
| |
| define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { |
| ; GENERIC-LABEL: test_mm512_mask_or_pd: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_mask_or_pd: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <8 x double> %__A to <8 x i64> |
| %1 = bitcast <8 x double> %__B to <8 x i64> |
| %or.i.i = or <8 x i64> %1, %0 |
| %2 = bitcast <8 x i64> %or.i.i to <8 x double> |
| %3 = bitcast i8 %__U to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W |
| ret <8 x double> %4 |
| } |
| |
| define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { |
| ; GENERIC-LABEL: test_mm512_maskz_or_pd: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_maskz_or_pd: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <8 x double> %__A to <8 x i64> |
| %1 = bitcast <8 x double> %__B to <8 x i64> |
| %or.i.i = or <8 x i64> %1, %0 |
| %2 = bitcast <8 x i64> %or.i.i to <8 x double> |
| %3 = bitcast i8 %__U to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer |
| ret <8 x double> %4 |
| } |
| |
| define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { |
| ; GENERIC-LABEL: test_mm512_mask_or_ps: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_mask_or_ps: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <16 x float> %__A to <16 x i32> |
| %1 = bitcast <16 x float> %__B to <16 x i32> |
| %or.i.i = or <16 x i32> %1, %0 |
| %2 = bitcast <16 x i32> %or.i.i to <16 x float> |
| %3 = bitcast i16 %__U to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W |
| ret <16 x float> %4 |
| } |
| |
| define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { |
| ; GENERIC-LABEL: test_mm512_maskz_or_ps: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_maskz_or_ps: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <16 x float> %__A to <16 x i32> |
| %1 = bitcast <16 x float> %__B to <16 x i32> |
| %or.i.i = or <16 x i32> %1, %0 |
| %2 = bitcast <16 x i32> %or.i.i to <16 x float> |
| %3 = bitcast i16 %__U to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer |
| ret <16 x float> %4 |
| } |
| |
| define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { |
| ; GENERIC-LABEL: test_mm512_mask_and_pd: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_mask_and_pd: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <8 x double> %__A to <8 x i64> |
| %1 = bitcast <8 x double> %__B to <8 x i64> |
| %and.i.i = and <8 x i64> %1, %0 |
| %2 = bitcast <8 x i64> %and.i.i to <8 x double> |
| %3 = bitcast i8 %__U to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W |
| ret <8 x double> %4 |
| } |
| |
| define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { |
| ; GENERIC-LABEL: test_mm512_maskz_and_pd: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_maskz_and_pd: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <8 x double> %__A to <8 x i64> |
| %1 = bitcast <8 x double> %__B to <8 x i64> |
| %and.i.i = and <8 x i64> %1, %0 |
| %2 = bitcast <8 x i64> %and.i.i to <8 x double> |
| %3 = bitcast i8 %__U to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer |
| ret <8 x double> %4 |
| } |
| |
| define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { |
| ; GENERIC-LABEL: test_mm512_mask_and_ps: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_mask_and_ps: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <16 x float> %__A to <16 x i32> |
| %1 = bitcast <16 x float> %__B to <16 x i32> |
| %and.i.i = and <16 x i32> %1, %0 |
| %2 = bitcast <16 x i32> %and.i.i to <16 x float> |
| %3 = bitcast i16 %__U to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W |
| ret <16 x float> %4 |
| } |
| |
| define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { |
| ; GENERIC-LABEL: test_mm512_maskz_and_ps: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_maskz_and_ps: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <16 x float> %__A to <16 x i32> |
| %1 = bitcast <16 x float> %__B to <16 x i32> |
| %and.i.i = and <16 x i32> %1, %0 |
| %2 = bitcast <16 x i32> %and.i.i to <16 x float> |
| %3 = bitcast i16 %__U to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer |
| ret <16 x float> %4 |
| } |
| |
| define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { |
| ; GENERIC-LABEL: test_mm512_mask_andnot_pd: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_mask_andnot_pd: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <8 x double> %__A to <8 x i64> |
| %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> |
| %1 = bitcast <8 x double> %__B to <8 x i64> |
| %and.i.i = and <8 x i64> %1, %neg.i.i |
| %2 = bitcast <8 x i64> %and.i.i to <8 x double> |
| %3 = bitcast i8 %__U to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W |
| ret <8 x double> %4 |
| } |
| |
| define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { |
| ; GENERIC-LABEL: test_mm512_maskz_andnot_pd: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_maskz_andnot_pd: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <8 x double> %__A to <8 x i64> |
| %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> |
| %1 = bitcast <8 x double> %__B to <8 x i64> |
| %and.i.i = and <8 x i64> %1, %neg.i.i |
| %2 = bitcast <8 x i64> %and.i.i to <8 x double> |
| %3 = bitcast i8 %__U to <8 x i1> |
| %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer |
| ret <8 x double> %4 |
| } |
| |
| define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { |
| ; GENERIC-LABEL: test_mm512_mask_andnot_ps: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_mask_andnot_ps: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <16 x float> %__A to <16 x i32> |
| %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> |
| %1 = bitcast <16 x float> %__B to <16 x i32> |
| %and.i.i = and <16 x i32> %1, %neg.i.i |
| %2 = bitcast <16 x i32> %and.i.i to <16 x float> |
| %3 = bitcast i16 %__U to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W |
| ret <16 x float> %4 |
| } |
| |
| define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { |
| ; GENERIC-LABEL: test_mm512_maskz_andnot_ps: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_maskz_andnot_ps: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = bitcast <16 x float> %__A to <16 x i32> |
| %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> |
| %1 = bitcast <16 x float> %__B to <16 x i32> |
| %and.i.i = and <16 x i32> %1, %neg.i.i |
| %2 = bitcast <16 x i32> %and.i.i to <16 x float> |
| %3 = bitcast i16 %__U to <16 x i1> |
| %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer |
| ret <16 x float> %4 |
| } |
| |
| define i32 @mov_test1(float %x) { |
| ; GENERIC-LABEL: mov_test1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = bitcast float %x to i32 |
| ret i32 %res |
| } |
| |
| define <4 x i32> @mov_test2(i32 %x) { |
| ; GENERIC-LABEL: mov_test2: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test2: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = insertelement <4 x i32>undef, i32 %x, i32 0 |
| ret <4 x i32>%res |
| } |
| |
| define <2 x i64> @mov_test3(i64 %x) { |
| ; GENERIC-LABEL: mov_test3: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test3: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = insertelement <2 x i64>undef, i64 %x, i32 0 |
| ret <2 x i64>%res |
| } |
| |
| define <4 x i32> @mov_test4(i32* %x) { |
| ; GENERIC-LABEL: mov_test4: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test4: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %y = load i32, i32* %x |
| %res = insertelement <4 x i32>undef, i32 %y, i32 0 |
| ret <4 x i32>%res |
| } |
| |
| define void @mov_test5(float %x, float* %y) { |
| ; GENERIC-LABEL: mov_test5: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test5: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| store float %x, float* %y, align 4 |
| ret void |
| } |
| |
| define void @mov_test6(double %x, double* %y) { |
| ; GENERIC-LABEL: mov_test6: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test6: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| store double %x, double* %y, align 8 |
| ret void |
| } |
| |
| define float @mov_test7(i32* %x) { |
| ; GENERIC-LABEL: mov_test7: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test7: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %y = load i32, i32* %x |
| %res = bitcast i32 %y to float |
| ret float %res |
| } |
| |
| define i32 @mov_test8(<4 x i32> %x) { |
| ; GENERIC-LABEL: mov_test8: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test8: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = extractelement <4 x i32> %x, i32 0 |
| ret i32 %res |
| } |
| |
| define i64 @mov_test9(<2 x i64> %x) { |
| ; GENERIC-LABEL: mov_test9: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test9: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = extractelement <2 x i64> %x, i32 0 |
| ret i64 %res |
| } |
| |
| define <4 x i32> @mov_test10(i32* %x) { |
| ; GENERIC-LABEL: mov_test10: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test10: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %y = load i32, i32* %x, align 4 |
| %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 |
| ret <4 x i32>%res |
| } |
| |
| define <4 x float> @mov_test11(float* %x) { |
| ; GENERIC-LABEL: mov_test11: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test11: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %y = load float, float* %x, align 4 |
| %res = insertelement <4 x float>zeroinitializer, float %y, i32 0 |
| ret <4 x float>%res |
| } |
| |
| define <2 x double> @mov_test12(double* %x) { |
| ; GENERIC-LABEL: mov_test12: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test12: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %y = load double, double* %x, align 8 |
| %res = insertelement <2 x double>zeroinitializer, double %y, i32 0 |
| ret <2 x double>%res |
| } |
| |
| define <2 x i64> @mov_test13(i64 %x) { |
| ; GENERIC-LABEL: mov_test13: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test13: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0 |
| ret <2 x i64>%res |
| } |
| |
| define <4 x i32> @mov_test14(i32 %x) { |
| ; GENERIC-LABEL: mov_test14: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test14: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 |
| ret <4 x i32>%res |
| } |
| |
| define <4 x i32> @mov_test15(i32* %x) { |
| ; GENERIC-LABEL: mov_test15: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test15: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %y = load i32, i32* %x, align 4 |
| %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 |
| ret <4 x i32>%res |
| } |
| |
| define <16 x i32> @mov_test16(i8 * %addr) { |
| ; GENERIC-LABEL: mov_test16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <16 x i32>* |
| %res = load <16 x i32>, <16 x i32>* %vaddr, align 1 |
| ret <16 x i32>%res |
| } |
| |
| define <16 x i32> @mov_test17(i8 * %addr) { |
| ; GENERIC-LABEL: mov_test17: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test17: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <16 x i32>* |
| %res = load <16 x i32>, <16 x i32>* %vaddr, align 64 |
| ret <16 x i32>%res |
| } |
| |
| define void @mov_test18(i8 * %addr, <8 x i64> %data) { |
| ; GENERIC-LABEL: mov_test18: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test18: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <8 x i64>* |
| store <8 x i64>%data, <8 x i64>* %vaddr, align 64 |
| ret void |
| } |
| |
| define void @mov_test19(i8 * %addr, <16 x i32> %data) { |
| ; GENERIC-LABEL: mov_test19: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test19: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <16 x i32>* |
| store <16 x i32>%data, <16 x i32>* %vaddr, align 1 |
| ret void |
| } |
| |
| define void @mov_test20(i8 * %addr, <16 x i32> %data) { |
| ; GENERIC-LABEL: mov_test20: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test20: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <16 x i32>* |
| store <16 x i32>%data, <16 x i32>* %vaddr, align 64 |
| ret void |
| } |
| |
| define <8 x i64> @mov_test21(i8 * %addr) { |
| ; GENERIC-LABEL: mov_test21: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test21: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <8 x i64>* |
| %res = load <8 x i64>, <8 x i64>* %vaddr, align 64 |
| ret <8 x i64>%res |
| } |
| |
| define void @mov_test22(i8 * %addr, <8 x i64> %data) { |
| ; GENERIC-LABEL: mov_test22: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test22: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <8 x i64>* |
| store <8 x i64>%data, <8 x i64>* %vaddr, align 1 |
| ret void |
| } |
| |
| define <8 x i64> @mov_test23(i8 * %addr) { |
| ; GENERIC-LABEL: mov_test23: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test23: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <8 x i64>* |
| %res = load <8 x i64>, <8 x i64>* %vaddr, align 1 |
| ret <8 x i64>%res |
| } |
| |
| define void @mov_test24(i8 * %addr, <8 x double> %data) { |
| ; GENERIC-LABEL: mov_test24: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test24: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <8 x double>* |
| store <8 x double>%data, <8 x double>* %vaddr, align 64 |
| ret void |
| } |
| |
| define <8 x double> @mov_test25(i8 * %addr) { |
| ; GENERIC-LABEL: mov_test25: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test25: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <8 x double>* |
| %res = load <8 x double>, <8 x double>* %vaddr, align 64 |
| ret <8 x double>%res |
| } |
| |
| define void @mov_test26(i8 * %addr, <16 x float> %data) { |
| ; GENERIC-LABEL: mov_test26: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test26: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <16 x float>* |
| store <16 x float>%data, <16 x float>* %vaddr, align 64 |
| ret void |
| } |
| |
| define <16 x float> @mov_test27(i8 * %addr) { |
| ; GENERIC-LABEL: mov_test27: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test27: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <16 x float>* |
| %res = load <16 x float>, <16 x float>* %vaddr, align 64 |
| ret <16 x float>%res |
| } |
| |
| define void @mov_test28(i8 * %addr, <8 x double> %data) { |
| ; GENERIC-LABEL: mov_test28: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test28: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <8 x double>* |
| store <8 x double>%data, <8 x double>* %vaddr, align 1 |
| ret void |
| } |
| |
| define <8 x double> @mov_test29(i8 * %addr) { |
| ; GENERIC-LABEL: mov_test29: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test29: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <8 x double>* |
| %res = load <8 x double>, <8 x double>* %vaddr, align 1 |
| ret <8 x double>%res |
| } |
| |
| define void @mov_test30(i8 * %addr, <16 x float> %data) { |
| ; GENERIC-LABEL: mov_test30: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test30: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <16 x float>* |
| store <16 x float>%data, <16 x float>* %vaddr, align 1 |
| ret void |
| } |
| |
| define <16 x float> @mov_test31(i8 * %addr) { |
| ; GENERIC-LABEL: mov_test31: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test31: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %vaddr = bitcast i8* %addr to <16 x float>* |
| %res = load <16 x float>, <16 x float>* %vaddr, align 1 |
| ret <16 x float>%res |
| } |
| |
| define <16 x i32> @mov_test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { |
| ; GENERIC-LABEL: mov_test32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <16 x i32>* |
| %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 |
| %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old |
| ret <16 x i32>%res |
| } |
| |
| define <16 x i32> @mov_test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { |
| ; GENERIC-LABEL: mov_test33: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test33: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <16 x i32>* |
| %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 |
| %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old |
| ret <16 x i32>%res |
| } |
| |
| define <16 x i32> @mov_test34(i8 * %addr, <16 x i32> %mask1) { |
| ; GENERIC-LABEL: mov_test34: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test34: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <16 x i32>* |
| %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 |
| %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer |
| ret <16 x i32>%res |
| } |
| |
| define <16 x i32> @mov_test35(i8 * %addr, <16 x i32> %mask1) { |
| ; GENERIC-LABEL: mov_test35: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test35: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <16 x i32>* |
| %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 |
| %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer |
| ret <16 x i32>%res |
| } |
| |
| define <8 x i64> @mov_test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { |
| ; GENERIC-LABEL: mov_test36: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test36: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <8 x i64> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <8 x i64>* |
| %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 |
| %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old |
| ret <8 x i64>%res |
| } |
| |
| define <8 x i64> @mov_test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { |
| ; GENERIC-LABEL: mov_test37: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test37: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <8 x i64> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <8 x i64>* |
| %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 |
| %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old |
| ret <8 x i64>%res |
| } |
| |
| define <8 x i64> @mov_test38(i8 * %addr, <8 x i64> %mask1) { |
| ; GENERIC-LABEL: mov_test38: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test38: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <8 x i64> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <8 x i64>* |
| %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 |
| %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer |
| ret <8 x i64>%res |
| } |
| |
| define <8 x i64> @mov_test39(i8 * %addr, <8 x i64> %mask1) { |
| ; GENERIC-LABEL: mov_test39: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test39: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <8 x i64> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <8 x i64>* |
| %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 |
| %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer |
| ret <8 x i64>%res |
| } |
| |
| define <16 x float> @mov_test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { |
| ; GENERIC-LABEL: mov_test40: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] |
| ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test40: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = fcmp one <16 x float> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <16 x float>* |
| %r = load <16 x float>, <16 x float>* %vaddr, align 64 |
| %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old |
| ret <16 x float>%res |
| } |
| |
| define <16 x float> @mov_test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { |
| ; GENERIC-LABEL: mov_test41: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] |
| ; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test41: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = fcmp one <16 x float> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <16 x float>* |
| %r = load <16 x float>, <16 x float>* %vaddr, align 1 |
| %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old |
| ret <16 x float>%res |
| } |
| |
| define <16 x float> @mov_test42(i8 * %addr, <16 x float> %mask1) { |
| ; GENERIC-LABEL: mov_test42: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] |
| ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test42: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = fcmp one <16 x float> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <16 x float>* |
| %r = load <16 x float>, <16 x float>* %vaddr, align 64 |
| %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer |
| ret <16 x float>%res |
| } |
| |
| define <16 x float> @mov_test43(i8 * %addr, <16 x float> %mask1) { |
| ; GENERIC-LABEL: mov_test43: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] |
| ; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test43: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = fcmp one <16 x float> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <16 x float>* |
| %r = load <16 x float>, <16 x float>* %vaddr, align 1 |
| %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer |
| ret <16 x float>%res |
| } |
| |
| define <8 x double> @mov_test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { |
| ; GENERIC-LABEL: mov_test44: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] |
| ; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test44: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = fcmp one <8 x double> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <8 x double>* |
| %r = load <8 x double>, <8 x double>* %vaddr, align 64 |
| %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old |
| ret <8 x double>%res |
| } |
| |
| define <8 x double> @mov_test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { |
| ; GENERIC-LABEL: mov_test45: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] |
| ; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test45: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = fcmp one <8 x double> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <8 x double>* |
| %r = load <8 x double>, <8 x double>* %vaddr, align 1 |
| %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old |
| ret <8 x double>%res |
| } |
| |
| define <8 x double> @mov_test46(i8 * %addr, <8 x double> %mask1) { |
| ; GENERIC-LABEL: mov_test46: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] |
| ; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test46: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = fcmp one <8 x double> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <8 x double>* |
| %r = load <8 x double>, <8 x double>* %vaddr, align 64 |
| %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer |
| ret <8 x double>%res |
| } |
| |
| define <8 x double> @mov_test47(i8 * %addr, <8 x double> %mask1) { |
| ; GENERIC-LABEL: mov_test47: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] |
| ; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mov_test47: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = fcmp one <8 x double> %mask1, zeroinitializer |
| %vaddr = bitcast i8* %addr to <8 x double>* |
| %r = load <8 x double>, <8 x double>* %vaddr, align 1 |
| %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer |
| ret <8 x double>%res |
| } |
| |
| define i16 @mask16(i16 %x) { |
| ; GENERIC-LABEL: mask16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: notl %edi # sched: [1:0.33] |
| ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mask16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: notl %edi # sched: [1:0.25] |
| ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %m0 = bitcast i16 %x to <16 x i1> |
| %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> |
| %ret = bitcast <16 x i1> %m1 to i16 |
| ret i16 %ret |
| } |
| |
| define i32 @mask16_zext(i16 %x) { |
| ; GENERIC-LABEL: mask16_zext: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: notl %edi # sched: [1:0.33] |
| ; GENERIC-NEXT: movzwl %di, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mask16_zext: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: notl %edi # sched: [1:0.25] |
| ; SKX-NEXT: movzwl %di, %eax # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %m0 = bitcast i16 %x to <16 x i1> |
| %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> |
| %m2 = bitcast <16 x i1> %m1 to i16 |
| %ret = zext i16 %m2 to i32 |
| ret i32 %ret |
| } |
| |
| define i8 @mask8(i8 %x) { |
| ; GENERIC-LABEL: mask8: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: notb %dil # sched: [1:0.33] |
| ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mask8: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: notb %dil # sched: [1:0.25] |
| ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %m0 = bitcast i8 %x to <8 x i1> |
| %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> |
| %ret = bitcast <8 x i1> %m1 to i8 |
| ret i8 %ret |
| } |
| |
| define i32 @mask8_zext(i8 %x) { |
| ; GENERIC-LABEL: mask8_zext: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: notb %dil # sched: [1:0.33] |
| ; GENERIC-NEXT: movzbl %dil, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mask8_zext: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: notb %dil # sched: [1:0.25] |
| ; SKX-NEXT: movzbl %dil, %eax # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %m0 = bitcast i8 %x to <8 x i1> |
| %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> |
| %m2 = bitcast <8 x i1> %m1 to i8 |
| %ret = zext i8 %m2 to i32 |
| ret i32 %ret |
| } |
| |
| define void @mask16_mem(i16* %ptr) { |
| ; GENERIC-LABEL: mask16_mem: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] |
| ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mask16_mem: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] |
| ; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = load i16, i16* %ptr, align 4 |
| %m0 = bitcast i16 %x to <16 x i1> |
| %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> |
| %ret = bitcast <16 x i1> %m1 to i16 |
| store i16 %ret, i16* %ptr, align 4 |
| ret void |
| } |
| |
| define void @mask8_mem(i8* %ptr) { |
| ; GENERIC-LABEL: mask8_mem: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] |
| ; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mask8_mem: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] |
| ; SKX-NEXT: knotb %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = load i8, i8* %ptr, align 4 |
| %m0 = bitcast i8 %x to <8 x i1> |
| %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> |
| %ret = bitcast <8 x i1> %m1 to i8 |
| store i8 %ret, i8* %ptr, align 4 |
| ret void |
| } |
| |
| define i16 @mand16(i16 %x, i16 %y) { |
| ; GENERIC-LABEL: mand16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: xorl %esi, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: andl %esi, %edi # sched: [1:0.33] |
| ; GENERIC-NEXT: orl %eax, %edi # sched: [1:0.33] |
| ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mand16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] |
| ; SKX-NEXT: xorl %esi, %eax # sched: [1:0.25] |
| ; SKX-NEXT: andl %esi, %edi # sched: [1:0.25] |
| ; SKX-NEXT: orl %eax, %edi # sched: [1:0.25] |
| ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %ma = bitcast i16 %x to <16 x i1> |
| %mb = bitcast i16 %y to <16 x i1> |
| %mc = and <16 x i1> %ma, %mb |
| %md = xor <16 x i1> %ma, %mb |
| %me = or <16 x i1> %mc, %md |
| %ret = bitcast <16 x i1> %me to i16 |
| ret i16 %ret |
| } |
| |
| define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) { |
| ; GENERIC-LABEL: mand16_mem: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] |
| ; GENERIC-NEXT: kmovw (%rsi), %k1 # sched: [5:0.50] |
| ; GENERIC-NEXT: kandw %k1, %k0, %k2 # sched: [1:0.33] |
| ; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: korw %k0, %k2, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: mand16_mem: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] |
| ; SKX-NEXT: kmovw (%rsi), %k1 # sched: [7:1.00] |
| ; SKX-NEXT: kandw %k1, %k0, %k2 # sched: [1:1.00] |
| ; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: korw %k0, %k2, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: # kill: def $ax killed $ax killed $eax |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %ma = load <16 x i1>, <16 x i1>* %x |
| %mb = load <16 x i1>, <16 x i1>* %y |
| %mc = and <16 x i1> %ma, %mb |
| %md = xor <16 x i1> %ma, %mb |
| %me = or <16 x i1> %mc, %md |
| %ret = bitcast <16 x i1> %me to i16 |
| ret i16 %ret |
| } |
| |
| define i8 @shuf_test1(i16 %v) nounwind { |
| ; GENERIC-LABEL: shuf_test1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kshiftrw $8, %k0, %k0 # sched: [1:1.00] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: # kill: def $al killed $al killed $eax |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: shuf_test1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kshiftrw $8, %k0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: # kill: def $al killed $al killed $eax |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %v1 = bitcast i16 %v to <16 x i1> |
| %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %mask1 = bitcast <8 x i1> %mask to i8 |
| ret i8 %mask1 |
| } |
| |
| define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { |
| ; GENERIC-LABEL: zext_test1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] |
| ; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_test1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: andl $1, %eax # sched: [1:0.25] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %cmp_res = icmp ugt <16 x i32> %a, %b |
| %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 |
| %res = zext i1 %cmp_res.i1 to i32 |
| ret i32 %res |
| } |
| |
| define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { |
| ; GENERIC-LABEL: zext_test2: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] |
| ; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_test2: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: andl $1, %eax # sched: [1:0.25] |
| ; SKX-NEXT: # kill: def $ax killed $ax killed $eax |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %cmp_res = icmp ugt <16 x i32> %a, %b |
| %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 |
| %res = zext i1 %cmp_res.i1 to i16 |
| ret i16 %res |
| } |
| |
| define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { |
| ; GENERIC-LABEL: zext_test3: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] |
| ; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: andb $1, %al # sched: [1:0.33] |
| ; GENERIC-NEXT: # kill: def $al killed $al killed $eax |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: zext_test3: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: andb $1, %al # sched: [1:0.25] |
| ; SKX-NEXT: # kill: def $al killed $al killed $eax |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %cmp_res = icmp ugt <16 x i32> %a, %b |
| %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 |
| %res = zext i1 %cmp_res.i1 to i8 |
| ret i8 %res |
| } |
| |
| define i8 @conv1(<8 x i1>* %R) { |
| ; GENERIC-LABEL: conv1: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: movb $-1, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00] |
| ; GENERIC-NEXT: movb $-2, %al # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: conv1: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: movb $-1, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00] |
| ; SKX-NEXT: movb $-2, %al # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R |
| |
| %maskPtr = alloca <8 x i1> |
| store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr |
| %mask = load <8 x i1>, <8 x i1>* %maskPtr |
| %mask_convert = bitcast <8 x i1> %mask to i8 |
| ret i8 %mask_convert |
| } |
| |
| define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { |
| ; GENERIC-LABEL: test4: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [1:0.50] |
| ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test4: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00] |
| ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x_gt_y = icmp sgt <4 x i64> %x, %y |
| %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1 |
| %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1 |
| %resse = sext <4 x i1>%res to <4 x i32> |
| ret <4 x i32> %resse |
| } |
| |
| define <2 x i64> @vcmp_test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { |
| ; GENERIC-LABEL: vcmp_test5: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [1:0.50] |
| ; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vcmp_test5: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00] |
| ; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x_gt_y = icmp slt <2 x i64> %x, %y |
| %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1 |
| %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1 |
| %resse = sext <2 x i1>%res to <2 x i64> |
| ret <2 x i64> %resse |
| }define void @vcmp_test6(<16 x i1> %mask) { |
| allocas: |
| %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> |
| %b = bitcast <16 x i1> %a to i16 |
| %c = icmp eq i16 %b, 0 |
| br i1 %c, label %true, label %false |
| |
| true: |
| ret void |
| |
| false: |
| ret void |
| } |
| define void @vcmp_test7(<8 x i1> %mask) { |
| ; GENERIC-LABEL: vcmp_test7: |
| ; GENERIC: # %bb.0: # %allocas |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: orb $85, %al # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vcmp_test7: |
| ; SKX: # %bb.0: # %allocas |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: orb $85, %al # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| allocas: |
| %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> |
| %b = bitcast <8 x i1> %a to i8 |
| %c = icmp eq i8 %b, 0 |
| br i1 %c, label %true, label %false |
| |
| true: |
| ret void |
| |
| false: |
| ret void |
| } |
| define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { |
| ; GENERIC-LABEL: vcmp_test8: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] |
| ; GENERIC-NEXT: jg .LBB386_1 # sched: [1:1.00] |
| ; GENERIC-NEXT: # %bb.2: |
| ; GENERIC-NEXT: kxorw %k0, %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; GENERIC-NEXT: .LBB386_1: |
| ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [1:0.50] |
| ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vcmp_test8: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] |
| ; SKX-NEXT: jg .LBB386_1 # sched: [1:0.50] |
| ; SKX-NEXT: # %bb.2: |
| ; SKX-NEXT: kxorw %k0, %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; SKX-NEXT: .LBB386_1: |
| ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] |
| ; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %cond = icmp sgt i32 %a1, %b1 |
| %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer |
| %cmp2 = icmp ult <16 x i32> %b, zeroinitializer |
| %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2 |
| %res = sext <16 x i1> %mix to <16 x i8> |
| ret <16 x i8> %res |
| } |
| define <16 x i1> @vpmov_test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { |
| ; GENERIC-LABEL: vpmov_test9: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] |
| ; GENERIC-NEXT: jg .LBB387_1 # sched: [1:1.00] |
| ; GENERIC-NEXT: # %bb.2: |
| ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: jmp .LBB387_3 # sched: [1:1.00] |
| ; GENERIC-NEXT: .LBB387_1: |
| ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: .LBB387_3: |
| ; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vpmov_test9: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] |
| ; SKX-NEXT: jg .LBB387_1 # sched: [1:0.50] |
| ; SKX-NEXT: # %bb.2: |
| ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: jmp .LBB387_3 # sched: [1:0.50] |
| ; SKX-NEXT: .LBB387_1: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: .LBB387_3: |
| ; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp sgt i32 %a1, %b1 |
| %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b |
| ret <16 x i1>%c |
| }define <8 x i1> @vpmov_test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) { |
| %mask = icmp sgt i32 %a1, %b1 |
| %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b |
| ret <8 x i1>%c |
| } |
| |
| define <4 x i1> @vmov_test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { |
| ; GENERIC-LABEL: vmov_test11: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] |
| ; GENERIC-NEXT: jg .LBB389_1 # sched: [1:1.00] |
| ; GENERIC-NEXT: # %bb.2: |
| ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: jmp .LBB389_3 # sched: [1:1.00] |
| ; GENERIC-NEXT: .LBB389_1: |
| ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: .LBB389_3: |
| ; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vmov_test11: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] |
| ; SKX-NEXT: jg .LBB389_1 # sched: [1:0.50] |
| ; SKX-NEXT: # %bb.2: |
| ; SKX-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: jmp .LBB389_3 # sched: [1:0.50] |
| ; SKX-NEXT: .LBB389_1: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: .LBB389_3: |
| ; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp sgt i32 %a1, %b1 |
| %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b |
| ret <4 x i1>%c |
| } |
| |
| define i32 @vmov_test12(i32 %x, i32 %y) { |
| ; GENERIC-LABEL: vmov_test12: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vmov_test12: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = bitcast i16 21845 to <16 x i1> |
| %b = extractelement <16 x i1> %a, i32 0 |
| %c = select i1 %b, i32 %x, i32 %y |
| ret i32 %c |
| } |
| |
| define i32 @vmov_test13(i32 %x, i32 %y) { |
| ; GENERIC-LABEL: vmov_test13: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vmov_test13: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: movl %esi, %eax # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = bitcast i16 21845 to <16 x i1> |
| %b = extractelement <16 x i1> %a, i32 3 |
| %c = select i1 %b, i32 %x, i32 %y |
| ret i32 %c |
| }define <4 x i1> @vmov_test14() { |
| %a = bitcast i16 21845 to <16 x i1> |
| %b = extractelement <16 x i1> %a, i32 2 |
| %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1 |
| ret <4 x i1> %c |
| } |
| |
| define <16 x i1> @vmov_test15(i32 %x, i32 %y) { |
| ; GENERIC-LABEL: vmov_test15: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] |
| ; GENERIC-NEXT: movl $21845, %eax # imm = 0x5555 |
| ; GENERIC-NEXT: # sched: [1:0.33] |
| ; GENERIC-NEXT: movl $1, %ecx # sched: [1:0.33] |
| ; GENERIC-NEXT: cmovgl %eax, %ecx # sched: [2:0.67] |
| ; GENERIC-NEXT: kmovd %ecx, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vmov_test15: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] |
| ; SKX-NEXT: movl $21845, %eax # imm = 0x5555 |
| ; SKX-NEXT: # sched: [1:0.25] |
| ; SKX-NEXT: movl $1, %ecx # sched: [1:0.25] |
| ; SKX-NEXT: cmovgl %eax, %ecx # sched: [1:0.50] |
| ; SKX-NEXT: kmovd %ecx, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = bitcast i16 21845 to <16 x i1> |
| %b = bitcast i16 1 to <16 x i1> |
| %mask = icmp sgt i32 %x, %y |
| %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b |
| ret <16 x i1> %c |
| } |
| |
| define <64 x i8> @vmov_test16(i64 %x) { |
| ; |
| ; GENERIC-LABEL: vmov_test16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: movb $1, %al # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00] |
| ; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00] |
| ; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00] |
| ; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vmov_test16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: movb $1, %al # sched: [1:0.25] |
| ; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00] |
| ; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = bitcast i64 %x to <64 x i1> |
| %b = insertelement <64 x i1>%a, i1 true, i32 5 |
| %c = sext <64 x i1>%b to <64 x i8> |
| ret <64 x i8>%c |
| } |
| |
| define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) { |
| ; |
| ; GENERIC-LABEL: vmov_test17: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: cmpl %edx, %esi # sched: [1:0.33] |
| ; GENERIC-NEXT: setg %al # sched: [1:0.50] |
| ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00] |
| ; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00] |
| ; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00] |
| ; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vmov_test17: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: cmpl %edx, %esi # sched: [1:0.25] |
| ; SKX-NEXT: setg %al # sched: [1:0.50] |
| ; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00] |
| ; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = bitcast i64 %x to <64 x i1> |
| %b = icmp sgt i32 %y, %z |
| %c = insertelement <64 x i1>%a, i1 %b, i32 5 |
| %d = sext <64 x i1>%c to <64 x i8> |
| ret <64 x i8>%d |
| } |
| |
| define <8 x i1> @vmov_test18(i8 %a, i16 %y) { |
| ; GENERIC-LABEL: vmov_test18: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %esi, %k2 # sched: [1:0.33] |
| ; GENERIC-NEXT: kshiftrw $8, %k2, %k0 # sched: [1:1.00] |
| ; GENERIC-NEXT: kshiftrw $9, %k2, %k2 # sched: [1:1.00] |
| ; GENERIC-NEXT: kshiftrb $6, %k1, %k3 # sched: [1:1.00] |
| ; GENERIC-NEXT: kxorb %k2, %k3, %k2 # sched: [1:0.33] |
| ; GENERIC-NEXT: kshiftlb $7, %k2, %k2 # sched: [1:1.00] |
| ; GENERIC-NEXT: kshiftrb $1, %k2, %k2 # sched: [1:1.00] |
| ; GENERIC-NEXT: kxorb %k2, %k1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: kshiftlb $1, %k1, %k1 # sched: [1:1.00] |
| ; GENERIC-NEXT: kshiftrb $1, %k1, %k1 # sched: [1:1.00] |
| ; GENERIC-NEXT: kshiftlb $7, %k0, %k0 # sched: [1:1.00] |
| ; GENERIC-NEXT: korb %k0, %k1, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vmov_test18: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %esi, %k2 # sched: [1:1.00] |
| ; SKX-NEXT: kshiftrw $8, %k2, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: kshiftrw $9, %k2, %k2 # sched: [3:1.00] |
| ; SKX-NEXT: kshiftrb $6, %k1, %k3 # sched: [3:1.00] |
| ; SKX-NEXT: kxorb %k2, %k3, %k2 # sched: [1:1.00] |
| ; SKX-NEXT: kshiftlb $7, %k2, %k2 # sched: [3:1.00] |
| ; SKX-NEXT: kshiftrb $1, %k2, %k2 # sched: [3:1.00] |
| ; SKX-NEXT: kxorb %k2, %k1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: kshiftlb $1, %k1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: kshiftrb $1, %k1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: kshiftlb $7, %k0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = bitcast i8 %a to <8 x i1> |
| %b1 = bitcast i16 %y to <16 x i1> |
| %el1 = extractelement <16 x i1>%b1, i32 8 |
| %el2 = extractelement <16 x i1>%b1, i32 9 |
| %c = insertelement <8 x i1>%b, i1 %el1, i32 7 |
| %d = insertelement <8 x i1>%c, i1 %el2, i32 6 |
| ret <8 x i1>%d |
| } |
| define <32 x i16> @vmov_test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { |
| ; GENERIC-LABEL: vmov_test21: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vmov_test21: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer |
| ret <32 x i16> %ret |
| } |
| |
| define void @vmov_test22(<4 x i1> %a, <4 x i1>* %addr) { |
| ; GENERIC-LABEL: vmov_test22: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vmov_test22: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| store <4 x i1> %a, <4 x i1>* %addr |
| ret void |
| } |
| |
| define void @vmov_test23(<2 x i1> %a, <2 x i1>* %addr) { |
| ; GENERIC-LABEL: vmov_test23: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: vmov_test23: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| store <2 x i1> %a, <2 x i1>* %addr |
| ret void |
| } |
| |
| define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { |
| ; GENERIC-LABEL: store_v1i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovb %k0, (%rsi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: store_v1i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovb %k0, (%rsi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = xor <1 x i1> %c, <i1 1> |
| store <1 x i1> %x, <1 x i1>* %ptr, align 4 |
| ret void |
| } |
| |
| define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { |
| ; GENERIC-LABEL: store_v2i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: store_v2i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = xor <2 x i1> %c, <i1 1, i1 1> |
| store <2 x i1> %x, <2 x i1>* %ptr, align 4 |
| ret void |
| } |
| |
| define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { |
| ; GENERIC-LABEL: store_v4i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: store_v4i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1> |
| store <4 x i1> %x, <4 x i1>* %ptr, align 4 |
| ret void |
| } |
| |
| define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { |
| ; GENERIC-LABEL: store_v8i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: store_v8i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: knotb %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> |
| store <8 x i1> %x, <8 x i1>* %ptr, align 4 |
| ret void |
| } |
| |
| define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { |
| ; GENERIC-LABEL: store_v16i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: store_v16i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> |
| store <16 x i1> %x, <16 x i1>* %ptr, align 4 |
| ret void |
| } |
| |
| ;void f2(int); |
| ;void f1(int c) |
| ;{ |
| ; static int v = 0; |
| ; if (v == 0) |
| ; v = 1; |
| ; else |
| ; v = 0; |
| ; f2(v); |
| ;} |
| |
| @f1.v = internal unnamed_addr global i1 false, align 4 |
| |
| define void @f1(i32 %c) { |
| ; GENERIC-LABEL: f1: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50] |
| ; GENERIC-NEXT: xorl $1, %edi # sched: [1:0.33] |
| ; GENERIC-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00] |
| ; GENERIC-NEXT: jmp f2 # TAILCALL |
| ; |
| ; SKX-LABEL: f1: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50] |
| ; SKX-NEXT: xorl $1, %edi # sched: [1:0.25] |
| ; SKX-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00] |
| ; SKX-NEXT: jmp f2 # TAILCALL |
| entry: |
| %.b1 = load i1, i1* @f1.v, align 4 |
| %not..b1 = xor i1 %.b1, true |
| store i1 %not..b1, i1* @f1.v, align 4 |
| %0 = zext i1 %not..b1 to i32 |
| tail call void @f2(i32 %0) #2 |
| ret void |
| } |
| |
| declare void @f2(i32) #1 |
| |
| define void @store_i16_i1(i16 %x, i1 *%y) { |
| ; GENERIC-LABEL: store_i16_i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] |
| ; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: store_i16_i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: andl $1, %edi # sched: [1:0.25] |
| ; SKX-NEXT: movb %dil, (%rsi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %c = trunc i16 %x to i1 |
| store i1 %c, i1* %y |
| ret void |
| } |
| |
| define void @store_i8_i1(i8 %x, i1 *%y) { |
| ; GENERIC-LABEL: store_i8_i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] |
| ; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: store_i8_i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: andl $1, %edi # sched: [1:0.25] |
| ; SKX-NEXT: movb %dil, (%rsi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %c = trunc i8 %x to i1 |
| store i1 %c, i1* %y |
| ret void |
| } |
| |
| define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) { |
| ; GENERIC-LABEL: test_build_vec_v32i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: movl $1497715861, %eax # imm = 0x59455495 |
| ; GENERIC-NEXT: # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_build_vec_v32i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: movl $1497715861, %eax # imm = 0x59455495 |
| ; SKX-NEXT: # sched: [1:0.25] |
| ; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer |
| ret <32 x i16> %ret |
| } |
| |
| define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { |
| ; GENERIC-LABEL: test_build_vec_v64i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_build_vec_v64i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer |
| ret <64 x i8> %ret |
| } |
| |
| define void @ktest_1(<8 x double> %in, double * %base) { |
| ; GENERIC-LABEL: ktest_1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovupd (%rdi), %zmm1 # sched: [7:0.50] |
| ; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00] |
| ; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [7:0.50] |
| ; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00] |
| ; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00] |
| ; GENERIC-NEXT: # %bb.1: # %L1 |
| ; GENERIC-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; GENERIC-NEXT: .LBB410_2: # %L2 |
| ; GENERIC-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ktest_1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovupd (%rdi), %zmm1 # sched: [8:0.50] |
| ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00] |
| ; SKX-NEXT: kortestb %k0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: je .LBB410_2 # sched: [1:0.50] |
| ; SKX-NEXT: # %bb.1: # %L1 |
| ; SKX-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; SKX-NEXT: .LBB410_2: # %L2 |
| ; SKX-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %addr1 = getelementptr double, double * %base, i64 0 |
| %addr2 = getelementptr double, double * %base, i64 1 |
| |
| %vaddr1 = bitcast double* %addr1 to <8 x double>* |
| %vaddr2 = bitcast double* %addr2 to <8 x double>* |
| |
| %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1 |
| %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1 |
| |
| %sel1 = fcmp ogt <8 x double>%in, %val1 |
| %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer |
| %sel2 = fcmp olt <8 x double> %in, %val3 |
| %sel3 = and <8 x i1> %sel1, %sel2 |
| |
| %int_sel3 = bitcast <8 x i1> %sel3 to i8 |
| %res = icmp eq i8 %int_sel3, zeroinitializer |
| br i1 %res, label %L2, label %L1 |
| L1: |
| store <8 x double> %in, <8 x double>* %vaddr1 |
| br label %End |
| L2: |
| store <8 x double> %in, <8 x double>* %vaddr2 |
| br label %End |
| End: |
| ret void |
| } |
| |
| define void @ktest_2(<32 x float> %in, float * %base) { |
| ; |
| ; GENERIC-LABEL: ktest_2: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vmovups (%rdi), %zmm2 # sched: [7:0.50] |
| ; GENERIC-NEXT: vmovups 64(%rdi), %zmm3 # sched: [7:0.50] |
| ; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00] |
| ; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00] |
| ; GENERIC-NEXT: kunpckwd %k1, %k2, %k0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [7:0.50] |
| ; GENERIC-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [7:0.50] |
| ; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00] |
| ; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] |
| ; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00] |
| ; GENERIC-NEXT: kortestd %k1, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00] |
| ; GENERIC-NEXT: # %bb.1: # %L1 |
| ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; GENERIC-NEXT: .LBB411_2: # %L2 |
| ; GENERIC-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: ktest_2: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vmovups (%rdi), %zmm2 # sched: [8:0.50] |
| ; SKX-NEXT: vmovups 64(%rdi), %zmm3 # sched: [8:0.50] |
| ; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00] |
| ; SKX-NEXT: kunpckwd %k1, %k2, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [8:0.50] |
| ; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] |
| ; SKX-NEXT: kunpckwd %k1, %k2, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: kortestd %k1, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: je .LBB411_2 # sched: [1:0.50] |
| ; SKX-NEXT: # %bb.1: # %L1 |
| ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; SKX-NEXT: .LBB411_2: # %L2 |
| ; SKX-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %addr1 = getelementptr float, float * %base, i64 0 |
| %addr2 = getelementptr float, float * %base, i64 1 |
| |
| %vaddr1 = bitcast float* %addr1 to <32 x float>* |
| %vaddr2 = bitcast float* %addr2 to <32 x float>* |
| |
| %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1 |
| %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1 |
| |
| %sel1 = fcmp ogt <32 x float>%in, %val1 |
| %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer |
| %sel2 = fcmp olt <32 x float> %in, %val3 |
| %sel3 = or <32 x i1> %sel1, %sel2 |
| |
| %int_sel3 = bitcast <32 x i1> %sel3 to i32 |
| %res = icmp eq i32 %int_sel3, zeroinitializer |
| br i1 %res, label %L2, label %L1 |
| L1: |
| store <32 x float> %in, <32 x float>* %vaddr1 |
| br label %End |
| L2: |
| store <32 x float> %in, <32 x float>* %vaddr2 |
| br label %End |
| End: |
| ret void |
| } |
| |
| define <8 x i64> @load_8i1(<8 x i1>* %a) { |
| ; GENERIC-LABEL: load_8i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] |
| ; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: load_8i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] |
| ; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = load <8 x i1>, <8 x i1>* %a |
| %c = sext <8 x i1> %b to <8 x i64> |
| ret <8 x i64> %c |
| } |
| |
| define <16 x i32> @load_16i1(<16 x i1>* %a) { |
| ; GENERIC-LABEL: load_16i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] |
| ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: load_16i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] |
| ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = load <16 x i1>, <16 x i1>* %a |
| %c = sext <16 x i1> %b to <16 x i32> |
| ret <16 x i32> %c |
| } |
| |
| define <2 x i16> @load_2i1(<2 x i1>* %a) { |
| ; GENERIC-LABEL: load_2i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] |
| ; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: load_2i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] |
| ; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = load <2 x i1>, <2 x i1>* %a |
| %c = sext <2 x i1> %b to <2 x i16> |
| ret <2 x i16> %c |
| } |
| |
| define <4 x i16> @load_4i1(<4 x i1>* %a) { |
| ; GENERIC-LABEL: load_4i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] |
| ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: load_4i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] |
| ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = load <4 x i1>, <4 x i1>* %a |
| %c = sext <4 x i1> %b to <4 x i16> |
| ret <4 x i16> %c |
| } |
| |
| define <32 x i16> @load_32i1(<32 x i1>* %a) { |
| ; GENERIC-LABEL: load_32i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd (%rdi), %k0 # sched: [5:0.50] |
| ; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: load_32i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd (%rdi), %k0 # sched: [7:1.00] |
| ; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = load <32 x i1>, <32 x i1>* %a |
| %c = sext <32 x i1> %b to <32 x i16> |
| ret <32 x i16> %c |
| } |
| |
| define <64 x i8> @load_64i1(<64 x i1>* %a) { |
| ; GENERIC-LABEL: load_64i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovq (%rdi), %k0 # sched: [5:0.50] |
| ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: load_64i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovq (%rdi), %k0 # sched: [7:1.00] |
| ; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = load <64 x i1>, <64 x i1>* %a |
| %c = sext <64 x i1> %b to <64 x i8> |
| ret <64 x i8> %c |
| } |
| |
| define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) { |
| ; GENERIC-LABEL: store_8i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: store_8i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| store <8 x i1> %v, <8 x i1>* %a |
| ret void |
| } |
| |
| define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) { |
| ; GENERIC-LABEL: store_8i1_1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: store_8i1_1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %v1 = trunc <8 x i16> %v to <8 x i1> |
| store <8 x i1> %v1, <8 x i1>* %a |
| ret void |
| } |
| |
| define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) { |
| ; GENERIC-LABEL: store_16i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: store_16i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| store <16 x i1> %v, <16 x i1>* %a |
| ret void |
| } |
| |
| define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { |
| ; GENERIC-LABEL: store_32i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: store_32i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| store <32 x i1> %v, <32 x i1>* %a |
| ret void |
| } |
| |
| define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { |
| ; GENERIC-LABEL: store_32i1_1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: store_32i1_1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %v1 = trunc <32 x i16> %v to <32 x i1> |
| store <32 x i1> %v1, <32 x i1>* %a |
| ret void |
| } |
| |
| |
| define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { |
| ; |
| ; GENERIC-LABEL: store_64i1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: store_64i1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00] |
| ; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| store <64 x i1> %v, <64 x i1>* %a |
| ret void |
| } |
| |
| define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) { |
| ; GENERIC-LABEL: test_bitcast_v8i1_zext: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovb %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_bitcast_v8i1_zext: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: kmovb %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: addl %eax, %eax # sched: [1:0.25] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %v1 = icmp eq <16 x i32> %a, zeroinitializer |
| %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %mask1 = bitcast <8 x i1> %mask to i8 |
| %val = zext i8 %mask1 to i32 |
| %val1 = add i32 %val, %val |
| ret i32 %val1 |
| } |
| |
| define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) { |
| ; GENERIC-LABEL: test_bitcast_v16i1_zext: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_bitcast_v16i1_zext: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: kmovw %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: addl %eax, %eax # sched: [1:0.25] |
| ; SKX-NEXT: vzeroupper # sched: [4:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %v1 = icmp eq <16 x i32> %a, zeroinitializer |
| %mask1 = bitcast <16 x i1> %v1 to i16 |
| %val = zext i16 %mask1 to i32 |
| %val1 = add i32 %val, %val |
| ret i32 %val1 |
| } |
| |
| define i16 @test_v16i1_add(i16 %x, i16 %y) { |
| ; GENERIC-LABEL: test_v16i1_add: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_v16i1_add: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: # kill: def $ax killed $ax killed $eax |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %m0 = bitcast i16 %x to <16 x i1> |
| %m1 = bitcast i16 %y to <16 x i1> |
| %m2 = add <16 x i1> %m0, %m1 |
| %ret = bitcast <16 x i1> %m2 to i16 |
| ret i16 %ret |
| } |
| |
| define i16 @test_v16i1_sub(i16 %x, i16 %y) { |
| ; GENERIC-LABEL: test_v16i1_sub: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_v16i1_sub: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: # kill: def $ax killed $ax killed $eax |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %m0 = bitcast i16 %x to <16 x i1> |
| %m1 = bitcast i16 %y to <16 x i1> |
| %m2 = sub <16 x i1> %m0, %m1 |
| %ret = bitcast <16 x i1> %m2 to i16 |
| ret i16 %ret |
| } |
| |
| define i16 @test_v16i1_mul(i16 %x, i16 %y) { |
| ; GENERIC-LABEL: test_v16i1_mul: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: kandw %k1, %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_v16i1_mul: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: kandw %k1, %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: # kill: def $ax killed $ax killed $eax |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %m0 = bitcast i16 %x to <16 x i1> |
| %m1 = bitcast i16 %y to <16 x i1> |
| %m2 = mul <16 x i1> %m0, %m1 |
| %ret = bitcast <16 x i1> %m2 to i16 |
| ret i16 %ret |
| } |
| |
| define i8 @test_v8i1_add(i8 %x, i8 %y) { |
| ; GENERIC-LABEL: test_v8i1_add: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: # kill: def $al killed $al killed $eax |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_v8i1_add: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: # kill: def $al killed $al killed $eax |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %m0 = bitcast i8 %x to <8 x i1> |
| %m1 = bitcast i8 %y to <8 x i1> |
| %m2 = add <8 x i1> %m0, %m1 |
| %ret = bitcast <8 x i1> %m2 to i8 |
| ret i8 %ret |
| } |
| |
| define i8 @test_v8i1_sub(i8 %x, i8 %y) { |
| ; GENERIC-LABEL: test_v8i1_sub: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: # kill: def $al killed $al killed $eax |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_v8i1_sub: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: # kill: def $al killed $al killed $eax |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %m0 = bitcast i8 %x to <8 x i1> |
| %m1 = bitcast i8 %y to <8 x i1> |
| %m2 = sub <8 x i1> %m0, %m1 |
| %ret = bitcast <8 x i1> %m2 to i8 |
| ret i8 %ret |
| } |
| |
| define i8 @test_v8i1_mul(i8 %x, i8 %y) { |
| ; GENERIC-LABEL: test_v8i1_mul: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: kandb %k1, %k0, %k0 # sched: [1:0.33] |
| ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] |
| ; GENERIC-NEXT: # kill: def $al killed $al killed $eax |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_v8i1_mul: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: kandb %k1, %k0, %k0 # sched: [1:1.00] |
| ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] |
| ; SKX-NEXT: # kill: def $al killed $al killed $eax |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %m0 = bitcast i8 %x to <8 x i1> |
| %m1 = bitcast i8 %y to <8 x i1> |
| %m2 = mul <8 x i1> %m0, %m1 |
| %ret = bitcast <8 x i1> %m2 to i8 |
| ret i8 %ret |
| } |
| |
| define <16 x i32> @_inreg16xi32(i32 %a) { |
| ; GENERIC-LABEL: _inreg16xi32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _inreg16xi32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpbroadcastd %edi, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = insertelement <16 x i32> undef, i32 %a, i32 0 |
| %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer |
| ret <16 x i32> %c |
| } |
| |
| define <8 x i64> @_inreg8xi64(i64 %a) { |
| ; GENERIC-LABEL: _inreg8xi64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _inreg8xi64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = insertelement <8 x i64> undef, i64 %a, i32 0 |
| %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer |
| ret <8 x i64> %c |
| } |
| |
| define <16 x float> @_ss16xfloat_v4(<4 x float> %a) { |
| ; GENERIC-LABEL: _ss16xfloat_v4: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _ss16xfloat_v4: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer |
| ret <16 x float> %b |
| } |
| |
| define <16 x float> @_inreg16xfloat(float %a) { |
| ; GENERIC-LABEL: _inreg16xfloat: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _inreg16xfloat: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = insertelement <16 x float> undef, float %a, i32 0 |
| %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer |
| ret <16 x float> %c |
| } |
| |
| define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) { |
| ; GENERIC-LABEL: _ss16xfloat_mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _ss16xfloat_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [3:1.00] |
| ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %b = insertelement <16 x float> undef, float %a, i32 0 |
| %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer |
| %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i |
| ret <16 x float> %r |
| } |
| |
| define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) { |
| ; GENERIC-LABEL: _ss16xfloat_maskz: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _ss16xfloat_maskz: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %b = insertelement <16 x float> undef, float %a, i32 0 |
| %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer |
| %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer |
| ret <16 x float> %r |
| } |
| |
| define <16 x float> @_ss16xfloat_load(float* %a.ptr) { |
| ; GENERIC-LABEL: _ss16xfloat_load: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _ss16xfloat_load: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load float, float* %a.ptr |
| %b = insertelement <16 x float> undef, float %a, i32 0 |
| %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer |
| ret <16 x float> %c |
| } |
| |
| define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) { |
| ; GENERIC-LABEL: _ss16xfloat_mask_load: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _ss16xfloat_mask_load: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load float, float* %a.ptr |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %b = insertelement <16 x float> undef, float %a, i32 0 |
| %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer |
| %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i |
| ret <16 x float> %r |
| } |
| |
| define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) { |
| ; GENERIC-LABEL: _ss16xfloat_maskz_load: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _ss16xfloat_maskz_load: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load float, float* %a.ptr |
| %mask = icmp ne <16 x i32> %mask1, zeroinitializer |
| %b = insertelement <16 x float> undef, float %a, i32 0 |
| %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer |
| %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer |
| ret <16 x float> %r |
| } |
| |
| define <8 x double> @_inreg8xdouble(double %a) { |
| ; GENERIC-LABEL: _inreg8xdouble: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _inreg8xdouble: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = insertelement <8 x double> undef, double %a, i32 0 |
| %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer |
| ret <8 x double> %c |
| } |
| |
| define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) { |
| ; GENERIC-LABEL: _sd8xdouble_mask: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %ymm2, %ymm2, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [1:1.00] |
| ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _sd8xdouble_mask: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %ymm2, %ymm2, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [3:1.00] |
| ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <8 x i32> %mask1, zeroinitializer |
| %b = insertelement <8 x double> undef, double %a, i32 0 |
| %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer |
| %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i |
| ret <8 x double> %r |
| } |
| |
| define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) { |
| ; GENERIC-LABEL: _sd8xdouble_maskz: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _sd8xdouble_maskz: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %mask = icmp ne <8 x i32> %mask1, zeroinitializer |
| %b = insertelement <8 x double> undef, double %a, i32 0 |
| %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer |
| %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer |
| ret <8 x double> %r |
| } |
| |
| define <8 x double> @_sd8xdouble_load(double* %a.ptr) { |
| ; GENERIC-LABEL: _sd8xdouble_load: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _sd8xdouble_load: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load double, double* %a.ptr |
| %b = insertelement <8 x double> undef, double %a, i32 0 |
| %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer |
| ret <8 x double> %c |
| } |
| |
| define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) { |
| ; GENERIC-LABEL: _sd8xdouble_mask_load: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _sd8xdouble_mask_load: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load double, double* %a.ptr |
| %mask = icmp ne <8 x i32> %mask1, zeroinitializer |
| %b = insertelement <8 x double> undef, double %a, i32 0 |
| %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer |
| %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i |
| ret <8 x double> %r |
| } |
| |
| define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) { |
| ; GENERIC-LABEL: _sd8xdouble_maskz_load: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _sd8xdouble_maskz_load: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [3:1.00] |
| ; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = load double, double* %a.ptr |
| %mask = icmp ne <8 x i32> %mask1, zeroinitializer |
| %b = insertelement <8 x double> undef, double %a, i32 0 |
| %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer |
| %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer |
| ret <8 x double> %r |
| } |
| |
| define <16 x i32> @_xmm16xi32(<16 x i32> %a) { |
| ; GENERIC-LABEL: _xmm16xi32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _xmm16xi32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer |
| ret <16 x i32> %b |
| } |
| |
| define <16 x float> @_xmm16xfloat(<16 x float> %a) { |
| ; GENERIC-LABEL: _xmm16xfloat: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _xmm16xfloat: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer |
| ret <16 x float> %b |
| } |
| |
| define <16 x i32> @test_vbroadcast() { |
| ; GENERIC-LABEL: test_vbroadcast: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: knotw %k0, %k1 # sched: [1:0.33] |
| ; GENERIC-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_vbroadcast: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33] |
| ; SKX-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00] |
| ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] |
| ; SKX-NEXT: knotw %k0, %k1 # sched: [1:1.00] |
| ; SKX-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = sext <16 x i1> zeroinitializer to <16 x i32> |
| %1 = fcmp uno <16 x float> undef, zeroinitializer |
| %2 = sext <16 x i1> %1 to <16 x i32> |
| %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2 |
| ret <16 x i32> %3 |
| } |
| |
| ; We implement the set1 intrinsics with vector initializers. Verify that the |
| ; IR generated will produce broadcasts at the end. |
| define <8 x double> @test_set1_pd(double %d) #2 { |
| ; GENERIC-LABEL: test_set1_pd: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_set1_pd: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %vecinit.i = insertelement <8 x double> undef, double %d, i32 0 |
| %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1 |
| %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %d, i32 2 |
| %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %d, i32 3 |
| %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %d, i32 4 |
| %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %d, i32 5 |
| %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %d, i32 6 |
| %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %d, i32 7 |
| ret <8 x double> %vecinit7.i |
| } |
| |
| define <8 x i64> @test_set1_epi64(i64 %d) #2 { |
| ; GENERIC-LABEL: test_set1_epi64: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_set1_epi64: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0 |
| %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1 |
| %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %d, i32 2 |
| %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %d, i32 3 |
| %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %d, i32 4 |
| %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %d, i32 5 |
| %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %d, i32 6 |
| %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %d, i32 7 |
| ret <8 x i64> %vecinit7.i |
| } |
| |
| define <16 x float> @test_set1_ps(float %f) #2 { |
| ; GENERIC-LABEL: test_set1_ps: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_set1_ps: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %vecinit.i = insertelement <16 x float> undef, float %f, i32 0 |
| %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1 |
| %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %f, i32 2 |
| %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %f, i32 3 |
| %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %f, i32 4 |
| %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %f, i32 5 |
| %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %f, i32 6 |
| %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %f, i32 7 |
| %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %f, i32 8 |
| %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %f, i32 9 |
| %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %f, i32 10 |
| %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %f, i32 11 |
| %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %f, i32 12 |
| %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %f, i32 13 |
| %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %f, i32 14 |
| %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %f, i32 15 |
| ret <16 x float> %vecinit15.i |
| } |
| |
| define <16 x i32> @test_set1_epi32(i32 %f) #2 { |
| ; GENERIC-LABEL: test_set1_epi32: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_set1_epi32: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vpbroadcastd %edi, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0 |
| %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1 |
| %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %f, i32 2 |
| %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %f, i32 3 |
| %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %f, i32 4 |
| %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %f, i32 5 |
| %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %f, i32 6 |
| %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %f, i32 7 |
| %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %f, i32 8 |
| %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %f, i32 9 |
| %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %f, i32 10 |
| %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %f, i32 11 |
| %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %f, i32 12 |
| %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %f, i32 13 |
| %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %f, i32 14 |
| %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %f, i32 15 |
| ret <16 x i32> %vecinit15.i |
| } |
| |
| ; We implement the scalar broadcast intrinsics with vector initializers. |
| ; Verify that the IR generated will produce the broadcast at the end. |
| define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) { |
| ; GENERIC-LABEL: test_mm512_broadcastsd_pd: |
| ; GENERIC: # %bb.0: # %entry |
| ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: test_mm512_broadcastsd_pd: |
| ; SKX: # %bb.0: # %entry |
| ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| entry: |
| %0 = extractelement <2 x double> %a, i32 0 |
| %vecinit.i = insertelement <8 x double> undef, double %0, i32 0 |
| %vecinit1.i = insertelement <8 x double> %vecinit.i, double %0, i32 1 |
| %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %0, i32 2 |
| %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %0, i32 3 |
| %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %0, i32 4 |
| %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %0, i32 5 |
| %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %0, i32 6 |
| %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7 |
| ret <8 x double> %vecinit7.i |
| } |
| |
| define <16 x float> @suff_test1(<8 x float>%a) { |
| ; GENERIC-LABEL: suff_test1: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: suff_test1: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> zeroinitializer |
| ret <16 x float>%res |
| } |
| |
| define <8 x double> @suff_test2(<4 x double>%a) { |
| ; GENERIC-LABEL: suff_test2: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: suff_test2: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> zeroinitializer |
| ret <8 x double>%res |
| } |
| |
| define <64 x i8> @_invec32xi8(<32 x i8>%a) { |
| ; GENERIC-LABEL: _invec32xi8: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpbroadcastb %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _invec32xi8: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpbroadcastb %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = shufflevector <32 x i8> %a, <32 x i8> undef, <64 x i32> zeroinitializer |
| ret <64 x i8>%res |
| } |
| |
| define <32 x i16> @_invec16xi16(<16 x i16>%a) { |
| ; GENERIC-LABEL: _invec16xi16: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vpbroadcastw %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _invec16xi16: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpbroadcastw %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = shufflevector <16 x i16> %a, <16 x i16> undef, <32 x i32> zeroinitializer |
| ret <32 x i16>%res |
| } |
| |
| define <16 x i32> @_invec8xi32(<8 x i32>%a) { |
| ; GENERIC-LABEL: _invec8xi32: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _invec8xi32: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer |
| ret <16 x i32>%res |
| } |
| |
| define <8 x i64> @_invec4xi64(<4 x i64>%a) { |
| ; GENERIC-LABEL: _invec4xi64: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: _invec4xi64: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer |
| ret <8 x i64>%res |
| } |
| |
| declare void @func_f32(float) |
| define <16 x float> @broadcast_ss_spill(float %x) { |
| ; GENERIC-LABEL: broadcast_ss_spill: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33] |
| ; GENERIC-NEXT: .cfi_def_cfa_offset 32 |
| ; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] |
| ; GENERIC-NEXT: callq func_f32 |
| ; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00] |
| ; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33] |
| ; GENERIC-NEXT: .cfi_def_cfa_offset 8 |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: broadcast_ss_spill: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: subq $24, %rsp # sched: [1:0.25] |
| ; SKX-NEXT: .cfi_def_cfa_offset 32 |
| ; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] |
| ; SKX-NEXT: callq func_f32 |
| ; SKX-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50] |
| ; SKX-NEXT: addq $24, %rsp # sched: [1:0.25] |
| ; SKX-NEXT: .cfi_def_cfa_offset 8 |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = fadd float %x, %x |
| call void @func_f32(float %a) |
| %b = insertelement <16 x float> undef, float %a, i32 0 |
| %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer |
| ret <16 x float> %c |
| } |
| |
| declare void @func_f64(double) |
| define <8 x double> @broadcast_sd_spill(double %x) { |
| ; GENERIC-LABEL: broadcast_sd_spill: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33] |
| ; GENERIC-NEXT: .cfi_def_cfa_offset 32 |
| ; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] |
| ; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] |
| ; GENERIC-NEXT: callq func_f64 |
| ; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00] |
| ; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33] |
| ; GENERIC-NEXT: .cfi_def_cfa_offset 8 |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SKX-LABEL: broadcast_sd_spill: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: subq $24, %rsp # sched: [1:0.25] |
| ; SKX-NEXT: .cfi_def_cfa_offset 32 |
| ; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] |
| ; SKX-NEXT: callq func_f64 |
| ; SKX-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50] |
| ; SKX-NEXT: addq $24, %rsp # sched: [1:0.25] |
| ; SKX-NEXT: .cfi_def_cfa_offset 8 |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| %a = fadd double %x, %x |
| call void @func_f64(double %a) |
| %b = insertelement <8 x double> undef, double %a, i32 0 |
| %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer |
| ret <8 x double> %c |
| } |