| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 |
| |
| define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { |
| ; CHECK-LABEL: test_mask_packs_epi32_rr_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) |
| ret <32 x i16> %1 |
| } |
| |
| define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { |
| ; X86-LABEL: test_mask_packs_epi32_rrk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1] |
| ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packs_epi32_rrk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1] |
| ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru |
| ret <32 x i16> %3 |
| } |
| |
| define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { |
| ; X86-LABEL: test_mask_packs_epi32_rrkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packs_epi32_rrkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer |
| ret <32 x i16> %3 |
| } |
| |
| define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { |
| ; X86-LABEL: test_mask_packs_epi32_rm_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packs_epi32_rm_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <16 x i32>, <16 x i32>* %ptr_b |
| %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) |
| ret <32 x i16> %1 |
| } |
| |
| define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { |
| ; X86-LABEL: test_mask_packs_epi32_rmk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x08] |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packs_epi32_rmk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f] |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <16 x i32>, <16 x i32>* %ptr_b |
| %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru |
| ret <32 x i16> %3 |
| } |
| |
| define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { |
| ; X86-LABEL: test_mask_packs_epi32_rmkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packs_epi32_rmkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <16 x i32>, <16 x i32>* %ptr_b |
| %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer |
| ret <32 x i16> %3 |
| } |
| |
| define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { |
| ; X86-LABEL: test_mask_packs_epi32_rmb_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packs_epi32_rmb_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %q = load i32, i32* %ptr_b |
| %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 |
| %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer |
| %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) |
| ret <32 x i16> %1 |
| } |
| |
| define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { |
| ; X86-LABEL: test_mask_packs_epi32_rmbk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x08] |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packs_epi32_rmbk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f] |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %q = load i32, i32* %ptr_b |
| %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 |
| %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer |
| %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru |
| ret <32 x i16> %3 |
| } |
| |
| define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { |
| ; X86-LABEL: test_mask_packs_epi32_rmbkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packs_epi32_rmbkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %q = load i32, i32* %ptr_b |
| %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 |
| %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer |
| %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer |
| ret <32 x i16> %3 |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) |
| |
| define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { |
| ; CHECK-LABEL: test_mask_packs_epi16_rr_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0xc1] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) |
| ret <64 x i8> %1 |
| } |
| |
| define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { |
| ; X86-LABEL: test_mask_packs_epi16_rrk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1] |
| ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packs_epi16_rrk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1] |
| ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) |
| %2 = bitcast i64 %mask to <64 x i1> |
| %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru |
| ret <64 x i8> %3 |
| } |
| |
| define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { |
| ; X86-LABEL: test_mask_packs_epi16_rrkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packs_epi16_rrkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) |
| %2 = bitcast i64 %mask to <64 x i1> |
| %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer |
| ret <64 x i8> %3 |
| } |
| |
| define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { |
| ; X86-LABEL: test_mask_packs_epi16_rm_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packs_epi16_rm_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) |
| ret <64 x i8> %1 |
| } |
| |
| define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { |
| ; X86-LABEL: test_mask_packs_epi16_rmk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x08] |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packs_epi16_rmk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] |
| ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x0f] |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) |
| %2 = bitcast i64 %mask to <64 x i1> |
| %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru |
| ret <64 x i8> %3 |
| } |
| |
| define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { |
| ; X86-LABEL: test_mask_packs_epi16_rmkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packs_epi16_rmkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] |
| ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) |
| %2 = bitcast i64 %mask to <64 x i1> |
| %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer |
| ret <64 x i8> %3 |
| } |
| |
| declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) |
| |
| |
| define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { |
| ; CHECK-LABEL: test_mask_packus_epi32_rr_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0xc1] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) |
| ret <32 x i16> %1 |
| } |
| |
| define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { |
| ; X86-LABEL: test_mask_packus_epi32_rrk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1] |
| ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packus_epi32_rrk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1] |
| ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru |
| ret <32 x i16> %3 |
| } |
| |
| define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { |
| ; X86-LABEL: test_mask_packus_epi32_rrkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packus_epi32_rrkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer |
| ret <32 x i16> %3 |
| } |
| |
| define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { |
| ; X86-LABEL: test_mask_packus_epi32_rm_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packus_epi32_rm_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <16 x i32>, <16 x i32>* %ptr_b |
| %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) |
| ret <32 x i16> %1 |
| } |
| |
| define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { |
| ; X86-LABEL: test_mask_packus_epi32_rmk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x08] |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packus_epi32_rmk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x0f] |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <16 x i32>, <16 x i32>* %ptr_b |
| %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru |
| ret <32 x i16> %3 |
| } |
| |
| define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { |
| ; X86-LABEL: test_mask_packus_epi32_rmkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packus_epi32_rmkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <16 x i32>, <16 x i32>* %ptr_b |
| %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer |
| ret <32 x i16> %3 |
| } |
| |
| define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { |
| ; X86-LABEL: test_mask_packus_epi32_rmb_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packus_epi32_rmb_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %q = load i32, i32* %ptr_b |
| %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 |
| %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer |
| %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) |
| ret <32 x i16> %1 |
| } |
| |
| define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { |
| ; X86-LABEL: test_mask_packus_epi32_rmbk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x08] |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packus_epi32_rmbk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x0f] |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %q = load i32, i32* %ptr_b |
| %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 |
| %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer |
| %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru |
| ret <32 x i16> %3 |
| } |
| |
| define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { |
| ; X86-LABEL: test_mask_packus_epi32_rmbkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packus_epi32_rmbkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %q = load i32, i32* %ptr_b |
| %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 |
| %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer |
| %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) |
| %2 = bitcast i32 %mask to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer |
| ret <32 x i16> %3 |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) |
| |
| define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { |
| ; CHECK-LABEL: test_mask_packus_epi16_rr_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0xc1] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) |
| ret <64 x i8> %1 |
| } |
| |
| define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { |
| ; X86-LABEL: test_mask_packus_epi16_rrk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1] |
| ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packus_epi16_rrk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1] |
| ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) |
| %2 = bitcast i64 %mask to <64 x i1> |
| %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru |
| ret <64 x i8> %3 |
| } |
| |
| define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { |
| ; X86-LABEL: test_mask_packus_epi16_rrkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packus_epi16_rrkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) |
| %2 = bitcast i64 %mask to <64 x i1> |
| %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer |
| ret <64 x i8> %3 |
| } |
| |
| define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { |
| ; X86-LABEL: test_mask_packus_epi16_rm_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packus_epi16_rm_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) |
| ret <64 x i8> %1 |
| } |
| |
| define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { |
| ; X86-LABEL: test_mask_packus_epi16_rmk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x08] |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packus_epi16_rmk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] |
| ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x0f] |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) |
| %2 = bitcast i64 %mask to <64 x i1> |
| %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru |
| ret <64 x i8> %3 |
| } |
| |
| define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { |
| ; X86-LABEL: test_mask_packus_epi16_rmkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_packus_epi16_rmkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] |
| ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) |
| %2 = bitcast i64 %mask to <64 x i1> |
| %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer |
| ret <64 x i8> %3 |
| } |
| |
| declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) |
| |
| define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { |
| ; CHECK-LABEL: test_mask_adds_epi16_rr_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { |
| ; X86-LABEL: test_mask_adds_epi16_rrk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1] |
| ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_adds_epi16_rrk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1] |
| ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { |
| ; X86-LABEL: test_mask_adds_epi16_rrkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_adds_epi16_rrkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { |
| ; X86-LABEL: test_mask_adds_epi16_rm_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_adds_epi16_rm_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { |
| ; X86-LABEL: test_mask_adds_epi16_rmk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08] |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_adds_epi16_rmk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f] |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { |
| ; X86-LABEL: test_mask_adds_epi16_rmkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_adds_epi16_rmkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) |
| |
| define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { |
| ; CHECK-LABEL: test_mask_subs_epi16_rr_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { |
| ; X86-LABEL: test_mask_subs_epi16_rrk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1] |
| ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_subs_epi16_rrk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1] |
| ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { |
| ; X86-LABEL: test_mask_subs_epi16_rrkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_subs_epi16_rrkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { |
| ; X86-LABEL: test_mask_subs_epi16_rm_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_subs_epi16_rm_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { |
| ; X86-LABEL: test_mask_subs_epi16_rmk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08] |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_subs_epi16_rmk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f] |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { |
| ; X86-LABEL: test_mask_subs_epi16_rmkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_subs_epi16_rmkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) |
| |
| define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { |
| ; CHECK-LABEL: test_mask_adds_epu16_rr_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0xc1] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { |
| ; X86-LABEL: test_mask_adds_epu16_rrk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1] |
| ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_adds_epu16_rrk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1] |
| ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { |
| ; X86-LABEL: test_mask_adds_epu16_rrkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_adds_epu16_rrkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { |
| ; X86-LABEL: test_mask_adds_epu16_rm_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_adds_epu16_rm_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { |
| ; X86-LABEL: test_mask_adds_epu16_rmk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x08] |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_adds_epu16_rmk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x0f] |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { |
| ; X86-LABEL: test_mask_adds_epu16_rmkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_adds_epu16_rmkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) |
| |
| define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { |
| ; CHECK-LABEL: test_mask_subs_epu16_rr_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0xc1] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { |
| ; X86-LABEL: test_mask_subs_epu16_rrk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1] |
| ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_subs_epu16_rrk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1] |
| ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { |
| ; X86-LABEL: test_mask_subs_epu16_rrkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_subs_epu16_rrkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { |
| ; X86-LABEL: test_mask_subs_epu16_rm_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_subs_epu16_rm_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { |
| ; X86-LABEL: test_mask_subs_epu16_rmk_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x08] |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_subs_epu16_rmk_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x0f] |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { |
| ; X86-LABEL: test_mask_subs_epu16_rmkz_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x00] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_mask_subs_epu16_rmkz_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %b = load <32 x i16>, <32 x i16>* %ptr_b |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) |
| ret <32 x i16> %res |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) |
| |
| define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { |
| ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] |
| ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca] |
| ; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] |
| ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca] |
| ; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) |
| %2 = bitcast i32 %x3 to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1 |
| %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) |
| %res2 = add <32 x i16> %3, %4 |
| ret <32 x i16> %res2 |
| } |
| |
| define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { |
| ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] |
| ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca] |
| ; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] |
| ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca] |
| ; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) |
| %2 = bitcast i32 %x3 to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer |
| %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) |
| %res2 = add <32 x i16> %3, %4 |
| ret <32 x i16> %res2 |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>) |
| |
| define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { |
| ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] |
| ; X86-NEXT: vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca] |
| ; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] |
| ; X64-NEXT: vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda] |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca] |
| ; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) |
| %2 = bitcast i32 %x3 to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1 |
| %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) |
| %res2 = add <32 x i16> %3, %4 |
| ret <32 x i16> %res2 |
| } |
| |
| declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>) |
| |
| define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) { |
| ; CHECK-LABEL: test_int_x86_avx512_pshuf_b_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpshufb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xc1] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) |
| ret <64 x i8> %res |
| } |
| |
| define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %mask) { |
| ; X86-LABEL: test_int_x86_avx512_pshuf_b_512_mask: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1] |
| ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_pshuf_b_512_mask: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1] |
| ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) |
| %mask.cast = bitcast i64 %mask to <64 x i1> |
| %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2 |
| ret <64 x i8> %res2 |
| } |
| |
| define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> %x1, i64 %mask) { |
| ; X86-LABEL: test_int_x86_avx512_pshuf_b_512_maskz: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_pshuf_b_512_maskz: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) |
| %mask.cast = bitcast i64 %mask to <64 x i1> |
| %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer |
| ret <64 x i8> %res2 |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16>, <32 x i16>) |
| |
| define <32 x i16> @test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { |
| ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1] |
| ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9] |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1] |
| ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1) |
| %2 = bitcast i32 %x3 to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 |
| %4 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1) |
| %res2 = add <32 x i16> %3, %4 |
| ret <32 x i16> %res2 |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16>, <32 x i16>) |
| |
| define <32 x i16> @test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { |
| ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1] |
| ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9] |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1] |
| ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1) |
| %2 = bitcast i32 %x3 to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 |
| %4 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1) |
| %res2 = add <32 x i16> %3, %4 |
| ret <32 x i16> %res2 |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16>, <32 x i16>) |
| |
| define <32 x i16> @test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { |
| ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1] |
| ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9] |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1] |
| ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1) |
| %2 = bitcast i32 %x3 to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 |
| %4 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1) |
| %res2 = add <32 x i16> %3, %4 |
| ret <32 x i16> %res2 |
| } |
| |
| declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32) |
| |
| define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { |
| ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1] |
| ; X86-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc2] |
| ; X86-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] |
| ; X86-NEXT: vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0] |
| ; X86-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1] |
| ; X64-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc2] |
| ; X64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] |
| ; X64-NEXT: vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0] |
| ; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) |
| %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) |
| %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) |
| %res3 = add <32 x i8> %res0, %res1 |
| %res4 = add <32 x i8> %res3, %res2 |
| ret <32 x i8> %res4 |
| } |
| |
| declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32) |
| |
| define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { |
| ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: vpmovwb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x00] |
| ; X86-NEXT: vpmovwb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x00] |
| ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpmovwb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x07] |
| ; X64-NEXT: vpmovwb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x07] |
| ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) |
| call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) |
| ret void |
| } |
| |
| declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32) |
| |
| define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { |
| ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1] |
| ; X86-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc2] |
| ; X86-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] |
| ; X86-NEXT: vpmovswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x20,0xc0] |
| ; X86-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1] |
| ; X64-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc2] |
| ; X64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] |
| ; X64-NEXT: vpmovswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x20,0xc0] |
| ; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) |
| %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) |
| %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) |
| %res3 = add <32 x i8> %res0, %res1 |
| %res4 = add <32 x i8> %res3, %res2 |
| ret <32 x i8> %res4 |
| } |
| |
| declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32) |
| |
| define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { |
| ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: vpmovswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x00] |
| ; X86-NEXT: vpmovswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x00] |
| ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpmovswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x07] |
| ; X64-NEXT: vpmovswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x07] |
| ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) |
| call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) |
| ret void |
| } |
| |
| declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32) |
| |
| define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { |
| ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1] |
| ; X86-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc2] |
| ; X86-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] |
| ; X86-NEXT: vpmovuswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x10,0xc0] |
| ; X86-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1] |
| ; X64-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc2] |
| ; X64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] |
| ; X64-NEXT: vpmovuswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x10,0xc0] |
| ; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) |
| %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) |
| %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) |
| %res3 = add <32 x i8> %res0, %res1 |
| %res4 = add <32 x i8> %res3, %res2 |
| ret <32 x i8> %res4 |
| } |
| |
| declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32) |
| |
| define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { |
| ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] |
| ; X86-NEXT: vpmovuswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x00] |
| ; X86-NEXT: vpmovuswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x00] |
| ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] |
| ; X64-NEXT: vpmovuswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x07] |
| ; X64-NEXT: vpmovuswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x07] |
| ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) |
| call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) |
| ret void |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>) |
| |
| define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { |
| ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1] |
| ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9] |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1] |
| ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1) |
| %2 = bitcast i32 %x3 to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 |
| %4 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1) |
| %res2 = add <32 x i16> %3, %4 |
| ret <32 x i16> %res2 |
| } |
| |
| declare <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>) |
| |
| define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { |
| ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9] |
| ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1] |
| ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9] |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1] |
| ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1) |
| %2 = bitcast i16 %x3 to <16 x i1> |
| %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 |
| %4 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1) |
| %res2 = add <16 x i32> %3, %4 |
| ret <16 x i32> %res2 |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8>, <64 x i8>, i32) |
| |
| define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { |
| ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] |
| ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02] |
| ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] |
| ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02] |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] |
| ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02] |
| ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] |
| ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) |
| %2 = bitcast i32 %x4 to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3 |
| %4 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) |
| %5 = bitcast i32 %x4 to <32 x i1> |
| %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer |
| %7 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) |
| %res3 = add <32 x i16> %3, %6 |
| %res4 = add <32 x i16> %res3, %7 |
| ret <32 x i16> %res4 |
| } |
| |
| declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>) |
| |
| define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){ |
| ; CHECK-LABEL: test_int_x86_avx512_mask_psadb_w_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc9] |
| ; CHECK-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc2] |
| ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) |
| %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2) |
| %res2 = add <8 x i64> %res, %res1 |
| ret <8 x i64> %res2 |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) |
| |
| define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { |
| ; X86-LABEL: test_int_x86_avx512_mask_psrlv32hi: |
| ; X86: # %bb.0: |
| ; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xd9] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1] |
| ; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1] |
| ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] |
| ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_psrlv32hi: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xd9] |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1] |
| ; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1] |
| ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] |
| ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) |
| %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) |
| %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) |
| %res3 = add <32 x i16> %res, %res1 |
| %res4 = add <32 x i16> %res3, %res2 |
| ret <32 x i16> %res4 |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) |
| |
| define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { |
| ; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi: |
| ; X86: # %bb.0: |
| ; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xd9] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1] |
| ; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1] |
| ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] |
| ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xd9] |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1] |
| ; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1] |
| ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] |
| ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) |
| %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) |
| %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) |
| %res3 = add <32 x i16> %res, %res1 |
| %res4 = add <32 x i16> %res3, %res2 |
| ret <32 x i16> %res4 |
| } |
| |
| define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { |
| ; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi_const: |
| ; X86: # %bb.0: |
| ; X86-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51] |
| ; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] |
| ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 |
| ; X86-NEXT: vpsravw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A] |
| ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi_const: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51] |
| ; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] |
| ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte |
| ; X64-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A] |
| ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> <i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>, |
| <32 x i16> <i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49>, |
| <32 x i16> zeroinitializer, i32 -1) |
| ret <32 x i16> %res |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) |
| |
| define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { |
| ; X86-LABEL: test_int_x86_avx512_mask_psllv32hi: |
| ; X86: # %bb.0: |
| ; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xd9] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1] |
| ; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1] |
| ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] |
| ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_psllv32hi: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xd9] |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1] |
| ; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1] |
| ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] |
| ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) |
| %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) |
| %res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) |
| %res3 = add <32 x i16> %res, %res1 |
| %res4 = add <32 x i16> %res3, %res2 |
| ret <32 x i16> %res4 |
| } |
| |
| declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>) |
| |
| define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { |
| ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8] |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0] |
| ; X86-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0] |
| ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] |
| ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8] |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0] |
| ; X64-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0] |
| ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] |
| ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) |
| %2 = bitcast i32 %x3 to <32 x i1> |
| %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 |
| %4 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) |
| %5 = bitcast i32 %x3 to <32 x i1> |
| %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer |
| %7 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) |
| %res3 = add <32 x i16> %3, %6 |
| %res4 = add <32 x i16> %res3, %7 |
| ret <32 x i16> %res4 |
| } |
| |
| define <32 x i16> @test_x86_avx512_psll_w_512(<32 x i16> %a0, <8 x i16> %a1) { |
| ; CHECK-LABEL: test_x86_avx512_psll_w_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpsllw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xc1] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] |
| ret <32 x i16> %res |
| } |
| define <32 x i16> @test_x86_avx512_mask_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) { |
| ; X86-LABEL: test_x86_avx512_mask_psll_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1] |
| ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_x86_avx512_mask_psll_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1] |
| ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] |
| %mask.cast = bitcast i32 %mask to <32 x i1> |
| %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru |
| ret <32 x i16> %res2 |
| } |
| define <32 x i16> @test_x86_avx512_maskz_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) { |
| ; X86-LABEL: test_x86_avx512_maskz_psll_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_x86_avx512_maskz_psll_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] |
| %mask.cast = bitcast i32 %mask to <32 x i1> |
| %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer |
| ret <32 x i16> %res2 |
| } |
| declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) nounwind readnone |
| |
| |
| define <32 x i16> @test_x86_avx512_pslli_w_512(<32 x i16> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_pslli_w_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpsllw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x07] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] |
| ret <32 x i16> %res |
| } |
| define <32 x i16> @test_x86_avx512_mask_pslli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { |
| ; X86-LABEL: test_x86_avx512_mask_pslli_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07] |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_x86_avx512_mask_pslli_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07] |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] |
| %mask.cast = bitcast i32 %mask to <32 x i1> |
| %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru |
| ret <32 x i16> %res2 |
| } |
| define <32 x i16> @test_x86_avx512_maskz_pslli_w_512(<32 x i16> %a0, i32 %mask) { |
| ; X86-LABEL: test_x86_avx512_maskz_pslli_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_x86_avx512_maskz_pslli_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] |
| %mask.cast = bitcast i32 %mask to <32 x i1> |
| %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer |
| ret <32 x i16> %res2 |
| } |
| declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) nounwind readnone |
| |
| |
| define <32 x i16> @test_x86_avx512_psra_w_512(<32 x i16> %a0, <8 x i16> %a1) { |
| ; CHECK-LABEL: test_x86_avx512_psra_w_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpsraw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xc1] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] |
| ret <32 x i16> %res |
| } |
| define <32 x i16> @test_x86_avx512_mask_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) { |
| ; X86-LABEL: test_x86_avx512_mask_psra_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1] |
| ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_x86_avx512_mask_psra_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1] |
| ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] |
| %mask.cast = bitcast i32 %mask to <32 x i1> |
| %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru |
| ret <32 x i16> %res2 |
| } |
| define <32 x i16> @test_x86_avx512_maskz_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) { |
| ; X86-LABEL: test_x86_avx512_maskz_psra_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_x86_avx512_maskz_psra_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] |
| %mask.cast = bitcast i32 %mask to <32 x i1> |
| %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer |
| ret <32 x i16> %res2 |
| } |
| declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) nounwind readnone |
| |
| |
| define <32 x i16> @test_x86_avx512_psrai_w_512(<32 x i16> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_psrai_w_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpsraw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x07] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] |
| ret <32 x i16> %res |
| } |
| define <32 x i16> @test_x86_avx512_mask_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { |
| ; X86-LABEL: test_x86_avx512_mask_psrai_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07] |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_x86_avx512_mask_psrai_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07] |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] |
| %mask.cast = bitcast i32 %mask to <32 x i1> |
| %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru |
| ret <32 x i16> %res2 |
| } |
| define <32 x i16> @test_x86_avx512_maskz_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { |
| ; X86-LABEL: test_x86_avx512_maskz_psrai_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_x86_avx512_maskz_psrai_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] |
| %mask.cast = bitcast i32 %mask to <32 x i1> |
| %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer |
| ret <32 x i16> %res2 |
| } |
| declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) nounwind readnone |
| |
| |
| define <32 x i16> @test_x86_avx512_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1) { |
| ; CHECK-LABEL: test_x86_avx512_psrl_w_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xc1] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] |
| ret <32 x i16> %res |
| } |
| define <32 x i16> @test_x86_avx512_mask_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) { |
| ; X86-LABEL: test_x86_avx512_mask_psrl_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1] |
| ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_x86_avx512_mask_psrl_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1] |
| ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] |
| %mask.cast = bitcast i32 %mask to <32 x i1> |
| %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru |
| ret <32 x i16> %res2 |
| } |
| define <32 x i16> @test_x86_avx512_maskz_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) { |
| ; X86-LABEL: test_x86_avx512_maskz_psrl_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_x86_avx512_maskz_psrl_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] |
| %mask.cast = bitcast i32 %mask to <32 x i1> |
| %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer |
| ret <32 x i16> %res2 |
| } |
| declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) nounwind readnone |
| |
| |
| define <32 x i16> @test_x86_avx512_psrli_w_512(<32 x i16> %a0) { |
| ; CHECK-LABEL: test_x86_avx512_psrli_w_512: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpsrlw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xd0,0x07] |
| ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] |
| ret <32 x i16> %res |
| } |
| define <32 x i16> @test_x86_avx512_mask_psrli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { |
| ; X86-LABEL: test_x86_avx512_mask_psrli_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07] |
| ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_x86_avx512_mask_psrli_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07] |
| ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] |
| %mask.cast = bitcast i32 %mask to <32 x i1> |
| %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru |
| ret <32 x i16> %res2 |
| } |
| define <32 x i16> @test_x86_avx512_maskz_psrli_w_512(<32 x i16> %a0, i32 %mask) { |
| ; X86-LABEL: test_x86_avx512_maskz_psrli_w_512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] |
| ; X86-NEXT: vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07] |
| ; X86-NEXT: retl # encoding: [0xc3] |
| ; |
| ; X64-LABEL: test_x86_avx512_maskz_psrli_w_512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] |
| ; X64-NEXT: vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07] |
| ; X64-NEXT: retq # encoding: [0xc3] |
| %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] |
| %mask.cast = bitcast i32 %mask to <32 x i1> |
| %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer |
| ret <32 x i16> %res2 |
| } |
| declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) nounwind readnone |