| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
 | ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 | 
 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 | 
 |  | 
 | define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { | 
 | ; CHECK-LABEL: test_mask_packs_epi32_rr_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   ret <32 x i16> %1 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { | 
 | ; X86-LABEL: test_mask_packs_epi32_rrk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1] | 
 | ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packs_epi32_rrk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1] | 
 | ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   %2 = bitcast i32 %mask to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru | 
 |   ret <32 x i16> %3 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { | 
 | ; X86-LABEL: test_mask_packs_epi32_rrkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packs_epi32_rrkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   %2 = bitcast i32 %mask to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer | 
 |   ret <32 x i16> %3 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { | 
 | ; X86-LABEL: test_mask_packs_epi32_rm_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packs_epi32_rm_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <16 x i32>, <16 x i32>* %ptr_b | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   ret <32 x i16> %1 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { | 
 | ; X86-LABEL: test_mask_packs_epi32_rmk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x08] | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packs_epi32_rmk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f] | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <16 x i32>, <16 x i32>* %ptr_b | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   %2 = bitcast i32 %mask to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru | 
 |   ret <32 x i16> %3 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { | 
 | ; X86-LABEL: test_mask_packs_epi32_rmkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packs_epi32_rmkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <16 x i32>, <16 x i32>* %ptr_b | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   %2 = bitcast i32 %mask to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer | 
 |   ret <32 x i16> %3 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { | 
 | ; X86-LABEL: test_mask_packs_epi32_rmb_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packs_epi32_rmb_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %q = load i32, i32* %ptr_b | 
 |   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 | 
 |   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   ret <32 x i16> %1 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { | 
 | ; X86-LABEL: test_mask_packs_epi32_rmbk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x08] | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packs_epi32_rmbk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f] | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %q = load i32, i32* %ptr_b | 
 |   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 | 
 |   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   %2 = bitcast i32 %mask to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru | 
 |   ret <32 x i16> %3 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { | 
 | ; X86-LABEL: test_mask_packs_epi32_rmbkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packs_epi32_rmbkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %q = load i32, i32* %ptr_b | 
 |   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 | 
 |   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   %2 = bitcast i32 %mask to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer | 
 |   ret <32 x i16> %3 | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) | 
 |  | 
 | define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { | 
 | ; CHECK-LABEL: test_mask_packs_epi16_rr_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0xc1] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) | 
 |   ret <64 x i8> %1 | 
 | } | 
 |  | 
 | define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { | 
 | ; X86-LABEL: test_mask_packs_epi16_rrk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1] | 
 | ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packs_epi16_rrk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1] | 
 | ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) | 
 |   %2 = bitcast i64 %mask to <64 x i1> | 
 |   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru | 
 |   ret <64 x i8> %3 | 
 | } | 
 |  | 
 | define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { | 
 | ; X86-LABEL: test_mask_packs_epi16_rrkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packs_epi16_rrkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) | 
 |   %2 = bitcast i64 %mask to <64 x i1> | 
 |   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer | 
 |   ret <64 x i8> %3 | 
 | } | 
 |  | 
 | define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { | 
 | ; X86-LABEL: test_mask_packs_epi16_rm_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packs_epi16_rm_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) | 
 |   ret <64 x i8> %1 | 
 | } | 
 |  | 
 | define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { | 
 | ; X86-LABEL: test_mask_packs_epi16_rmk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x08] | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packs_epi16_rmk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x0f] | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) | 
 |   %2 = bitcast i64 %mask to <64 x i1> | 
 |   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru | 
 |   ret <64 x i8> %3 | 
 | } | 
 |  | 
 | define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { | 
 | ; X86-LABEL: test_mask_packs_epi16_rmkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packs_epi16_rmkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) | 
 |   %2 = bitcast i64 %mask to <64 x i1> | 
 |   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer | 
 |   ret <64 x i8> %3 | 
 | } | 
 |  | 
 | declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) | 
 |  | 
 |  | 
 | define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { | 
 | ; CHECK-LABEL: test_mask_packus_epi32_rr_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0xc1] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   ret <32 x i16> %1 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { | 
 | ; X86-LABEL: test_mask_packus_epi32_rrk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1] | 
 | ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packus_epi32_rrk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1] | 
 | ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   %2 = bitcast i32 %mask to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru | 
 |   ret <32 x i16> %3 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { | 
 | ; X86-LABEL: test_mask_packus_epi32_rrkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packus_epi32_rrkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   %2 = bitcast i32 %mask to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer | 
 |   ret <32 x i16> %3 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { | 
 | ; X86-LABEL: test_mask_packus_epi32_rm_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packus_epi32_rm_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <16 x i32>, <16 x i32>* %ptr_b | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   ret <32 x i16> %1 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { | 
 | ; X86-LABEL: test_mask_packus_epi32_rmk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x08] | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packus_epi32_rmk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x0f] | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <16 x i32>, <16 x i32>* %ptr_b | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   %2 = bitcast i32 %mask to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru | 
 |   ret <32 x i16> %3 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { | 
 | ; X86-LABEL: test_mask_packus_epi32_rmkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packus_epi32_rmkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <16 x i32>, <16 x i32>* %ptr_b | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   %2 = bitcast i32 %mask to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer | 
 |   ret <32 x i16> %3 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { | 
 | ; X86-LABEL: test_mask_packus_epi32_rmb_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packus_epi32_rmb_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %q = load i32, i32* %ptr_b | 
 |   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 | 
 |   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   ret <32 x i16> %1 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { | 
 | ; X86-LABEL: test_mask_packus_epi32_rmbk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x08] | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packus_epi32_rmbk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x0f] | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %q = load i32, i32* %ptr_b | 
 |   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 | 
 |   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   %2 = bitcast i32 %mask to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru | 
 |   ret <32 x i16> %3 | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { | 
 | ; X86-LABEL: test_mask_packus_epi32_rmbkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packus_epi32_rmbkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %q = load i32, i32* %ptr_b | 
 |   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 | 
 |   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) | 
 |   %2 = bitcast i32 %mask to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer | 
 |   ret <32 x i16> %3 | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) | 
 |  | 
 | define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { | 
 | ; CHECK-LABEL: test_mask_packus_epi16_rr_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0xc1] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) | 
 |   ret <64 x i8> %1 | 
 | } | 
 |  | 
 | define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { | 
 | ; X86-LABEL: test_mask_packus_epi16_rrk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1] | 
 | ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packus_epi16_rrk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1] | 
 | ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) | 
 |   %2 = bitcast i64 %mask to <64 x i1> | 
 |   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru | 
 |   ret <64 x i8> %3 | 
 | } | 
 |  | 
 | define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { | 
 | ; X86-LABEL: test_mask_packus_epi16_rrkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packus_epi16_rrkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) | 
 |   %2 = bitcast i64 %mask to <64 x i1> | 
 |   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer | 
 |   ret <64 x i8> %3 | 
 | } | 
 |  | 
 | define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { | 
 | ; X86-LABEL: test_mask_packus_epi16_rm_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packus_epi16_rm_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) | 
 |   ret <64 x i8> %1 | 
 | } | 
 |  | 
 | define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { | 
 | ; X86-LABEL: test_mask_packus_epi16_rmk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x08] | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packus_epi16_rmk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x0f] | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) | 
 |   %2 = bitcast i64 %mask to <64 x i1> | 
 |   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru | 
 |   ret <64 x i8> %3 | 
 | } | 
 |  | 
 | define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { | 
 | ; X86-LABEL: test_mask_packus_epi16_rmkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_packus_epi16_rmkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) | 
 |   %2 = bitcast i64 %mask to <64 x i1> | 
 |   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer | 
 |   ret <64 x i8> %3 | 
 | } | 
 |  | 
 | declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) | 
 |  | 
 | define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { | 
 | ; CHECK-LABEL: test_mask_adds_epi16_rr_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { | 
 | ; X86-LABEL: test_mask_adds_epi16_rrk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1] | 
 | ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_adds_epi16_rrk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1] | 
 | ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { | 
 | ; X86-LABEL: test_mask_adds_epi16_rrkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_adds_epi16_rrkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { | 
 | ; X86-LABEL: test_mask_adds_epi16_rm_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_adds_epi16_rm_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { | 
 | ; X86-LABEL: test_mask_adds_epi16_rmk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08] | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_adds_epi16_rmk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f] | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { | 
 | ; X86-LABEL: test_mask_adds_epi16_rmkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_adds_epi16_rmkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | 
 |  | 
 | define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { | 
 | ; CHECK-LABEL: test_mask_subs_epi16_rr_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { | 
 | ; X86-LABEL: test_mask_subs_epi16_rrk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1] | 
 | ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_subs_epi16_rrk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1] | 
 | ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { | 
 | ; X86-LABEL: test_mask_subs_epi16_rrkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_subs_epi16_rrkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { | 
 | ; X86-LABEL: test_mask_subs_epi16_rm_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_subs_epi16_rm_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { | 
 | ; X86-LABEL: test_mask_subs_epi16_rmk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08] | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_subs_epi16_rmk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f] | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { | 
 | ; X86-LABEL: test_mask_subs_epi16_rmkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_subs_epi16_rmkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | 
 |  | 
 | define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { | 
 | ; CHECK-LABEL: test_mask_adds_epu16_rr_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0xc1] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { | 
 | ; X86-LABEL: test_mask_adds_epu16_rrk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1] | 
 | ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_adds_epu16_rrk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1] | 
 | ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { | 
 | ; X86-LABEL: test_mask_adds_epu16_rrkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_adds_epu16_rrkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { | 
 | ; X86-LABEL: test_mask_adds_epu16_rm_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    vpaddusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_adds_epu16_rm_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpaddusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { | 
 | ; X86-LABEL: test_mask_adds_epu16_rmk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpaddusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x08] | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_adds_epu16_rmk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpaddusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x0f] | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { | 
 | ; X86-LABEL: test_mask_adds_epu16_rmkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_adds_epu16_rmkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | 
 |  | 
 | define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { | 
 | ; CHECK-LABEL: test_mask_subs_epu16_rr_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0xc1] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { | 
 | ; X86-LABEL: test_mask_subs_epu16_rrk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1] | 
 | ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_subs_epu16_rrk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1] | 
 | ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { | 
 | ; X86-LABEL: test_mask_subs_epu16_rrkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_subs_epu16_rrkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { | 
 | ; X86-LABEL: test_mask_subs_epu16_rm_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    vpsubusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_subs_epu16_rm_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpsubusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { | 
 | ; X86-LABEL: test_mask_subs_epu16_rmk_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpsubusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x08] | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_subs_epu16_rmk_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpsubusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x0f] | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { | 
 | ; X86-LABEL: test_mask_subs_epu16_rmkz_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x00] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_mask_subs_epu16_rmkz_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %b = load <32 x i16>, <32 x i16>* %ptr_b | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | 
 |  | 
 | define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] | 
 | ; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca] | 
 | ; X86-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] | 
 | ; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca] | 
 | ; X64-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) | 
 |   %2 = bitcast i32 %x3 to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1 | 
 |   %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) | 
 |   %res2 = add <32 x i16> %3, %4 | 
 |   ret <32 x i16> %res2 | 
 | } | 
 |  | 
 | define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] | 
 | ; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca] | 
 | ; X86-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] | 
 | ; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca] | 
 | ; X64-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) | 
 |   %2 = bitcast i32 %x3 to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer | 
 |   %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) | 
 |   %res2 = add <32 x i16> %3, %4 | 
 |   ret <32 x i16> %res2 | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>) | 
 |  | 
 | define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] | 
 | ; X86-NEXT:    vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca] | 
 | ; X86-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] | 
 | ; X64-NEXT:    vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda] | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca] | 
 | ; X64-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) | 
 |   %2 = bitcast i32 %x3 to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1 | 
 |   %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) | 
 |   %res2 = add <32 x i16> %3, %4 | 
 |   ret <32 x i16> %res2 | 
 | } | 
 |  | 
 | declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>) | 
 |  | 
 | define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) { | 
 | ; CHECK-LABEL: test_int_x86_avx512_pshuf_b_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpshufb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xc1] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) | 
 |   ret <64 x i8> %res | 
 | } | 
 |  | 
 | define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %mask) { | 
 | ; X86-LABEL: test_int_x86_avx512_pshuf_b_512_mask: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1] | 
 | ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_pshuf_b_512_mask: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1] | 
 | ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) | 
 |   %mask.cast = bitcast i64 %mask to <64 x i1> | 
 |   %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2 | 
 |   ret <64 x i8> %res2 | 
 | } | 
 |  | 
 | define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> %x1, i64 %mask) { | 
 | ; X86-LABEL: test_int_x86_avx512_pshuf_b_512_maskz: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_pshuf_b_512_maskz: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) | 
 |   %mask.cast = bitcast i64 %mask to <64 x i1> | 
 |   %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer | 
 |   ret <64 x i8> %res2 | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16>, <32 x i16>) | 
 |  | 
 | define <32 x i16> @test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1] | 
 | ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9] | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1] | 
 | ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1) | 
 |   %2 = bitcast i32 %x3 to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 | 
 |   %4 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1) | 
 |   %res2 = add <32 x i16> %3, %4 | 
 |   ret <32 x i16> %res2 | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16>, <32 x i16>) | 
 |  | 
 | define <32 x i16> @test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1] | 
 | ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9] | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1] | 
 | ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1) | 
 |   %2 = bitcast i32 %x3 to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 | 
 |   %4 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1) | 
 |   %res2 = add <32 x i16> %3, %4 | 
 |   ret <32 x i16> %res2 | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16>, <32 x i16>) | 
 |  | 
 | define <32 x i16> @test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1] | 
 | ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9] | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1] | 
 | ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1) | 
 |   %2 = bitcast i32 %x3 to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 | 
 |   %4 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1) | 
 |   %res2 = add <32 x i16> %3, %4 | 
 |   ret <32 x i16> %res2 | 
 | } | 
 |  | 
 | declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32) | 
 |  | 
 | define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1] | 
 | ; X86-NEXT:    vpmovwb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc2] | 
 | ; X86-NEXT:    vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] | 
 | ; X86-NEXT:    vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0] | 
 | ; X86-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1] | 
 | ; X64-NEXT:    vpmovwb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc2] | 
 | ; X64-NEXT:    vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] | 
 | ; X64-NEXT:    vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0] | 
 | ; X64-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |     %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) | 
 |     %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) | 
 |     %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) | 
 |     %res3 = add <32 x i8> %res0, %res1 | 
 |     %res4 = add <32 x i8> %res3, %res2 | 
 |     ret <32 x i8> %res4 | 
 | } | 
 |  | 
 | declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32) | 
 |  | 
 | define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    vpmovwb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x00] | 
 | ; X86-NEXT:    vpmovwb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x00] | 
 | ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpmovwb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x07] | 
 | ; X64-NEXT:    vpmovwb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x07] | 
 | ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |     call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) | 
 |     call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) | 
 |     ret void | 
 | } | 
 |  | 
 | declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32) | 
 |  | 
 | define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1] | 
 | ; X86-NEXT:    vpmovswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc2] | 
 | ; X86-NEXT:    vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] | 
 | ; X86-NEXT:    vpmovswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x20,0xc0] | 
 | ; X86-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1] | 
 | ; X64-NEXT:    vpmovswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc2] | 
 | ; X64-NEXT:    vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] | 
 | ; X64-NEXT:    vpmovswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x20,0xc0] | 
 | ; X64-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |     %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) | 
 |     %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) | 
 |     %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) | 
 |     %res3 = add <32 x i8> %res0, %res1 | 
 |     %res4 = add <32 x i8> %res3, %res2 | 
 |     ret <32 x i8> %res4 | 
 | } | 
 |  | 
 | declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32) | 
 |  | 
 | define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    vpmovswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x00] | 
 | ; X86-NEXT:    vpmovswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x00] | 
 | ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpmovswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x07] | 
 | ; X64-NEXT:    vpmovswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x07] | 
 | ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |     call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) | 
 |     call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) | 
 |     ret void | 
 | } | 
 |  | 
 | declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32) | 
 |  | 
 | define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1] | 
 | ; X86-NEXT:    vpmovuswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc2] | 
 | ; X86-NEXT:    vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] | 
 | ; X86-NEXT:    vpmovuswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x10,0xc0] | 
 | ; X86-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1] | 
 | ; X64-NEXT:    vpmovuswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc2] | 
 | ; X64-NEXT:    vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca] | 
 | ; X64-NEXT:    vpmovuswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x10,0xc0] | 
 | ; X64-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |     %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) | 
 |     %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) | 
 |     %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) | 
 |     %res3 = add <32 x i8> %res0, %res1 | 
 |     %res4 = add <32 x i8> %res3, %res2 | 
 |     ret <32 x i8> %res4 | 
 | } | 
 |  | 
 | declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32) | 
 |  | 
 | define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] | 
 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
 | ; X86-NEXT:    vpmovuswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x00] | 
 | ; X86-NEXT:    vpmovuswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x00] | 
 | ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] | 
 | ; X64-NEXT:    vpmovuswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x07] | 
 | ; X64-NEXT:    vpmovuswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x07] | 
 | ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |     call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) | 
 |     call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) | 
 |     ret void | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>) | 
 |  | 
 | define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1] | 
 | ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9] | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1] | 
 | ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1) | 
 |   %2 = bitcast i32 %x3 to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 | 
 |   %4 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1) | 
 |   %res2 = add <32 x i16> %3, %4 | 
 |   ret <32 x i16> %res2 | 
 | } | 
 |  | 
 | declare <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>) | 
 |  | 
 | define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9] | 
 | ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1] | 
 | ; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9] | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1] | 
 | ; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1) | 
 |   %2 = bitcast i16 %x3 to <16 x i1> | 
 |   %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 | 
 |   %4 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1) | 
 |   %res2 = add <16 x i32> %3, %4 | 
 |   ret <16 x i32> %res2 | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8>, <64 x i8>, i32) | 
 |  | 
 | define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] | 
 | ; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02] | 
 | ; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] | 
 | ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02] | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] | 
 | ; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02] | 
 | ; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] | 
 | ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) | 
 |   %2 = bitcast i32 %x4 to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3 | 
 |   %4 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) | 
 |   %5 = bitcast i32 %x4 to <32 x i1> | 
 |   %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer | 
 |   %7 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) | 
 |   %res3 = add <32 x i16> %3, %6 | 
 |   %res4 = add <32 x i16> %res3, %7 | 
 |   ret <32 x i16> %res4 | 
 | } | 
 |  | 
 | declare  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>) | 
 |  | 
 | define  <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){ | 
 | ; CHECK-LABEL: test_int_x86_avx512_mask_psadb_w_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpsadbw %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc9] | 
 | ; CHECK-NEXT:    vpsadbw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc2] | 
 | ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %res = call  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) | 
 |   %res1 = call  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2) | 
 |   %res2 = add  <8 x i64> %res, %res1 | 
 |   ret  <8 x i64> %res2 | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) | 
 |  | 
 | define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_psrlv32hi: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xd9] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1] | 
 | ; X86-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1] | 
 | ; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] | 
 | ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_psrlv32hi: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xd9] | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1] | 
 | ; X64-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1] | 
 | ; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] | 
 | ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | 
 |   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) | 
 |   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | 
 |   %res3 = add <32 x i16> %res, %res1 | 
 |   %res4 = add <32 x i16> %res3, %res2 | 
 |   ret <32 x i16> %res4 | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) | 
 |  | 
 | define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vpsravw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xd9] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1] | 
 | ; X86-NEXT:    vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1] | 
 | ; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] | 
 | ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpsravw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xd9] | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1] | 
 | ; X64-NEXT:    vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1] | 
 | ; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] | 
 | ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | 
 |   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) | 
 |   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | 
 |   %res3 = add <32 x i16> %res, %res1 | 
 |   %res4 = add <32 x i16> %res3, %res2 | 
 |   ret <32 x i16> %res4 | 
 | } | 
 |  | 
 | define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi_const: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51] | 
 | ; X86-NEXT:    # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] | 
 | ; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 | 
 | ; X86-NEXT:    vpsravw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A] | 
 | ; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi_const: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51] | 
 | ; X64-NEXT:    # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] | 
 | ; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte | 
 | ; X64-NEXT:    vpsravw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A] | 
 | ; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> <i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>, | 
 |                                                           <32 x i16> <i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49, i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49, i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49, i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49>, | 
 |                                                           <32 x i16> zeroinitializer, i32 -1) | 
 |   ret <32 x i16> %res | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) | 
 |  | 
 | define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_psllv32hi: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vpsllvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xd9] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1] | 
 | ; X86-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1] | 
 | ; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] | 
 | ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_psllv32hi: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpsllvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xd9] | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1] | 
 | ; X64-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1] | 
 | ; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] | 
 | ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | 
 |   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) | 
 |   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | 
 |   %res3 = add <32 x i16> %res, %res1 | 
 |   %res4 = add <32 x i16> %res3, %res2 | 
 |   ret <32 x i16> %res4 | 
 | } | 
 |  | 
 | declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>) | 
 |  | 
 | define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8] | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0] | 
 | ; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0] | 
 | ; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] | 
 | ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8] | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0] | 
 | ; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0] | 
 | ; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] | 
 | ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) | 
 |   %2 = bitcast i32 %x3 to <32 x i1> | 
 |   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 | 
 |   %4 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) | 
 |   %5 = bitcast i32 %x3 to <32 x i1> | 
 |   %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer | 
 |   %7 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) | 
 |   %res3 = add <32 x i16> %3, %6 | 
 |   %res4 = add <32 x i16> %res3, %7 | 
 |   ret <32 x i16> %res4 | 
 | } | 
 |  | 
 | define <32 x i16> @test_x86_avx512_psll_w_512(<32 x i16> %a0, <8 x i16> %a1) { | 
 | ; CHECK-LABEL: test_x86_avx512_psll_w_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xc1] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] | 
 |   ret <32 x i16> %res | 
 | } | 
 | define <32 x i16> @test_x86_avx512_mask_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) { | 
 | ; X86-LABEL: test_x86_avx512_mask_psll_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1] | 
 | ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_x86_avx512_mask_psll_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1] | 
 | ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] | 
 |   %mask.cast = bitcast i32 %mask to <32 x i1> | 
 |   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru | 
 |   ret <32 x i16> %res2 | 
 | } | 
 | define <32 x i16> @test_x86_avx512_maskz_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) { | 
 | ; X86-LABEL: test_x86_avx512_maskz_psll_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_x86_avx512_maskz_psll_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] | 
 |   %mask.cast = bitcast i32 %mask to <32 x i1> | 
 |   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer | 
 |   ret <32 x i16> %res2 | 
 | } | 
 | declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) nounwind readnone | 
 |  | 
 |  | 
 | define <32 x i16> @test_x86_avx512_pslli_w_512(<32 x i16> %a0) { | 
 | ; CHECK-LABEL: test_x86_avx512_pslli_w_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpsllw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x07] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] | 
 |   ret <32 x i16> %res | 
 | } | 
 | define <32 x i16> @test_x86_avx512_mask_pslli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { | 
 | ; X86-LABEL: test_x86_avx512_mask_pslli_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07] | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_x86_avx512_mask_pslli_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07] | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] | 
 |   %mask.cast = bitcast i32 %mask to <32 x i1> | 
 |   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru | 
 |   ret <32 x i16> %res2 | 
 | } | 
 | define <32 x i16> @test_x86_avx512_maskz_pslli_w_512(<32 x i16> %a0, i32 %mask) { | 
 | ; X86-LABEL: test_x86_avx512_maskz_pslli_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_x86_avx512_maskz_pslli_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] | 
 |   %mask.cast = bitcast i32 %mask to <32 x i1> | 
 |   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer | 
 |   ret <32 x i16> %res2 | 
 | } | 
 | declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) nounwind readnone | 
 |  | 
 |  | 
 | define <32 x i16> @test_x86_avx512_psra_w_512(<32 x i16> %a0, <8 x i16> %a1) { | 
 | ; CHECK-LABEL: test_x86_avx512_psra_w_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xc1] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] | 
 |   ret <32 x i16> %res | 
 | } | 
 | define <32 x i16> @test_x86_avx512_mask_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) { | 
 | ; X86-LABEL: test_x86_avx512_mask_psra_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1] | 
 | ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_x86_avx512_mask_psra_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1] | 
 | ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] | 
 |   %mask.cast = bitcast i32 %mask to <32 x i1> | 
 |   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru | 
 |   ret <32 x i16> %res2 | 
 | } | 
 | define <32 x i16> @test_x86_avx512_maskz_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) { | 
 | ; X86-LABEL: test_x86_avx512_maskz_psra_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_x86_avx512_maskz_psra_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] | 
 |   %mask.cast = bitcast i32 %mask to <32 x i1> | 
 |   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer | 
 |   ret <32 x i16> %res2 | 
 | } | 
 | declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) nounwind readnone | 
 |  | 
 |  | 
 | define <32 x i16> @test_x86_avx512_psrai_w_512(<32 x i16> %a0) { | 
 | ; CHECK-LABEL: test_x86_avx512_psrai_w_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpsraw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x07] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] | 
 |   ret <32 x i16> %res | 
 | } | 
 | define <32 x i16> @test_x86_avx512_mask_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { | 
 | ; X86-LABEL: test_x86_avx512_mask_psrai_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07] | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_x86_avx512_mask_psrai_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07] | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] | 
 |   %mask.cast = bitcast i32 %mask to <32 x i1> | 
 |   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru | 
 |   ret <32 x i16> %res2 | 
 | } | 
 | define <32 x i16> @test_x86_avx512_maskz_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { | 
 | ; X86-LABEL: test_x86_avx512_maskz_psrai_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_x86_avx512_maskz_psrai_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] | 
 |   %mask.cast = bitcast i32 %mask to <32 x i1> | 
 |   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer | 
 |   ret <32 x i16> %res2 | 
 | } | 
 | declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) nounwind readnone | 
 |  | 
 |  | 
 | define <32 x i16> @test_x86_avx512_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1) { | 
 | ; CHECK-LABEL: test_x86_avx512_psrl_w_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xc1] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] | 
 |   ret <32 x i16> %res | 
 | } | 
 | define <32 x i16> @test_x86_avx512_mask_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) { | 
 | ; X86-LABEL: test_x86_avx512_mask_psrl_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1] | 
 | ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_x86_avx512_mask_psrl_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1] | 
 | ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] | 
 |   %mask.cast = bitcast i32 %mask to <32 x i1> | 
 |   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru | 
 |   ret <32 x i16> %res2 | 
 | } | 
 | define <32 x i16> @test_x86_avx512_maskz_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) { | 
 | ; X86-LABEL: test_x86_avx512_maskz_psrl_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_x86_avx512_maskz_psrl_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] | 
 |   %mask.cast = bitcast i32 %mask to <32 x i1> | 
 |   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer | 
 |   ret <32 x i16> %res2 | 
 | } | 
 | declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) nounwind readnone | 
 |  | 
 |  | 
 | define <32 x i16> @test_x86_avx512_psrli_w_512(<32 x i16> %a0) { | 
 | ; CHECK-LABEL: test_x86_avx512_psrli_w_512: | 
 | ; CHECK:       # %bb.0: | 
 | ; CHECK-NEXT:    vpsrlw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xd0,0x07] | 
 | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] | 
 |   ret <32 x i16> %res | 
 | } | 
 | define <32 x i16> @test_x86_avx512_mask_psrli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { | 
 | ; X86-LABEL: test_x86_avx512_mask_psrli_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07] | 
 | ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_x86_avx512_mask_psrli_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07] | 
 | ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] | 
 |   %mask.cast = bitcast i32 %mask to <32 x i1> | 
 |   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru | 
 |   ret <32 x i16> %res2 | 
 | } | 
 | define <32 x i16> @test_x86_avx512_maskz_psrli_w_512(<32 x i16> %a0, i32 %mask) { | 
 | ; X86-LABEL: test_x86_avx512_maskz_psrli_w_512: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] | 
 | ; X86-NEXT:    vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_x86_avx512_maskz_psrli_w_512: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] | 
 | ; X64-NEXT:    vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |   %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] | 
 |   %mask.cast = bitcast i32 %mask to <32 x i1> | 
 |   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer | 
 |   ret <32 x i16> %res2 | 
 | } | 
 | declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) nounwind readnone |