| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
 | ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512ifma,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 | 
 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 | 
 |  | 
 | declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>) | 
 |  | 
 | define <2 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_128: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] | 
 | ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb5,0xda] | 
 | ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] | 
 | ; X86-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] | 
 | ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb5,0xe2] | 
 | ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb5,0xc2] | 
 | ; X86-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0] | 
 | ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xd2] | 
 | ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] | 
 | ; X86-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_128: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] | 
 | ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb5,0xda] | 
 | ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] | 
 | ; X64-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] | 
 | ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb5,0xe2] | 
 | ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb5,0xc2] | 
 | ; X64-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0] | 
 | ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xd2] | 
 | ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] | 
 | ; X64-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |  | 
 |   %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) | 
 |   %2 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> | 
 |   %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> %x0 | 
 |   %4 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer) | 
 |   %5 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1> | 
 |   %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> %x0 | 
 |   %7 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer) | 
 |   %8 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <2 x i32> <i32 0, i32 1> | 
 |   %9 = select <2 x i1> %extract, <2 x i64> %7, <2 x i64> zeroinitializer | 
 |   %10 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) | 
 |   %res4 = add <2 x i64> %3, %6 | 
 |   %res5 = add <2 x i64> %10, %9 | 
 |   %res6 = add <2 x i64> %res5, %res4 | 
 |   ret <2 x i64> %res6 | 
 | } | 
 |  | 
 | declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>) | 
 |  | 
 | define <4 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_256: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] | 
 | ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb5,0xda] | 
 | ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] | 
 | ; X86-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] | 
 | ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb5,0xe2] | 
 | ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb5,0xc2] | 
 | ; X86-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0] | 
 | ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xd2] | 
 | ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] | 
 | ; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_256: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] | 
 | ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb5,0xda] | 
 | ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] | 
 | ; X64-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] | 
 | ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb5,0xe2] | 
 | ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb5,0xc2] | 
 | ; X64-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0] | 
 | ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xd2] | 
 | ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] | 
 | ; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |  | 
 |   %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) | 
 |   %2 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
 |   %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> %x0 | 
 |   %4 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer) | 
 |   %5 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
 |   %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> %x0 | 
 |   %7 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer) | 
 |   %8 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
 |   %9 = select <4 x i1> %extract, <4 x i64> %7, <4 x i64> zeroinitializer | 
 |   %10 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) | 
 |   %res4 = add <4 x i64> %3, %6 | 
 |   %res5 = add <4 x i64> %10, %9 | 
 |   %res6 = add <4 x i64> %res5, %res4 | 
 |   ret <4 x i64> %res6 | 
 | } | 
 |  | 
 | define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_128: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] | 
 | ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb5,0xda] | 
 | ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] | 
 | ; X86-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] | 
 | ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xe2] | 
 | ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xc2] | 
 | ; X86-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0] | 
 | ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xd2] | 
 | ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] | 
 | ; X86-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_128: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] | 
 | ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb5,0xda] | 
 | ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] | 
 | ; X64-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] | 
 | ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xe2] | 
 | ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xc2] | 
 | ; X64-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0] | 
 | ; X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb5,0xd2] | 
 | ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] | 
 | ; X64-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |  | 
 |   %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) | 
 |   %2 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> | 
 |   %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> zeroinitializer | 
 |   %4 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer) | 
 |   %5 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1> | 
 |   %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer | 
 |   %7 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer) | 
 |   %8 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <2 x i32> <i32 0, i32 1> | 
 |   %9 = select <2 x i1> %extract, <2 x i64> %7, <2 x i64> zeroinitializer | 
 |   %10 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) | 
 |   %res4 = add <2 x i64> %3, %6 | 
 |   %res5 = add <2 x i64> %10, %9 | 
 |   %res6 = add <2 x i64> %res5, %res4 | 
 |   ret <2 x i64> %res6 | 
 | } | 
 |  | 
 | define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_256: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] | 
 | ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb5,0xda] | 
 | ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] | 
 | ; X86-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] | 
 | ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xe2] | 
 | ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xc2] | 
 | ; X86-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0] | 
 | ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xd2] | 
 | ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] | 
 | ; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_256: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] | 
 | ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb5,0xda] | 
 | ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] | 
 | ; X64-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] | 
 | ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xe2] | 
 | ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xc2] | 
 | ; X64-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0] | 
 | ; X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb5,0xd2] | 
 | ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] | 
 | ; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |  | 
 |   %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) | 
 |   %2 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
 |   %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> zeroinitializer | 
 |   %4 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer) | 
 |   %5 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
 |   %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer | 
 |   %7 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer) | 
 |   %8 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
 |   %9 = select <4 x i1> %extract, <4 x i64> %7, <4 x i64> zeroinitializer | 
 |   %10 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) | 
 |   %res4 = add <4 x i64> %3, %6 | 
 |   %res5 = add <4 x i64> %10, %9 | 
 |   %res6 = add <4 x i64> %res5, %res4 | 
 |   ret <4 x i64> %res6 | 
 | } | 
 |  | 
 | declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>) | 
 |  | 
 | define <2 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_128: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] | 
 | ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb4,0xda] | 
 | ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] | 
 | ; X86-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] | 
 | ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb4,0xe2] | 
 | ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb4,0xc2] | 
 | ; X86-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0] | 
 | ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xd2] | 
 | ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] | 
 | ; X86-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_128: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] | 
 | ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb4,0xda] | 
 | ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] | 
 | ; X64-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] | 
 | ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb4,0xe2] | 
 | ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb4,0xc2] | 
 | ; X64-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0] | 
 | ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xd2] | 
 | ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] | 
 | ; X64-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |  | 
 |   %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) | 
 |   %2 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> | 
 |   %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> %x0 | 
 |   %4 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer) | 
 |   %5 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1> | 
 |   %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> %x0 | 
 |   %7 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer) | 
 |   %8 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <2 x i32> <i32 0, i32 1> | 
 |   %9 = select <2 x i1> %extract, <2 x i64> %7, <2 x i64> zeroinitializer | 
 |   %10 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) | 
 |   %res4 = add <2 x i64> %3, %6 | 
 |   %res5 = add <2 x i64> %10, %9 | 
 |   %res6 = add <2 x i64> %res5, %res4 | 
 |   ret <2 x i64> %res6 | 
 | } | 
 |  | 
 | declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>) | 
 |  | 
 | define <4 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_256: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] | 
 | ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb4,0xda] | 
 | ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] | 
 | ; X86-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] | 
 | ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb4,0xe2] | 
 | ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb4,0xc2] | 
 | ; X86-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0] | 
 | ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xd2] | 
 | ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] | 
 | ; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_256: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] | 
 | ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb4,0xda] | 
 | ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] | 
 | ; X64-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] | 
 | ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm4 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb4,0xe2] | 
 | ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xb4,0xc2] | 
 | ; X64-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0] | 
 | ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xd2] | 
 | ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] | 
 | ; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |  | 
 |   %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) | 
 |   %2 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
 |   %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> %x0 | 
 |   %4 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer) | 
 |   %5 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
 |   %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> %x0 | 
 |   %7 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer) | 
 |   %8 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
 |   %9 = select <4 x i1> %extract, <4 x i64> %7, <4 x i64> zeroinitializer | 
 |   %10 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) | 
 |   %res4 = add <4 x i64> %3, %6 | 
 |   %res5 = add <4 x i64> %10, %9 | 
 |   %res6 = add <4 x i64> %res5, %res4 | 
 |   ret <4 x i64> %res6 | 
 | } | 
 |  | 
 | define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_128: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] | 
 | ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb4,0xda] | 
 | ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] | 
 | ; X86-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] | 
 | ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xe2] | 
 | ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xc2] | 
 | ; X86-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0] | 
 | ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xd2] | 
 | ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] | 
 | ; X86-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_128: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] | 
 | ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0xb4,0xda] | 
 | ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] | 
 | ; X64-NEXT:    vmovdqa %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe0] | 
 | ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xe2] | 
 | ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xc2] | 
 | ; X64-NEXT:    vpaddq %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc0] | 
 | ; X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xb4,0xd2] | 
 | ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] | 
 | ; X64-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |  | 
 |   %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) | 
 |   %2 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> | 
 |   %3 = select <2 x i1> %extract2, <2 x i64> %1, <2 x i64> zeroinitializer | 
 |   %4 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer) | 
 |   %5 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1> | 
 |   %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer | 
 |   %7 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer) | 
 |   %8 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <2 x i32> <i32 0, i32 1> | 
 |   %9 = select <2 x i1> %extract, <2 x i64> %7, <2 x i64> zeroinitializer | 
 |   %10 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) | 
 |   %res4 = add <2 x i64> %3, %6 | 
 |   %res5 = add <2 x i64> %10, %9 | 
 |   %res6 = add <2 x i64> %res5, %res4 | 
 |   ret <2 x i64> %res6 | 
 | } | 
 |  | 
 | define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { | 
 | ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_256: | 
 | ; X86:       # %bb.0: | 
 | ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] | 
 | ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb4,0xda] | 
 | ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] | 
 | ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] | 
 | ; X86-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] | 
 | ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xe2] | 
 | ; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xc2] | 
 | ; X86-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0] | 
 | ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xd2] | 
 | ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] | 
 | ; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] | 
 | ; X86-NEXT:    retl # encoding: [0xc3] | 
 | ; | 
 | ; X64-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_256: | 
 | ; X64:       # %bb.0: | 
 | ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] | 
 | ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0xb4,0xda] | 
 | ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] | 
 | ; X64-NEXT:    vmovdqa %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe0] | 
 | ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xe2] | 
 | ; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] | 
 | ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xc2] | 
 | ; X64-NEXT:    vpaddq %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc0] | 
 | ; X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xb4,0xd2] | 
 | ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] | 
 | ; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] | 
 | ; X64-NEXT:    retq # encoding: [0xc3] | 
 |  | 
 |   %1 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) | 
 |   %2 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract2 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
 |   %3 = select <4 x i1> %extract2, <4 x i64> %1, <4 x i64> zeroinitializer | 
 |   %4 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer) | 
 |   %5 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
 |   %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer | 
 |   %7 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer) | 
 |   %8 = bitcast i8 %x3 to <8 x i1> | 
 |   %extract = shufflevector <8 x i1> %8, <8 x i1> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
 |   %9 = select <4 x i1> %extract, <4 x i64> %7, <4 x i64> zeroinitializer | 
 |   %10 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) | 
 |   %res4 = add <4 x i64> %3, %6 | 
 |   %res5 = add <4 x i64> %10, %9 | 
 |   %res6 = add <4 x i64> %res5, %res4 | 
 |   ret <4 x i64> %res6 | 
 | } |