|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
|  | ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE | 
|  | ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 | 
|  | ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 | 
|  | ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,-sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE | 
|  | ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 | 
|  | ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 | 
|  |  | 
|  | ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse-builtins.c | 
|  |  | 
|  | define <4 x float> @test_mm_add_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_add_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    addps %xmm1, %xmm0 # encoding: [0x0f,0x58,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_add_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x58,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_add_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = fadd <4 x float> %a0, %a1 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_add_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_add_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    addss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x58,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_add_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x58,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_add_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x58,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %ext0 = extractelement <4 x float> %a0, i32 0 | 
|  | %ext1 = extractelement <4 x float> %a1, i32 0 | 
|  | %fadd = fadd float %ext0, %ext1 | 
|  | %res = insertelement <4 x float> %a0, float %fadd, i32 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_and_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_and_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_and_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %arg0 = bitcast <4 x float> %a0 to <4 x i32> | 
|  | %arg1 = bitcast <4 x float> %a1 to <4 x i32> | 
|  | %res = and <4 x i32> %arg0, %arg1 | 
|  | %bc = bitcast <4 x i32> %res to <4 x float> | 
|  | ret <4 x float> %bc | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_andnot_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_andnot_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_andnot_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %arg0 = bitcast <4 x float> %a0 to <4 x i32> | 
|  | %arg1 = bitcast <4 x float> %a1 to <4 x i32> | 
|  | %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1> | 
|  | %res = and <4 x i32> %not, %arg1 | 
|  | %bc = bitcast <4 x i32> %res to <4 x float> | 
|  | ret <4 x float> %bc | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpeq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpeq_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpeqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x00] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cmpeq_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x00] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cmpeq_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00] | 
|  | ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = fcmp oeq <4 x float> %a0, %a1 | 
|  | %sext = sext <4 x i1> %cmp to <4 x i32> | 
|  | %res = bitcast <4 x i32> %sext to <4 x float> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpeq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpeq_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpeqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x00] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_cmpeq_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x00] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) | 
|  | ret <4 x float> %res | 
|  | } | 
|  | declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone | 
|  |  | 
|  | define <4 x float> @test_mm_cmpge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpge_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x02] | 
|  | ; SSE-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cmpge_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcmpleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x02] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cmpge_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcmpleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x02] | 
|  | ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = fcmp ole <4 x float> %a1, %a0 | 
|  | %sext = sext <4 x i1> %cmp to <4 x i32> | 
|  | %res = bitcast <4 x i32> %sext to <4 x float> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpge_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x02] | 
|  | ; SSE-NEXT:    movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] | 
|  | ; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1,2,3] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_cmpge_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vcmpless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x02] | 
|  | ; AVX-NEXT:    vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] | 
|  | ; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1,2,3] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 2) | 
|  | %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpgt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpgt_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x01] | 
|  | ; SSE-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cmpgt_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x01] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cmpgt_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcmpltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x01] | 
|  | ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = fcmp olt <4 x float> %a1, %a0 | 
|  | %sext = sext <4 x i1> %cmp to <4 x i32> | 
|  | %res = bitcast <4 x i32> %sext to <4 x float> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpgt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpgt_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x01] | 
|  | ; SSE-NEXT:    movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] | 
|  | ; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1,2,3] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_cmpgt_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vcmpltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x01] | 
|  | ; AVX-NEXT:    vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] | 
|  | ; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1,2,3] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 1) | 
|  | %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmple_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmple_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x02] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cmple_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcmpleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x02] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cmple_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02] | 
|  | ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = fcmp ole <4 x float> %a0, %a1 | 
|  | %sext = sext <4 x i1> %cmp to <4 x i32> | 
|  | %res = bitcast <4 x i32> %sext to <4 x float> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmple_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmple_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x02] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_cmple_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vcmpless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x02] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 2) | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmplt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmplt_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x01] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cmplt_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcmpltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x01] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cmplt_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcmpltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x01] | 
|  | ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = fcmp olt <4 x float> %a0, %a1 | 
|  | %sext = sext <4 x i1> %cmp to <4 x i32> | 
|  | %res = bitcast <4 x i32> %sext to <4 x float> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmplt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmplt_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x01] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_cmplt_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vcmpltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x01] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 1) | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpneq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpneq_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpneqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x04] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cmpneq_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcmpneqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x04] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cmpneq_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcmpneqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x04] | 
|  | ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = fcmp une <4 x float> %a0, %a1 | 
|  | %sext = sext <4 x i1> %cmp to <4 x i32> | 
|  | %res = bitcast <4 x i32> %sext to <4 x float> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpneq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpneq_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpneqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x04] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_cmpneq_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vcmpneqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x04] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 4) | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpnge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpnge_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpnleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x06] | 
|  | ; SSE-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cmpnge_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcmpnleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x06] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cmpnge_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcmpnleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x06] | 
|  | ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = fcmp ugt <4 x float> %a1, %a0 | 
|  | %sext = sext <4 x i1> %cmp to <4 x i32> | 
|  | %res = bitcast <4 x i32> %sext to <4 x float> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpnge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpnge_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpnless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x06] | 
|  | ; SSE-NEXT:    movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] | 
|  | ; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1,2,3] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_cmpnge_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vcmpnless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x06] | 
|  | ; AVX-NEXT:    vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] | 
|  | ; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1,2,3] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 6) | 
|  | %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpngt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpngt_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpnltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x05] | 
|  | ; SSE-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cmpngt_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcmpnltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x05] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cmpngt_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcmpnltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x05] | 
|  | ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = fcmp uge <4 x float> %a1, %a0 | 
|  | %sext = sext <4 x i1> %cmp to <4 x i32> | 
|  | %res = bitcast <4 x i32> %sext to <4 x float> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpngt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpngt_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpnltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x05] | 
|  | ; SSE-NEXT:    movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] | 
|  | ; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1,2,3] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_cmpngt_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vcmpnltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x05] | 
|  | ; AVX-NEXT:    vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] | 
|  | ; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1,2,3] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 5) | 
|  | %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpnle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpnle_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpnleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x06] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cmpnle_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcmpnleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x06] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cmpnle_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcmpnleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x06] | 
|  | ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = fcmp ugt <4 x float> %a0, %a1 | 
|  | %sext = sext <4 x i1> %cmp to <4 x i32> | 
|  | %res = bitcast <4 x i32> %sext to <4 x float> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpnle_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpnle_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpnless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x06] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_cmpnle_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vcmpnless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x06] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 6) | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpnlt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpnlt_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpnltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x05] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cmpnlt_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcmpnltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x05] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cmpnlt_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcmpnltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x05] | 
|  | ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = fcmp uge <4 x float> %a0, %a1 | 
|  | %sext = sext <4 x i1> %cmp to <4 x i32> | 
|  | %res = bitcast <4 x i32> %sext to <4 x float> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpnlt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpnlt_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpnltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x05] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_cmpnlt_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vcmpnltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x05] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 5) | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpord_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x07] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cmpord_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcmpordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x07] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cmpord_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcmpordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x07] | 
|  | ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = fcmp ord <4 x float> %a0, %a1 | 
|  | %sext = sext <4 x i1> %cmp to <4 x i32> | 
|  | %res = bitcast <4 x i32> %sext to <4 x float> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpord_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x07] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_cmpord_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vcmpordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x07] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpunord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpunord_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpunordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x03] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cmpunord_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcmpunordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x03] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cmpunord_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcmpunordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x03] | 
|  | ; AVX512-NEXT:    vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %cmp = fcmp uno <4 x float> %a0, %a1 | 
|  | %sext = sext <4 x i1> %cmp to <4 x i32> | 
|  | %res = bitcast <4 x i32> %sext to <4 x float> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_cmpunord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_cmpunord_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cmpunordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x03] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_cmpunord_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vcmpunordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x03] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 3) | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define i32 @test_mm_comieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_comieq_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] | 
|  | ; SSE-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0] | 
|  | ; SSE-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1] | 
|  | ; SSE-NEXT:    andb %al, %cl # encoding: [0x20,0xc1] | 
|  | ; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_comieq_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] | 
|  | ; AVX1-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0] | 
|  | ; AVX1-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1] | 
|  | ; AVX1-NEXT:    andb %al, %cl # encoding: [0x20,0xc1] | 
|  | ; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_comieq_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] | 
|  | ; AVX512-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0] | 
|  | ; AVX512-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1] | 
|  | ; AVX512-NEXT:    andb %al, %cl # encoding: [0x20,0xc1] | 
|  | ; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define i32 @test_mm_comige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_comige_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; SSE-NEXT:    comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] | 
|  | ; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_comige_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX1-NEXT:    vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] | 
|  | ; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_comige_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX512-NEXT:    vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] | 
|  | ; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define i32 @test_mm_comigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_comigt_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; SSE-NEXT:    comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] | 
|  | ; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_comigt_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX1-NEXT:    vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] | 
|  | ; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_comigt_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX512-NEXT:    vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] | 
|  | ; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define i32 @test_mm_comile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_comile_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; SSE-NEXT:    comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8] | 
|  | ; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_comile_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX1-NEXT:    vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8] | 
|  | ; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_comile_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX512-NEXT:    vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] | 
|  | ; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define i32 @test_mm_comilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_comilt_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; SSE-NEXT:    comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8] | 
|  | ; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_comilt_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX1-NEXT:    vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8] | 
|  | ; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_comilt_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX512-NEXT:    vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] | 
|  | ; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define i32 @test_mm_comineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_comineq_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] | 
|  | ; SSE-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0] | 
|  | ; SSE-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1] | 
|  | ; SSE-NEXT:    orb %al, %cl # encoding: [0x08,0xc1] | 
|  | ; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_comineq_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] | 
|  | ; AVX1-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0] | 
|  | ; AVX1-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1] | 
|  | ; AVX1-NEXT:    orb %al, %cl # encoding: [0x08,0xc1] | 
|  | ; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_comineq_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] | 
|  | ; AVX512-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0] | 
|  | ; AVX512-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1] | 
|  | ; AVX512-NEXT:    orb %al, %cl # encoding: [0x08,0xc1] | 
|  | ; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define i32 @test_mm_cvt_ss2si(<4 x float> %a0) nounwind { | 
|  | ; SSE-LABEL: test_mm_cvt_ss2si: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cvt_ss2si: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cvt_ss2si: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone | 
|  |  | 
|  | define <4 x float> @test_mm_cvtsi32_ss(<4 x float> %a0, i32 %a1) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_cvtsi32_ss: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x2a,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_cvtsi32_ss: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_cvtsi32_ss: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_cvtsi32_ss: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    cvtsi2ssl %edi, %xmm0 # encoding: [0xf3,0x0f,0x2a,0xc7] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_cvtsi32_ss: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0xc7] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_cvtsi32_ss: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0xc7] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 %a1) | 
|  | ret <4 x float> %res | 
|  | } | 
|  | declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone | 
|  |  | 
|  | define float @test_mm_cvtss_f32(<4 x float> %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_cvtss_f32: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-SSE-NEXT:    movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24] | 
|  | ; X86-SSE-NEXT:    flds (%esp) # encoding: [0xd9,0x04,0x24] | 
|  | ; X86-SSE-NEXT:    popl %eax # encoding: [0x58] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_cvtss_f32: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-AVX1-NEXT:    vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24] | 
|  | ; X86-AVX1-NEXT:    flds (%esp) # encoding: [0xd9,0x04,0x24] | 
|  | ; X86-AVX1-NEXT:    popl %eax # encoding: [0x58] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_cvtss_f32: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-AVX512-NEXT:    vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24] | 
|  | ; X86-AVX512-NEXT:    flds (%esp) # encoding: [0xd9,0x04,0x24] | 
|  | ; X86-AVX512-NEXT:    popl %eax # encoding: [0x58] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-LABEL: test_mm_cvtss_f32: | 
|  | ; X64:       # %bb.0: | 
|  | ; X64-NEXT:    retq # encoding: [0xc3] | 
|  | %res = extractelement <4 x float> %a0, i32 0 | 
|  | ret float %res | 
|  | } | 
|  |  | 
|  | define i32 @test_mm_cvtss_si32(<4 x float> %a0) nounwind { | 
|  | ; SSE-LABEL: test_mm_cvtss_si32: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cvtss_si32: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cvtss_si32: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) | 
|  | ret i32 %res | 
|  | } | 
|  |  | 
|  | define i32 @test_mm_cvttss_si(<4 x float> %a0) nounwind { | 
|  | ; SSE-LABEL: test_mm_cvttss_si: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cvttss_si: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cvttss_si: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone | 
|  |  | 
|  | define i32 @test_mm_cvttss_si32(<4 x float> %a0) nounwind { | 
|  | ; SSE-LABEL: test_mm_cvttss_si32: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_cvttss_si32: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_cvttss_si32: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) | 
|  | ret i32 %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_div_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_div_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    divps %xmm1, %xmm0 # encoding: [0x0f,0x5e,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_div_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5e,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_div_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = fdiv <4 x float> %a0, %a1 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_div_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_div_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    divss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5e,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_div_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5e,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_div_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5e,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %ext0 = extractelement <4 x float> %a0, i32 0 | 
|  | %ext1 = extractelement <4 x float> %a1, i32 0 | 
|  | %fdiv = fdiv float %ext0, %ext1 | 
|  | %res = insertelement <4 x float> %a0, float %fdiv, i32 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define i32 @test_MM_GET_EXCEPTION_MASK() nounwind { | 
|  | ; X86-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-SSE-NEXT:    movl %esp, %eax # encoding: [0x89,0xe0] | 
|  | ; X86-SSE-NEXT:    stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] | 
|  | ; X86-SSE-NEXT:    movl (%esp), %eax # encoding: [0x8b,0x04,0x24] | 
|  | ; X86-SSE-NEXT:    andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] | 
|  | ; X86-SSE-NEXT:    # imm = 0x1F80 | 
|  | ; X86-SSE-NEXT:    popl %ecx # encoding: [0x59] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: | 
|  | ; X86-AVX:       # %bb.0: | 
|  | ; X86-AVX-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-AVX-NEXT:    movl %esp, %eax # encoding: [0x89,0xe0] | 
|  | ; X86-AVX-NEXT:    vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] | 
|  | ; X86-AVX-NEXT:    movl (%esp), %eax # encoding: [0x8b,0x04,0x24] | 
|  | ; X86-AVX-NEXT:    andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] | 
|  | ; X86-AVX-NEXT:    # imm = 0x1F80 | 
|  | ; X86-AVX-NEXT:    popl %ecx # encoding: [0x59] | 
|  | ; X86-AVX-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] | 
|  | ; X64-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] | 
|  | ; X64-SSE-NEXT:    # imm = 0x1F80 | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: | 
|  | ; X64-AVX:       # %bb.0: | 
|  | ; X64-AVX-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] | 
|  | ; X64-AVX-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] | 
|  | ; X64-AVX-NEXT:    # imm = 0x1F80 | 
|  | ; X64-AVX-NEXT:    retq # encoding: [0xc3] | 
|  | %1 = alloca i32, align 4 | 
|  | %2 = bitcast i32* %1 to i8* | 
|  | call void @llvm.x86.sse.stmxcsr(i8* %2) | 
|  | %3 = load i32, i32* %1, align 4 | 
|  | %4 = and i32 %3, 8064 | 
|  | ret i32 %4 | 
|  | } | 
|  | declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone | 
|  |  | 
|  | define i32 @test_MM_GET_EXCEPTION_STATE() nounwind { | 
|  | ; X86-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-SSE-NEXT:    movl %esp, %eax # encoding: [0x89,0xe0] | 
|  | ; X86-SSE-NEXT:    stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] | 
|  | ; X86-SSE-NEXT:    movl (%esp), %eax # encoding: [0x8b,0x04,0x24] | 
|  | ; X86-SSE-NEXT:    andl $63, %eax # encoding: [0x83,0xe0,0x3f] | 
|  | ; X86-SSE-NEXT:    popl %ecx # encoding: [0x59] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: | 
|  | ; X86-AVX:       # %bb.0: | 
|  | ; X86-AVX-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-AVX-NEXT:    movl %esp, %eax # encoding: [0x89,0xe0] | 
|  | ; X86-AVX-NEXT:    vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] | 
|  | ; X86-AVX-NEXT:    movl (%esp), %eax # encoding: [0x8b,0x04,0x24] | 
|  | ; X86-AVX-NEXT:    andl $63, %eax # encoding: [0x83,0xe0,0x3f] | 
|  | ; X86-AVX-NEXT:    popl %ecx # encoding: [0x59] | 
|  | ; X86-AVX-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] | 
|  | ; X64-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    andl $63, %eax # encoding: [0x83,0xe0,0x3f] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: | 
|  | ; X64-AVX:       # %bb.0: | 
|  | ; X64-AVX-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] | 
|  | ; X64-AVX-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    andl $63, %eax # encoding: [0x83,0xe0,0x3f] | 
|  | ; X64-AVX-NEXT:    retq # encoding: [0xc3] | 
|  | %1 = alloca i32, align 4 | 
|  | %2 = bitcast i32* %1 to i8* | 
|  | call void @llvm.x86.sse.stmxcsr(i8* %2) | 
|  | %3 = load i32, i32* %1, align 4 | 
|  | %4 = and i32 %3, 63 | 
|  | ret i32 %4 | 
|  | } | 
|  |  | 
|  | define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind { | 
|  | ; X86-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-SSE-NEXT:    movl %esp, %eax # encoding: [0x89,0xe0] | 
|  | ; X86-SSE-NEXT:    stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] | 
|  | ; X86-SSE-NEXT:    movl (%esp), %eax # encoding: [0x8b,0x04,0x24] | 
|  | ; X86-SSE-NEXT:    andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] | 
|  | ; X86-SSE-NEXT:    # imm = 0x8000 | 
|  | ; X86-SSE-NEXT:    popl %ecx # encoding: [0x59] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: | 
|  | ; X86-AVX:       # %bb.0: | 
|  | ; X86-AVX-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-AVX-NEXT:    movl %esp, %eax # encoding: [0x89,0xe0] | 
|  | ; X86-AVX-NEXT:    vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] | 
|  | ; X86-AVX-NEXT:    movl (%esp), %eax # encoding: [0x8b,0x04,0x24] | 
|  | ; X86-AVX-NEXT:    andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] | 
|  | ; X86-AVX-NEXT:    # imm = 0x8000 | 
|  | ; X86-AVX-NEXT:    popl %ecx # encoding: [0x59] | 
|  | ; X86-AVX-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] | 
|  | ; X64-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] | 
|  | ; X64-SSE-NEXT:    # imm = 0x8000 | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: | 
|  | ; X64-AVX:       # %bb.0: | 
|  | ; X64-AVX-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] | 
|  | ; X64-AVX-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] | 
|  | ; X64-AVX-NEXT:    # imm = 0x8000 | 
|  | ; X64-AVX-NEXT:    retq # encoding: [0xc3] | 
|  | %1 = alloca i32, align 4 | 
|  | %2 = bitcast i32* %1 to i8* | 
|  | call void @llvm.x86.sse.stmxcsr(i8* %2) | 
|  | %3 = load i32, i32* %1, align 4 | 
|  | %4 = and i32 %3, 32768 | 
|  | ret i32 %4 | 
|  | } | 
|  |  | 
|  | define i32 @test_MM_GET_ROUNDING_MODE() nounwind { | 
|  | ; X86-SSE-LABEL: test_MM_GET_ROUNDING_MODE: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-SSE-NEXT:    movl %esp, %eax # encoding: [0x89,0xe0] | 
|  | ; X86-SSE-NEXT:    stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] | 
|  | ; X86-SSE-NEXT:    movl (%esp), %eax # encoding: [0x8b,0x04,0x24] | 
|  | ; X86-SSE-NEXT:    andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] | 
|  | ; X86-SSE-NEXT:    # imm = 0x6000 | 
|  | ; X86-SSE-NEXT:    popl %ecx # encoding: [0x59] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX-LABEL: test_MM_GET_ROUNDING_MODE: | 
|  | ; X86-AVX:       # %bb.0: | 
|  | ; X86-AVX-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-AVX-NEXT:    movl %esp, %eax # encoding: [0x89,0xe0] | 
|  | ; X86-AVX-NEXT:    vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] | 
|  | ; X86-AVX-NEXT:    movl (%esp), %eax # encoding: [0x8b,0x04,0x24] | 
|  | ; X86-AVX-NEXT:    andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] | 
|  | ; X86-AVX-NEXT:    # imm = 0x6000 | 
|  | ; X86-AVX-NEXT:    popl %ecx # encoding: [0x59] | 
|  | ; X86-AVX-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_MM_GET_ROUNDING_MODE: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] | 
|  | ; X64-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] | 
|  | ; X64-SSE-NEXT:    # imm = 0x6000 | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX-LABEL: test_MM_GET_ROUNDING_MODE: | 
|  | ; X64-AVX:       # %bb.0: | 
|  | ; X64-AVX-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] | 
|  | ; X64-AVX-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] | 
|  | ; X64-AVX-NEXT:    # imm = 0x6000 | 
|  | ; X64-AVX-NEXT:    retq # encoding: [0xc3] | 
|  | %1 = alloca i32, align 4 | 
|  | %2 = bitcast i32* %1 to i8* | 
|  | call void @llvm.x86.sse.stmxcsr(i8* %2) | 
|  | %3 = load i32, i32* %1, align 4 | 
|  | %4 = and i32 %3, 24576 | 
|  | ret i32 %4 | 
|  | } | 
|  |  | 
|  | define i32 @test_mm_getcsr() nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_getcsr: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-SSE-NEXT:    movl %esp, %eax # encoding: [0x89,0xe0] | 
|  | ; X86-SSE-NEXT:    stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] | 
|  | ; X86-SSE-NEXT:    movl (%esp), %eax # encoding: [0x8b,0x04,0x24] | 
|  | ; X86-SSE-NEXT:    popl %ecx # encoding: [0x59] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX-LABEL: test_mm_getcsr: | 
|  | ; X86-AVX:       # %bb.0: | 
|  | ; X86-AVX-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-AVX-NEXT:    movl %esp, %eax # encoding: [0x89,0xe0] | 
|  | ; X86-AVX-NEXT:    vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] | 
|  | ; X86-AVX-NEXT:    movl (%esp), %eax # encoding: [0x8b,0x04,0x24] | 
|  | ; X86-AVX-NEXT:    popl %ecx # encoding: [0x59] | 
|  | ; X86-AVX-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_getcsr: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] | 
|  | ; X64-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX-LABEL: test_mm_getcsr: | 
|  | ; X64-AVX:       # %bb.0: | 
|  | ; X64-AVX-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] | 
|  | ; X64-AVX-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    retq # encoding: [0xc3] | 
|  | %1 = alloca i32, align 4 | 
|  | %2 = bitcast i32* %1 to i8* | 
|  | call void @llvm.x86.sse.stmxcsr(i8* %2) | 
|  | %3 = load i32, i32* %1, align 4 | 
|  | ret i32 %3 | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_load_ps(float* %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_load_ps: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_load_ps: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_load_ps: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_load_ps: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_load_ps: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_load_ps: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %arg0 = bitcast float* %a0 to <4 x float>* | 
|  | %res = load <4 x float>, <4 x float>* %arg0, align 16 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_load_ps1(float* %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_load_ps1: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] | 
|  | ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_load_ps1: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_load_ps1: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_load_ps1: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] | 
|  | ; X64-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X64-SSE-NEXT:    shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] | 
|  | ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_load_ps1: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_load_ps1: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %ld = load float, float* %a0, align 4 | 
|  | %res0 = insertelement <4 x float> undef, float %ld, i32 0 | 
|  | %res1 = insertelement <4 x float> %res0, float %ld, i32 1 | 
|  | %res2 = insertelement <4 x float> %res1, float %ld, i32 2 | 
|  | %res3 = insertelement <4 x float> %res2, float %ld, i32 3 | 
|  | ret <4 x float> %res3 | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_load_ss(float* %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_load_ss: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] | 
|  | ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_load_ss: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vmovss (%eax), %xmm0 # encoding: [0xc5,0xfa,0x10,0x00] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_load_ss: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_load_ss: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] | 
|  | ; X64-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_load_ss: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_load_ss: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] | 
|  | ; X64-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %ld = load float, float* %a0, align 1 | 
|  | %res0 = insertelement <4 x float> undef, float %ld, i32 0 | 
|  | %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 | 
|  | %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 | 
|  | %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 | 
|  | ret <4 x float> %res3 | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_load1_ps(float* %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_load1_ps: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] | 
|  | ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_load1_ps: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_load1_ps: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_load1_ps: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] | 
|  | ; X64-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X64-SSE-NEXT:    shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] | 
|  | ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_load1_ps: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_load1_ps: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %ld = load float, float* %a0, align 4 | 
|  | %res0 = insertelement <4 x float> undef, float %ld, i32 0 | 
|  | %res1 = insertelement <4 x float> %res0, float %ld, i32 1 | 
|  | %res2 = insertelement <4 x float> %res1, float %ld, i32 2 | 
|  | %res3 = insertelement <4 x float> %res2, float %ld, i32 3 | 
|  | ret <4 x float> %res3 | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) { | 
|  | ; X86-SSE-LABEL: test_mm_loadh_pi: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    movss (%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x08] | 
|  | ; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04] | 
|  | ; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] | 
|  | ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] | 
|  | ; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_loadh_pi: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vmovsd (%eax), %xmm1 # encoding: [0xc5,0xfb,0x10,0x08] | 
|  | ; X86-AVX1-NEXT:    # xmm1 = mem[0],zero | 
|  | ; X86-AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_loadh_pi: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] | 
|  | ; X86-AVX512-NEXT:    # xmm1 = mem[0],zero | 
|  | ; X86-AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_loadh_pi: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] | 
|  | ; X64-SSE-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xf8] | 
|  | ; X64-SSE-NEXT:    shrq $32, %rax # encoding: [0x48,0xc1,0xe8,0x20] | 
|  | ; X64-SSE-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    movss -{{[0-9]+}}(%rsp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0xf8] | 
|  | ; X64-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero | 
|  | ; X64-SSE-NEXT:    movss -{{[0-9]+}}(%rsp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero | 
|  | ; X64-SSE-NEXT:    unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] | 
|  | ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] | 
|  | ; X64-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] | 
|  | ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_loadh_pi: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_loadh_pi: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07] | 
|  | ; X64-AVX512-NEXT:    # xmm0 = xmm0[0],mem[0] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %ptr = bitcast x86_mmx* %a1 to <2 x float>* | 
|  | %ld  = load <2 x float>, <2 x float>* %ptr | 
|  | %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | 
|  | %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 0, i32 1, i32 4, i32 5> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) { | 
|  | ; X86-SSE-LABEL: test_mm_loadl_pi: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    movss (%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x08] | 
|  | ; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04] | 
|  | ; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] | 
|  | ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] | 
|  | ; X86-SSE-NEXT:    shufps $228, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe4] | 
|  | ; X86-SSE-NEXT:    # xmm1 = xmm1[0,1],xmm0[2,3] | 
|  | ; X86-SSE-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_loadl_pi: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vmovsd (%eax), %xmm1 # encoding: [0xc5,0xfb,0x10,0x08] | 
|  | ; X86-AVX1-NEXT:    # xmm1 = mem[0],zero | 
|  | ; X86-AVX1-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = xmm1[0,1],xmm0[2,3] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_loadl_pi: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] | 
|  | ; X86-AVX512-NEXT:    # xmm1 = mem[0],zero | 
|  | ; X86-AVX512-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = xmm1[0,1],xmm0[2,3] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_loadl_pi: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] | 
|  | ; X64-SSE-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xf8] | 
|  | ; X64-SSE-NEXT:    shrq $32, %rax # encoding: [0x48,0xc1,0xe8,0x20] | 
|  | ; X64-SSE-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    movss -{{[0-9]+}}(%rsp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0xf8] | 
|  | ; X64-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero | 
|  | ; X64-SSE-NEXT:    movss -{{[0-9]+}}(%rsp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero | 
|  | ; X64-SSE-NEXT:    unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] | 
|  | ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] | 
|  | ; X64-SSE-NEXT:    shufps $228, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe4] | 
|  | ; X64-SSE-NEXT:    # xmm1 = xmm1[0,1],xmm0[2,3] | 
|  | ; X64-SSE-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_loadl_pi: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = mem[0],xmm0[1] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_loadl_pi: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07] | 
|  | ; X64-AVX512-NEXT:    # xmm0 = mem[0],xmm0[1] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %ptr = bitcast x86_mmx* %a1 to <2 x float>* | 
|  | %ld  = load <2 x float>, <2 x float>* %ptr | 
|  | %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | 
|  | %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_loadr_ps(float* %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_loadr_ps: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] | 
|  | ; X86-SSE-NEXT:    shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm0[3,2,1,0] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_loadr_ps: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vpermilps $27, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = mem[3,2,1,0] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_loadr_ps: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vpermilps $27, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = mem[3,2,1,0] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_loadr_ps: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] | 
|  | ; X64-SSE-NEXT:    shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] | 
|  | ; X64-SSE-NEXT:    # xmm0 = xmm0[3,2,1,0] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_loadr_ps: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vpermilps $27, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = mem[3,2,1,0] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_loadr_ps: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vpermilps $27, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b] | 
|  | ; X64-AVX512-NEXT:    # xmm0 = mem[3,2,1,0] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %arg0 = bitcast float* %a0 to <4 x float>* | 
|  | %ld = load <4 x float>, <4 x float>* %arg0, align 16 | 
|  | %res = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_loadu_ps(float* %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_loadu_ps: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_loadu_ps: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_loadu_ps: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_loadu_ps: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_loadu_ps: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_loadu_ps: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %arg0 = bitcast float* %a0 to <4 x float>* | 
|  | %res = load <4 x float>, <4 x float>* %arg0, align 1 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_max_ps(<4 x float> %a0, <4 x float> %a1) { | 
|  | ; SSE-LABEL: test_mm_max_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    maxps %xmm1, %xmm0 # encoding: [0x0f,0x5f,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_max_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5f,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_max_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) | 
|  | ret <4 x float> %res | 
|  | } | 
|  | declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define <4 x float> @test_mm_max_ss(<4 x float> %a0, <4 x float> %a1) { | 
|  | ; SSE-LABEL: test_mm_max_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    maxss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5f,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_max_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5f,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_max_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5f,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) | 
|  | ret <4 x float> %res | 
|  | } | 
|  | declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define <4 x float> @test_mm_min_ps(<4 x float> %a0, <4 x float> %a1) { | 
|  | ; SSE-LABEL: test_mm_min_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    minps %xmm1, %xmm0 # encoding: [0x0f,0x5d,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_min_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vminps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5d,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_min_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) | 
|  | ret <4 x float> %res | 
|  | } | 
|  | declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define <4 x float> @test_mm_min_ss(<4 x float> %a0, <4 x float> %a1) { | 
|  | ; SSE-LABEL: test_mm_min_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    minss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5d,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_min_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vminss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5d,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_min_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5d,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) | 
|  | ret <4 x float> %res | 
|  | } | 
|  | declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define <4 x float> @test_mm_move_ss(<4 x float> %a0, <4 x float> %a1) { | 
|  | ; SSE-LABEL: test_mm_move_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] | 
|  | ; SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1,2,3] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_move_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] | 
|  | ; AVX-NEXT:    # xmm0 = xmm1[0],xmm0[1,2,3] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_movehl_ps(<4 x float> %a0, <4 x float> %a1) { | 
|  | ; SSE-LABEL: test_mm_movehl_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    movhlps %xmm1, %xmm0 # encoding: [0x0f,0x12,0xc1] | 
|  | ; SSE-NEXT:    # xmm0 = xmm1[1],xmm0[1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_movehl_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vunpckhpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x15,0xc0] | 
|  | ; AVX1-NEXT:    # xmm0 = xmm1[1],xmm0[1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_movehl_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vunpckhpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x15,0xc0] | 
|  | ; AVX512-NEXT:    # xmm0 = xmm1[1],xmm0[1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_movelh_ps(<4 x float> %a0, <4 x float> %a1) { | 
|  | ; SSE-LABEL: test_mm_movelh_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] | 
|  | ; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_movelh_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] | 
|  | ; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_movelh_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] | 
|  | ; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define i32 @test_mm_movemask_ps(<4 x float> %a0) nounwind { | 
|  | ; SSE-LABEL: test_mm_movemask_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    movmskps %xmm0, %eax # encoding: [0x0f,0x50,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_movemask_ps: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vmovmskps %xmm0, %eax # encoding: [0xc5,0xf8,0x50,0xc0] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone | 
|  |  | 
|  | define <4 x float> @test_mm_mul_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_mul_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    mulps %xmm1, %xmm0 # encoding: [0x0f,0x59,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_mul_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x59,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_mul_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = fmul <4 x float> %a0, %a1 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_mul_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_mul_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    mulss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x59,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_mul_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x59,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_mul_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x59,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %ext0 = extractelement <4 x float> %a0, i32 0 | 
|  | %ext1 = extractelement <4 x float> %a1, i32 0 | 
|  | %fmul = fmul float %ext0, %ext1 | 
|  | %res = insertelement <4 x float> %a0, float %fmul, i32 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_or_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_or_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_or_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %arg0 = bitcast <4 x float> %a0 to <4 x i32> | 
|  | %arg1 = bitcast <4 x float> %a1 to <4 x i32> | 
|  | %res = or <4 x i32> %arg0, %arg1 | 
|  | %bc = bitcast <4 x i32> %res to <4 x float> | 
|  | ret <4 x float> %bc | 
|  | } | 
|  |  | 
|  | define void @test_mm_prefetch(i8* %a0) { | 
|  | ; X86-LABEL: test_mm_prefetch: | 
|  | ; X86:       # %bb.0: | 
|  | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-NEXT:    prefetchnta (%eax) # encoding: [0x0f,0x18,0x00] | 
|  | ; X86-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-LABEL: test_mm_prefetch: | 
|  | ; X64:       # %bb.0: | 
|  | ; X64-NEXT:    prefetchnta (%rdi) # encoding: [0x0f,0x18,0x07] | 
|  | ; X64-NEXT:    retq # encoding: [0xc3] | 
|  | call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1) | 
|  | ret void | 
|  | } | 
|  | declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone | 
|  |  | 
|  | define <4 x float> @test_mm_rcp_ps(<4 x float> %a0) { | 
|  | ; SSE-LABEL: test_mm_rcp_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    rcpps %xmm0, %xmm0 # encoding: [0x0f,0x53,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_rcp_ps: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vrcpps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x53,0xc0] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) | 
|  | ret <4 x float> %res | 
|  | } | 
|  | declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone | 
|  |  | 
|  | define <4 x float> @test_mm_rcp_ss(<4 x float> %a0) { | 
|  | ; SSE-LABEL: test_mm_rcp_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    rcpss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x53,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_rcp_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x53,0xc0] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %rcp = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) | 
|  | ret <4 x float> %rcp | 
|  | } | 
|  | declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone | 
|  |  | 
|  | define <4 x float> @test_mm_rsqrt_ps(<4 x float> %a0) { | 
|  | ; SSE-LABEL: test_mm_rsqrt_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    rsqrtps %xmm0, %xmm0 # encoding: [0x0f,0x52,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_rsqrt_ps: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vrsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x52,0xc0] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) | 
|  | ret <4 x float> %res | 
|  | } | 
|  | declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone | 
|  |  | 
|  | define <4 x float> @test_mm_rsqrt_ss(<4 x float> %a0) { | 
|  | ; SSE-LABEL: test_mm_rsqrt_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    rsqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x52,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX-LABEL: test_mm_rsqrt_ss: | 
|  | ; AVX:       # %bb.0: | 
|  | ; AVX-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x52,0xc0] | 
|  | ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %rsqrt = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) | 
|  | ret <4 x float> %rsqrt | 
|  | } | 
|  | declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone | 
|  |  | 
|  | define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] | 
|  | ; X86-SSE-NEXT:    movl %esp, %ecx # encoding: [0x89,0xe1] | 
|  | ; X86-SSE-NEXT:    stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] | 
|  | ; X86-SSE-NEXT:    movl (%esp), %edx # encoding: [0x8b,0x14,0x24] | 
|  | ; X86-SSE-NEXT:    andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff] | 
|  | ; X86-SSE-NEXT:    # imm = 0xE07F | 
|  | ; X86-SSE-NEXT:    orl %eax, %edx # encoding: [0x09,0xc2] | 
|  | ; X86-SSE-NEXT:    movl %edx, (%esp) # encoding: [0x89,0x14,0x24] | 
|  | ; X86-SSE-NEXT:    ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] | 
|  | ; X86-SSE-NEXT:    popl %eax # encoding: [0x58] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: | 
|  | ; X86-AVX:       # %bb.0: | 
|  | ; X86-AVX-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] | 
|  | ; X86-AVX-NEXT:    movl %esp, %ecx # encoding: [0x89,0xe1] | 
|  | ; X86-AVX-NEXT:    vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] | 
|  | ; X86-AVX-NEXT:    movl (%esp), %edx # encoding: [0x8b,0x14,0x24] | 
|  | ; X86-AVX-NEXT:    andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff] | 
|  | ; X86-AVX-NEXT:    # imm = 0xE07F | 
|  | ; X86-AVX-NEXT:    orl %eax, %edx # encoding: [0x09,0xc2] | 
|  | ; X86-AVX-NEXT:    movl %edx, (%esp) # encoding: [0x89,0x14,0x24] | 
|  | ; X86-AVX-NEXT:    vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] | 
|  | ; X86-AVX-NEXT:    popl %eax # encoding: [0x58] | 
|  | ; X86-AVX-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] | 
|  | ; X64-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] | 
|  | ; X64-SSE-NEXT:    # imm = 0xE07F | 
|  | ; X64-SSE-NEXT:    orl %edi, %ecx # encoding: [0x09,0xf9] | 
|  | ; X64-SSE-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: | 
|  | ; X64-AVX:       # %bb.0: | 
|  | ; X64-AVX-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] | 
|  | ; X64-AVX-NEXT:    movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] | 
|  | ; X64-AVX-NEXT:    # imm = 0xE07F | 
|  | ; X64-AVX-NEXT:    orl %edi, %ecx # encoding: [0x09,0xf9] | 
|  | ; X64-AVX-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] | 
|  | ; X64-AVX-NEXT:    retq # encoding: [0xc3] | 
|  | %1 = alloca i32, align 4 | 
|  | %2 = bitcast i32* %1 to i8* | 
|  | call void @llvm.x86.sse.stmxcsr(i8* %2) | 
|  | %3 = load i32, i32* %1 | 
|  | %4 = and i32 %3, -8065 | 
|  | %5 = or i32 %4, %a0 | 
|  | store i32 %5, i32* %1 | 
|  | call void @llvm.x86.sse.ldmxcsr(i8* %2) | 
|  | ret void | 
|  | } | 
|  | declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone | 
|  |  | 
|  | define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] | 
|  | ; X86-SSE-NEXT:    movl %esp, %ecx # encoding: [0x89,0xe1] | 
|  | ; X86-SSE-NEXT:    stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] | 
|  | ; X86-SSE-NEXT:    movl (%esp), %edx # encoding: [0x8b,0x14,0x24] | 
|  | ; X86-SSE-NEXT:    andl $-64, %edx # encoding: [0x83,0xe2,0xc0] | 
|  | ; X86-SSE-NEXT:    orl %eax, %edx # encoding: [0x09,0xc2] | 
|  | ; X86-SSE-NEXT:    movl %edx, (%esp) # encoding: [0x89,0x14,0x24] | 
|  | ; X86-SSE-NEXT:    ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] | 
|  | ; X86-SSE-NEXT:    popl %eax # encoding: [0x58] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: | 
|  | ; X86-AVX:       # %bb.0: | 
|  | ; X86-AVX-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] | 
|  | ; X86-AVX-NEXT:    movl %esp, %ecx # encoding: [0x89,0xe1] | 
|  | ; X86-AVX-NEXT:    vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] | 
|  | ; X86-AVX-NEXT:    movl (%esp), %edx # encoding: [0x8b,0x14,0x24] | 
|  | ; X86-AVX-NEXT:    andl $-64, %edx # encoding: [0x83,0xe2,0xc0] | 
|  | ; X86-AVX-NEXT:    orl %eax, %edx # encoding: [0x09,0xc2] | 
|  | ; X86-AVX-NEXT:    movl %edx, (%esp) # encoding: [0x89,0x14,0x24] | 
|  | ; X86-AVX-NEXT:    vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] | 
|  | ; X86-AVX-NEXT:    popl %eax # encoding: [0x58] | 
|  | ; X86-AVX-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] | 
|  | ; X64-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] | 
|  | ; X64-SSE-NEXT:    orl %edi, %ecx # encoding: [0x09,0xf9] | 
|  | ; X64-SSE-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: | 
|  | ; X64-AVX:       # %bb.0: | 
|  | ; X64-AVX-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] | 
|  | ; X64-AVX-NEXT:    movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] | 
|  | ; X64-AVX-NEXT:    orl %edi, %ecx # encoding: [0x09,0xf9] | 
|  | ; X64-AVX-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] | 
|  | ; X64-AVX-NEXT:    retq # encoding: [0xc3] | 
|  | %1 = alloca i32, align 4 | 
|  | %2 = bitcast i32* %1 to i8* | 
|  | call void @llvm.x86.sse.stmxcsr(i8* %2) | 
|  | %3 = load i32, i32* %1 | 
|  | %4 = and i32 %3, -64 | 
|  | %5 = or i32 %4, %a0 | 
|  | store i32 %5, i32* %1 | 
|  | call void @llvm.x86.sse.ldmxcsr(i8* %2) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] | 
|  | ; X86-SSE-NEXT:    movl %esp, %ecx # encoding: [0x89,0xe1] | 
|  | ; X86-SSE-NEXT:    stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] | 
|  | ; X86-SSE-NEXT:    movl (%esp), %edx # encoding: [0x8b,0x14,0x24] | 
|  | ; X86-SSE-NEXT:    andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff] | 
|  | ; X86-SSE-NEXT:    # imm = 0xFFFF7FFF | 
|  | ; X86-SSE-NEXT:    orl %eax, %edx # encoding: [0x09,0xc2] | 
|  | ; X86-SSE-NEXT:    movl %edx, (%esp) # encoding: [0x89,0x14,0x24] | 
|  | ; X86-SSE-NEXT:    ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] | 
|  | ; X86-SSE-NEXT:    popl %eax # encoding: [0x58] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: | 
|  | ; X86-AVX:       # %bb.0: | 
|  | ; X86-AVX-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] | 
|  | ; X86-AVX-NEXT:    movl %esp, %ecx # encoding: [0x89,0xe1] | 
|  | ; X86-AVX-NEXT:    vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] | 
|  | ; X86-AVX-NEXT:    movl (%esp), %edx # encoding: [0x8b,0x14,0x24] | 
|  | ; X86-AVX-NEXT:    andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff] | 
|  | ; X86-AVX-NEXT:    # imm = 0xFFFF7FFF | 
|  | ; X86-AVX-NEXT:    orl %eax, %edx # encoding: [0x09,0xc2] | 
|  | ; X86-AVX-NEXT:    movl %edx, (%esp) # encoding: [0x89,0x14,0x24] | 
|  | ; X86-AVX-NEXT:    vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] | 
|  | ; X86-AVX-NEXT:    popl %eax # encoding: [0x58] | 
|  | ; X86-AVX-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] | 
|  | ; X64-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] | 
|  | ; X64-SSE-NEXT:    # imm = 0xFFFF7FFF | 
|  | ; X64-SSE-NEXT:    orl %edi, %ecx # encoding: [0x09,0xf9] | 
|  | ; X64-SSE-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: | 
|  | ; X64-AVX:       # %bb.0: | 
|  | ; X64-AVX-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] | 
|  | ; X64-AVX-NEXT:    movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] | 
|  | ; X64-AVX-NEXT:    # imm = 0xFFFF7FFF | 
|  | ; X64-AVX-NEXT:    orl %edi, %ecx # encoding: [0x09,0xf9] | 
|  | ; X64-AVX-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] | 
|  | ; X64-AVX-NEXT:    retq # encoding: [0xc3] | 
|  | %1 = alloca i32, align 4 | 
|  | %2 = bitcast i32* %1 to i8* | 
|  | call void @llvm.x86.sse.stmxcsr(i8* %2) | 
|  | %3 = load i32, i32* %1 | 
|  | %4 = and i32 %3, -32769 | 
|  | %5 = or i32 %4, %a0 | 
|  | store i32 %5, i32* %1 | 
|  | call void @llvm.x86.sse.ldmxcsr(i8* %2) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_set_ps(float %a0, float %a1, float %a2, float %a3) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_set_ps: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] | 
|  | ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] | 
|  | ; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | 
|  | ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08] | 
|  | ; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] | 
|  | ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] | 
|  | ; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_set_ps: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] | 
|  | ; X86-AVX1-NEXT:    # xmm1 = mem[0],zero,zero,zero | 
|  | ; X86-AVX1-NEXT:    vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] | 
|  | ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x08] | 
|  | ; X86-AVX1-NEXT:    # xmm1 = mem[0],zero,zero,zero | 
|  | ; X86-AVX1-NEXT:    vinsertps $32, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x20] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = xmm0[0,1],xmm1[0],xmm0[3] | 
|  | ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    # xmm1 = mem[0],zero,zero,zero | 
|  | ; X86-AVX1-NEXT:    vinsertps $48, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x30] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = xmm0[0,1,2],xmm1[0] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_set_ps: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] | 
|  | ; X86-AVX512-NEXT:    # xmm1 = mem[0],zero,zero,zero | 
|  | ; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] | 
|  | ; X86-AVX512-NEXT:    # xmm2 = mem[0],zero,zero,zero | 
|  | ; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    # xmm3 = mem[0],zero,zero,zero | 
|  | ; X86-AVX512-NEXT:    vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] | 
|  | ; X86-AVX512-NEXT:    vinsertps $32, %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] | 
|  | ; X86-AVX512-NEXT:    vinsertps $48, %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = xmm0[0,1,2],xmm3[0] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_set_ps: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] | 
|  | ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] | 
|  | ; X64-SSE-NEXT:    unpcklps %xmm2, %xmm3 # encoding: [0x0f,0x14,0xda] | 
|  | ; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] | 
|  | ; X64-SSE-NEXT:    movlhps %xmm1, %xmm3 # encoding: [0x0f,0x16,0xd9] | 
|  | ; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm1[0] | 
|  | ; X64-SSE-NEXT:    movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_set_ps: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] | 
|  | ; X64-AVX1-NEXT:    # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] | 
|  | ; X64-AVX1-NEXT:    vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] | 
|  | ; X64-AVX1-NEXT:    # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] | 
|  | ; X64-AVX1-NEXT:    vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = xmm1[0,1,2],xmm0[0] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_set_ps: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] | 
|  | ; X64-AVX512-NEXT:    # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] | 
|  | ; X64-AVX512-NEXT:    vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] | 
|  | ; X64-AVX512-NEXT:    # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] | 
|  | ; X64-AVX512-NEXT:    vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] | 
|  | ; X64-AVX512-NEXT:    # xmm0 = xmm1[0,1,2],xmm0[0] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %res0  = insertelement <4 x float> undef, float %a3, i32 0 | 
|  | %res1  = insertelement <4 x float> %res0, float %a2, i32 1 | 
|  | %res2  = insertelement <4 x float> %res1, float %a1, i32 2 | 
|  | %res3  = insertelement <4 x float> %res2, float %a0, i32 3 | 
|  | ret <4 x float> %res3 | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_set_ps1(float %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_set_ps1: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_set_ps1: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-AVX1-NEXT:    vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_set_ps1: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-AVX512-NEXT:    vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_set_ps1: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] | 
|  | ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_set_ps1: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_set_ps1: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %res0  = insertelement <4 x float> undef, float %a0, i32 0 | 
|  | %res1  = insertelement <4 x float> %res0, float %a0, i32 1 | 
|  | %res2  = insertelement <4 x float> %res1, float %a0, i32 2 | 
|  | %res3  = insertelement <4 x float> %res2, float %a0, i32 3 | 
|  | ret <4 x float> %res3 | 
|  | } | 
|  |  | 
|  | define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_MM_SET_ROUNDING_MODE: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] | 
|  | ; X86-SSE-NEXT:    movl %esp, %ecx # encoding: [0x89,0xe1] | 
|  | ; X86-SSE-NEXT:    stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] | 
|  | ; X86-SSE-NEXT:    movl (%esp), %edx # encoding: [0x8b,0x14,0x24] | 
|  | ; X86-SSE-NEXT:    andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff] | 
|  | ; X86-SSE-NEXT:    # imm = 0x9FFF | 
|  | ; X86-SSE-NEXT:    orl %eax, %edx # encoding: [0x09,0xc2] | 
|  | ; X86-SSE-NEXT:    movl %edx, (%esp) # encoding: [0x89,0x14,0x24] | 
|  | ; X86-SSE-NEXT:    ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] | 
|  | ; X86-SSE-NEXT:    popl %eax # encoding: [0x58] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX-LABEL: test_MM_SET_ROUNDING_MODE: | 
|  | ; X86-AVX:       # %bb.0: | 
|  | ; X86-AVX-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] | 
|  | ; X86-AVX-NEXT:    movl %esp, %ecx # encoding: [0x89,0xe1] | 
|  | ; X86-AVX-NEXT:    vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] | 
|  | ; X86-AVX-NEXT:    movl (%esp), %edx # encoding: [0x8b,0x14,0x24] | 
|  | ; X86-AVX-NEXT:    andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff] | 
|  | ; X86-AVX-NEXT:    # imm = 0x9FFF | 
|  | ; X86-AVX-NEXT:    orl %eax, %edx # encoding: [0x09,0xc2] | 
|  | ; X86-AVX-NEXT:    movl %edx, (%esp) # encoding: [0x89,0x14,0x24] | 
|  | ; X86-AVX-NEXT:    vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] | 
|  | ; X86-AVX-NEXT:    popl %eax # encoding: [0x58] | 
|  | ; X86-AVX-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_MM_SET_ROUNDING_MODE: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] | 
|  | ; X64-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] | 
|  | ; X64-SSE-NEXT:    # imm = 0x9FFF | 
|  | ; X64-SSE-NEXT:    orl %edi, %ecx # encoding: [0x09,0xf9] | 
|  | ; X64-SSE-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX-LABEL: test_MM_SET_ROUNDING_MODE: | 
|  | ; X64-AVX:       # %bb.0: | 
|  | ; X64-AVX-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] | 
|  | ; X64-AVX-NEXT:    movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] | 
|  | ; X64-AVX-NEXT:    # imm = 0x9FFF | 
|  | ; X64-AVX-NEXT:    orl %edi, %ecx # encoding: [0x09,0xf9] | 
|  | ; X64-AVX-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] | 
|  | ; X64-AVX-NEXT:    retq # encoding: [0xc3] | 
|  | %1 = alloca i32, align 4 | 
|  | %2 = bitcast i32* %1 to i8* | 
|  | call void @llvm.x86.sse.stmxcsr(i8* %2) | 
|  | %3 = load i32, i32* %1 | 
|  | %4 = and i32 %3, -24577 | 
|  | %5 = or i32 %4, %a0 | 
|  | store i32 %5, i32* %1 | 
|  | call void @llvm.x86.sse.ldmxcsr(i8* %2) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_set_ss(float %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_set_ss: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] | 
|  | ; X86-SSE-NEXT:    movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm1[0],xmm0[1,2,3] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_set_ss: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] | 
|  | ; X86-AVX1-NEXT:    vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_set_ss: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] | 
|  | ; X86-AVX512-NEXT:    vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_set_ss: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9] | 
|  | ; X64-SSE-NEXT:    movss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0x10,0xc8] | 
|  | ; X64-SSE-NEXT:    # xmm1 = xmm0[0],xmm1[1,2,3] | 
|  | ; X64-SSE-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX-LABEL: test_mm_set_ss: | 
|  | ; X64-AVX:       # %bb.0: | 
|  | ; X64-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] | 
|  | ; X64-AVX-NEXT:    vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] | 
|  | ; X64-AVX-NEXT:    # xmm0 = xmm0[0],xmm1[1,2,3] | 
|  | ; X64-AVX-NEXT:    retq # encoding: [0xc3] | 
|  | %res0  = insertelement <4 x float> undef, float %a0, i32 0 | 
|  | %res1  = insertelement <4 x float> %res0, float 0.0, i32 1 | 
|  | %res2  = insertelement <4 x float> %res1, float 0.0, i32 2 | 
|  | %res3  = insertelement <4 x float> %res2, float 0.0, i32 3 | 
|  | ret <4 x float> %res3 | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_set1_ps(float %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_set1_ps: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_set1_ps: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-AVX1-NEXT:    vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_set1_ps: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-AVX512-NEXT:    vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_set1_ps: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] | 
|  | ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_set1_ps: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_set1_ps: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %res0  = insertelement <4 x float> undef, float %a0, i32 0 | 
|  | %res1  = insertelement <4 x float> %res0, float %a0, i32 1 | 
|  | %res2  = insertelement <4 x float> %res1, float %a0, i32 2 | 
|  | %res3  = insertelement <4 x float> %res2, float %a0, i32 3 | 
|  | ret <4 x float> %res3 | 
|  | } | 
|  |  | 
|  | define void @test_mm_setcsr(i32 %a0) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_setcsr: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    ldmxcsr (%eax) # encoding: [0x0f,0xae,0x10] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX-LABEL: test_mm_setcsr: | 
|  | ; X86-AVX:       # %bb.0: | 
|  | ; X86-AVX-NEXT:    leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04] | 
|  | ; X86-AVX-NEXT:    vldmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x10] | 
|  | ; X86-AVX-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_setcsr: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-SSE-NEXT:    ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX-LABEL: test_mm_setcsr: | 
|  | ; X64-AVX:       # %bb.0: | 
|  | ; X64-AVX-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] | 
|  | ; X64-AVX-NEXT:    vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] | 
|  | ; X64-AVX-NEXT:    retq # encoding: [0xc3] | 
|  | %st = alloca i32, align 4 | 
|  | store i32 %a0, i32* %st, align 4 | 
|  | %bc = bitcast i32* %st to i8* | 
|  | call void @llvm.x86.sse.ldmxcsr(i8* %bc) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_setr_ps(float %a0, float %a1, float %a2, float %a3) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_setr_ps: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] | 
|  | ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] | 
|  | ; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] | 
|  | ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] | 
|  | ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08] | 
|  | ; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] | 
|  | ; X86-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_setr_ps: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] | 
|  | ; X86-AVX1-NEXT:    # xmm1 = mem[0],zero,zero,zero | 
|  | ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] | 
|  | ; X86-AVX1-NEXT:    # xmm2 = mem[0],zero,zero,zero | 
|  | ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm3 # encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    # xmm3 = mem[0],zero,zero,zero | 
|  | ; X86-AVX1-NEXT:    vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] | 
|  | ; X86-AVX1-NEXT:    # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] | 
|  | ; X86-AVX1-NEXT:    vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] | 
|  | ; X86-AVX1-NEXT:    # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] | 
|  | ; X86-AVX1-NEXT:    vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = xmm1[0,1,2],xmm0[0] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_setr_ps: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] | 
|  | ; X86-AVX512-NEXT:    # xmm1 = mem[0],zero,zero,zero | 
|  | ; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] | 
|  | ; X86-AVX512-NEXT:    # xmm2 = mem[0],zero,zero,zero | 
|  | ; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    # xmm3 = mem[0],zero,zero,zero | 
|  | ; X86-AVX512-NEXT:    vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] | 
|  | ; X86-AVX512-NEXT:    # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] | 
|  | ; X86-AVX512-NEXT:    vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] | 
|  | ; X86-AVX512-NEXT:    # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] | 
|  | ; X86-AVX512-NEXT:    vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = xmm1[0,1,2],xmm0[0] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_setr_ps: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    unpcklps %xmm3, %xmm2 # encoding: [0x0f,0x14,0xd3] | 
|  | ; X64-SSE-NEXT:    # xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] | 
|  | ; X64-SSE-NEXT:    unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] | 
|  | ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | 
|  | ; X64-SSE-NEXT:    movlhps %xmm2, %xmm0 # encoding: [0x0f,0x16,0xc2] | 
|  | ; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm2[0] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_setr_ps: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] | 
|  | ; X64-AVX1-NEXT:    vinsertps $32, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] | 
|  | ; X64-AVX1-NEXT:    vinsertps $48, %xmm3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = xmm0[0,1,2],xmm3[0] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_setr_ps: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] | 
|  | ; X64-AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] | 
|  | ; X64-AVX512-NEXT:    vinsertps $32, %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] | 
|  | ; X64-AVX512-NEXT:    # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] | 
|  | ; X64-AVX512-NEXT:    vinsertps $48, %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] | 
|  | ; X64-AVX512-NEXT:    # xmm0 = xmm0[0,1,2],xmm3[0] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %res0  = insertelement <4 x float> undef, float %a0, i32 0 | 
|  | %res1  = insertelement <4 x float> %res0, float %a1, i32 1 | 
|  | %res2  = insertelement <4 x float> %res1, float %a2, i32 2 | 
|  | %res3  = insertelement <4 x float> %res2, float %a3, i32 3 | 
|  | ret <4 x float> %res3 | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_setzero_ps() { | 
|  | ; SSE-LABEL: test_mm_setzero_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_setzero_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_setzero_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ret <4 x float> zeroinitializer | 
|  | } | 
|  |  | 
|  | define void @test_mm_sfence() nounwind { | 
|  | ; CHECK-LABEL: test_mm_sfence: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    sfence # encoding: [0x0f,0xae,0xf8] | 
|  | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | call void @llvm.x86.sse.sfence() | 
|  | ret void | 
|  | } | 
|  | declare void @llvm.x86.sse.sfence() nounwind readnone | 
|  |  | 
|  | define <4 x float> @test_mm_shuffle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_shuffle_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    shufps $0, %xmm1, %xmm0 # encoding: [0x0f,0xc6,0xc1,0x00] | 
|  | ; SSE-NEXT:    # xmm0 = xmm0[0,0],xmm1[0,0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_shuffle_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vshufps $0, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc1,0x00] | 
|  | ; AVX1-NEXT:    # xmm0 = xmm0[0,0],xmm1[0,0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_shuffle_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vshufps $0, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc1,0x00] | 
|  | ; AVX512-NEXT:    # xmm0 = xmm0[0,0],xmm1[0,0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_sqrt_ps(<4 x float> %a0) { | 
|  | ; SSE-LABEL: test_mm_sqrt_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    sqrtps %xmm0, %xmm0 # encoding: [0x0f,0x51,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_sqrt_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x51,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_sqrt_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vsqrtps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0) | 
|  | ret <4 x float> %res | 
|  | } | 
|  | declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readnone | 
|  |  | 
|  | define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) { | 
|  | ; SSE-LABEL: test_mm_sqrt_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_sqrt_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_sqrt_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %ext = extractelement <4 x float> %a0, i32 0 | 
|  | %sqrt = call float @llvm.sqrt.f32(float %ext) | 
|  | %ins = insertelement <4 x float> %a0, float %sqrt, i32 0 | 
|  | ret <4 x float> %ins | 
|  | } | 
|  | declare float @llvm.sqrt.f32(float) nounwind readnone | 
|  |  | 
|  | define float @test_mm_sqrt_ss_scalar(float %a0) { | 
|  | ; X86-SSE-LABEL: test_mm_sqrt_ss_scalar: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-SSE-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; X86-SSE-NEXT:    movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08] | 
|  | ; X86-SSE-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-SSE-NEXT:    sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] | 
|  | ; X86-SSE-NEXT:    movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24] | 
|  | ; X86-SSE-NEXT:    flds (%esp) # encoding: [0xd9,0x04,0x24] | 
|  | ; X86-SSE-NEXT:    popl %eax # encoding: [0x58] | 
|  | ; X86-SSE-NEXT:    .cfi_def_cfa_offset 4 | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_sqrt_ss_scalar: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; X86-AVX1-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-AVX1-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] | 
|  | ; X86-AVX1-NEXT:    vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24] | 
|  | ; X86-AVX1-NEXT:    flds (%esp) # encoding: [0xd9,0x04,0x24] | 
|  | ; X86-AVX1-NEXT:    popl %eax # encoding: [0x58] | 
|  | ; X86-AVX1-NEXT:    .cfi_def_cfa_offset 4 | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_sqrt_ss_scalar: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    pushl %eax # encoding: [0x50] | 
|  | ; X86-AVX512-NEXT:    .cfi_def_cfa_offset 8 | 
|  | ; X86-AVX512-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero | 
|  | ; X86-AVX512-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] | 
|  | ; X86-AVX512-NEXT:    vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24] | 
|  | ; X86-AVX512-NEXT:    flds (%esp) # encoding: [0xd9,0x04,0x24] | 
|  | ; X86-AVX512-NEXT:    popl %eax # encoding: [0x58] | 
|  | ; X86-AVX512-NEXT:    .cfi_def_cfa_offset 4 | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_sqrt_ss_scalar: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_sqrt_ss_scalar: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_sqrt_ss_scalar: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %sqrt = call float @llvm.sqrt.f32(float %a0) | 
|  | ret float %sqrt | 
|  | } | 
|  |  | 
|  | define void @test_mm_store_ps(float *%a0, <4 x float> %a1) { | 
|  | ; X86-SSE-LABEL: test_mm_store_ps: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_store_ps: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_store_ps: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_store_ps: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_store_ps: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_store_ps: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %arg0 = bitcast float* %a0 to <4 x float>* | 
|  | store <4 x float> %a1, <4 x float>* %arg0, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @test_mm_store_ps1(float *%a0, <4 x float> %a1) { | 
|  | ; X86-SSE-LABEL: test_mm_store_ps1: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_store_ps1: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X86-AVX1-NEXT:    vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_store_ps1: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] | 
|  | ; X86-AVX512-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_store_ps1: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] | 
|  | ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_store_ps1: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X64-AVX1-NEXT:    vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_store_ps1: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] | 
|  | ; X64-AVX512-NEXT:    vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %arg0 = bitcast float* %a0 to <4 x float>* | 
|  | %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer | 
|  | store <4 x float> %shuf, <4 x float>* %arg0, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @test_mm_store_ss(float *%a0, <4 x float> %a1) { | 
|  | ; X86-SSE-LABEL: test_mm_store_ss: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    movss %xmm0, (%eax) # encoding: [0xf3,0x0f,0x11,0x00] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_store_ss: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vmovss %xmm0, (%eax) # encoding: [0xc5,0xfa,0x11,0x00] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_store_ss: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vmovss %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x00] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_store_ss: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movss %xmm0, (%rdi) # encoding: [0xf3,0x0f,0x11,0x07] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_store_ss: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_store_ss: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %ext = extractelement <4 x float> %a1, i32 0 | 
|  | store float %ext, float* %a0, align 1 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @test_mm_store1_ps(float *%a0, <4 x float> %a1) { | 
|  | ; X86-SSE-LABEL: test_mm_store1_ps: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_store1_ps: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X86-AVX1-NEXT:    vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_store1_ps: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] | 
|  | ; X86-AVX512-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_store1_ps: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] | 
|  | ; X64-SSE-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_store1_ps: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = xmm0[0,0,0,0] | 
|  | ; X64-AVX1-NEXT:    vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_store1_ps: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] | 
|  | ; X64-AVX512-NEXT:    vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %arg0 = bitcast float* %a0 to <4 x float>* | 
|  | %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer | 
|  | store <4 x float> %shuf, <4 x float>* %arg0, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @test_mm_storeh_ps(x86_mmx *%a0, <4 x float> %a1) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_storeh_ps: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    pushl %ebp # encoding: [0x55] | 
|  | ; X86-SSE-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5] | 
|  | ; X86-SSE-NEXT:    andl $-16, %esp # encoding: [0x83,0xe4,0xf0] | 
|  | ; X86-SSE-NEXT:    subl $32, %esp # encoding: [0x83,0xec,0x20] | 
|  | ; X86-SSE-NEXT:    movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] | 
|  | ; X86-SSE-NEXT:    movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] | 
|  | ; X86-SSE-NEXT:    movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] | 
|  | ; X86-SSE-NEXT:    movl %ecx, (%eax) # encoding: [0x89,0x08] | 
|  | ; X86-SSE-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec] | 
|  | ; X86-SSE-NEXT:    popl %ebp # encoding: [0x5d] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_storeh_ps: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vmovhpd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x17,0x00] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_storeh_ps: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vmovhpd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x17,0x00] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_storeh_ps: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] | 
|  | ; X64-SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xf0] | 
|  | ; X64-SSE-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_storeh_ps: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vpextrq $1, %xmm0, %rax # encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01] | 
|  | ; X64-AVX1-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_storeh_ps: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vpextrq $1, %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01] | 
|  | ; X64-AVX512-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %ptr = bitcast x86_mmx* %a0 to i64* | 
|  | %bc  = bitcast <4 x float> %a1 to <2 x i64> | 
|  | %ext = extractelement <2 x i64> %bc, i32 1 | 
|  | store i64 %ext, i64* %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @test_mm_storel_ps(x86_mmx *%a0, <4 x float> %a1) nounwind { | 
|  | ; X86-SSE-LABEL: test_mm_storel_ps: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    pushl %ebp # encoding: [0x55] | 
|  | ; X86-SSE-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5] | 
|  | ; X86-SSE-NEXT:    andl $-16, %esp # encoding: [0x83,0xe4,0xf0] | 
|  | ; X86-SSE-NEXT:    subl $32, %esp # encoding: [0x83,0xec,0x20] | 
|  | ; X86-SSE-NEXT:    movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] | 
|  | ; X86-SSE-NEXT:    movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] | 
|  | ; X86-SSE-NEXT:    movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] | 
|  | ; X86-SSE-NEXT:    movl %ecx, (%eax) # encoding: [0x89,0x08] | 
|  | ; X86-SSE-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec] | 
|  | ; X86-SSE-NEXT:    popl %ebp # encoding: [0x5d] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_storel_ps: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_storel_ps: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_storel_ps: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] | 
|  | ; X64-SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xe8] | 
|  | ; X64-SSE-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_storel_ps: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] | 
|  | ; X64-AVX1-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_storel_ps: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] | 
|  | ; X64-AVX512-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %ptr = bitcast x86_mmx* %a0 to i64* | 
|  | %bc  = bitcast <4 x float> %a1 to <2 x i64> | 
|  | %ext = extractelement <2 x i64> %bc, i32 0 | 
|  | store i64 %ext, i64* %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @test_mm_storer_ps(float *%a0, <4 x float> %a1) { | 
|  | ; X86-SSE-LABEL: test_mm_storer_ps: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm0[3,2,1,0] | 
|  | ; X86-SSE-NEXT:    movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_storer_ps: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = xmm0[3,2,1,0] | 
|  | ; X86-AVX1-NEXT:    vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_storer_ps: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = xmm0[3,2,1,0] | 
|  | ; X86-AVX512-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_storer_ps: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] | 
|  | ; X64-SSE-NEXT:    # xmm0 = xmm0[3,2,1,0] | 
|  | ; X64-SSE-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_storer_ps: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = xmm0[3,2,1,0] | 
|  | ; X64-AVX1-NEXT:    vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_storer_ps: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] | 
|  | ; X64-AVX512-NEXT:    # xmm0 = xmm0[3,2,1,0] | 
|  | ; X64-AVX512-NEXT:    vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %arg0 = bitcast float* %a0 to <4 x float>* | 
|  | %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> | 
|  | store <4 x float> %shuf, <4 x float>* %arg0, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @test_mm_storeu_ps(float *%a0, <4 x float> %a1) { | 
|  | ; X86-SSE-LABEL: test_mm_storeu_ps: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_storeu_ps: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_storeu_ps: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_storeu_ps: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_storeu_ps: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_storeu_ps: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %arg0 = bitcast float* %a0 to <4 x float>* | 
|  | store <4 x float> %a1, <4 x float>* %arg0, align 1 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @test_mm_stream_ps(float *%a0, <4 x float> %a1) { | 
|  | ; X86-SSE-LABEL: test_mm_stream_ps: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-SSE-NEXT:    movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_mm_stream_ps: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX1-NEXT:    vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_mm_stream_ps: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] | 
|  | ; X86-AVX512-NEXT:    vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_mm_stream_ps: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_mm_stream_ps: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_mm_stream_ps: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %arg0 = bitcast float* %a0 to <4 x float>* | 
|  | store <4 x float> %a1, <4 x float>* %arg0, align 16, !nontemporal !0 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_sub_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_sub_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    subps %xmm1, %xmm0 # encoding: [0x0f,0x5c,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_sub_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5c,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_sub_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = fsub <4 x float> %a0, %a1 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_sub_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_sub_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    subss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5c,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_sub_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5c,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_sub_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5c,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %ext0 = extractelement <4 x float> %a0, i32 0 | 
|  | %ext1 = extractelement <4 x float> %a1, i32 0 | 
|  | %fsub = fsub float %ext0, %ext1 | 
|  | %res = insertelement <4 x float> %a0, float %fsub, i32 0 | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define void @test_MM_TRANSPOSE4_PS(<4 x float>* %a0, <4 x float>* %a1, <4 x float>* %a2, <4 x float>* %a3) nounwind { | 
|  | ; X86-SSE-LABEL: test_MM_TRANSPOSE4_PS: | 
|  | ; X86-SSE:       # %bb.0: | 
|  | ; X86-SSE-NEXT:    pushl %esi # encoding: [0x56] | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] | 
|  | ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] | 
|  | ; X86-SSE-NEXT:    movaps (%esi), %xmm0 # encoding: [0x0f,0x28,0x06] | 
|  | ; X86-SSE-NEXT:    movaps (%edx), %xmm1 # encoding: [0x0f,0x28,0x0a] | 
|  | ; X86-SSE-NEXT:    movaps (%ecx), %xmm2 # encoding: [0x0f,0x28,0x11] | 
|  | ; X86-SSE-NEXT:    movaps (%eax), %xmm3 # encoding: [0x0f,0x28,0x18] | 
|  | ; X86-SSE-NEXT:    movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0] | 
|  | ; X86-SSE-NEXT:    unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1] | 
|  | ; X86-SSE-NEXT:    # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] | 
|  | ; X86-SSE-NEXT:    movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea] | 
|  | ; X86-SSE-NEXT:    unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb] | 
|  | ; X86-SSE-NEXT:    # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] | 
|  | ; X86-SSE-NEXT:    unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] | 
|  | ; X86-SSE-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] | 
|  | ; X86-SSE-NEXT:    unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3] | 
|  | ; X86-SSE-NEXT:    # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] | 
|  | ; X86-SSE-NEXT:    movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc] | 
|  | ; X86-SSE-NEXT:    movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd] | 
|  | ; X86-SSE-NEXT:    # xmm1 = xmm1[0],xmm5[0] | 
|  | ; X86-SSE-NEXT:    movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec] | 
|  | ; X86-SSE-NEXT:    # xmm5 = xmm4[1],xmm5[1] | 
|  | ; X86-SSE-NEXT:    movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] | 
|  | ; X86-SSE-NEXT:    movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda] | 
|  | ; X86-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0] | 
|  | ; X86-SSE-NEXT:    movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0] | 
|  | ; X86-SSE-NEXT:    # xmm2 = xmm0[1],xmm2[1] | 
|  | ; X86-SSE-NEXT:    movaps %xmm1, (%esi) # encoding: [0x0f,0x29,0x0e] | 
|  | ; X86-SSE-NEXT:    movaps %xmm5, (%edx) # encoding: [0x0f,0x29,0x2a] | 
|  | ; X86-SSE-NEXT:    movaps %xmm3, (%ecx) # encoding: [0x0f,0x29,0x19] | 
|  | ; X86-SSE-NEXT:    movaps %xmm2, (%eax) # encoding: [0x0f,0x29,0x10] | 
|  | ; X86-SSE-NEXT:    popl %esi # encoding: [0x5e] | 
|  | ; X86-SSE-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX1-LABEL: test_MM_TRANSPOSE4_PS: | 
|  | ; X86-AVX1:       # %bb.0: | 
|  | ; X86-AVX1-NEXT:    pushl %esi # encoding: [0x56] | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] | 
|  | ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] | 
|  | ; X86-AVX1-NEXT:    vmovaps (%esi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x06] | 
|  | ; X86-AVX1-NEXT:    vmovaps (%edx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a] | 
|  | ; X86-AVX1-NEXT:    vmovaps (%ecx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x11] | 
|  | ; X86-AVX1-NEXT:    vmovaps (%eax), %xmm3 # encoding: [0xc5,0xf8,0x28,0x18] | 
|  | ; X86-AVX1-NEXT:    vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1] | 
|  | ; X86-AVX1-NEXT:    # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | 
|  | ; X86-AVX1-NEXT:    vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb] | 
|  | ; X86-AVX1-NEXT:    # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] | 
|  | ; X86-AVX1-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] | 
|  | ; X86-AVX1-NEXT:    vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb] | 
|  | ; X86-AVX1-NEXT:    # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] | 
|  | ; X86-AVX1-NEXT:    vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5] | 
|  | ; X86-AVX1-NEXT:    # xmm2 = xmm4[0],xmm5[0] | 
|  | ; X86-AVX1-NEXT:    vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd] | 
|  | ; X86-AVX1-NEXT:    # xmm3 = xmm4[1],xmm5[1] | 
|  | ; X86-AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1] | 
|  | ; X86-AVX1-NEXT:    # xmm4 = xmm0[0],xmm1[0] | 
|  | ; X86-AVX1-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] | 
|  | ; X86-AVX1-NEXT:    # xmm0 = xmm0[1],xmm1[1] | 
|  | ; X86-AVX1-NEXT:    vmovaps %xmm2, (%esi) # encoding: [0xc5,0xf8,0x29,0x16] | 
|  | ; X86-AVX1-NEXT:    vmovaps %xmm3, (%edx) # encoding: [0xc5,0xf8,0x29,0x1a] | 
|  | ; X86-AVX1-NEXT:    vmovaps %xmm4, (%ecx) # encoding: [0xc5,0xf8,0x29,0x21] | 
|  | ; X86-AVX1-NEXT:    vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] | 
|  | ; X86-AVX1-NEXT:    popl %esi # encoding: [0x5e] | 
|  | ; X86-AVX1-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X86-AVX512-LABEL: test_MM_TRANSPOSE4_PS: | 
|  | ; X86-AVX512:       # %bb.0: | 
|  | ; X86-AVX512-NEXT:    pushl %esi # encoding: [0x56] | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] | 
|  | ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] | 
|  | ; X86-AVX512-NEXT:    vmovaps (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06] | 
|  | ; X86-AVX512-NEXT:    vmovaps (%edx), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0a] | 
|  | ; X86-AVX512-NEXT:    vmovaps (%ecx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x11] | 
|  | ; X86-AVX512-NEXT:    vmovaps (%eax), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x18] | 
|  | ; X86-AVX512-NEXT:    vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1] | 
|  | ; X86-AVX512-NEXT:    # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | 
|  | ; X86-AVX512-NEXT:    vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb] | 
|  | ; X86-AVX512-NEXT:    # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] | 
|  | ; X86-AVX512-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] | 
|  | ; X86-AVX512-NEXT:    vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb] | 
|  | ; X86-AVX512-NEXT:    # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] | 
|  | ; X86-AVX512-NEXT:    vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5] | 
|  | ; X86-AVX512-NEXT:    # xmm2 = xmm4[0],xmm5[0] | 
|  | ; X86-AVX512-NEXT:    vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd] | 
|  | ; X86-AVX512-NEXT:    # xmm3 = xmm4[1],xmm5[1] | 
|  | ; X86-AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1] | 
|  | ; X86-AVX512-NEXT:    # xmm4 = xmm0[0],xmm1[0] | 
|  | ; X86-AVX512-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] | 
|  | ; X86-AVX512-NEXT:    # xmm0 = xmm0[1],xmm1[1] | 
|  | ; X86-AVX512-NEXT:    vmovaps %xmm2, (%esi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x16] | 
|  | ; X86-AVX512-NEXT:    vmovaps %xmm3, (%edx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1a] | 
|  | ; X86-AVX512-NEXT:    vmovaps %xmm4, (%ecx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x21] | 
|  | ; X86-AVX512-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] | 
|  | ; X86-AVX512-NEXT:    popl %esi # encoding: [0x5e] | 
|  | ; X86-AVX512-NEXT:    retl # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-SSE-LABEL: test_MM_TRANSPOSE4_PS: | 
|  | ; X64-SSE:       # %bb.0: | 
|  | ; X64-SSE-NEXT:    movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] | 
|  | ; X64-SSE-NEXT:    movaps (%rsi), %xmm1 # encoding: [0x0f,0x28,0x0e] | 
|  | ; X64-SSE-NEXT:    movaps (%rdx), %xmm2 # encoding: [0x0f,0x28,0x12] | 
|  | ; X64-SSE-NEXT:    movaps (%rcx), %xmm3 # encoding: [0x0f,0x28,0x19] | 
|  | ; X64-SSE-NEXT:    movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0] | 
|  | ; X64-SSE-NEXT:    unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1] | 
|  | ; X64-SSE-NEXT:    # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] | 
|  | ; X64-SSE-NEXT:    movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea] | 
|  | ; X64-SSE-NEXT:    unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb] | 
|  | ; X64-SSE-NEXT:    # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] | 
|  | ; X64-SSE-NEXT:    unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] | 
|  | ; X64-SSE-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] | 
|  | ; X64-SSE-NEXT:    unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3] | 
|  | ; X64-SSE-NEXT:    # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] | 
|  | ; X64-SSE-NEXT:    movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc] | 
|  | ; X64-SSE-NEXT:    movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd] | 
|  | ; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm5[0] | 
|  | ; X64-SSE-NEXT:    movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec] | 
|  | ; X64-SSE-NEXT:    # xmm5 = xmm4[1],xmm5[1] | 
|  | ; X64-SSE-NEXT:    movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] | 
|  | ; X64-SSE-NEXT:    movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda] | 
|  | ; X64-SSE-NEXT:    # xmm3 = xmm3[0],xmm2[0] | 
|  | ; X64-SSE-NEXT:    movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0] | 
|  | ; X64-SSE-NEXT:    # xmm2 = xmm0[1],xmm2[1] | 
|  | ; X64-SSE-NEXT:    movaps %xmm1, (%rdi) # encoding: [0x0f,0x29,0x0f] | 
|  | ; X64-SSE-NEXT:    movaps %xmm5, (%rsi) # encoding: [0x0f,0x29,0x2e] | 
|  | ; X64-SSE-NEXT:    movaps %xmm3, (%rdx) # encoding: [0x0f,0x29,0x1a] | 
|  | ; X64-SSE-NEXT:    movaps %xmm2, (%rcx) # encoding: [0x0f,0x29,0x11] | 
|  | ; X64-SSE-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX1-LABEL: test_MM_TRANSPOSE4_PS: | 
|  | ; X64-AVX1:       # %bb.0: | 
|  | ; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] | 
|  | ; X64-AVX1-NEXT:    vmovaps (%rsi), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0e] | 
|  | ; X64-AVX1-NEXT:    vmovaps (%rdx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x12] | 
|  | ; X64-AVX1-NEXT:    vmovaps (%rcx), %xmm3 # encoding: [0xc5,0xf8,0x28,0x19] | 
|  | ; X64-AVX1-NEXT:    vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1] | 
|  | ; X64-AVX1-NEXT:    # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | 
|  | ; X64-AVX1-NEXT:    vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb] | 
|  | ; X64-AVX1-NEXT:    # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] | 
|  | ; X64-AVX1-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] | 
|  | ; X64-AVX1-NEXT:    vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb] | 
|  | ; X64-AVX1-NEXT:    # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] | 
|  | ; X64-AVX1-NEXT:    vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5] | 
|  | ; X64-AVX1-NEXT:    # xmm2 = xmm4[0],xmm5[0] | 
|  | ; X64-AVX1-NEXT:    vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd] | 
|  | ; X64-AVX1-NEXT:    # xmm3 = xmm4[1],xmm5[1] | 
|  | ; X64-AVX1-NEXT:    vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1] | 
|  | ; X64-AVX1-NEXT:    # xmm4 = xmm0[0],xmm1[0] | 
|  | ; X64-AVX1-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] | 
|  | ; X64-AVX1-NEXT:    # xmm0 = xmm0[1],xmm1[1] | 
|  | ; X64-AVX1-NEXT:    vmovaps %xmm2, (%rdi) # encoding: [0xc5,0xf8,0x29,0x17] | 
|  | ; X64-AVX1-NEXT:    vmovaps %xmm3, (%rsi) # encoding: [0xc5,0xf8,0x29,0x1e] | 
|  | ; X64-AVX1-NEXT:    vmovaps %xmm4, (%rdx) # encoding: [0xc5,0xf8,0x29,0x22] | 
|  | ; X64-AVX1-NEXT:    vmovaps %xmm0, (%rcx) # encoding: [0xc5,0xf8,0x29,0x01] | 
|  | ; X64-AVX1-NEXT:    retq # encoding: [0xc3] | 
|  | ; | 
|  | ; X64-AVX512-LABEL: test_MM_TRANSPOSE4_PS: | 
|  | ; X64-AVX512:       # %bb.0: | 
|  | ; X64-AVX512-NEXT:    vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] | 
|  | ; X64-AVX512-NEXT:    vmovaps (%rsi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0e] | 
|  | ; X64-AVX512-NEXT:    vmovaps (%rdx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x12] | 
|  | ; X64-AVX512-NEXT:    vmovaps (%rcx), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x19] | 
|  | ; X64-AVX512-NEXT:    vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1] | 
|  | ; X64-AVX512-NEXT:    # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | 
|  | ; X64-AVX512-NEXT:    vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb] | 
|  | ; X64-AVX512-NEXT:    # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] | 
|  | ; X64-AVX512-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] | 
|  | ; X64-AVX512-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] | 
|  | ; X64-AVX512-NEXT:    vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb] | 
|  | ; X64-AVX512-NEXT:    # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] | 
|  | ; X64-AVX512-NEXT:    vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5] | 
|  | ; X64-AVX512-NEXT:    # xmm2 = xmm4[0],xmm5[0] | 
|  | ; X64-AVX512-NEXT:    vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd] | 
|  | ; X64-AVX512-NEXT:    # xmm3 = xmm4[1],xmm5[1] | 
|  | ; X64-AVX512-NEXT:    vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1] | 
|  | ; X64-AVX512-NEXT:    # xmm4 = xmm0[0],xmm1[0] | 
|  | ; X64-AVX512-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] | 
|  | ; X64-AVX512-NEXT:    # xmm0 = xmm0[1],xmm1[1] | 
|  | ; X64-AVX512-NEXT:    vmovaps %xmm2, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x17] | 
|  | ; X64-AVX512-NEXT:    vmovaps %xmm3, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1e] | 
|  | ; X64-AVX512-NEXT:    vmovaps %xmm4, (%rdx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x22] | 
|  | ; X64-AVX512-NEXT:    vmovaps %xmm0, (%rcx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x01] | 
|  | ; X64-AVX512-NEXT:    retq # encoding: [0xc3] | 
|  | %row0 = load <4 x float>, <4 x float>* %a0, align 16 | 
|  | %row1 = load <4 x float>, <4 x float>* %a1, align 16 | 
|  | %row2 = load <4 x float>, <4 x float>* %a2, align 16 | 
|  | %row3 = load <4 x float>, <4 x float>* %a3, align 16 | 
|  | %tmp0 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> | 
|  | %tmp2 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 0, i32 4, i32 1, i32 5> | 
|  | %tmp1 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> | 
|  | %tmp3 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 2, i32 6, i32 3, i32 7> | 
|  | %res0 = shufflevector <4 x float> %tmp0, <4 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> | 
|  | %res1 = shufflevector <4 x float> %tmp2, <4 x float> %tmp0, <4 x i32> <i32 6, i32 7, i32 2, i32 3> | 
|  | %res2 = shufflevector <4 x float> %tmp1, <4 x float> %tmp3, <4 x i32> <i32 0, i32 1, i32 4, i32 5> | 
|  | %res3 = shufflevector <4 x float> %tmp3, <4 x float> %tmp1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> | 
|  | store <4 x float> %res0, <4 x float>* %a0, align 16 | 
|  | store <4 x float> %res1, <4 x float>* %a1, align 16 | 
|  | store <4 x float> %res2, <4 x float>* %a2, align 16 | 
|  | store <4 x float> %res3, <4 x float>* %a3, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define i32 @test_mm_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_ucomieq_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] | 
|  | ; SSE-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0] | 
|  | ; SSE-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1] | 
|  | ; SSE-NEXT:    andb %al, %cl # encoding: [0x20,0xc1] | 
|  | ; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_ucomieq_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] | 
|  | ; AVX1-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0] | 
|  | ; AVX1-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1] | 
|  | ; AVX1-NEXT:    andb %al, %cl # encoding: [0x20,0xc1] | 
|  | ; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_ucomieq_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] | 
|  | ; AVX512-NEXT:    setnp %al # encoding: [0x0f,0x9b,0xc0] | 
|  | ; AVX512-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1] | 
|  | ; AVX512-NEXT:    andb %al, %cl # encoding: [0x20,0xc1] | 
|  | ; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define i32 @test_mm_ucomige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_ucomige_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; SSE-NEXT:    ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] | 
|  | ; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_ucomige_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX1-NEXT:    vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] | 
|  | ; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_ucomige_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX512-NEXT:    vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] | 
|  | ; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define i32 @test_mm_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_ucomigt_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; SSE-NEXT:    ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] | 
|  | ; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_ucomigt_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX1-NEXT:    vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] | 
|  | ; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_ucomigt_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX512-NEXT:    vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] | 
|  | ; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define i32 @test_mm_ucomile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_ucomile_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; SSE-NEXT:    ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8] | 
|  | ; SSE-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_ucomile_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX1-NEXT:    vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8] | 
|  | ; AVX1-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_ucomile_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX512-NEXT:    vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] | 
|  | ; AVX512-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define i32 @test_mm_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_ucomilt_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; SSE-NEXT:    ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8] | 
|  | ; SSE-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_ucomilt_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX1-NEXT:    vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8] | 
|  | ; AVX1-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_ucomilt_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0] | 
|  | ; AVX512-NEXT:    vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] | 
|  | ; AVX512-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define i32 @test_mm_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_ucomineq_ss: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] | 
|  | ; SSE-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0] | 
|  | ; SSE-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1] | 
|  | ; SSE-NEXT:    orb %al, %cl # encoding: [0x08,0xc1] | 
|  | ; SSE-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_ucomineq_ss: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] | 
|  | ; AVX1-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0] | 
|  | ; AVX1-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1] | 
|  | ; AVX1-NEXT:    orb %al, %cl # encoding: [0x08,0xc1] | 
|  | ; AVX1-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_ucomineq_ss: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] | 
|  | ; AVX512-NEXT:    setp %al # encoding: [0x0f,0x9a,0xc0] | 
|  | ; AVX512-NEXT:    setne %cl # encoding: [0x0f,0x95,0xc1] | 
|  | ; AVX512-NEXT:    orb %al, %cl # encoding: [0x08,0xc1] | 
|  | ; AVX512-NEXT:    movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) | 
|  | ret i32 %res | 
|  | } | 
|  | declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone | 
|  |  | 
|  | define <4 x float> @test_mm_undefined_ps() { | 
|  | ; CHECK-LABEL: test_mm_undefined_ps: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ret <4 x float> undef | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_unpackhi_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_unpackhi_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] | 
|  | ; SSE-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_unpackhi_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] | 
|  | ; AVX1-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_unpackhi_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] | 
|  | ; AVX512-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_unpacklo_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_unpacklo_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] | 
|  | ; SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_unpacklo_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1] | 
|  | ; AVX1-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_unpacklo_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1] | 
|  | ; AVX512-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> | 
|  | ret <4 x float> %res | 
|  | } | 
|  |  | 
|  | define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind { | 
|  | ; SSE-LABEL: test_mm_xor_ps: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1] | 
|  | ; SSE-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX1-LABEL: test_mm_xor_ps: | 
|  | ; AVX1:       # %bb.0: | 
|  | ; AVX1-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] | 
|  | ; AVX1-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | ; | 
|  | ; AVX512-LABEL: test_mm_xor_ps: | 
|  | ; AVX512:       # %bb.0: | 
|  | ; AVX512-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] | 
|  | ; AVX512-NEXT:    ret{{[l|q]}} # encoding: [0xc3] | 
|  | %arg0 = bitcast <4 x float> %a0 to <4 x i32> | 
|  | %arg1 = bitcast <4 x float> %a1 to <4 x i32> | 
|  | %res = xor <4 x i32> %arg0, %arg1 | 
|  | %bc = bitcast <4 x i32> %res to <4 x float> | 
|  | ret <4 x float> %bc | 
|  | } | 
|  |  | 
|  | !0 = !{i32 1} |