third_party/llvm-7.0/llvm/test/CodeGen/X86/x86-64-psub.ll - SwiftShader - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-pc-linux -mattr=mmx < %s | FileCheck %s

 ; MMX packed sub opcodes were wrongly marked as commutative.
 ; This test checks that the operands of packed sub instructions are
 ; never interchanged by the "Two-Address instruction pass".

 declare { i64, double } @getFirstParam()
 declare { i64, double } @getSecondParam()

 define i64 @test_psubb() {
 ; CHECK-LABEL: test_psubb:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset %rbx, -16
 ; CHECK-NEXT:    callq getFirstParam
 ; CHECK-NEXT:    movq %rax, %rbx
 ; CHECK-NEXT:    callq getSecondParam
 ; CHECK-NEXT:    movq %rbx, %mm0
 ; CHECK-NEXT:    movq %rax, %mm1
 ; CHECK-NEXT:    psubb %mm1, %mm0
 ; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %call = tail call { i64, double } @getFirstParam()
   %0 = extractvalue { i64, double } %call, 0
   %call2 = tail call { i64, double } @getSecondParam()
   %1 = extractvalue { i64, double } %call2, 0
   %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
   %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
   %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
   %3 = bitcast <8 x i8> %2 to x86_mmx
   %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
   %5 = bitcast <8 x i8> %4 to x86_mmx
   %6 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %3, x86_mmx %5) nounwind
   %7 = bitcast x86_mmx %6 to <8 x i8>
   %8 = bitcast <8 x i8> %7 to <1 x i64>
   %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
   ret i64 %retval.0.extract.i15
 }

 define i64 @test_psubw() {
 ; CHECK-LABEL: test_psubw:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset %rbx, -16
 ; CHECK-NEXT:    callq getFirstParam
 ; CHECK-NEXT:    movq %rax, %rbx
 ; CHECK-NEXT:    callq getSecondParam
 ; CHECK-NEXT:    movq %rbx, %mm0
 ; CHECK-NEXT:    movq %rax, %mm1
 ; CHECK-NEXT:    psubw %mm1, %mm0
 ; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %call = tail call { i64, double } @getFirstParam()
   %0 = extractvalue { i64, double } %call, 0
   %call2 = tail call { i64, double } @getSecondParam()
   %1 = extractvalue { i64, double } %call2, 0
   %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
   %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
   %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
   %3 = bitcast <4 x i16> %2 to x86_mmx
   %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
   %5 = bitcast <4 x i16> %4 to x86_mmx
   %6 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %3, x86_mmx %5) nounwind
   %7 = bitcast x86_mmx %6 to <4 x i16>
   %8 = bitcast <4 x i16> %7 to <1 x i64>
   %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
   ret i64 %retval.0.extract.i15
 }

 define i64 @test_psubd() {
 ; CHECK-LABEL: test_psubd:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset %rbx, -16
 ; CHECK-NEXT:    callq getFirstParam
 ; CHECK-NEXT:    movq %rax, %rbx
 ; CHECK-NEXT:    callq getSecondParam
 ; CHECK-NEXT:    movq %rbx, %mm0
 ; CHECK-NEXT:    movq %rax, %mm1
 ; CHECK-NEXT:    psubd %mm1, %mm0
 ; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %call = tail call { i64, double } @getFirstParam()
   %0 = extractvalue { i64, double } %call, 0
   %call2 = tail call { i64, double } @getSecondParam()
   %1 = extractvalue { i64, double } %call2, 0
   %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
   %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
   %2 = bitcast <1 x i64> %__m1.0.insert.i to <2 x i32>
   %3 = bitcast <2 x i32> %2 to x86_mmx
   %4 = bitcast <1 x i64> %__m2.0.insert.i to <2 x i32>
   %5 = bitcast <2 x i32> %4 to x86_mmx
   %6 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %3, x86_mmx %5) nounwind
   %7 = bitcast x86_mmx %6 to <2 x i32>
   %8 = bitcast <2 x i32> %7 to <1 x i64>
   %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
   ret i64 %retval.0.extract.i15
 }

 define i64 @test_psubsb() {
 ; CHECK-LABEL: test_psubsb:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset %rbx, -16
 ; CHECK-NEXT:    callq getFirstParam
 ; CHECK-NEXT:    movq %rax, %rbx
 ; CHECK-NEXT:    callq getSecondParam
 ; CHECK-NEXT:    movq %rbx, %mm0
 ; CHECK-NEXT:    movq %rax, %mm1
 ; CHECK-NEXT:    psubsb %mm1, %mm0
 ; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %call = tail call { i64, double } @getFirstParam()
   %0 = extractvalue { i64, double } %call, 0
   %call2 = tail call { i64, double } @getSecondParam()
   %1 = extractvalue { i64, double } %call2, 0
   %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
   %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
   %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
   %3 = bitcast <8 x i8> %2 to x86_mmx
   %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
   %5 = bitcast <8 x i8> %4 to x86_mmx
   %6 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %3, x86_mmx %5) nounwind
   %7 = bitcast x86_mmx %6 to <8 x i8>
   %8 = bitcast <8 x i8> %7 to <1 x i64>
   %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
   ret i64 %retval.0.extract.i15
 }

 define i64 @test_psubswv() {
 ; CHECK-LABEL: test_psubswv:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset %rbx, -16
 ; CHECK-NEXT:    callq getFirstParam
 ; CHECK-NEXT:    movq %rax, %rbx
 ; CHECK-NEXT:    callq getSecondParam
 ; CHECK-NEXT:    movq %rbx, %mm0
 ; CHECK-NEXT:    movq %rax, %mm1
 ; CHECK-NEXT:    psubsw %mm1, %mm0
 ; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %call = tail call { i64, double } @getFirstParam()
   %0 = extractvalue { i64, double } %call, 0
   %call2 = tail call { i64, double } @getSecondParam()
   %1 = extractvalue { i64, double } %call2, 0
   %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
   %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
   %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
   %3 = bitcast <4 x i16> %2 to x86_mmx
   %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
   %5 = bitcast <4 x i16> %4 to x86_mmx
   %6 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %3, x86_mmx %5) nounwind
   %7 = bitcast x86_mmx %6 to <4 x i16>
   %8 = bitcast <4 x i16> %7 to <1 x i64>
   %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
   ret i64 %retval.0.extract.i15
 }

 define i64 @test_psubusbv() {
 ; CHECK-LABEL: test_psubusbv:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset %rbx, -16
 ; CHECK-NEXT:    callq getFirstParam
 ; CHECK-NEXT:    movq %rax, %rbx
 ; CHECK-NEXT:    callq getSecondParam
 ; CHECK-NEXT:    movq %rbx, %mm0
 ; CHECK-NEXT:    movq %rax, %mm1
 ; CHECK-NEXT:    psubusb %mm1, %mm0
 ; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %call = tail call { i64, double } @getFirstParam()
   %0 = extractvalue { i64, double } %call, 0
   %call2 = tail call { i64, double } @getSecondParam()
   %1 = extractvalue { i64, double } %call2, 0
   %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
   %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
   %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
   %3 = bitcast <8 x i8> %2 to x86_mmx
   %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
   %5 = bitcast <8 x i8> %4 to x86_mmx
   %6 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %3, x86_mmx %5) nounwind
   %7 = bitcast x86_mmx %6 to <8 x i8>
   %8 = bitcast <8 x i8> %7 to <1 x i64>
   %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
   ret i64 %retval.0.extract.i15
 }

 define i64 @test_psubuswv() {
 ; CHECK-LABEL: test_psubuswv:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset %rbx, -16
 ; CHECK-NEXT:    callq getFirstParam
 ; CHECK-NEXT:    movq %rax, %rbx
 ; CHECK-NEXT:    callq getSecondParam
 ; CHECK-NEXT:    movq %rbx, %mm0
 ; CHECK-NEXT:    movq %rax, %mm1
 ; CHECK-NEXT:    psubusw %mm1, %mm0
 ; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %call = tail call { i64, double } @getFirstParam()
   %0 = extractvalue { i64, double } %call, 0
   %call2 = tail call { i64, double } @getSecondParam()
   %1 = extractvalue { i64, double } %call2, 0
   %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
   %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
   %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
   %3 = bitcast <4 x i16> %2 to x86_mmx
   %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
   %5 = bitcast <4 x i16> %4 to x86_mmx
   %6 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %3, x86_mmx %5) nounwind
   %7 = bitcast x86_mmx %6 to <4 x i16>
   %8 = bitcast <4 x i16> %7 to <1 x i64>
   %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
   ret i64 %retval.0.extract.i15
 }

 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone

 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone

 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone

 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone

 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone

 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone

 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=x86_64-pc-linux -mattr=mmx < %s \| FileCheck %s

	; MMX packed sub opcodes were wrongly marked as commutative.
	; This test checks that the operands of packed sub instructions are
	; never interchanged by the "Two-Address instruction pass".

	declare { i64, double } @getFirstParam()
	declare { i64, double } @getSecondParam()

	define i64 @test_psubb() {
	; CHECK-LABEL: test_psubb:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushq %rbx
	; CHECK-NEXT: .cfi_def_cfa_offset 16
	; CHECK-NEXT: .cfi_offset %rbx, -16
	; CHECK-NEXT: callq getFirstParam
	; CHECK-NEXT: movq %rax, %rbx
	; CHECK-NEXT: callq getSecondParam
	; CHECK-NEXT: movq %rbx, %mm0
	; CHECK-NEXT: movq %rax, %mm1
	; CHECK-NEXT: psubb %mm1, %mm0
	; CHECK-NEXT: movq %mm0, %rax
	; CHECK-NEXT: popq %rbx
	; CHECK-NEXT: .cfi_def_cfa_offset 8
	; CHECK-NEXT: retq
	entry:
	%call = tail call { i64, double } @getFirstParam()
	%0 = extractvalue { i64, double } %call, 0
	%call2 = tail call { i64, double } @getSecondParam()
	%1 = extractvalue { i64, double } %call2, 0
	%__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
	%__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
	%2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
	%3 = bitcast <8 x i8> %2 to x86_mmx
	%4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
	%5 = bitcast <8 x i8> %4 to x86_mmx
	%6 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %3, x86_mmx %5) nounwind
	%7 = bitcast x86_mmx %6 to <8 x i8>
	%8 = bitcast <8 x i8> %7 to <1 x i64>
	%retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
	ret i64 %retval.0.extract.i15
	}

	define i64 @test_psubw() {
	; CHECK-LABEL: test_psubw:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushq %rbx
	; CHECK-NEXT: .cfi_def_cfa_offset 16
	; CHECK-NEXT: .cfi_offset %rbx, -16
	; CHECK-NEXT: callq getFirstParam
	; CHECK-NEXT: movq %rax, %rbx
	; CHECK-NEXT: callq getSecondParam
	; CHECK-NEXT: movq %rbx, %mm0
	; CHECK-NEXT: movq %rax, %mm1
	; CHECK-NEXT: psubw %mm1, %mm0
	; CHECK-NEXT: movq %mm0, %rax
	; CHECK-NEXT: popq %rbx
	; CHECK-NEXT: .cfi_def_cfa_offset 8
	; CHECK-NEXT: retq
	entry:
	%call = tail call { i64, double } @getFirstParam()
	%0 = extractvalue { i64, double } %call, 0
	%call2 = tail call { i64, double } @getSecondParam()
	%1 = extractvalue { i64, double } %call2, 0
	%__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
	%__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
	%2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
	%3 = bitcast <4 x i16> %2 to x86_mmx
	%4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
	%5 = bitcast <4 x i16> %4 to x86_mmx
	%6 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %3, x86_mmx %5) nounwind
	%7 = bitcast x86_mmx %6 to <4 x i16>
	%8 = bitcast <4 x i16> %7 to <1 x i64>
	%retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
	ret i64 %retval.0.extract.i15
	}

	define i64 @test_psubd() {
	; CHECK-LABEL: test_psubd:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushq %rbx
	; CHECK-NEXT: .cfi_def_cfa_offset 16
	; CHECK-NEXT: .cfi_offset %rbx, -16
	; CHECK-NEXT: callq getFirstParam
	; CHECK-NEXT: movq %rax, %rbx
	; CHECK-NEXT: callq getSecondParam
	; CHECK-NEXT: movq %rbx, %mm0
	; CHECK-NEXT: movq %rax, %mm1
	; CHECK-NEXT: psubd %mm1, %mm0
	; CHECK-NEXT: movq %mm0, %rax
	; CHECK-NEXT: popq %rbx
	; CHECK-NEXT: .cfi_def_cfa_offset 8
	; CHECK-NEXT: retq
	entry:
	%call = tail call { i64, double } @getFirstParam()
	%0 = extractvalue { i64, double } %call, 0
	%call2 = tail call { i64, double } @getSecondParam()
	%1 = extractvalue { i64, double } %call2, 0
	%__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
	%__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
	%2 = bitcast <1 x i64> %__m1.0.insert.i to <2 x i32>
	%3 = bitcast <2 x i32> %2 to x86_mmx
	%4 = bitcast <1 x i64> %__m2.0.insert.i to <2 x i32>
	%5 = bitcast <2 x i32> %4 to x86_mmx
	%6 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %3, x86_mmx %5) nounwind
	%7 = bitcast x86_mmx %6 to <2 x i32>
	%8 = bitcast <2 x i32> %7 to <1 x i64>
	%retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
	ret i64 %retval.0.extract.i15
	}

	define i64 @test_psubsb() {
	; CHECK-LABEL: test_psubsb:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushq %rbx
	; CHECK-NEXT: .cfi_def_cfa_offset 16
	; CHECK-NEXT: .cfi_offset %rbx, -16
	; CHECK-NEXT: callq getFirstParam
	; CHECK-NEXT: movq %rax, %rbx
	; CHECK-NEXT: callq getSecondParam
	; CHECK-NEXT: movq %rbx, %mm0
	; CHECK-NEXT: movq %rax, %mm1
	; CHECK-NEXT: psubsb %mm1, %mm0
	; CHECK-NEXT: movq %mm0, %rax
	; CHECK-NEXT: popq %rbx
	; CHECK-NEXT: .cfi_def_cfa_offset 8
	; CHECK-NEXT: retq
	entry:
	%call = tail call { i64, double } @getFirstParam()
	%0 = extractvalue { i64, double } %call, 0
	%call2 = tail call { i64, double } @getSecondParam()
	%1 = extractvalue { i64, double } %call2, 0
	%__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
	%__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
	%2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
	%3 = bitcast <8 x i8> %2 to x86_mmx
	%4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
	%5 = bitcast <8 x i8> %4 to x86_mmx
	%6 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %3, x86_mmx %5) nounwind
	%7 = bitcast x86_mmx %6 to <8 x i8>
	%8 = bitcast <8 x i8> %7 to <1 x i64>
	%retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
	ret i64 %retval.0.extract.i15
	}

	define i64 @test_psubswv() {
	; CHECK-LABEL: test_psubswv:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushq %rbx
	; CHECK-NEXT: .cfi_def_cfa_offset 16
	; CHECK-NEXT: .cfi_offset %rbx, -16
	; CHECK-NEXT: callq getFirstParam
	; CHECK-NEXT: movq %rax, %rbx
	; CHECK-NEXT: callq getSecondParam
	; CHECK-NEXT: movq %rbx, %mm0
	; CHECK-NEXT: movq %rax, %mm1
	; CHECK-NEXT: psubsw %mm1, %mm0
	; CHECK-NEXT: movq %mm0, %rax
	; CHECK-NEXT: popq %rbx
	; CHECK-NEXT: .cfi_def_cfa_offset 8
	; CHECK-NEXT: retq
	entry:
	%call = tail call { i64, double } @getFirstParam()
	%0 = extractvalue { i64, double } %call, 0
	%call2 = tail call { i64, double } @getSecondParam()
	%1 = extractvalue { i64, double } %call2, 0
	%__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
	%__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
	%2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
	%3 = bitcast <4 x i16> %2 to x86_mmx
	%4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
	%5 = bitcast <4 x i16> %4 to x86_mmx
	%6 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %3, x86_mmx %5) nounwind
	%7 = bitcast x86_mmx %6 to <4 x i16>
	%8 = bitcast <4 x i16> %7 to <1 x i64>
	%retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
	ret i64 %retval.0.extract.i15
	}

	define i64 @test_psubusbv() {
	; CHECK-LABEL: test_psubusbv:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushq %rbx
	; CHECK-NEXT: .cfi_def_cfa_offset 16
	; CHECK-NEXT: .cfi_offset %rbx, -16
	; CHECK-NEXT: callq getFirstParam
	; CHECK-NEXT: movq %rax, %rbx
	; CHECK-NEXT: callq getSecondParam
	; CHECK-NEXT: movq %rbx, %mm0
	; CHECK-NEXT: movq %rax, %mm1
	; CHECK-NEXT: psubusb %mm1, %mm0
	; CHECK-NEXT: movq %mm0, %rax
	; CHECK-NEXT: popq %rbx
	; CHECK-NEXT: .cfi_def_cfa_offset 8
	; CHECK-NEXT: retq
	entry:
	%call = tail call { i64, double } @getFirstParam()
	%0 = extractvalue { i64, double } %call, 0
	%call2 = tail call { i64, double } @getSecondParam()
	%1 = extractvalue { i64, double } %call2, 0
	%__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
	%__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
	%2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
	%3 = bitcast <8 x i8> %2 to x86_mmx
	%4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
	%5 = bitcast <8 x i8> %4 to x86_mmx
	%6 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %3, x86_mmx %5) nounwind
	%7 = bitcast x86_mmx %6 to <8 x i8>
	%8 = bitcast <8 x i8> %7 to <1 x i64>
	%retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
	ret i64 %retval.0.extract.i15
	}

	define i64 @test_psubuswv() {
	; CHECK-LABEL: test_psubuswv:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushq %rbx
	; CHECK-NEXT: .cfi_def_cfa_offset 16
	; CHECK-NEXT: .cfi_offset %rbx, -16
	; CHECK-NEXT: callq getFirstParam
	; CHECK-NEXT: movq %rax, %rbx
	; CHECK-NEXT: callq getSecondParam
	; CHECK-NEXT: movq %rbx, %mm0
	; CHECK-NEXT: movq %rax, %mm1
	; CHECK-NEXT: psubusw %mm1, %mm0
	; CHECK-NEXT: movq %mm0, %rax
	; CHECK-NEXT: popq %rbx
	; CHECK-NEXT: .cfi_def_cfa_offset 8
	; CHECK-NEXT: retq
	entry:
	%call = tail call { i64, double } @getFirstParam()
	%0 = extractvalue { i64, double } %call, 0
	%call2 = tail call { i64, double } @getSecondParam()
	%1 = extractvalue { i64, double } %call2, 0
	%__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
	%__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
	%2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
	%3 = bitcast <4 x i16> %2 to x86_mmx
	%4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
	%5 = bitcast <4 x i16> %4 to x86_mmx
	%6 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %3, x86_mmx %5) nounwind
	%7 = bitcast x86_mmx %6 to <4 x i16>
	%8 = bitcast <4 x i16> %7 to <1 x i64>
	%retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
	ret i64 %retval.0.extract.i15
	}

	declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone

	declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone

	declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone

	declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone

	declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone

	declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone

	declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone