blob: 18c8e9fb4017a83938840a7012ef85debc801c94 [file] [log] [blame]
; This is a smoke test of nop insertion.
; Don't use integrated-as because this currently depends on the # variant
; assembler comment.
; RUN: %p2i -i %s -a -rng-seed=1 -nop-insertion -nop-insertion-percentage=50 \
; RUN: -max-nops-per-instruction=1 -integrated-as=false \
; RUN: | FileCheck %s --check-prefix=PROB50
; RUN: %p2i -i %s -a -rng-seed=1 -nop-insertion -nop-insertion-percentage=90 \
; RUN: -max-nops-per-instruction=1 -integrated-as=false \
; RUN: | FileCheck %s --check-prefix=PROB90
; RUN: %p2i -i %s -a -rng-seed=1 -nop-insertion -nop-insertion-percentage=50 \
; RUN: -max-nops-per-instruction=2 -integrated-as=false \
; RUN: | FileCheck %s --check-prefix=MAXNOPS2
define <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) {
entry:
%res = mul <4 x i32> %a, %b
ret <4 x i32> %res
; PROB50-LABEL: mul_v4i32:
; PROB50: nop # variant = 3
; PROB50: sub esp, 60
; PROB50: nop # variant = 4
; PROB50: movups xmmword ptr [esp+32], xmm0
; PROB50: movups xmmword ptr [esp+16], xmm1
; PROB50: nop # variant = 0
; PROB50: movups xmm0, xmmword ptr [esp+32]
; PROB50: nop # variant = 4
; PROB50: pshufd xmm1, xmmword ptr [esp+32], 49
; PROB50: pshufd xmm2, xmmword ptr [esp+16], 49
; PROB50: pmuludq xmm0, xmmword ptr [esp+16]
; PROB50: pmuludq xmm1, xmm2
; PROB50: nop # variant = 0
; PROB50: shufps xmm0, xmm1, 136
; PROB50: pshufd xmm1, xmm0, 216
; PROB50: nop # variant = 2
; PROB50: movups xmmword ptr [esp], xmm1
; PROB50: movups xmm0, xmmword ptr [esp]
; PROB50: add esp, 60
; PROB50: nop # variant = 0
; PROB50: ret
; PROB90-LABEL: mul_v4i32:
; PROB90: nop # variant = 3
; PROB90: sub esp, 60
; PROB90: nop # variant = 4
; PROB90: movups xmmword ptr [esp+32], xmm0
; PROB90: nop # variant = 3
; PROB90: movups xmmword ptr [esp+16], xmm1
; PROB90: nop # variant = 2
; PROB90: movups xmm0, xmmword ptr [esp+32]
; PROB90: nop # variant = 3
; PROB90: pshufd xmm1, xmmword ptr [esp+32], 49
; PROB90: nop # variant = 4
; PROB90: pshufd xmm2, xmmword ptr [esp+16], 49
; PROB90: nop # variant = 0
; PROB90: pmuludq xmm0, xmmword ptr [esp+16]
; PROB90: nop # variant = 2
; PROB90: pmuludq xmm1, xmm2
; PROB90: nop # variant = 3
; PROB90: shufps xmm0, xmm1, 136
; PROB90: nop # variant = 4
; PROB90: pshufd xmm1, xmm0, 216
; PROB90: nop # variant = 2
; PROB90: movups xmmword ptr [esp], xmm1
; PROB90: nop # variant = 4
; PROB90: movups xmm0, xmmword ptr [esp]
; PROB90: nop # variant = 2
; PROB90: add esp, 60
; PROB90: nop # variant = 3
; PROB90: ret
; MAXNOPS2-LABEL: mul_v4i32:
; MAXNOPS2: sub esp, 60
; MAXNOPS2: nop # variant = 4
; MAXNOPS2: movups xmmword ptr [esp+32], xmm0
; MAXNOPS2: nop # variant = 0
; MAXNOPS2: nop # variant = 4
; MAXNOPS2: movups xmmword ptr [esp+16], xmm1
; MAXNOPS2: movups xmm0, xmmword ptr [esp+32]
; MAXNOPS2: nop # variant = 0
; MAXNOPS2: pshufd xmm1, xmmword ptr [esp+32], 49
; MAXNOPS2: nop # variant = 2
; MAXNOPS2: pshufd xmm2, xmmword ptr [esp+16], 49
; MAXNOPS2: pmuludq xmm0, xmmword ptr [esp+16]
; MAXNOPS2: nop # variant = 0
; MAXNOPS2: nop # variant = 3
; MAXNOPS2: pmuludq xmm1, xmm2
; MAXNOPS2: shufps xmm0, xmm1, 136
; MAXNOPS2: pshufd xmm1, xmm0, 216
; MAXNOPS2: nop # variant = 3
; MAXNOPS2: movups xmmword ptr [esp], xmm1
; MAXNOPS2: nop # variant = 0
; MAXNOPS2: movups xmm0, xmmword ptr [esp]
; MAXNOPS2: nop # variant = 2
; MAXNOPS2: add esp, 60
; MAXNOPS2: nop # variant = 4
; MAXNOPS2: ret
}