| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,GENERIC |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefixes=CHECK,ATOM |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SLM |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKX-SSE |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 |
| |
| define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { |
| ; GENERIC-LABEL: test_pabsb: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] |
| ; GENERIC-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_pabsb: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] |
| ; ATOM-NEXT: pabsb (%rdi), %xmm0 # sched: [1:1.00] |
| ; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_pabsb: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] |
| ; SLM-NEXT: pabsb (%rdi), %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_pabsb: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] |
| ; SANDY-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] |
| ; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_pabsb: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] |
| ; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] |
| ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_pabsb: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] |
| ; HASWELL-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] |
| ; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_pabsb: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] |
| ; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] |
| ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_pabsb: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] |
| ; BROADWELL-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [6:0.50] |
| ; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_pabsb: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] |
| ; BROADWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [6:0.50] |
| ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_pabsb: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] |
| ; SKYLAKE-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] |
| ; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_pabsb: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKYLAKE-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] |
| ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_pabsb: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] |
| ; SKX-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] |
| ; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_pabsb: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] |
| ; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_pabsb: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [6:1.00] |
| ; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_pabsb: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vpabsb (%rdi), %xmm1 # sched: [6:1.00] |
| ; BTVER2-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_pabsb: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.25] |
| ; ZNVER1-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [8:0.50] |
| ; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_pabsb: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vpabsb (%rdi), %xmm1 # sched: [8:0.50] |
| ; ZNVER1-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) |
| %2 = load <16 x i8>, <16 x i8> *%a1, align 16 |
| %3 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %2) |
| %4 = or <16 x i8> %1, %3 |
| ret <16 x i8> %4 |
| } |
| declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone |
| |
| define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { |
| ; GENERIC-LABEL: test_pabsd: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] |
| ; GENERIC-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_pabsd: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] |
| ; ATOM-NEXT: pabsd (%rdi), %xmm0 # sched: [1:1.00] |
| ; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_pabsd: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] |
| ; SLM-NEXT: pabsd (%rdi), %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_pabsd: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] |
| ; SANDY-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] |
| ; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_pabsd: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] |
| ; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] |
| ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_pabsd: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] |
| ; HASWELL-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] |
| ; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_pabsd: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] |
| ; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] |
| ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_pabsd: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] |
| ; BROADWELL-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [6:0.50] |
| ; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_pabsd: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] |
| ; BROADWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [6:0.50] |
| ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_pabsd: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] |
| ; SKYLAKE-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] |
| ; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_pabsd: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKYLAKE-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] |
| ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_pabsd: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] |
| ; SKX-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] |
| ; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_pabsd: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] |
| ; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_pabsd: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [6:1.00] |
| ; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_pabsd: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vpabsd (%rdi), %xmm1 # sched: [6:1.00] |
| ; BTVER2-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_pabsd: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.25] |
| ; ZNVER1-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [8:0.50] |
| ; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_pabsd: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vpabsd (%rdi), %xmm1 # sched: [8:0.50] |
| ; ZNVER1-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) |
| %2 = load <4 x i32>, <4 x i32> *%a1, align 16 |
| %3 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %2) |
| %4 = or <4 x i32> %1, %3 |
| ret <4 x i32> %4 |
| } |
| declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone |
| |
| define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { |
| ; GENERIC-LABEL: test_pabsw: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] |
| ; GENERIC-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_pabsw: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] |
| ; ATOM-NEXT: pabsw (%rdi), %xmm0 # sched: [1:1.00] |
| ; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_pabsw: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] |
| ; SLM-NEXT: pabsw (%rdi), %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_pabsw: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] |
| ; SANDY-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] |
| ; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_pabsw: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] |
| ; SANDY-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] |
| ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_pabsw: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] |
| ; HASWELL-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] |
| ; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_pabsw: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] |
| ; HASWELL-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] |
| ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_pabsw: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] |
| ; BROADWELL-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [6:0.50] |
| ; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_pabsw: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] |
| ; BROADWELL-NEXT: vpabsw (%rdi), %xmm1 # sched: [6:0.50] |
| ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_pabsw: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] |
| ; SKYLAKE-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] |
| ; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_pabsw: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKYLAKE-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] |
| ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_pabsw: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] |
| ; SKX-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] |
| ; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_pabsw: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] |
| ; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_pabsw: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [6:1.00] |
| ; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_pabsw: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vpabsw (%rdi), %xmm1 # sched: [6:1.00] |
| ; BTVER2-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_pabsw: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.25] |
| ; ZNVER1-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [8:0.50] |
| ; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_pabsw: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vpabsw (%rdi), %xmm1 # sched: [8:0.50] |
| ; ZNVER1-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) |
| %2 = load <8 x i16>, <8 x i16> *%a1, align 16 |
| %3 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %2) |
| %4 = or <8 x i16> %1, %3 |
| ret <8 x i16> %4 |
| } |
| declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone |
| |
| define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { |
| ; GENERIC-LABEL: test_palignr: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] |
| ; GENERIC-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] |
| ; GENERIC-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_palignr: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] |
| ; ATOM-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00] |
| ; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_palignr: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] |
| ; SLM-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [4:1.00] |
| ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_palignr: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] |
| ; SANDY-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] |
| ; SANDY-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_palignr: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] |
| ; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_palignr: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] |
| ; HASWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] |
| ; HASWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_palignr: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] |
| ; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_palignr: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] |
| ; BROADWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] |
| ; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_palignr: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] |
| ; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_palignr: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] |
| ; SKYLAKE-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] |
| ; SKYLAKE-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_palignr: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] |
| ; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_palignr: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] |
| ; SKX-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] |
| ; SKX-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_palignr: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] |
| ; SKX-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_palignr: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] |
| ; BTVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_palignr: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] |
| ; BTVER2-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_palignr: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25] |
| ; ZNVER1-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [8:0.50] |
| ; ZNVER1-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_palignr: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25] |
| ; ZNVER1-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [8:0.50] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10> |
| %2 = load <8 x i16>, <8 x i16> *%a2, align 16 |
| %3 = shufflevector <8 x i16> %2, <8 x i16> %1, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> |
| ret <8 x i16> %3 |
| } |
| |
| define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { |
| ; GENERIC-LABEL: test_phaddd: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50] |
| ; GENERIC-NEXT: phaddd (%rdi), %xmm0 # sched: [9:1.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_phaddd: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50] |
| ; ATOM-NEXT: phaddd (%rdi), %xmm0 # sched: [4:2.00] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_phaddd: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: phaddd %xmm1, %xmm0 # sched: [1:0.50] |
| ; SLM-NEXT: phaddd (%rdi), %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_phaddd: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50] |
| ; SANDY-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:1.50] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_phaddd: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] |
| ; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_phaddd: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] |
| ; HASWELL-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_phaddd: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_phaddd: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] |
| ; BROADWELL-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [8:2.00] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_phaddd: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; BROADWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_phaddd: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] |
| ; SKYLAKE-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_phaddd: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; SKYLAKE-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_phaddd: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] |
| ; SKX-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_phaddd: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; SKX-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_phaddd: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [6:1.00] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_phaddd: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_phaddd: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [100:0.25] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_phaddd: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) |
| %2 = load <4 x i32>, <4 x i32> *%a2, align 16 |
| %3 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %1, <4 x i32> %2) |
| ret <4 x i32> %3 |
| } |
| declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone |
| |
| define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { |
| ; GENERIC-LABEL: test_phaddsw: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:1.50] |
| ; GENERIC-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:1.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_phaddsw: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: phaddsw %xmm1, %xmm0 # sched: [7:3.50] |
| ; ATOM-NEXT: phaddsw (%rdi), %xmm0 # sched: [8:4.00] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_phaddsw: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: phaddsw %xmm1, %xmm0 # sched: [1:0.50] |
| ; SLM-NEXT: phaddsw (%rdi), %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_phaddsw: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:1.50] |
| ; SANDY-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:1.50] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_phaddsw: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] |
| ; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_phaddsw: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] |
| ; HASWELL-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_phaddsw: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_phaddsw: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] |
| ; BROADWELL-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [8:2.00] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_phaddsw: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; BROADWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_phaddsw: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] |
| ; SKYLAKE-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_phaddsw: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; SKYLAKE-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_phaddsw: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] |
| ; SKX-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_phaddsw: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; SKX-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_phaddsw: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [6:1.00] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_phaddsw: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_phaddsw: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [100:0.25] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_phaddsw: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) |
| %2 = load <8 x i16>, <8 x i16> *%a2, align 16 |
| %3 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %1, <8 x i16> %2) |
| ret <8 x i16> %3 |
| } |
| declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { |
| ; GENERIC-LABEL: test_phaddw: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: phaddw %xmm1, %xmm0 # sched: [3:1.50] |
| ; GENERIC-NEXT: phaddw (%rdi), %xmm0 # sched: [9:1.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_phaddw: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: phaddw %xmm1, %xmm0 # sched: [7:3.50] |
| ; ATOM-NEXT: phaddw (%rdi), %xmm0 # sched: [8:4.00] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_phaddw: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: phaddw %xmm1, %xmm0 # sched: [1:0.50] |
| ; SLM-NEXT: phaddw (%rdi), %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_phaddw: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:1.50] |
| ; SANDY-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:1.50] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_phaddw: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] |
| ; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_phaddw: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] |
| ; HASWELL-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_phaddw: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_phaddw: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] |
| ; BROADWELL-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [8:2.00] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_phaddw: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; BROADWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_phaddw: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] |
| ; SKYLAKE-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_phaddw: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; SKYLAKE-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_phaddw: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] |
| ; SKX-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_phaddw: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; SKX-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_phaddw: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [6:1.00] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_phaddw: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_phaddw: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [100:0.25] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_phaddw: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) |
| %2 = load <8 x i16>, <8 x i16> *%a2, align 16 |
| %3 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %1, <8 x i16> %2) |
| ret <8 x i16> %3 |
| } |
| declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { |
| ; GENERIC-LABEL: test_phsubd: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50] |
| ; GENERIC-NEXT: phsubd (%rdi), %xmm0 # sched: [9:1.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_phsubd: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50] |
| ; ATOM-NEXT: phsubd (%rdi), %xmm0 # sched: [4:2.00] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_phsubd: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: phsubd %xmm1, %xmm0 # sched: [1:0.50] |
| ; SLM-NEXT: phsubd (%rdi), %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_phsubd: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50] |
| ; SANDY-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:1.50] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_phsubd: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] |
| ; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_phsubd: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] |
| ; HASWELL-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_phsubd: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_phsubd: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] |
| ; BROADWELL-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [8:2.00] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_phsubd: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; BROADWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_phsubd: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] |
| ; SKYLAKE-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_phsubd: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; SKYLAKE-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_phsubd: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] |
| ; SKX-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_phsubd: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; SKX-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_phsubd: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [6:1.00] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_phsubd: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_phsubd: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [100:0.25] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_phsubd: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) |
| %2 = load <4 x i32>, <4 x i32> *%a2, align 16 |
| %3 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %1, <4 x i32> %2) |
| ret <4 x i32> %3 |
| } |
| declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone |
| |
| define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { |
| ; GENERIC-LABEL: test_phsubsw: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:1.50] |
| ; GENERIC-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:1.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_phsubsw: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: phsubsw %xmm1, %xmm0 # sched: [7:3.50] |
| ; ATOM-NEXT: phsubsw (%rdi), %xmm0 # sched: [8:4.00] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_phsubsw: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: phsubsw %xmm1, %xmm0 # sched: [1:0.50] |
| ; SLM-NEXT: phsubsw (%rdi), %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_phsubsw: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:1.50] |
| ; SANDY-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:1.50] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_phsubsw: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] |
| ; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_phsubsw: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] |
| ; HASWELL-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_phsubsw: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_phsubsw: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] |
| ; BROADWELL-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [8:2.00] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_phsubsw: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; BROADWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_phsubsw: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] |
| ; SKYLAKE-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_phsubsw: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; SKYLAKE-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_phsubsw: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] |
| ; SKX-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_phsubsw: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; SKX-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_phsubsw: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [6:1.00] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_phsubsw: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_phsubsw: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [100:0.25] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_phsubsw: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) |
| %2 = load <8 x i16>, <8 x i16> *%a2, align 16 |
| %3 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %1, <8 x i16> %2) |
| ret <8 x i16> %3 |
| } |
| declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { |
| ; GENERIC-LABEL: test_phsubw: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: phsubw %xmm1, %xmm0 # sched: [3:1.50] |
| ; GENERIC-NEXT: phsubw (%rdi), %xmm0 # sched: [9:1.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_phsubw: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: phsubw %xmm1, %xmm0 # sched: [7:3.50] |
| ; ATOM-NEXT: phsubw (%rdi), %xmm0 # sched: [8:4.00] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_phsubw: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: phsubw %xmm1, %xmm0 # sched: [1:0.50] |
| ; SLM-NEXT: phsubw (%rdi), %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_phsubw: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:1.50] |
| ; SANDY-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:1.50] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_phsubw: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] |
| ; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_phsubw: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] |
| ; HASWELL-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_phsubw: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_phsubw: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] |
| ; BROADWELL-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [8:2.00] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_phsubw: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; BROADWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_phsubw: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] |
| ; SKYLAKE-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_phsubw: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; SKYLAKE-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_phsubw: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] |
| ; SKX-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_phsubw: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] |
| ; SKX-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_phsubw: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [6:1.00] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_phsubw: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_phsubw: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [100:0.25] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_phsubw: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [100:0.25] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) |
| %2 = load <8 x i16>, <8 x i16> *%a2, align 16 |
| %3 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %1, <8 x i16> %2) |
| ret <8 x i16> %3 |
| } |
| declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { |
| ; GENERIC-LABEL: test_pmaddubsw: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_pmaddubsw: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:5.00] |
| ; ATOM-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [5:5.00] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_pmaddubsw: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [7:1.00] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_pmaddubsw: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] |
| ; SANDY-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_pmaddubsw: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] |
| ; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_pmaddubsw: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] |
| ; HASWELL-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_pmaddubsw: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] |
| ; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_pmaddubsw: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] |
| ; BROADWELL-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:1.00] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_pmaddubsw: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] |
| ; BROADWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_pmaddubsw: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.50] |
| ; SKYLAKE-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:0.50] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_pmaddubsw: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKYLAKE-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_pmaddubsw: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.50] |
| ; SKX-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:0.50] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_pmaddubsw: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_pmaddubsw: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [2:1.00] |
| ; BTVER2-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [7:1.00] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_pmaddubsw: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] |
| ; BTVER2-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_pmaddubsw: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:1.00] |
| ; ZNVER1-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_pmaddubsw: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] |
| ; ZNVER1-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) |
| %2 = load <16 x i8>, <16 x i8> *%a2, align 16 |
| %3 = bitcast <8 x i16> %1 to <16 x i8> |
| %4 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %3, <16 x i8> %2) |
| ret <8 x i16> %4 |
| } |
| declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone |
| |
| define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { |
| ; GENERIC-LABEL: test_pmulhrsw: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] |
| ; GENERIC-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_pmulhrsw: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:5.00] |
| ; ATOM-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [5:5.00] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_pmulhrsw: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [7:1.00] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_pmulhrsw: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] |
| ; SANDY-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_pmulhrsw: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] |
| ; SANDY-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_pmulhrsw: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] |
| ; HASWELL-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_pmulhrsw: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] |
| ; HASWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_pmulhrsw: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] |
| ; BROADWELL-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:1.00] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_pmulhrsw: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] |
| ; BROADWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_pmulhrsw: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.50] |
| ; SKYLAKE-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:0.50] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_pmulhrsw: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKYLAKE-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_pmulhrsw: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.50] |
| ; SKX-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:0.50] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_pmulhrsw: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] |
| ; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_pmulhrsw: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [2:1.00] |
| ; BTVER2-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [7:1.00] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_pmulhrsw: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] |
| ; BTVER2-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_pmulhrsw: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:1.00] |
| ; ZNVER1-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_pmulhrsw: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] |
| ; ZNVER1-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) |
| %2 = load <8 x i16>, <8 x i16> *%a2, align 16 |
| %3 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %1, <8 x i16> %2) |
| ret <8 x i16> %3 |
| } |
| declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone |
| |
| define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { |
| ; GENERIC-LABEL: test_pshufb: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: pshufb (%rdi), %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_pshufb: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: pshufb %xmm1, %xmm0 # sched: [4:2.00] |
| ; ATOM-NEXT: pshufb (%rdi), %xmm0 # sched: [5:2.50] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_pshufb: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] |
| ; SLM-NEXT: pshufb (%rdi), %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_pshufb: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.50] |
| ; SANDY-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:0.50] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_pshufb: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_pshufb: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] |
| ; HASWELL-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_pshufb: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] |
| ; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_pshufb: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] |
| ; BROADWELL-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [6:1.00] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_pshufb: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] |
| ; BROADWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_pshufb: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] |
| ; SKYLAKE-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_pshufb: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] |
| ; SKYLAKE-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_pshufb: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] |
| ; SKX-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_pshufb: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] |
| ; SKX-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_pshufb: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [2:2.00] |
| ; BTVER2-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:2.00] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_pshufb: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [2:2.00] |
| ; BTVER2-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:2.00] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_pshufb: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [8:0.50] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_pshufb: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) |
| %2 = load <16 x i8>, <16 x i8> *%a2, align 16 |
| %3 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> %2) |
| ret <16 x i8> %3 |
| } |
| declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone |
| |
| define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { |
| ; GENERIC-LABEL: test_psignb: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_psignb: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] |
| ; ATOM-NEXT: psignb (%rdi), %xmm0 # sched: [1:1.00] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_psignb: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] |
| ; SLM-NEXT: psignb (%rdi), %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_psignb: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] |
| ; SANDY-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_psignb: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_psignb: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] |
| ; HASWELL-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_psignb: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_psignb: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] |
| ; BROADWELL-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [6:0.50] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_psignb: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BROADWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_psignb: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] |
| ; SKYLAKE-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_psignb: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKYLAKE-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_psignb: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] |
| ; SKX-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_psignb: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_psignb: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [6:1.00] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_psignb: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_psignb: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [8:0.50] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_psignb: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) |
| %2 = load <16 x i8>, <16 x i8> *%a2, align 16 |
| %3 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %1, <16 x i8> %2) |
| ret <16 x i8> %3 |
| } |
| declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone |
| |
| define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { |
| ; GENERIC-LABEL: test_psignd: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_psignd: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] |
| ; ATOM-NEXT: psignd (%rdi), %xmm0 # sched: [1:1.00] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_psignd: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] |
| ; SLM-NEXT: psignd (%rdi), %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_psignd: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] |
| ; SANDY-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_psignd: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_psignd: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] |
| ; HASWELL-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_psignd: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_psignd: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] |
| ; BROADWELL-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [6:0.50] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_psignd: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BROADWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_psignd: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] |
| ; SKYLAKE-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_psignd: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKYLAKE-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_psignd: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] |
| ; SKX-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_psignd: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_psignd: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [6:1.00] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_psignd: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_psignd: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [8:0.50] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_psignd: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) |
| %2 = load <4 x i32>, <4 x i32> *%a2, align 16 |
| %3 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %1, <4 x i32> %2) |
| ret <4 x i32> %3 |
| } |
| declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone |
| |
| define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { |
| ; GENERIC-LABEL: test_psignw: |
| ; GENERIC: # %bb.0: |
| ; GENERIC-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] |
| ; GENERIC-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] |
| ; GENERIC-NEXT: retq # sched: [1:1.00] |
| ; |
| ; ATOM-LABEL: test_psignw: |
| ; ATOM: # %bb.0: |
| ; ATOM-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] |
| ; ATOM-NEXT: psignw (%rdi), %xmm0 # sched: [1:1.00] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: nop # sched: [1:0.50] |
| ; ATOM-NEXT: retq # sched: [79:39.50] |
| ; |
| ; SLM-LABEL: test_psignw: |
| ; SLM: # %bb.0: |
| ; SLM-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] |
| ; SLM-NEXT: psignw (%rdi), %xmm0 # sched: [4:1.00] |
| ; SLM-NEXT: retq # sched: [4:1.00] |
| ; |
| ; SANDY-SSE-LABEL: test_psignw: |
| ; SANDY-SSE: # %bb.0: |
| ; SANDY-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] |
| ; SANDY-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] |
| ; SANDY-SSE-NEXT: retq # sched: [1:1.00] |
| ; |
| ; SANDY-LABEL: test_psignw: |
| ; SANDY: # %bb.0: |
| ; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] |
| ; SANDY-NEXT: retq # sched: [1:1.00] |
| ; |
| ; HASWELL-SSE-LABEL: test_psignw: |
| ; HASWELL-SSE: # %bb.0: |
| ; HASWELL-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] |
| ; HASWELL-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] |
| ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; HASWELL-LABEL: test_psignw: |
| ; HASWELL: # %bb.0: |
| ; HASWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] |
| ; HASWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-SSE-LABEL: test_psignw: |
| ; BROADWELL-SSE: # %bb.0: |
| ; BROADWELL-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] |
| ; BROADWELL-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [6:0.50] |
| ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BROADWELL-LABEL: test_psignw: |
| ; BROADWELL: # %bb.0: |
| ; BROADWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BROADWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] |
| ; BROADWELL-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-SSE-LABEL: test_psignw: |
| ; SKYLAKE-SSE: # %bb.0: |
| ; SKYLAKE-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] |
| ; SKYLAKE-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] |
| ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKYLAKE-LABEL: test_psignw: |
| ; SKYLAKE: # %bb.0: |
| ; SKYLAKE-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKYLAKE-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] |
| ; SKYLAKE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-SSE-LABEL: test_psignw: |
| ; SKX-SSE: # %bb.0: |
| ; SKX-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] |
| ; SKX-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] |
| ; SKX-SSE-NEXT: retq # sched: [7:1.00] |
| ; |
| ; SKX-LABEL: test_psignw: |
| ; SKX: # %bb.0: |
| ; SKX-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; SKX-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] |
| ; SKX-NEXT: retq # sched: [7:1.00] |
| ; |
| ; BTVER2-SSE-LABEL: test_psignw: |
| ; BTVER2-SSE: # %bb.0: |
| ; BTVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] |
| ; BTVER2-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [6:1.00] |
| ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] |
| ; |
| ; BTVER2-LABEL: test_psignw: |
| ; BTVER2: # %bb.0: |
| ; BTVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] |
| ; BTVER2-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] |
| ; BTVER2-NEXT: retq # sched: [4:1.00] |
| ; |
| ; ZNVER1-SSE-LABEL: test_psignw: |
| ; ZNVER1-SSE: # %bb.0: |
| ; ZNVER1-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [8:0.50] |
| ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] |
| ; |
| ; ZNVER1-LABEL: test_psignw: |
| ; ZNVER1: # %bb.0: |
| ; ZNVER1-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] |
| ; ZNVER1-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] |
| ; ZNVER1-NEXT: retq # sched: [1:0.50] |
| %1 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) |
| %2 = load <8 x i16>, <8 x i16> *%a2, align 16 |
| %3 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %1, <8 x i16> %2) |
| ret <8 x i16> %3 |
| } |
| declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone |