| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE |
| ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 |
| ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 |
| |
| ; Verify that we're folding the load into the math instruction. |
| ; This pattern is generated out of the simplest intrinsics usage: |
| ; _mm_add_ss(a, _mm_load_ss(b)); |
| |
| define <4 x float> @addss(<4 x float> %va, float* %pb) { |
| ; SSE-LABEL: addss: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: addss (%rdi), %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: addss: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| %a = extractelement <4 x float> %va, i32 0 |
| %b = load float, float* %pb |
| %r = fadd float %a, %b |
| %vr = insertelement <4 x float> %va, float %r, i32 0 |
| ret <4 x float> %vr |
| } |
| |
| define <2 x double> @addsd(<2 x double> %va, double* %pb) { |
| ; SSE-LABEL: addsd: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: addsd (%rdi), %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: addsd: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| %a = extractelement <2 x double> %va, i32 0 |
| %b = load double, double* %pb |
| %r = fadd double %a, %b |
| %vr = insertelement <2 x double> %va, double %r, i32 0 |
| ret <2 x double> %vr |
| } |
| |
| define <4 x float> @subss(<4 x float> %va, float* %pb) { |
| ; SSE-LABEL: subss: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: subss (%rdi), %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: subss: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| %a = extractelement <4 x float> %va, i32 0 |
| %b = load float, float* %pb |
| %r = fsub float %a, %b |
| %vr = insertelement <4 x float> %va, float %r, i32 0 |
| ret <4 x float> %vr |
| } |
| |
| define <2 x double> @subsd(<2 x double> %va, double* %pb) { |
| ; SSE-LABEL: subsd: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: subsd (%rdi), %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: subsd: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| %a = extractelement <2 x double> %va, i32 0 |
| %b = load double, double* %pb |
| %r = fsub double %a, %b |
| %vr = insertelement <2 x double> %va, double %r, i32 0 |
| ret <2 x double> %vr |
| } |
| |
| define <4 x float> @mulss(<4 x float> %va, float* %pb) { |
| ; SSE-LABEL: mulss: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: mulss (%rdi), %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: mulss: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| %a = extractelement <4 x float> %va, i32 0 |
| %b = load float, float* %pb |
| %r = fmul float %a, %b |
| %vr = insertelement <4 x float> %va, float %r, i32 0 |
| ret <4 x float> %vr |
| } |
| |
| define <2 x double> @mulsd(<2 x double> %va, double* %pb) { |
| ; SSE-LABEL: mulsd: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: mulsd (%rdi), %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: mulsd: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| %a = extractelement <2 x double> %va, i32 0 |
| %b = load double, double* %pb |
| %r = fmul double %a, %b |
| %vr = insertelement <2 x double> %va, double %r, i32 0 |
| ret <2 x double> %vr |
| } |
| |
| define <4 x float> @divss(<4 x float> %va, float* %pb) { |
| ; SSE-LABEL: divss: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: divss (%rdi), %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: divss: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| %a = extractelement <4 x float> %va, i32 0 |
| %b = load float, float* %pb |
| %r = fdiv float %a, %b |
| %vr = insertelement <4 x float> %va, float %r, i32 0 |
| ret <4 x float> %vr |
| } |
| |
| define <2 x double> @divsd(<2 x double> %va, double* %pb) { |
| ; SSE-LABEL: divsd: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: divsd (%rdi), %xmm0 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: divsd: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| %a = extractelement <2 x double> %va, i32 0 |
| %b = load double, double* %pb |
| %r = fdiv double %a, %b |
| %vr = insertelement <2 x double> %va, double %r, i32 0 |
| ret <2 x double> %vr |
| } |