Jim Stichnoth | f79d2cb | 2015-03-23 15:10:54 -0700 | [diff] [blame] | 1 | ; This test checks that when SSE instructions access memory and require full |
| 2 | ; alignment, memory operands are limited to properly aligned stack operands. |
| 3 | ; This would only happen when we fuse a load instruction with another |
| 4 | ; instruction, which currently only happens with non-scalarized Arithmetic |
| 5 | ; instructions. |
| 6 | |
| 7 | ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s |
| 8 | ; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 | FileCheck %s |
| 9 | |
| 10 | define <4 x i32> @test_add(i32 %addr_i, <4 x i32> %addend) { |
| 11 | entry: |
| 12 | %addr = inttoptr i32 %addr_i to <4 x i32>* |
Jim Stichnoth | e5b58fb | 2015-06-01 15:17:20 -0700 | [diff] [blame] | 13 | %loaded = load <4 x i32>, <4 x i32>* %addr, align 4 |
Jim Stichnoth | f79d2cb | 2015-03-23 15:10:54 -0700 | [diff] [blame] | 14 | %result = add <4 x i32> %addend, %loaded |
| 15 | ret <4 x i32> %result |
| 16 | } |
| 17 | ; CHECK-LABEL: test_add |
| 18 | ; CHECK-NOT: paddd xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} |
| 19 | ; CHECK: paddd xmm{{.}}, |
| 20 | |
| 21 | define <4 x i32> @test_and(i32 %addr_i, <4 x i32> %addend) { |
| 22 | entry: |
| 23 | %addr = inttoptr i32 %addr_i to <4 x i32>* |
Jim Stichnoth | e5b58fb | 2015-06-01 15:17:20 -0700 | [diff] [blame] | 24 | %loaded = load <4 x i32>, <4 x i32>* %addr, align 4 |
Jim Stichnoth | f79d2cb | 2015-03-23 15:10:54 -0700 | [diff] [blame] | 25 | %result = and <4 x i32> %addend, %loaded |
| 26 | ret <4 x i32> %result |
| 27 | } |
| 28 | ; CHECK-LABEL: test_and |
| 29 | ; CHECK-NOT: pand xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} |
| 30 | ; CHECK: pand xmm{{.}}, |
| 31 | |
| 32 | define <4 x i32> @test_or(i32 %addr_i, <4 x i32> %addend) { |
| 33 | entry: |
| 34 | %addr = inttoptr i32 %addr_i to <4 x i32>* |
Jim Stichnoth | e5b58fb | 2015-06-01 15:17:20 -0700 | [diff] [blame] | 35 | %loaded = load <4 x i32>, <4 x i32>* %addr, align 4 |
Jim Stichnoth | f79d2cb | 2015-03-23 15:10:54 -0700 | [diff] [blame] | 36 | %result = or <4 x i32> %addend, %loaded |
| 37 | ret <4 x i32> %result |
| 38 | } |
| 39 | ; CHECK-LABEL: test_or |
| 40 | ; CHECK-NOT: por xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} |
| 41 | ; CHECK: por xmm{{.}}, |
| 42 | |
| 43 | define <4 x i32> @test_xor(i32 %addr_i, <4 x i32> %addend) { |
| 44 | entry: |
| 45 | %addr = inttoptr i32 %addr_i to <4 x i32>* |
Jim Stichnoth | e5b58fb | 2015-06-01 15:17:20 -0700 | [diff] [blame] | 46 | %loaded = load <4 x i32>, <4 x i32>* %addr, align 4 |
Jim Stichnoth | f79d2cb | 2015-03-23 15:10:54 -0700 | [diff] [blame] | 47 | %result = xor <4 x i32> %addend, %loaded |
| 48 | ret <4 x i32> %result |
| 49 | } |
| 50 | ; CHECK-LABEL: test_xor |
| 51 | ; CHECK-NOT: pxor xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} |
| 52 | ; CHECK: pxor xmm{{.}}, |
| 53 | |
| 54 | define <4 x i32> @test_sub(i32 %addr_i, <4 x i32> %addend) { |
| 55 | entry: |
| 56 | %addr = inttoptr i32 %addr_i to <4 x i32>* |
Jim Stichnoth | e5b58fb | 2015-06-01 15:17:20 -0700 | [diff] [blame] | 57 | %loaded = load <4 x i32>, <4 x i32>* %addr, align 4 |
Jim Stichnoth | f79d2cb | 2015-03-23 15:10:54 -0700 | [diff] [blame] | 58 | %result = sub <4 x i32> %addend, %loaded |
| 59 | ret <4 x i32> %result |
| 60 | } |
| 61 | ; CHECK-LABEL: test_sub |
| 62 | ; CHECK-NOT: psubd xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} |
| 63 | ; CHECK: psubd xmm{{.}}, |
| 64 | |
| 65 | define <4 x float> @test_fadd(i32 %addr_i, <4 x float> %addend) { |
| 66 | entry: |
| 67 | %addr = inttoptr i32 %addr_i to <4 x float>* |
Jim Stichnoth | e5b58fb | 2015-06-01 15:17:20 -0700 | [diff] [blame] | 68 | %loaded = load <4 x float>, <4 x float>* %addr, align 4 |
Jim Stichnoth | f79d2cb | 2015-03-23 15:10:54 -0700 | [diff] [blame] | 69 | %result = fadd <4 x float> %addend, %loaded |
| 70 | ret <4 x float> %result |
| 71 | } |
| 72 | ; CHECK-LABEL: test_fadd |
| 73 | ; CHECK-NOT: addps xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} |
| 74 | ; CHECK: addps xmm{{.}}, |
| 75 | |
| 76 | define <4 x float> @test_fsub(i32 %addr_i, <4 x float> %addend) { |
| 77 | entry: |
| 78 | %addr = inttoptr i32 %addr_i to <4 x float>* |
Jim Stichnoth | e5b58fb | 2015-06-01 15:17:20 -0700 | [diff] [blame] | 79 | %loaded = load <4 x float>, <4 x float>* %addr, align 4 |
Jim Stichnoth | f79d2cb | 2015-03-23 15:10:54 -0700 | [diff] [blame] | 80 | %result = fsub <4 x float> %addend, %loaded |
| 81 | ret <4 x float> %result |
| 82 | } |
| 83 | ; CHECK-LABEL: test_fsub |
| 84 | ; CHECK-NOT: subps xmm{{.}},XMMWORD PTR [e{{ax|cx|dx|di|si|bx|bp}} |
| 85 | ; CHECK: subps xmm{{.}}, |