|  | ; This checks to ensure that Subzero aligns spill slots. | 
|  |  | 
|  | ; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \ | 
|  | ; RUN:   -allow-externally-defined-symbols | FileCheck %s | 
|  | ; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 \ | 
|  | ; RUN:   -allow-externally-defined-symbols | FileCheck %s | 
|  |  | 
|  | ; The location of the stack slot for a variable is inferred from the | 
|  | ; return sequence. | 
|  |  | 
|  | ; In this file, "global" refers to a variable with a live range across | 
|  | ; multiple basic blocks (not an LLVM global variable) and "local" | 
|  | ; refers to a variable that is live in only a single basic block. | 
|  |  | 
|  | define internal <4 x i32> @align_global_vector(i32 %arg) { | 
|  | entry: | 
|  | %vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0 | 
|  | br label %block | 
|  | block: | 
|  | call void @ForceXmmSpills() | 
|  | ret <4 x i32> %vec.global | 
|  | ; CHECK-LABEL: align_global_vector | 
|  | ; CHECK: movups xmm0,XMMWORD PTR [esp] | 
|  | ; CHECK-NEXT: add esp,0x1c | 
|  | ; CHECK-NEXT: ret | 
|  | } | 
|  |  | 
|  | define internal <4 x i32> @align_local_vector(i32 %arg) { | 
|  | entry: | 
|  | br label %block | 
|  | block: | 
|  | %vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0 | 
|  | call void @ForceXmmSpills() | 
|  | ret <4 x i32> %vec.local | 
|  | ; CHECK-LABEL: align_local_vector | 
|  | ; CHECK: movups xmm0,XMMWORD PTR [esp] | 
|  | ; CHECK-NEXT: add esp,0x1c | 
|  | ; CHECK-NEXT: ret | 
|  | } | 
|  |  | 
|  | declare void @ForceXmmSpills() | 
|  |  | 
|  | define internal <4 x i32> @align_global_vector_ebp_based(i32 %arg) { | 
|  | entry: | 
|  | br label %eblock  ; Disable alloca optimization | 
|  | eblock: | 
|  | %alloc = alloca i8, i32 1, align 1 | 
|  | %vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0 | 
|  | br label %block | 
|  | block: | 
|  | call void @ForceXmmSpillsAndUseAlloca(i8* %alloc) | 
|  | ret <4 x i32> %vec.global | 
|  | ; CHECK-LABEL: align_global_vector_ebp_based | 
|  | ; CHECK: movups xmm0,XMMWORD PTR [ebp-0x18] | 
|  | ; CHECK-NEXT: mov esp,ebp | 
|  | ; CHECK-NEXT: pop ebp | 
|  | ; CHECK: ret | 
|  | } | 
|  |  | 
|  | define internal <4 x i32> @align_local_vector_ebp_based(i32 %arg) { | 
|  | entry: | 
|  | br label %eblock  ; Disable alloca optimization | 
|  | eblock: | 
|  | %alloc = alloca i8, i32 1, align 1 | 
|  | %vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0 | 
|  | call void @ForceXmmSpillsAndUseAlloca(i8* %alloc) | 
|  | ret <4 x i32> %vec.local | 
|  | ; CHECK-LABEL: align_local_vector_ebp_based | 
|  | ; CHECK: movups xmm0,XMMWORD PTR [ebp-0x18] | 
|  | ; CHECK-NEXT: mov esp,ebp | 
|  | ; CHECK-NEXT: pop ebp | 
|  | ; CHECK: ret | 
|  | } | 
|  |  | 
|  | define internal <4 x i32> @align_local_vector_and_global_float(i32 %arg) { | 
|  | entry: | 
|  | %float.global = sitofp i32 %arg to float | 
|  | call void @ForceXmmSpillsAndUseFloat(float %float.global) | 
|  | br label %block | 
|  | block: | 
|  | %vec.local = insertelement <4 x i32> undef, i32 undef, i32 0 | 
|  | call void @ForceXmmSpillsAndUseFloat(float %float.global) | 
|  | ret <4 x i32> %vec.local | 
|  | ; CHECK-LABEL: align_local_vector_and_global_float | 
|  | ; CHECK: cvtsi2ss xmm0,eax | 
|  | ; CHECK-NEXT: movss DWORD PTR [esp+{{0x1c|0x2c}}],xmm0 | 
|  | ; CHECK: movups xmm0,XMMWORD PTR [{{esp\+0x10|esp\+0x20}}] | 
|  | ; CHECK-NEXT: add esp,0x3c | 
|  | ; CHECK-NEXT: ret | 
|  | } | 
|  |  | 
|  | declare void @ForceXmmSpillsAndUseAlloca(i8*) | 
|  | declare void @ForceXmmSpillsAndUseFloat(float) |