| ; Tests basics and corner cases of x86-32 sandboxing, using -Om1 in |
| ; the hope that the output will remain stable. When packing bundles, |
| ; we try to limit to a few instructions with well known sizes and |
| ; minimal use of registers and stack slots in the lowering sequence. |
| |
| ; XFAIL: filtype=asm |
| ; RUN: %p2i -i %s --sandbox --filetype=obj --disassemble --args -Om1 \ |
| ; RUN: -allow-externally-defined-symbols \ |
| ; RUN: -ffunction-sections | FileCheck %s |
| |
| ; RUN: %p2i -i %s --sandbox --filetype=obj --disassemble --target=x8664 \ |
| ; RUN: --args -Om1 -allow-externally-defined-symbols \ |
| ; RUN: -ffunction-sections | FileCheck %s --check-prefix X8664 |
| |
| declare void @call_target() |
| @global_byte = internal global [1 x i8] zeroinitializer |
| @global_short = internal global [2 x i8] zeroinitializer |
| @global_int = internal global [4 x i8] zeroinitializer |
| |
| ; A direct call sequence uses the right mask and register-call sequence. |
| define internal void @test_direct_call() { |
| entry: |
| call void @call_target() |
| ret void |
| } |
| ; CHECK-LABEL: test_direct_call |
| ; CHECK: nop |
| ; CHECK: 1b: {{.*}} call 1c |
| ; CHECK-NEXT: 20: |
| ; X8664-LABEL: test_direct_call |
| ; X8664: push {{.*}} R_X86_64_32S test_direct_call+{{.*}}20 |
| ; X8664: jmp {{.*}} call_target |
| |
| ; An indirect call sequence uses the right mask and register-call sequence. |
| define internal void @test_indirect_call(i32 %target) { |
| entry: |
| %__1 = inttoptr i32 %target to void ()* |
| call void %__1() |
| ret void |
| } |
| ; CHECK-LABEL: test_indirect_call |
| ; CHECK: mov [[REG:.*]],DWORD PTR [esp |
| ; CHECK-NEXT: nop |
| ; CHECK: 1b: {{.*}} and [[REG]],0xffffffe0 |
| ; CHECK-NEXT: call [[REG]] |
| ; CHECk-NEXT: 20: |
| ; X8664-LABEL: test_indirect_call |
| ; X8664: push {{.*}} R_X86_64_32S test_indirect_call+{{.*}}20 |
| ; X8664: {{.*}} and e[[REG:..]],0xffffffe0 |
| ; X8664: add r[[REG]],r15 |
| ; X8664: jmp r[[REG]] |
| |
| ; A return sequence uses the right pop / mask / jmp sequence. |
| define internal void @test_ret() { |
| entry: |
| ret void |
| } |
| ; CHECK-LABEL: test_ret |
| ; CHECK: pop ecx |
| ; CHECK-NEXT: and ecx,0xffffffe0 |
| ; CHECK-NEXT: jmp ecx |
| ; X8664-LABEL: test_ret |
| ; X8664: pop rcx |
| ; X8664: and ecx,0xffffffe0 |
| ; X8664: add rcx,r15 |
| ; X8664: jmp rcx |
| |
| ; A perfectly packed bundle should not have nops at the end. |
| define internal void @packed_bundle() { |
| entry: |
| call void @call_target() |
| ; bundle boundary |
| %addr_byte = bitcast [1 x i8]* @global_byte to i8* |
| %addr_short = bitcast [2 x i8]* @global_short to i16* |
| store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| ; bundle boundary |
| store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| ret void |
| } |
| ; CHECK-LABEL: packed_bundle |
| ; CHECK: call |
| ; CHECK-NEXT: 20: {{.*}} mov BYTE PTR |
| ; CHECK-NEXT: 27: {{.*}} mov WORD PTR |
| ; CHECK-NEXT: 30: {{.*}} mov BYTE PTR |
| ; CHECK-NEXT: 37: {{.*}} mov WORD PTR |
| ; CHECK-NEXT: 40: {{.*}} mov BYTE PTR |
| ; CHECK-NEXT: 47: {{.*}} mov WORD PTR |
| |
| ; An imperfectly packed bundle should have one or more nops at the end. |
| define internal void @nonpacked_bundle() { |
| entry: |
| call void @call_target() |
| ; bundle boundary |
| %addr_short = bitcast [2 x i8]* @global_short to i16* |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| ; nop padding |
| ; bundle boundary |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| ret void |
| } |
| ; CHECK-LABEL: nonpacked_bundle |
| ; CHECK: call |
| ; CHECK-NEXT: 20: {{.*}} mov WORD PTR |
| ; CHECK-NEXT: 29: {{.*}} mov WORD PTR |
| ; CHECK-NEXT: 32: {{.*}} mov WORD PTR |
| ; CHECK-NEXT: 3b: {{.*}} nop |
| ; CHECK: 40: {{.*}} mov WORD PTR |
| |
| ; A zero-byte instruction (e.g. local label definition) at a bundle |
| ; boundary should not trigger nop padding. |
| define internal void @label_at_boundary(i32 %arg, float %farg1, float %farg2) { |
| entry: |
| %argi8 = trunc i32 %arg to i8 |
| call void @call_target() |
| ; bundle boundary |
| %addr_short = bitcast [2 x i8]* @global_short to i16* |
| %addr_int = bitcast [4 x i8]* @global_int to i32* |
| store i32 0, i32* %addr_int, align 1 ; 10-byte instruction |
| %blah = select i1 true, i8 %argi8, i8 %argi8 ; 22-byte lowering sequence |
| ; label is here |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| ret void |
| } |
| ; CHECK-LABEL: label_at_boundary |
| ; CHECK: call |
| ; We rely on a particular 7-instruction 22-byte Om1 lowering sequence |
| ; for select. |
| ; CHECK-NEXT: 20: {{.*}} mov DWORD PTR |
| ; CHECK-NEXT: 2a: {{.*}} mov {{.*}},0x1 |
| ; CHECK-NEXT: 2c: {{.*}} cmp {{.*}},0x0 |
| ; CHECK-NEXT: 2e: {{.*}} mov {{.*}},BYTE PTR |
| ; CHECK-NEXT: 32: {{.*}} mov BYTE PTR |
| ; CHECK-NEXT: 36: {{.*}} jne 40 |
| ; CHECK-NEXT: 38: {{.*}} mov {{.*}},BYTE PTR |
| ; CHECK-NEXT: 3c: {{.*}} mov BYTE PTR |
| ; CHECK-NEXT: 40: {{.*}} mov WORD PTR |
| |
| ; Bundle lock without padding. |
| define internal void @bundle_lock_without_padding() { |
| entry: |
| %addr_short = bitcast [2 x i8]* @global_short to i16* |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| ret void |
| } |
| ; CHECK-LABEL: bundle_lock_without_padding |
| ; CHECK: mov WORD PTR |
| ; CHECK-NEXT: pop ecx |
| ; CHECK-NEXT: and ecx,0xffffffe0 |
| ; CHECK-NEXT: jmp ecx |
| |
| ; Bundle lock with padding. |
| define internal void @bundle_lock_with_padding() { |
| entry: |
| call void @call_target() |
| ; bundle boundary |
| %addr_byte = bitcast [1 x i8]* @global_byte to i8* |
| %addr_short = bitcast [2 x i8]* @global_short to i16* |
| store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| ret void |
| ; 3 bytes to restore stack pointer |
| ; 1 byte to pop ecx |
| ; bundle_lock |
| ; 3 bytes to mask ecx |
| ; This is now 32 bytes from the beginning of the bundle, so |
| ; a 3-byte nop will need to be emitted before the bundle_lock. |
| ; 2 bytes to jump to ecx |
| ; bundle_unlock |
| } |
| ; CHECK-LABEL: bundle_lock_with_padding |
| ; CHECK: call |
| ; CHECK-NEXT: 20: {{.*}} mov BYTE PTR |
| ; CHECK-NEXT: 27: {{.*}} mov WORD PTR |
| ; CHECK-NEXT: 30: {{.*}} mov WORD PTR |
| ; CHECK-NEXT: 39: {{.*}} add esp, |
| ; CHECK-NEXT: 3c: {{.*}} pop ecx |
| ; CHECK-NEXT: 3d: {{.*}} nop |
| ; CHECK-NEXT: 40: {{.*}} and ecx,0xffffffe0 |
| ; CHECK-NEXT: 43: {{.*}} jmp ecx |
| |
| ; Bundle lock align_to_end without any padding. |
| define internal void @bundle_lock_align_to_end_padding_0() { |
| entry: |
| call void @call_target() |
| ; bundle boundary |
| %addr_short = bitcast [2 x i8]* @global_short to i16* |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| call void @call_target() ; 5-byte instruction |
| ret void |
| } |
| ; CHECK-LABEL: bundle_lock_align_to_end_padding_0 |
| ; CHECK: call |
| ; CHECK-NEXT: 20: {{.*}} mov WORD PTR |
| ; CHECK-NEXT: 29: {{.*}} mov WORD PTR |
| ; CHECK-NEXT: 32: {{.*}} mov WORD PTR |
| ; CHECK-NEXT: 3b: {{.*}} call |
| |
| ; Bundle lock align_to_end with one bunch of padding. |
| define internal void @bundle_lock_align_to_end_padding_1() { |
| entry: |
| call void @call_target() |
| ; bundle boundary |
| %addr_byte = bitcast [1 x i8]* @global_byte to i8* |
| store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction |
| store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction |
| store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction |
| call void @call_target() ; 5-byte instruction |
| ret void |
| } |
| ; CHECK-LABEL: bundle_lock_align_to_end_padding_1 |
| ; CHECK: call |
| ; CHECK-NEXT: 20: {{.*}} mov BYTE PTR |
| ; CHECK-NEXT: 27: {{.*}} mov BYTE PTR |
| ; CHECK-NEXT: 2e: {{.*}} mov BYTE PTR |
| ; CHECK-NEXT: 35: {{.*}} nop |
| ; CHECK: 3b: {{.*}} call |
| |
| ; Bundle lock align_to_end with two bunches of padding. |
| define internal void @bundle_lock_align_to_end_padding_2(i32 %target) { |
| entry: |
| call void @call_target() |
| ; bundle boundary |
| %addr_byte = bitcast [1 x i8]* @global_byte to i8* |
| %addr_short = bitcast [2 x i8]* @global_short to i16* |
| %__1 = inttoptr i32 %target to void ()* |
| store i8 0, i8* %addr_byte, align 1 ; 7-byte instruction |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| store i16 0, i16* %addr_short, align 1 ; 9-byte instruction |
| call void %__1() |
| ; 4 bytes to load %target into a register |
| ; bundle_lock align_to_end |
| ; 3 bytes to mask the register |
| ; This is now 32 bytes from the beginning of the bundle, so |
| ; a 3-byte nop will need to be emitted before the bundle_lock, |
| ; followed by a 27-byte nop before the mask/jump. |
| ; 2 bytes to jump to the register |
| ; bundle_unlock |
| ret void |
| } |
| ; CHECK-LABEL: bundle_lock_align_to_end_padding_2 |
| ; CHECK: call |
| ; CHECK-NEXT: 20: {{.*}} mov BYTE PTR |
| ; CHECK-NEXT: 27: {{.*}} mov WORD PTR |
| ; CHECK-NEXT: 30: {{.*}} mov WORD PTR |
| ; CHECK-NEXT: 39: {{.*}} mov [[REG:.*]],DWORD PTR [esp |
| ; CHECK-NEXT: 3d: {{.*}} nop |
| ; CHECK: 40: {{.*}} nop |
| ; CHECK: 5b: {{.*}} and [[REG]],0xffffffe0 |
| ; CHECK-NEXT: 5e: {{.*}} call [[REG]] |
| |
| ; Tests the pad_to_end bundle alignment with no padding bytes needed. |
| define internal void @bundle_lock_pad_to_end_padding_0(i32 %arg0, i32 %arg1, |
| i32 %arg3, i32 %arg4, |
| i32 %arg5, i32 %arg6) { |
| call void @call_target() |
| ; bundle boundary |
| %x = add i32 %arg5, %arg6 ; 12 bytes |
| %y = trunc i32 %x to i16 ; 10 bytes |
| call void @call_target() ; 10 bytes |
| ; bundle boundary |
| ret void |
| } |
| ; X8664: 56: {{.*}} push {{.*}} R_X86_64_32S bundle_lock_pad_to_end_padding_0+{{.*}}60 |
| ; X8664: 5b: {{.*}} jmp {{.*}} call_target |
| ; X8664: 60: {{.*}} add |
| |
| ; Tests the pad_to_end bundle alignment with 11 padding bytes needed, and some |
| ; instructions before the call. |
| define internal void @bundle_lock_pad_to_end_padding_11(i32 %arg0, i32 %arg1, |
| i32 %arg3, i32 %arg4, |
| i32 %arg5, i32 %arg6) { |
| call void @call_target() |
| ; bundle boundary |
| %x = add i32 %arg5, %arg6 ; 11 bytes |
| call void @call_target() ; 10 bytes |
| ; 11 bytes of nop |
| ; bundle boundary |
| ret void |
| } |
| ; X8664: 4b: {{.*}} push {{.*}} R_X86_64_32S bundle_lock_pad_to_end_padding_11+{{.*}}60 |
| ; X8664: 50: {{.*}} jmp {{.*}} call_target |
| ; X8664: 55: {{.*}} nop |
| ; X8664: 5d: {{.*}} nop |
| ; X8664: 60: {{.*}} add |
| |
| ; Tests the pad_to_end bundle alignment with 22 padding bytes needed, and no |
| ; instructions before the call. |
| define internal void @bundle_lock_pad_to_end_padding_22(i32 %arg0, i32 %arg1, |
| i32 %arg3, i32 %arg4, |
| i32 %arg5, i32 %arg6) { |
| call void @call_target() |
| ; bundle boundary |
| call void @call_target() ; 10 bytes |
| ; 22 bytes of nop |
| ; bundle boundary |
| ret void |
| } |
| ; X8664: 40: {{.*}} push {{.*}} R_X86_64_32S bundle_lock_pad_to_end_padding_22+{{.*}}60 |
| ; X8664: 45: {{.*}} jmp {{.*}} call_target |
| ; X8664: 4a: {{.*}} nop |
| ; X8664: 52: {{.*}} nop |
| ; X8664: 5a: {{.*}} nop |
| ; X8664: 60: {{.*}} add |
| |
| ; Stack adjustment state during an argument push sequence gets |
| ; properly checkpointed and restored during the two passes, as |
| ; observed by the stack adjustment for accessing stack-allocated |
| ; variables. |
| define internal void @checkpoint_restore_stack_adjustment(i32 %arg) { |
| entry: |
| call void @call_target() |
| ; bundle boundary |
| call void @checkpoint_restore_stack_adjustment(i32 %arg) |
| ret void |
| } |
| ; CHECK-LABEL: checkpoint_restore_stack_adjustment |
| ; CHECK: sub esp,0x1c |
| ; CHECK: call |
| ; The address of %arg should be [esp+0x20], not [esp+0x30]. |
| ; CHECK-NEXT: mov [[REG:.*]],DWORD PTR [esp+0x20] |
| ; CHECK-NEXT: mov DWORD PTR [esp],[[REG]] |
| ; CHECK: call |
| ; CHECK: add esp,0x1c |