| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA |
| ; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA |
| |
| ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA |
| ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA |
| ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF |
| ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF |
| ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF |
| |
| declare i32 @foo() |
| declare i32 @bar(i64) |
| |
| ; In the following case when using fast scheduling we get a long chain of |
| ; EFLAGS save/restore due to a sequence of: |
| ; cmpxchg8b (implicit-def eflags) |
| ; eax = copy eflags |
| ; adjcallstackdown32 |
| ; ... |
| ; use of eax |
| ; During PEI the adjcallstackdown32 is replaced with the subl which |
| ; clobbers eflags, effectively interfering in the liveness interval. However, |
| ; we then promote these copies into independent conditions in GPRs that avoids |
| ; repeated saving and restoring logic and can be trivially managed by the |
| ; register allocator. |
| define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind { |
| ; 32-GOOD-RA-LABEL: test_intervening_call: |
| ; 32-GOOD-RA: # %bb.0: # %entry |
| ; 32-GOOD-RA-NEXT: pushl %ebx |
| ; 32-GOOD-RA-NEXT: pushl %esi |
| ; 32-GOOD-RA-NEXT: pushl %eax |
| ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx |
| ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi) |
| ; 32-GOOD-RA-NEXT: setne %bl |
| ; 32-GOOD-RA-NEXT: subl $8, %esp |
| ; 32-GOOD-RA-NEXT: pushl %edx |
| ; 32-GOOD-RA-NEXT: pushl %eax |
| ; 32-GOOD-RA-NEXT: calll bar |
| ; 32-GOOD-RA-NEXT: addl $16, %esp |
| ; 32-GOOD-RA-NEXT: testb %bl, %bl |
| ; 32-GOOD-RA-NEXT: jne .LBB0_3 |
| ; 32-GOOD-RA-NEXT: # %bb.1: # %t |
| ; 32-GOOD-RA-NEXT: movl $42, %eax |
| ; 32-GOOD-RA-NEXT: jmp .LBB0_2 |
| ; 32-GOOD-RA-NEXT: .LBB0_3: # %f |
| ; 32-GOOD-RA-NEXT: xorl %eax, %eax |
| ; 32-GOOD-RA-NEXT: .LBB0_2: # %t |
| ; 32-GOOD-RA-NEXT: xorl %edx, %edx |
| ; 32-GOOD-RA-NEXT: addl $4, %esp |
| ; 32-GOOD-RA-NEXT: popl %esi |
| ; 32-GOOD-RA-NEXT: popl %ebx |
| ; 32-GOOD-RA-NEXT: retl |
| ; |
| ; 32-FAST-RA-LABEL: test_intervening_call: |
| ; 32-FAST-RA: # %bb.0: # %entry |
| ; 32-FAST-RA-NEXT: pushl %ebx |
| ; 32-FAST-RA-NEXT: pushl %esi |
| ; 32-FAST-RA-NEXT: pushl %eax |
| ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx |
| ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi) |
| ; 32-FAST-RA-NEXT: setne %bl |
| ; 32-FAST-RA-NEXT: subl $8, %esp |
| ; 32-FAST-RA-NEXT: pushl %edx |
| ; 32-FAST-RA-NEXT: pushl %eax |
| ; 32-FAST-RA-NEXT: calll bar |
| ; 32-FAST-RA-NEXT: addl $16, %esp |
| ; 32-FAST-RA-NEXT: testb %bl, %bl |
| ; 32-FAST-RA-NEXT: jne .LBB0_3 |
| ; 32-FAST-RA-NEXT: # %bb.1: # %t |
| ; 32-FAST-RA-NEXT: movl $42, %eax |
| ; 32-FAST-RA-NEXT: jmp .LBB0_2 |
| ; 32-FAST-RA-NEXT: .LBB0_3: # %f |
| ; 32-FAST-RA-NEXT: xorl %eax, %eax |
| ; 32-FAST-RA-NEXT: .LBB0_2: # %t |
| ; 32-FAST-RA-NEXT: xorl %edx, %edx |
| ; 32-FAST-RA-NEXT: addl $4, %esp |
| ; 32-FAST-RA-NEXT: popl %esi |
| ; 32-FAST-RA-NEXT: popl %ebx |
| ; 32-FAST-RA-NEXT: retl |
| ; |
| ; 64-ALL-LABEL: test_intervening_call: |
| ; 64-ALL: # %bb.0: # %entry |
| ; 64-ALL-NEXT: pushq %rbx |
| ; 64-ALL-NEXT: movq %rsi, %rax |
| ; 64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi) |
| ; 64-ALL-NEXT: setne %bl |
| ; 64-ALL-NEXT: movq %rax, %rdi |
| ; 64-ALL-NEXT: callq bar |
| ; 64-ALL-NEXT: testb %bl, %bl |
| ; 64-ALL-NEXT: jne .LBB0_2 |
| ; 64-ALL-NEXT: # %bb.1: # %t |
| ; 64-ALL-NEXT: movl $42, %eax |
| ; 64-ALL-NEXT: popq %rbx |
| ; 64-ALL-NEXT: retq |
| ; 64-ALL-NEXT: .LBB0_2: # %f |
| ; 64-ALL-NEXT: xorl %eax, %eax |
| ; 64-ALL-NEXT: popq %rbx |
| ; 64-ALL-NEXT: retq |
| entry: |
| %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst |
| %v = extractvalue { i64, i1 } %cx, 0 |
| %p = extractvalue { i64, i1 } %cx, 1 |
| call i32 @bar(i64 %v) |
| br i1 %p, label %t, label %f |
| |
| t: |
| ret i64 42 |
| |
| f: |
| ret i64 0 |
| } |
| |
| ; Interesting in producing a clobber without any function calls. |
| define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) nounwind { |
| ; 32-ALL-LABEL: test_control_flow: |
| ; 32-ALL: # %bb.0: # %entry |
| ; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; 32-ALL-NEXT: cmpl {{[0-9]+}}(%esp), %eax |
| ; 32-ALL-NEXT: jle .LBB1_6 |
| ; 32-ALL-NEXT: # %bb.1: # %loop_start |
| ; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; 32-ALL-NEXT: .p2align 4, 0x90 |
| ; 32-ALL-NEXT: .LBB1_2: # %while.condthread-pre-split.i |
| ; 32-ALL-NEXT: # =>This Loop Header: Depth=1 |
| ; 32-ALL-NEXT: # Child Loop BB1_3 Depth 2 |
| ; 32-ALL-NEXT: movl (%ecx), %edx |
| ; 32-ALL-NEXT: .p2align 4, 0x90 |
| ; 32-ALL-NEXT: .LBB1_3: # %while.cond.i |
| ; 32-ALL-NEXT: # Parent Loop BB1_2 Depth=1 |
| ; 32-ALL-NEXT: # => This Inner Loop Header: Depth=2 |
| ; 32-ALL-NEXT: movl %edx, %eax |
| ; 32-ALL-NEXT: xorl %edx, %edx |
| ; 32-ALL-NEXT: testl %eax, %eax |
| ; 32-ALL-NEXT: je .LBB1_3 |
| ; 32-ALL-NEXT: # %bb.4: # %while.body.i |
| ; 32-ALL-NEXT: # in Loop: Header=BB1_2 Depth=1 |
| ; 32-ALL-NEXT: lock cmpxchgl %eax, (%ecx) |
| ; 32-ALL-NEXT: jne .LBB1_2 |
| ; 32-ALL-NEXT: # %bb.5: |
| ; 32-ALL-NEXT: xorl %eax, %eax |
| ; 32-ALL-NEXT: .LBB1_6: # %cond.end |
| ; 32-ALL-NEXT: retl |
| ; |
| ; 64-ALL-LABEL: test_control_flow: |
| ; 64-ALL: # %bb.0: # %entry |
| ; 64-ALL-NEXT: cmpl %edx, %esi |
| ; 64-ALL-NEXT: jle .LBB1_5 |
| ; 64-ALL-NEXT: .p2align 4, 0x90 |
| ; 64-ALL-NEXT: .LBB1_1: # %while.condthread-pre-split.i |
| ; 64-ALL-NEXT: # =>This Loop Header: Depth=1 |
| ; 64-ALL-NEXT: # Child Loop BB1_2 Depth 2 |
| ; 64-ALL-NEXT: movl (%rdi), %ecx |
| ; 64-ALL-NEXT: .p2align 4, 0x90 |
| ; 64-ALL-NEXT: .LBB1_2: # %while.cond.i |
| ; 64-ALL-NEXT: # Parent Loop BB1_1 Depth=1 |
| ; 64-ALL-NEXT: # => This Inner Loop Header: Depth=2 |
| ; 64-ALL-NEXT: movl %ecx, %eax |
| ; 64-ALL-NEXT: xorl %ecx, %ecx |
| ; 64-ALL-NEXT: testl %eax, %eax |
| ; 64-ALL-NEXT: je .LBB1_2 |
| ; 64-ALL-NEXT: # %bb.3: # %while.body.i |
| ; 64-ALL-NEXT: # in Loop: Header=BB1_1 Depth=1 |
| ; 64-ALL-NEXT: lock cmpxchgl %eax, (%rdi) |
| ; 64-ALL-NEXT: jne .LBB1_1 |
| ; 64-ALL-NEXT: # %bb.4: |
| ; 64-ALL-NEXT: xorl %esi, %esi |
| ; 64-ALL-NEXT: .LBB1_5: # %cond.end |
| ; 64-ALL-NEXT: movl %esi, %eax |
| ; 64-ALL-NEXT: retq |
| entry: |
| %cmp = icmp sgt i32 %i, %j |
| br i1 %cmp, label %loop_start, label %cond.end |
| |
| loop_start: |
| br label %while.condthread-pre-split.i |
| |
| while.condthread-pre-split.i: |
| %.pr.i = load i32, i32* %p, align 4 |
| br label %while.cond.i |
| |
| while.cond.i: |
| %0 = phi i32 [ %.pr.i, %while.condthread-pre-split.i ], [ 0, %while.cond.i ] |
| %tobool.i = icmp eq i32 %0, 0 |
| br i1 %tobool.i, label %while.cond.i, label %while.body.i |
| |
| while.body.i: |
| %.lcssa = phi i32 [ %0, %while.cond.i ] |
| %1 = cmpxchg i32* %p, i32 %.lcssa, i32 %.lcssa seq_cst seq_cst |
| %2 = extractvalue { i32, i1 } %1, 1 |
| br i1 %2, label %cond.end.loopexit, label %while.condthread-pre-split.i |
| |
| cond.end.loopexit: |
| br label %cond.end |
| |
| cond.end: |
| %cond = phi i32 [ %i, %entry ], [ 0, %cond.end.loopexit ] |
| ret i32 %cond |
| } |
| |
| ; This one is an interesting case because CMOV doesn't have a chain |
| ; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here. |
| define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind { |
| ; 32-GOOD-RA-LABEL: test_feed_cmov: |
| ; 32-GOOD-RA: # %bb.0: # %entry |
| ; 32-GOOD-RA-NEXT: pushl %ebx |
| ; 32-GOOD-RA-NEXT: pushl %esi |
| ; 32-GOOD-RA-NEXT: pushl %eax |
| ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; 32-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx) |
| ; 32-GOOD-RA-NEXT: sete %bl |
| ; 32-GOOD-RA-NEXT: calll foo |
| ; 32-GOOD-RA-NEXT: testb %bl, %bl |
| ; 32-GOOD-RA-NEXT: jne .LBB2_2 |
| ; 32-GOOD-RA-NEXT: # %bb.1: # %entry |
| ; 32-GOOD-RA-NEXT: movl %eax, %esi |
| ; 32-GOOD-RA-NEXT: .LBB2_2: # %entry |
| ; 32-GOOD-RA-NEXT: movl %esi, %eax |
| ; 32-GOOD-RA-NEXT: addl $4, %esp |
| ; 32-GOOD-RA-NEXT: popl %esi |
| ; 32-GOOD-RA-NEXT: popl %ebx |
| ; 32-GOOD-RA-NEXT: retl |
| ; |
| ; 32-FAST-RA-LABEL: test_feed_cmov: |
| ; 32-FAST-RA: # %bb.0: # %entry |
| ; 32-FAST-RA-NEXT: pushl %ebx |
| ; 32-FAST-RA-NEXT: pushl %esi |
| ; 32-FAST-RA-NEXT: pushl %eax |
| ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; 32-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx) |
| ; 32-FAST-RA-NEXT: sete %bl |
| ; 32-FAST-RA-NEXT: calll foo |
| ; 32-FAST-RA-NEXT: testb %bl, %bl |
| ; 32-FAST-RA-NEXT: jne .LBB2_2 |
| ; 32-FAST-RA-NEXT: # %bb.1: # %entry |
| ; 32-FAST-RA-NEXT: movl %eax, %esi |
| ; 32-FAST-RA-NEXT: .LBB2_2: # %entry |
| ; 32-FAST-RA-NEXT: movl %esi, %eax |
| ; 32-FAST-RA-NEXT: addl $4, %esp |
| ; 32-FAST-RA-NEXT: popl %esi |
| ; 32-FAST-RA-NEXT: popl %ebx |
| ; 32-FAST-RA-NEXT: retl |
| ; |
| ; 64-ALL-LABEL: test_feed_cmov: |
| ; 64-ALL: # %bb.0: # %entry |
| ; 64-ALL-NEXT: pushq %rbp |
| ; 64-ALL-NEXT: pushq %rbx |
| ; 64-ALL-NEXT: pushq %rax |
| ; 64-ALL-NEXT: movl %edx, %ebx |
| ; 64-ALL-NEXT: movl %esi, %eax |
| ; 64-ALL-NEXT: lock cmpxchgl %edx, (%rdi) |
| ; 64-ALL-NEXT: sete %bpl |
| ; 64-ALL-NEXT: callq foo |
| ; 64-ALL-NEXT: testb %bpl, %bpl |
| ; 64-ALL-NEXT: cmovnel %ebx, %eax |
| ; 64-ALL-NEXT: addq $8, %rsp |
| ; 64-ALL-NEXT: popq %rbx |
| ; 64-ALL-NEXT: popq %rbp |
| ; 64-ALL-NEXT: retq |
| entry: |
| %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst |
| %success = extractvalue { i32, i1 } %res, 1 |
| |
| %rhs = call i32 @foo() |
| |
| %ret = select i1 %success, i32 %new, i32 %rhs |
| ret i32 %ret |
| } |