| # RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-memory-legalizer %s -o - | FileCheck %s |
| |
| --- | |
| ; ModuleID = '<stdin>' |
| source_filename = "<stdin>" |
| target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" |
| |
| ; Function Attrs: nounwind readnone |
| declare i32 @llvm.amdgcn.workitem.id.x() #0 |
| |
| ; Function Attrs: nounwind |
| define amdgpu_kernel void @atomic_max_i32_noret( |
| i32 addrspace(1)* %out, |
| i32 addrspace(1)* addrspace(1)* %in, |
| i32 addrspace(1)* %x, |
| i32 %y) #1 { |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| %idxprom = sext i32 %tid to i64 |
| %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i64 %idxprom |
| %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep |
| %xor = xor i32 %tid, 1 |
| %cmp = icmp ne i32 %xor, 0 |
| %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %cmp) |
| %2 = extractvalue { i1, i64 } %1, 0 |
| %3 = extractvalue { i1, i64 } %1, 1 |
| br i1 %2, label %atomic, label %exit |
| |
| atomic: ; preds = %0 |
| %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100 |
| %ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst |
| br label %exit |
| |
| exit: ; preds = %atomic, %0 |
| call void @llvm.amdgcn.end.cf(i64 %3) |
| ret void |
| } |
| |
| declare { i1, i64 } @llvm.amdgcn.if(i1) |
| |
| declare void @llvm.amdgcn.end.cf(i64) |
| |
| ; Function Attrs: nounwind |
| declare void @llvm.stackprotector(i8*, i8**) #3 |
| |
| attributes #0 = { nounwind readnone "target-cpu"="tahiti" } |
| attributes #1 = { nounwind "target-cpu"="tahiti" } |
| attributes #2 = { readnone } |
| attributes #3 = { nounwind } |
| |
| ... |
| --- |
| |
| # CHECK-LABEL: name: atomic_max_i32_noret |
| |
| # CHECK-LABEL: bb.1.atomic: |
| # CHECK: BUFFER_ATOMIC_SMAX_ADDR64 |
| # CHECK-NEXT: S_WAITCNT 3952 |
| # CHECK-NEXT: BUFFER_WBINVL1_VOL |
| |
| name: atomic_max_i32_noret |
| alignment: 0 |
| exposesReturnsTwice: false |
| legalized: false |
| regBankSelected: false |
| selected: false |
| tracksRegLiveness: true |
| liveins: |
| - { reg: '$sgpr0_sgpr1' } |
| - { reg: '$vgpr0' } |
| frameInfo: |
| isFrameAddressTaken: false |
| isReturnAddressTaken: false |
| hasStackMap: false |
| hasPatchPoint: false |
| stackSize: 0 |
| offsetAdjustment: 0 |
| maxAlignment: 0 |
| adjustsStack: false |
| hasCalls: false |
| maxCallFrameSize: 0 |
| hasOpaqueSPAdjustment: false |
| hasVAStart: false |
| hasMustTailInVarArgFunc: false |
| body: | |
| bb.0 (%ir-block.0): |
| successors: %bb.1.atomic(0x40000000), %bb.2.exit(0x40000000) |
| liveins: $vgpr0, $sgpr0_sgpr1 |
| |
| $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) |
| $vgpr1 = V_ASHRREV_I32_e32 31, $vgpr0, implicit $exec |
| $vgpr1_vgpr2 = V_LSHL_B64 $vgpr0_vgpr1, 3, implicit $exec |
| $sgpr7 = S_MOV_B32 61440 |
| $sgpr6 = S_MOV_B32 0 |
| S_WAITCNT 127 |
| $vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 8 from %ir.tid.gep) |
| $vgpr0 = V_XOR_B32_e32 1, killed $vgpr0, implicit $exec |
| V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec |
| $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec |
| $sgpr2_sgpr3 = S_XOR_B64 $exec, killed $sgpr2_sgpr3, implicit-def dead $scc |
| SI_MASK_BRANCH %bb.2.exit, implicit $exec |
| |
| bb.1.atomic: |
| successors: %bb.2.exit(0x80000000) |
| liveins: $sgpr4_sgpr5_sgpr6_sgpr7:0x0000000C, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr1_vgpr2_vgpr3_vgpr4:0x00000003 |
| |
| $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`) |
| dead $vgpr0 = V_MOV_B32_e32 -1, implicit $exec |
| dead $vgpr0 = V_MOV_B32_e32 61440, implicit $exec |
| $sgpr4_sgpr5 = S_MOV_B64 0 |
| S_WAITCNT 127 |
| $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec |
| S_WAITCNT 3952 |
| BUFFER_ATOMIC_SMAX_ADDR64 killed $vgpr0, killed $vgpr1_vgpr2, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 400, 0, implicit $exec :: (volatile load seq_cst 4 from %ir.gep) |
| |
| bb.2.exit: |
| liveins: $sgpr2_sgpr3 |
| |
| $exec = S_OR_B64 $exec, killed $sgpr2_sgpr3, implicit-def $scc |
| S_ENDPGM |
| |
| ... |
| |