| ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VGPR %s |
| ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s |
| ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VMEM %s |
| |
| ; ALL-LABEL: {{^}}spill_sgpr_x2: |
| ; SMEM: s_add_u32 m0, s3, 0x100{{$}} |
| ; SMEM: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:11], m0 ; 8-byte Folded Spill |
| ; SMEM: s_cbranch_scc1 |
| |
| ; SMEM: s_add_u32 m0, s3, 0x100{{$}} |
| ; SMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:11], m0 ; 8-byte Folded Reload |
| |
| ; SMEM: s_dcache_wb |
| ; SMEM: s_endpgm |
| |
| ; FIXME: Should only need 4 bytes |
| ; SMEM: ScratchSize: 12 |
| |
| |
| ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 |
| ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 |
| ; VGPR: s_cbranch_scc1 |
| |
| ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 |
| ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 |
| |
| ; VMEM: buffer_store_dword |
| ; VMEM: buffer_store_dword |
| ; VMEM: s_cbranch_scc1 |
| |
| ; VMEM: buffer_load_dword |
| ; VMEM: buffer_load_dword |
| define amdgpu_kernel void @spill_sgpr_x2(i32 addrspace(1)* %out, i32 %in) #0 { |
| %wide.sgpr = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 |
| %cmp = icmp eq i32 %in, 0 |
| br i1 %cmp, label %bb0, label %ret |
| |
| bb0: |
| call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr) #0 |
| br label %ret |
| |
| ret: |
| ret void |
| } |
| |
| ; ALL-LABEL: {{^}}spill_sgpr_x4: |
| ; SMEM: s_add_u32 m0, s3, 0x100{{$}} |
| ; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[12:15], m0 ; 16-byte Folded Spill |
| ; SMEM: s_cbranch_scc1 |
| |
| ; SMEM: s_add_u32 m0, s3, 0x100{{$}} |
| ; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[12:15], m0 ; 16-byte Folded Reload |
| ; SMEM: s_dcache_wb |
| ; SMEM: s_endpgm |
| |
| ; FIXME: Should only need 4 bytes |
| ; SMEM: ScratchSize: 20 |
| |
| ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 |
| ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 |
| ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 |
| ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 |
| ; VGPR: s_cbranch_scc1 |
| |
| ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 |
| ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 |
| ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 |
| ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 |
| |
| |
| ; VMEM: buffer_store_dword |
| ; VMEM: buffer_store_dword |
| ; VMEM: buffer_store_dword |
| ; VMEM: buffer_store_dword |
| ; VMEM: s_cbranch_scc1 |
| |
| ; VMEM: buffer_load_dword |
| ; VMEM: buffer_load_dword |
| ; VMEM: buffer_load_dword |
| ; VMEM: buffer_load_dword |
| define amdgpu_kernel void @spill_sgpr_x4(i32 addrspace(1)* %out, i32 %in) #0 { |
| %wide.sgpr = call <4 x i32> asm sideeffect "; def $0", "=s" () #0 |
| %cmp = icmp eq i32 %in, 0 |
| br i1 %cmp, label %bb0, label %ret |
| |
| bb0: |
| call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr) #0 |
| br label %ret |
| |
| ret: |
| ret void |
| } |
| |
| ; ALL-LABEL: {{^}}spill_sgpr_x8: |
| |
| ; SMEM: s_add_u32 m0, s3, 0x100{{$}} |
| ; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Spill |
| ; SMEM: s_add_u32 m0, s3, 0x110{{$}} |
| ; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Spill |
| ; SMEM: s_cbranch_scc1 |
| |
| ; SMEM: s_add_u32 m0, s3, 0x100{{$}} |
| ; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Reload |
| ; SMEM: s_add_u32 m0, s3, 0x110{{$}} |
| ; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Reload |
| |
| ; SMEM: s_dcache_wb |
| ; SMEM: s_endpgm |
| |
| ; SMEM: ScratchSize: 36 |
| |
| ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 |
| ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 |
| ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 |
| ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 |
| ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4 |
| ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5 |
| ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6 |
| ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7 |
| ; VGPR: s_cbranch_scc1 |
| |
| ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 |
| ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 |
| ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 |
| ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 |
| ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4 |
| ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5 |
| ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6 |
| ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7 |
| |
| ; VMEM: buffer_store_dword |
| ; VMEM: buffer_store_dword |
| ; VMEM: buffer_store_dword |
| ; VMEM: buffer_store_dword |
| ; VMEM: buffer_store_dword |
| ; VMEM: buffer_store_dword |
| ; VMEM: buffer_store_dword |
| ; VMEM: buffer_store_dword |
| ; VMEM: s_cbranch_scc1 |
| |
| ; VMEM: buffer_load_dword |
| ; VMEM: buffer_load_dword |
| ; VMEM: buffer_load_dword |
| ; VMEM: buffer_load_dword |
| ; VMEM: buffer_load_dword |
| ; VMEM: buffer_load_dword |
| ; VMEM: buffer_load_dword |
| ; VMEM: buffer_load_dword |
| define amdgpu_kernel void @spill_sgpr_x8(i32 addrspace(1)* %out, i32 %in) #0 { |
| %wide.sgpr = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 |
| %cmp = icmp eq i32 %in, 0 |
| br i1 %cmp, label %bb0, label %ret |
| |
| bb0: |
| call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr) #0 |
| br label %ret |
| |
| ret: |
| ret void |
| } |
| |
| ; FIXME: x16 inlineasm seems broken |
| ; define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 { |
| ; %wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 |
| ; %cmp = icmp eq i32 %in, 0 |
| ; br i1 %cmp, label %bb0, label %ret |
| |
| ; bb0: |
| ; call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0 |
| ; br label %ret |
| |
| ; ret: |
| ; ret void |
| ; } |
| |
| attributes #0 = { nounwind } |