| ; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s |
| ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s |
| ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s |
| ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s |
| |
| ; This particular case will actually be worse in terms of code size |
| ; from sinking into both. |
| |
| ; OPT-LABEL: @sink_ubfe_i32( |
| ; OPT: entry: |
| ; OPT-NEXT: br i1 |
| |
| ; OPT: bb0: |
| ; OPT: %0 = lshr i32 %arg1, 8 |
| ; OPT-NEXT: %val0 = and i32 %0, 255 |
| ; OPT: br label |
| |
| ; OPT: bb1: |
| ; OPT: %1 = lshr i32 %arg1, 8 |
| ; OPT-NEXT: %val1 = and i32 %1, 127 |
| ; OPT: br label |
| |
| ; OPT: ret: |
| ; OPT: store |
| ; OPT: ret |
| |
| |
| ; GCN-LABEL: {{^}}sink_ubfe_i32: |
| ; GCN-NOT: lshr |
| ; GCN: s_cbranch_scc1 |
| |
| ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008 |
| ; GCN: BB0_2: |
| ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008 |
| |
| ; GCN: BB0_3: |
| ; GCN: buffer_store_dword |
| ; GCN: s_endpgm |
| define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 { |
| entry: |
| %shr = lshr i32 %arg1, 8 |
| br i1 undef, label %bb0, label %bb1 |
| |
| bb0: |
| %val0 = and i32 %shr, 255 |
| store volatile i32 0, i32 addrspace(1)* undef |
| br label %ret |
| |
| bb1: |
| %val1 = and i32 %shr, 127 |
| store volatile i32 0, i32 addrspace(1)* undef |
| br label %ret |
| |
| ret: |
| %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ] |
| store i32 %phi, i32 addrspace(1)* %out |
| ret void |
| } |
| |
| ; OPT-LABEL: @sink_sbfe_i32( |
| ; OPT: entry: |
| ; OPT-NEXT: br i1 |
| |
| ; OPT: bb0: |
| ; OPT: %0 = ashr i32 %arg1, 8 |
| ; OPT-NEXT: %val0 = and i32 %0, 255 |
| ; OPT: br label |
| |
| ; OPT: bb1: |
| ; OPT: %1 = ashr i32 %arg1, 8 |
| ; OPT-NEXT: %val1 = and i32 %1, 127 |
| ; OPT: br label |
| |
| ; OPT: ret: |
| ; OPT: store |
| ; OPT: ret |
| |
| ; GCN-LABEL: {{^}}sink_sbfe_i32: |
| define amdgpu_kernel void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 { |
| entry: |
| %shr = ashr i32 %arg1, 8 |
| br i1 undef, label %bb0, label %bb1 |
| |
| bb0: |
| %val0 = and i32 %shr, 255 |
| store volatile i32 0, i32 addrspace(1)* undef |
| br label %ret |
| |
| bb1: |
| %val1 = and i32 %shr, 127 |
| store volatile i32 0, i32 addrspace(1)* undef |
| br label %ret |
| |
| ret: |
| %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ] |
| store i32 %phi, i32 addrspace(1)* %out |
| ret void |
| } |
| |
| |
| ; OPT-LABEL: @sink_ubfe_i16( |
| ; OPT: entry: |
| ; OPT-NEXT: br i1 |
| |
| ; OPT: bb0: |
| ; OPT: %0 = lshr i16 %arg1, 4 |
| ; OPT-NEXT: %val0 = and i16 %0, 255 |
| ; OPT: br label |
| |
| ; OPT: bb1: |
| ; OPT: %1 = lshr i16 %arg1, 4 |
| ; OPT-NEXT: %val1 = and i16 %1, 127 |
| ; OPT: br label |
| |
| ; OPT: ret: |
| ; OPT: store |
| ; OPT: ret |
| |
| ; For GFX8: since i16 is legal type, we cannot sink lshr into BBs. |
| |
| ; GCN-LABEL: {{^}}sink_ubfe_i16: |
| ; GCN-NOT: lshr |
| ; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c |
| ; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004 |
| ; GCN: s_cbranch_scc1 |
| |
| ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004 |
| ; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0xff |
| |
| ; GCN: BB2_2: |
| ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004 |
| ; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0x7f |
| |
| ; GCN: BB2_3: |
| ; GCN: buffer_store_short |
| ; GCN: s_endpgm |
| define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 { |
| entry: |
| %shr = lshr i16 %arg1, 4 |
| br i1 undef, label %bb0, label %bb1 |
| |
| bb0: |
| %val0 = and i16 %shr, 255 |
| store volatile i16 0, i16 addrspace(1)* undef |
| br label %ret |
| |
| bb1: |
| %val1 = and i16 %shr, 127 |
| store volatile i16 0, i16 addrspace(1)* undef |
| br label %ret |
| |
| ret: |
| %phi = phi i16 [ %val0, %bb0 ], [ %val1, %bb1 ] |
| store i16 %phi, i16 addrspace(1)* %out |
| ret void |
| } |
| |
| ; We don't really want to sink this one since it isn't reducible to a |
| ; 32-bit BFE on one half of the integer. |
| |
| ; OPT-LABEL: @sink_ubfe_i64_span_midpoint( |
| ; OPT: entry: |
| ; OPT-NOT: lshr |
| ; OPT: br i1 |
| |
| ; OPT: bb0: |
| ; OPT: %0 = lshr i64 %arg1, 30 |
| ; OPT-NEXT: %val0 = and i64 %0, 255 |
| |
| ; OPT: bb1: |
| ; OPT: %1 = lshr i64 %arg1, 30 |
| ; OPT-NEXT: %val1 = and i64 %1, 127 |
| |
| ; OPT: ret: |
| ; OPT: store |
| ; OPT: ret |
| |
| ; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint: |
| |
| ; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30 |
| ; GCN: s_cbranch_scc1 BB3_2 |
| ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]] |
| |
| ; GCN: BB3_2: |
| ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]] |
| |
| ; GCN: BB3_3: |
| ; GCN: buffer_store_dwordx2 |
| define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 { |
| entry: |
| %shr = lshr i64 %arg1, 30 |
| br i1 undef, label %bb0, label %bb1 |
| |
| bb0: |
| %val0 = and i64 %shr, 255 |
| store volatile i32 0, i32 addrspace(1)* undef |
| br label %ret |
| |
| bb1: |
| %val1 = and i64 %shr, 127 |
| store volatile i32 0, i32 addrspace(1)* undef |
| br label %ret |
| |
| ret: |
| %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ] |
| store i64 %phi, i64 addrspace(1)* %out |
| ret void |
| } |
| |
| ; OPT-LABEL: @sink_ubfe_i64_low32( |
| ; OPT: entry: |
| ; OPT-NOT: lshr |
| ; OPT: br i1 |
| |
| ; OPT: bb0: |
| ; OPT: %0 = lshr i64 %arg1, 15 |
| ; OPT-NEXT: %val0 = and i64 %0, 255 |
| |
| ; OPT: bb1: |
| ; OPT: %1 = lshr i64 %arg1, 15 |
| ; OPT-NEXT: %val1 = and i64 %1, 127 |
| |
| ; OPT: ret: |
| ; OPT: store |
| ; OPT: ret |
| |
| ; GCN-LABEL: {{^}}sink_ubfe_i64_low32: |
| |
| ; GCN: s_cbranch_scc1 BB4_2 |
| |
| ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f |
| |
| ; GCN: BB4_2: |
| ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f |
| |
| ; GCN: BB4_3: |
| ; GCN: buffer_store_dwordx2 |
| define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 { |
| entry: |
| %shr = lshr i64 %arg1, 15 |
| br i1 undef, label %bb0, label %bb1 |
| |
| bb0: |
| %val0 = and i64 %shr, 255 |
| store volatile i32 0, i32 addrspace(1)* undef |
| br label %ret |
| |
| bb1: |
| %val1 = and i64 %shr, 127 |
| store volatile i32 0, i32 addrspace(1)* undef |
| br label %ret |
| |
| ret: |
| %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ] |
| store i64 %phi, i64 addrspace(1)* %out |
| ret void |
| } |
| |
| ; OPT-LABEL: @sink_ubfe_i64_high32( |
| ; OPT: entry: |
| ; OPT-NOT: lshr |
| ; OPT: br i1 |
| |
| ; OPT: bb0: |
| ; OPT: %0 = lshr i64 %arg1, 35 |
| ; OPT-NEXT: %val0 = and i64 %0, 255 |
| |
| ; OPT: bb1: |
| ; OPT: %1 = lshr i64 %arg1, 35 |
| ; OPT-NEXT: %val1 = and i64 %1, 127 |
| |
| ; OPT: ret: |
| ; OPT: store |
| ; OPT: ret |
| |
| ; GCN-LABEL: {{^}}sink_ubfe_i64_high32: |
| ; GCN: s_cbranch_scc1 BB5_2 |
| ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003 |
| |
| ; GCN: BB5_2: |
| ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003 |
| |
| ; GCN: BB5_3: |
| ; GCN: buffer_store_dwordx2 |
| define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 { |
| entry: |
| %shr = lshr i64 %arg1, 35 |
| br i1 undef, label %bb0, label %bb1 |
| |
| bb0: |
| %val0 = and i64 %shr, 255 |
| store volatile i32 0, i32 addrspace(1)* undef |
| br label %ret |
| |
| bb1: |
| %val1 = and i64 %shr, 127 |
| store volatile i32 0, i32 addrspace(1)* undef |
| br label %ret |
| |
| ret: |
| %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ] |
| store i64 %phi, i64 addrspace(1)* %out |
| ret void |
| } |
| |
| attributes #0 = { nounwind } |