third_party/llvm-7.0/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll - SwiftShader - Git at Google

 ; RUN: opt -S -codegenprepare < %s | FileCheck %s

 target datalayout =
 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"

 @x = external global [1 x [2 x <4 x float>]]

 ; Can we sink single addressing mode computation to use?
 define void @test1(i1 %cond, i64* %base) {
 ; CHECK-LABEL: @test1
 ; CHECK: getelementptr i8, {{.+}} 40
 entry:
   %addr = getelementptr inbounds i64, i64* %base, i64 5
   %casted = bitcast i64* %addr to i32*
   br i1 %cond, label %if.then, label %fallthrough

 if.then:
   %v = load i32, i32* %casted, align 4
   br label %fallthrough

 fallthrough:
   ret void
 }

 declare void @foo(i32)

 ; Make sure sinking two copies of addressing mode into different blocks works
 define void @test2(i1 %cond, i64* %base) {
 ; CHECK-LABEL: @test2
 entry:
   %addr = getelementptr inbounds i64, i64* %base, i64 5
   %casted = bitcast i64* %addr to i32*
   br i1 %cond, label %if.then, label %fallthrough

 if.then:
 ; CHECK-LABEL: if.then:
 ; CHECK: getelementptr i8, {{.+}} 40
   %v1 = load i32, i32* %casted, align 4
   call void @foo(i32 %v1)
   %cmp = icmp eq i32 %v1, 0
   br i1 %cmp, label %next, label %fallthrough

 next:
 ; CHECK-LABEL: next:
 ; CHECK: getelementptr i8, {{.+}} 40
   %v2 = load i32, i32* %casted, align 4
   call void @foo(i32 %v2)
   br label %fallthrough

 fallthrough:
   ret void
 }

 ; If we have two loads in the same block, only need one copy of addressing mode
 ; - instruction selection will duplicate if needed
 define void @test3(i1 %cond, i64* %base) {
 ; CHECK-LABEL: @test3
 entry:
   %addr = getelementptr inbounds i64, i64* %base, i64 5
   %casted = bitcast i64* %addr to i32*
   br i1 %cond, label %if.then, label %fallthrough

 if.then:
 ; CHECK-LABEL: if.then:
 ; CHECK: getelementptr i8, {{.+}} 40
   %v1 = load i32, i32* %casted, align 4
   call void @foo(i32 %v1)
 ; CHECK-NOT: getelementptr i8, {{.+}} 40
   %v2 = load i32, i32* %casted, align 4
   call void @foo(i32 %v2)
   br label %fallthrough

 fallthrough:
   ret void
 }

 ; Can we still sink addressing mode if there's a cold use of the
 ; address itself?
 define void @test4(i1 %cond, i64* %base) {
 ; CHECK-LABEL: @test4
 entry:
   %addr = getelementptr inbounds i64, i64* %base, i64 5
   %casted = bitcast i64* %addr to i32*
   br i1 %cond, label %if.then, label %fallthrough

 if.then:
 ; CHECK-LABEL: if.then:
 ; CHECK: getelementptr i8, {{.+}} 40
   %v1 = load i32, i32* %casted, align 4
   call void @foo(i32 %v1)
   %cmp = icmp eq i32 %v1, 0
   br i1 %cmp, label %rare.1, label %fallthrough

 fallthrough:
   ret void

 rare.1:
 ; CHECK-LABEL: rare.1:
 ; CHECK: getelementptr i8, {{.+}} 40
   call void @slowpath(i32 %v1, i32* %casted) cold
   br label %fallthrough
 }

 ; Negative test - don't want to duplicate addressing into hot path
 define void @test5(i1 %cond, i64* %base) {
 ; CHECK-LABEL: @test5
 entry:
 ; CHECK: %addr = getelementptr
   %addr = getelementptr inbounds i64, i64* %base, i64 5
   %casted = bitcast i64* %addr to i32*
   br i1 %cond, label %if.then, label %fallthrough

 if.then:
 ; CHECK-LABEL: if.then:
 ; CHECK-NOT: getelementptr i8, {{.+}} 40
   %v1 = load i32, i32* %casted, align 4
   call void @foo(i32 %v1)
   %cmp = icmp eq i32 %v1, 0
   br i1 %cmp, label %rare.1, label %fallthrough

 fallthrough:
   ret void

 rare.1:
   call void @slowpath(i32 %v1, i32* %casted) ;; NOT COLD
   br label %fallthrough
 }

 ; Negative test - opt for size
 define void @test6(i1 %cond, i64* %base) minsize {
 ; CHECK-LABEL: @test6
 entry:
 ; CHECK: %addr = getelementptr
   %addr = getelementptr inbounds i64, i64* %base, i64 5
   %casted = bitcast i64* %addr to i32*
   br i1 %cond, label %if.then, label %fallthrough

 if.then:
 ; CHECK-LABEL: if.then:
 ; CHECK-NOT: getelementptr i8, {{.+}} 40
   %v1 = load i32, i32* %casted, align 4
   call void @foo(i32 %v1)
   %cmp = icmp eq i32 %v1, 0
   br i1 %cmp, label %rare.1, label %fallthrough

 fallthrough:
   ret void

 rare.1:
   call void @slowpath(i32 %v1, i32* %casted) cold
   br label %fallthrough
 }


 ; Make sure sinking two copies of addressing mode into different blocks works
 ; when there are cold paths for each.
 define void @test7(i1 %cond, i64* %base) {
 ; CHECK-LABEL: @test7
 entry:
   %addr = getelementptr inbounds i64, i64* %base, i64 5
   %casted = bitcast i64* %addr to i32*
   br i1 %cond, label %if.then, label %fallthrough

 if.then:
 ; CHECK-LABEL: if.then:
 ; CHECK: getelementptr i8, {{.+}} 40
   %v1 = load i32, i32* %casted, align 4
   call void @foo(i32 %v1)
   %cmp = icmp eq i32 %v1, 0
   br i1 %cmp, label %rare.1, label %next

 next:
 ; CHECK-LABEL: next:
 ; CHECK: getelementptr i8, {{.+}} 40
   %v2 = load i32, i32* %casted, align 4
   call void @foo(i32 %v2)
   %cmp2 = icmp eq i32 %v2, 0
   br i1 %cmp2, label %rare.1, label %fallthrough

 fallthrough:
   ret void

 rare.1:
 ; CHECK-LABEL: rare.1:
 ; CHECK: getelementptr i8, {{.+}} 40
   call void @slowpath(i32 %v1, i32* %casted) cold
   br label %next

 rare.2:
 ; CHECK-LABEL: rare.2:
 ; CHECK: getelementptr i8, {{.+}} 40
   call void @slowpath(i32 %v2, i32* %casted) cold
   br label %fallthrough
 }

 declare void @slowpath(i32, i32*)

 ; Make sure we don't end up in an infinite loop after we fail to sink.
 ; CHECK-LABEL: define void @test8
 ; CHECK: %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
 define void @test8() {
 allocas:
   %aFOO_load = load float*, float** undef
   %aFOO_load_ptr2int = ptrtoint float* %aFOO_load to i64
   %aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> undef, i64 %aFOO_load_ptr2int, i32 0
   %aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to i8*
   %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
   br label %load.i145

 load.i145:
   %ptr.i143 = bitcast i8* %ptr to <4 x float>*
   %valall.i144 = load <4 x float>, <4 x float>* %ptr.i143, align 4
   %x_offset = getelementptr [1 x [2 x <4 x float>]], [1 x [2 x <4 x float>]]* @x, i32 0, i64 0
   br label %pl_loop.i.i122

 pl_loop.i.i122:
   br label %pl_loop.i.i122
 }

 ; Make sure we can sink address computation even
 ; if there is a cycle in phi nodes.
 define void @test9(i1 %cond, i64* %base) {
 ; CHECK-LABEL: @test9
 entry:
   %addr = getelementptr inbounds i64, i64* %base, i64 5
   %casted = bitcast i64* %addr to i32*
   br label %header

 header:
   %iv = phi i32 [0, %entry], [%iv.inc, %backedge]
   %casted.loop = phi i32* [%casted, %entry], [%casted.merged, %backedge]
   br i1 %cond, label %if.then, label %backedge

 if.then:
   call void @foo(i32 %iv)
   %addr.1 = getelementptr inbounds i64, i64* %base, i64 5
   %casted.1 = bitcast i64* %addr.1 to i32*
   br label %backedge

 backedge:
 ; CHECK-LABEL: backedge:
 ; CHECK: getelementptr i8, {{.+}} 40
   %casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then]
   %v = load i32, i32* %casted.merged, align 4
   call void @foo(i32 %v)
   %iv.inc = add i32 %iv, 1
   %cmp = icmp slt i32 %iv.inc, 1000
   br i1 %cmp, label %header, label %exit

 exit:
   ret void
 }

 ; Make sure we can eliminate a select when both arguments perform equivalent
 ; address computation.
 define void @test10(i1 %cond, i64* %base) {
 ; CHECK-LABEL: @test10
 ; CHECK: getelementptr i8, {{.+}} 40
 ; CHECK-NOT: select
 entry:
   %gep1 = getelementptr inbounds i64, i64* %base, i64 5
   %gep1.casted = bitcast i64* %gep1 to i32*
   %base.casted = bitcast i64* %base to i32*
   %gep2 = getelementptr inbounds i32, i32* %base.casted, i64 10
   %casted.merged = select i1 %cond, i32* %gep1.casted, i32* %gep2
   %v = load i32, i32* %casted.merged, align 4
   call void @foo(i32 %v)
   ret void
 }

 ; Found by fuzzer, getSExtValue of > 64 bit constant
 define void @i96_mul(i1* %base, i96 %offset) {
 BB:
   ;; RHS = 0x7FFFFFFFFFFFFFFFFFFFFFFF
   %B84 = mul i96 %offset, 39614081257132168796771975167
   %G23 = getelementptr i1, i1* %base, i96 %B84
   store i1 false, i1* %G23
   ret void
 }
	; RUN: opt -S -codegenprepare < %s \| FileCheck %s

	target datalayout =
	"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
	target triple = "x86_64-unknown-linux-gnu"

	@x = external global [1 x [2 x <4 x float>]]

	; Can we sink single addressing mode computation to use?
	define void @test1(i1 %cond, i64* %base) {
	; CHECK-LABEL: @test1
	; CHECK: getelementptr i8, {{.+}} 40
	entry:
	%addr = getelementptr inbounds i64, i64* %base, i64 5
	%casted = bitcast i64* %addr to i32*
	br i1 %cond, label %if.then, label %fallthrough

	if.then:
	%v = load i32, i32* %casted, align 4
	br label %fallthrough

	fallthrough:
	ret void
	}

	declare void @foo(i32)

	; Make sure sinking two copies of addressing mode into different blocks works
	define void @test2(i1 %cond, i64* %base) {
	; CHECK-LABEL: @test2
	entry:
	%addr = getelementptr inbounds i64, i64* %base, i64 5
	%casted = bitcast i64* %addr to i32*
	br i1 %cond, label %if.then, label %fallthrough

	if.then:
	; CHECK-LABEL: if.then:
	; CHECK: getelementptr i8, {{.+}} 40
	%v1 = load i32, i32* %casted, align 4
	call void @foo(i32 %v1)
	%cmp = icmp eq i32 %v1, 0
	br i1 %cmp, label %next, label %fallthrough

	next:
	; CHECK-LABEL: next:
	; CHECK: getelementptr i8, {{.+}} 40
	%v2 = load i32, i32* %casted, align 4
	call void @foo(i32 %v2)
	br label %fallthrough

	fallthrough:
	ret void
	}

	; If we have two loads in the same block, only need one copy of addressing mode
	; - instruction selection will duplicate if needed
	define void @test3(i1 %cond, i64* %base) {
	; CHECK-LABEL: @test3
	entry:
	%addr = getelementptr inbounds i64, i64* %base, i64 5
	%casted = bitcast i64* %addr to i32*
	br i1 %cond, label %if.then, label %fallthrough

	if.then:
	; CHECK-LABEL: if.then:
	; CHECK: getelementptr i8, {{.+}} 40
	%v1 = load i32, i32* %casted, align 4
	call void @foo(i32 %v1)
	; CHECK-NOT: getelementptr i8, {{.+}} 40
	%v2 = load i32, i32* %casted, align 4
	call void @foo(i32 %v2)
	br label %fallthrough

	fallthrough:
	ret void
	}

	; Can we still sink addressing mode if there's a cold use of the
	; address itself?
	define void @test4(i1 %cond, i64* %base) {
	; CHECK-LABEL: @test4
	entry:
	%addr = getelementptr inbounds i64, i64* %base, i64 5
	%casted = bitcast i64* %addr to i32*
	br i1 %cond, label %if.then, label %fallthrough

	if.then:
	; CHECK-LABEL: if.then:
	; CHECK: getelementptr i8, {{.+}} 40
	%v1 = load i32, i32* %casted, align 4
	call void @foo(i32 %v1)
	%cmp = icmp eq i32 %v1, 0
	br i1 %cmp, label %rare.1, label %fallthrough

	fallthrough:
	ret void

	rare.1:
	; CHECK-LABEL: rare.1:
	; CHECK: getelementptr i8, {{.+}} 40
	call void @slowpath(i32 %v1, i32* %casted) cold
	br label %fallthrough
	}

	; Negative test - don't want to duplicate addressing into hot path
	define void @test5(i1 %cond, i64* %base) {
	; CHECK-LABEL: @test5
	entry:
	; CHECK: %addr = getelementptr
	%addr = getelementptr inbounds i64, i64* %base, i64 5
	%casted = bitcast i64* %addr to i32*
	br i1 %cond, label %if.then, label %fallthrough

	if.then:
	; CHECK-LABEL: if.then:
	; CHECK-NOT: getelementptr i8, {{.+}} 40
	%v1 = load i32, i32* %casted, align 4
	call void @foo(i32 %v1)
	%cmp = icmp eq i32 %v1, 0
	br i1 %cmp, label %rare.1, label %fallthrough

	fallthrough:
	ret void

	rare.1:
	call void @slowpath(i32 %v1, i32* %casted) ;; NOT COLD
	br label %fallthrough
	}

	; Negative test - opt for size
	define void @test6(i1 %cond, i64* %base) minsize {
	; CHECK-LABEL: @test6
	entry:
	; CHECK: %addr = getelementptr
	%addr = getelementptr inbounds i64, i64* %base, i64 5
	%casted = bitcast i64* %addr to i32*
	br i1 %cond, label %if.then, label %fallthrough

	if.then:
	; CHECK-LABEL: if.then:
	; CHECK-NOT: getelementptr i8, {{.+}} 40
	%v1 = load i32, i32* %casted, align 4
	call void @foo(i32 %v1)
	%cmp = icmp eq i32 %v1, 0
	br i1 %cmp, label %rare.1, label %fallthrough

	fallthrough:
	ret void

	rare.1:
	call void @slowpath(i32 %v1, i32* %casted) cold
	br label %fallthrough
	}


	; Make sure sinking two copies of addressing mode into different blocks works
	; when there are cold paths for each.
	define void @test7(i1 %cond, i64* %base) {
	; CHECK-LABEL: @test7
	entry:
	%addr = getelementptr inbounds i64, i64* %base, i64 5
	%casted = bitcast i64* %addr to i32*
	br i1 %cond, label %if.then, label %fallthrough

	if.then:
	; CHECK-LABEL: if.then:
	; CHECK: getelementptr i8, {{.+}} 40
	%v1 = load i32, i32* %casted, align 4
	call void @foo(i32 %v1)
	%cmp = icmp eq i32 %v1, 0
	br i1 %cmp, label %rare.1, label %next

	next:
	; CHECK-LABEL: next:
	; CHECK: getelementptr i8, {{.+}} 40
	%v2 = load i32, i32* %casted, align 4
	call void @foo(i32 %v2)
	%cmp2 = icmp eq i32 %v2, 0
	br i1 %cmp2, label %rare.1, label %fallthrough

	fallthrough:
	ret void

	rare.1:
	; CHECK-LABEL: rare.1:
	; CHECK: getelementptr i8, {{.+}} 40
	call void @slowpath(i32 %v1, i32* %casted) cold
	br label %next

	rare.2:
	; CHECK-LABEL: rare.2:
	; CHECK: getelementptr i8, {{.+}} 40
	call void @slowpath(i32 %v2, i32* %casted) cold
	br label %fallthrough
	}

	declare void @slowpath(i32, i32*)

	; Make sure we don't end up in an infinite loop after we fail to sink.
	; CHECK-LABEL: define void @test8
	; CHECK: %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
	define void @test8() {
	allocas:
	%aFOO_load = load float, float* undef
	%aFOO_load_ptr2int = ptrtoint float* %aFOO_load to i64
	%aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> undef, i64 %aFOO_load_ptr2int, i32 0
	%aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to i8*
	%ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
	br label %load.i145

	load.i145:
	%ptr.i143 = bitcast i8* %ptr to <4 x float>*
	%valall.i144 = load <4 x float>, <4 x float>* %ptr.i143, align 4
	%x_offset = getelementptr [1 x [2 x <4 x float>]], [1 x [2 x <4 x float>]]* @x, i32 0, i64 0
	br label %pl_loop.i.i122

	pl_loop.i.i122:
	br label %pl_loop.i.i122
	}

	; Make sure we can sink address computation even
	; if there is a cycle in phi nodes.
	define void @test9(i1 %cond, i64* %base) {
	; CHECK-LABEL: @test9
	entry:
	%addr = getelementptr inbounds i64, i64* %base, i64 5
	%casted = bitcast i64* %addr to i32*
	br label %header

	header:
	%iv = phi i32 [0, %entry], [%iv.inc, %backedge]
	%casted.loop = phi i32* [%casted, %entry], [%casted.merged, %backedge]
	br i1 %cond, label %if.then, label %backedge

	if.then:
	call void @foo(i32 %iv)
	%addr.1 = getelementptr inbounds i64, i64* %base, i64 5
	%casted.1 = bitcast i64* %addr.1 to i32*
	br label %backedge

	backedge:
	; CHECK-LABEL: backedge:
	; CHECK: getelementptr i8, {{.+}} 40
	%casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then]
	%v = load i32, i32* %casted.merged, align 4
	call void @foo(i32 %v)
	%iv.inc = add i32 %iv, 1
	%cmp = icmp slt i32 %iv.inc, 1000
	br i1 %cmp, label %header, label %exit

	exit:
	ret void
	}

	; Make sure we can eliminate a select when both arguments perform equivalent
	; address computation.
	define void @test10(i1 %cond, i64* %base) {
	; CHECK-LABEL: @test10
	; CHECK: getelementptr i8, {{.+}} 40
	; CHECK-NOT: select
	entry:
	%gep1 = getelementptr inbounds i64, i64* %base, i64 5
	%gep1.casted = bitcast i64* %gep1 to i32*
	%base.casted = bitcast i64* %base to i32*
	%gep2 = getelementptr inbounds i32, i32* %base.casted, i64 10
	%casted.merged = select i1 %cond, i32* %gep1.casted, i32* %gep2
	%v = load i32, i32* %casted.merged, align 4
	call void @foo(i32 %v)
	ret void
	}

	; Found by fuzzer, getSExtValue of > 64 bit constant
	define void @i96_mul(i1* %base, i96 %offset) {
	BB:
	;; RHS = 0x7FFFFFFFFFFFFFFFFFFFFFFF
	%B84 = mul i96 %offset, 39614081257132168796771975167
	%G23 = getelementptr i1, i1* %base, i96 %B84
	store i1 false, i1* %G23
	ret void
	}