Add SwiftShader dump from Feb 6 2013

diff --git a/src/LLVM/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll b/src/LLVM/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
new file mode 100644
index 0000000..ff6fd3e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm -mattr=+v6

+

+%struct.layer_data = type { i32, [2048 x i8], i8*, [16 x i8], i32, i8*, i32, i32, [64 x i32], [64 x i32], [64 x i32], [64 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x [64 x i16]] }

+@ld = external global %struct.layer_data*               ; <%struct.layer_data**> [#uses=1]

+

+define void @main() {

+entry:

+        br i1 false, label %bb169.i, label %cond_true11

+

+bb169.i:                ; preds = %entry

+        ret void

+

+cond_true11:            ; preds = %entry

+        %tmp.i32 = load %struct.layer_data** @ld                ; <%struct.layer_data*> [#uses=2]

+        %tmp3.i35 = getelementptr %struct.layer_data* %tmp.i32, i32 0, i32 1, i32 2048; <i8*> [#uses=2]

+        %tmp.i36 = getelementptr %struct.layer_data* %tmp.i32, i32 0, i32 2          ; <i8**> [#uses=1]

+        store i8* %tmp3.i35, i8** %tmp.i36

+        store i8* %tmp3.i35, i8** null

+        ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/src/LLVM/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
new file mode 100644
index 0000000..6818446
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll

@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s

+

+@quant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]

+@dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]

+@A = external global [4 x [4 x i32]]		; <[4 x [4 x i32]]*> [#uses=1]

+

+; CHECK: dct_luma_sp:

+define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {

+entry:

+; Make sure to use base-updating stores for saving callee-saved registers.

+; CHECK: push

+; CHECK-NOT: sub sp

+; CHECK: push 

+	%predicted_block = alloca [4 x [4 x i32]], align 4		; <[4 x [4 x i32]]*> [#uses=1]

+	br label %cond_next489

+

+cond_next489:		; preds = %cond_false, %bb471

+	%j.7.in = load i8* null		; <i8> [#uses=1]

+	%i.8.in = load i8* null		; <i8> [#uses=1]

+	%i.8 = zext i8 %i.8.in to i32		; <i32> [#uses=4]

+	%j.7 = zext i8 %j.7.in to i32		; <i32> [#uses=4]

+	%tmp495 = getelementptr [4 x [4 x i32]]* %predicted_block, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=2]

+	%tmp496 = load i32* %tmp495		; <i32> [#uses=2]

+	%tmp502 = load i32* null		; <i32> [#uses=1]

+	%tmp542 = getelementptr [6 x [4 x [4 x i32]]]* @quant_coef, i32 0, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=1]

+	%tmp543 = load i32* %tmp542		; <i32> [#uses=1]

+	%tmp548 = ashr i32 0, 0		; <i32> [#uses=3]

+	%tmp561 = sub i32 0, %tmp496		; <i32> [#uses=3]

+	%abscond563 = icmp sgt i32 %tmp561, -1		; <i1> [#uses=1]

+	%abs564 = select i1 %abscond563, i32 %tmp561, i32 0		; <i32> [#uses=1]

+	%tmp572 = mul i32 %abs564, %tmp543		; <i32> [#uses=1]

+	%tmp574 = add i32 %tmp572, 0		; <i32> [#uses=1]

+	%tmp576 = ashr i32 %tmp574, 0		; <i32> [#uses=7]

+	%tmp579 = icmp eq i32 %tmp548, %tmp576		; <i1> [#uses=1]

+	br i1 %tmp579, label %bb712, label %cond_next589

+

+cond_next589:		; preds = %cond_next489

+	%tmp605 = getelementptr [6 x [4 x [4 x i32]]]* @dequant_coef, i32 0, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=1]

+	%tmp606 = load i32* %tmp605		; <i32> [#uses=1]

+	%tmp612 = load i32* null		; <i32> [#uses=1]

+	%tmp629 = load i32* null		; <i32> [#uses=1]

+	%tmp629a = sitofp i32 %tmp629 to double		; <double> [#uses=1]

+	%tmp631 = fmul double %tmp629a, 0.000000e+00		; <double> [#uses=1]

+	%tmp632 = fadd double 0.000000e+00, %tmp631		; <double> [#uses=1]

+	%tmp642 = call fastcc i32 @sign( i32 %tmp576, i32 %tmp561 )		; <i32> [#uses=1]

+	%tmp650 = mul i32 %tmp606, %tmp642		; <i32> [#uses=1]

+	%tmp656 = mul i32 %tmp650, %tmp612		; <i32> [#uses=1]

+	%tmp658 = shl i32 %tmp656, 0		; <i32> [#uses=1]

+	%tmp659 = ashr i32 %tmp658, 6		; <i32> [#uses=1]

+	%tmp660 = sub i32 0, %tmp659		; <i32> [#uses=1]

+	%tmp666 = sub i32 %tmp660, %tmp496		; <i32> [#uses=1]

+	%tmp667 = sitofp i32 %tmp666 to double		; <double> [#uses=2]

+	call void @levrun_linfo_inter( i32 %tmp576, i32 0, i32* null, i32* null )

+	%tmp671 = fmul double %tmp667, %tmp667		; <double> [#uses=1]

+	%tmp675 = fadd double %tmp671, 0.000000e+00		; <double> [#uses=1]

+	%tmp678 = fcmp oeq double %tmp632, %tmp675		; <i1> [#uses=1]

+	br i1 %tmp678, label %cond_true679, label %cond_false693

+

+cond_true679:		; preds = %cond_next589

+	%abscond681 = icmp sgt i32 %tmp548, -1		; <i1> [#uses=1]

+	%abs682 = select i1 %abscond681, i32 %tmp548, i32 0		; <i32> [#uses=1]

+	%abscond684 = icmp sgt i32 %tmp576, -1		; <i1> [#uses=1]

+	%abs685 = select i1 %abscond684, i32 %tmp576, i32 0		; <i32> [#uses=1]

+	%tmp686 = icmp slt i32 %abs682, %abs685		; <i1> [#uses=1]

+	br i1 %tmp686, label %cond_next702, label %cond_false689

+

+cond_false689:		; preds = %cond_true679

+	%tmp739 = icmp eq i32 %tmp576, 0		; <i1> [#uses=1]

+	br i1 %tmp579, label %bb737, label %cond_false708

+

+cond_false693:		; preds = %cond_next589

+	ret i32 0

+

+cond_next702:		; preds = %cond_true679

+	ret i32 0

+

+cond_false708:		; preds = %cond_false689

+	ret i32 0

+

+bb712:		; preds = %cond_next489

+	ret i32 0

+

+bb737:		; preds = %cond_false689

+	br i1 %tmp739, label %cond_next791, label %cond_true740

+

+cond_true740:		; preds = %bb737

+	%tmp761 = call fastcc i32 @sign( i32 %tmp576, i32 0 )		; <i32> [#uses=1]

+	%tmp780 = load i32* null		; <i32> [#uses=1]

+	%tmp785 = getelementptr [4 x [4 x i32]]* @A, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=1]

+	%tmp786 = load i32* %tmp785		; <i32> [#uses=1]

+	%tmp781 = mul i32 %tmp780, %tmp761		; <i32> [#uses=1]

+	%tmp787 = mul i32 %tmp781, %tmp786		; <i32> [#uses=1]

+	%tmp789 = shl i32 %tmp787, 0		; <i32> [#uses=1]

+	%tmp790 = ashr i32 %tmp789, 6		; <i32> [#uses=1]

+	br label %cond_next791

+

+cond_next791:		; preds = %cond_true740, %bb737

+	%ilev.1 = phi i32 [ %tmp790, %cond_true740 ], [ 0, %bb737 ]		; <i32> [#uses=1]

+	%tmp796 = load i32* %tmp495		; <i32> [#uses=1]

+	%tmp798 = add i32 %tmp796, %ilev.1		; <i32> [#uses=1]

+	%tmp812 = mul i32 0, %tmp502		; <i32> [#uses=0]

+	%tmp818 = call fastcc i32 @sign( i32 0, i32 %tmp798 )		; <i32> [#uses=0]

+	unreachable

+}

+

+declare i32 @sign(i32, i32)

+

+declare void @levrun_linfo_inter(i32, i32, i32*, i32*)


diff --git a/src/LLVM/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll b/src/LLVM/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
new file mode 100644
index 0000000..38781ab
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2

+

+define fastcc i8* @read_sleb128(i8* %p, i32* %val) {

+	br label %bb

+

+bb:		; preds = %bb, %0

+	%p_addr.0 = getelementptr i8* %p, i32 0		; <i8*> [#uses=1]

+	%tmp2 = load i8* %p_addr.0		; <i8> [#uses=2]

+	%tmp4.rec = add i32 0, 1		; <i32> [#uses=1]

+	%tmp4 = getelementptr i8* %p, i32 %tmp4.rec		; <i8*> [#uses=1]

+	%tmp56 = zext i8 %tmp2 to i32		; <i32> [#uses=1]

+	%tmp7 = and i32 %tmp56, 127		; <i32> [#uses=1]

+	%tmp9 = shl i32 %tmp7, 0		; <i32> [#uses=1]

+	%tmp11 = or i32 %tmp9, 0		; <i32> [#uses=1]

+	icmp slt i8 %tmp2, 0		; <i1>:1 [#uses=1]

+	br i1 %1, label %bb, label %cond_next28

+

+cond_next28:		; preds = %bb

+	store i32 %tmp11, i32* %val

+	ret i8* %tmp4

+}


diff --git a/src/LLVM/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/src/LLVM/test/CodeGen/ARM/2007-03-13-InstrSched.ll
new file mode 100644
index 0000000..3a63418
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-03-13-InstrSched.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \

+; RUN:   -mattr=+v6 | grep r9

+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \

+; RUN:   -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats |& grep asm-printer

+; | grep 35

+

+define void @test(i32 %tmp56222, i32 %tmp36224, i32 %tmp46223, i32 %i.0196.0.ph, i32 %tmp8, i32* %tmp1011, i32** %tmp1, i32* %d2.1.out, i32* %d3.1.out, i32* %d0.1.out, i32* %d1.1.out) {

+newFuncRoot:

+	br label %bb74

+

+bb78.exitStub:		; preds = %bb74

+	store i32 %d2.1, i32* %d2.1.out

+	store i32 %d3.1, i32* %d3.1.out

+	store i32 %d0.1, i32* %d0.1.out

+	store i32 %d1.1, i32* %d1.1.out

+	ret void

+

+bb74:		; preds = %bb26, %newFuncRoot

+	%fp.1.rec = phi i32 [ 0, %newFuncRoot ], [ %tmp71.rec, %bb26 ]		; <i32> [#uses=3]

+	%fm.1.in = phi i32* [ %tmp71, %bb26 ], [ %tmp1011, %newFuncRoot ]		; <i32*> [#uses=1]

+	%d0.1 = phi i32 [ %tmp44, %bb26 ], [ 8192, %newFuncRoot ]		; <i32> [#uses=2]

+	%d1.1 = phi i32 [ %tmp54, %bb26 ], [ 8192, %newFuncRoot ]		; <i32> [#uses=2]

+	%d2.1 = phi i32 [ %tmp64, %bb26 ], [ 8192, %newFuncRoot ]		; <i32> [#uses=2]

+	%d3.1 = phi i32 [ %tmp69, %bb26 ], [ 8192, %newFuncRoot ]		; <i32> [#uses=2]

+	%fm.1 = load i32* %fm.1.in		; <i32> [#uses=4]

+	icmp eq i32 %fp.1.rec, %tmp8		; <i1>:0 [#uses=1]

+	br i1 %0, label %bb78.exitStub, label %bb26

+

+bb26:		; preds = %bb74

+	%tmp28 = getelementptr i32** %tmp1, i32 %fp.1.rec		; <i32**> [#uses=1]

+	%tmp30 = load i32** %tmp28		; <i32*> [#uses=4]

+	%tmp33 = getelementptr i32* %tmp30, i32 %i.0196.0.ph		; <i32*> [#uses=1]

+	%tmp34 = load i32* %tmp33		; <i32> [#uses=1]

+	%tmp38 = getelementptr i32* %tmp30, i32 %tmp36224		; <i32*> [#uses=1]

+	%tmp39 = load i32* %tmp38		; <i32> [#uses=1]

+	%tmp42 = mul i32 %tmp34, %fm.1		; <i32> [#uses=1]

+	%tmp44 = add i32 %tmp42, %d0.1		; <i32> [#uses=1]

+	%tmp48 = getelementptr i32* %tmp30, i32 %tmp46223		; <i32*> [#uses=1]

+	%tmp49 = load i32* %tmp48		; <i32> [#uses=1]

+	%tmp52 = mul i32 %tmp39, %fm.1		; <i32> [#uses=1]

+	%tmp54 = add i32 %tmp52, %d1.1		; <i32> [#uses=1]

+	%tmp58 = getelementptr i32* %tmp30, i32 %tmp56222		; <i32*> [#uses=1]

+	%tmp59 = load i32* %tmp58		; <i32> [#uses=1]

+	%tmp62 = mul i32 %tmp49, %fm.1		; <i32> [#uses=1]

+	%tmp64 = add i32 %tmp62, %d2.1		; <i32> [#uses=1]

+	%tmp67 = mul i32 %tmp59, %fm.1		; <i32> [#uses=1]

+	%tmp69 = add i32 %tmp67, %d3.1		; <i32> [#uses=1]

+	%tmp71.rec = add i32 %fp.1.rec, 1		; <i32> [#uses=2]

+	%tmp71 = getelementptr i32* %tmp1011, i32 %tmp71.rec		; <i32*> [#uses=1]

+	br label %bb74

+}


diff --git a/src/LLVM/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll b/src/LLVM/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
new file mode 100644
index 0000000..77a03e4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll

@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi

+; PR1257

+

+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }

+	%struct.arm_stack_offsets = type { i32, i32, i32, i32, i32 }

+	%struct.c_arg_info = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i8 }

+	%struct.c_language_function = type { %struct.stmt_tree_s }

+	%struct.c_switch = type opaque

+	%struct.eh_status = type opaque

+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }

+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }

+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }

+	%struct.ht_identifier = type { i8*, i32, i32 }

+	%struct.initial_value_struct = type opaque

+	%struct.lang_decl = type { i8 }

+	%struct.language_function = type { %struct.c_language_function, %struct.tree_node*, %struct.tree_node*, %struct.c_switch*, %struct.c_arg_info*, i32, i32, i32, i32 }

+	%struct.location_t = type { i8*, i32 }

+	%struct.machine_function = type { %struct.rtx_def*, i32, i32, i32, %struct.arm_stack_offsets, i32, i32, i32, [14 x %struct.rtx_def*] }

+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }

+	%struct.rtx_def = type { i16, i8, i8, %struct.u }

+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }

+	%struct.stmt_tree_s = type { %struct.tree_node*, i32 }

+	%struct.temp_slot = type opaque

+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }

+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }

+	%struct.tree_decl_u1 = type { i64 }

+	%struct.tree_decl_u2 = type { %struct.function* }

+	%struct.tree_identifier = type { %struct.tree_common, %struct.ht_identifier }

+	%struct.tree_node = type { %struct.tree_decl }

+	%struct.u = type { [1 x i64] }

+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }

+	%struct.varasm_status = type opaque

+	%struct.varray_head_tag = type opaque

+	%union.tree_ann_d = type opaque

+

+

+define void @declspecs_add_type(i32 %spec.1) {

+entry:

+	%spec.1961 = zext i32 %spec.1 to i64		; <i64> [#uses=1]

+	%spec.1961.adj = shl i64 %spec.1961, 32		; <i64> [#uses=1]

+	%spec.1961.adj.ins = or i64 %spec.1961.adj, 0		; <i64> [#uses=2]

+	%tmp10959 = lshr i64 %spec.1961.adj.ins, 32		; <i64> [#uses=2]

+	%tmp1920 = inttoptr i64 %tmp10959 to %struct.tree_common*		; <%struct.tree_common*> [#uses=1]

+	%tmp21 = getelementptr %struct.tree_common* %tmp1920, i32 0, i32 3		; <i8*> [#uses=1]

+	%tmp2122 = bitcast i8* %tmp21 to i32*		; <i32*> [#uses=1]

+	br i1 false, label %cond_next53, label %cond_true

+

+cond_true:		; preds = %entry

+	ret void

+

+cond_next53:		; preds = %entry

+	br i1 false, label %cond_true63, label %cond_next689

+

+cond_true63:		; preds = %cond_next53

+	ret void

+

+cond_next689:		; preds = %cond_next53

+	br i1 false, label %cond_false841, label %bb743

+

+bb743:		; preds = %cond_next689

+	ret void

+

+cond_false841:		; preds = %cond_next689

+	br i1 false, label %cond_true851, label %cond_true918

+

+cond_true851:		; preds = %cond_false841

+	tail call void @lookup_name( )

+	br i1 false, label %bb866, label %cond_next856

+

+cond_next856:		; preds = %cond_true851

+	ret void

+

+bb866:		; preds = %cond_true851

+	%tmp874 = load i32* %tmp2122		; <i32> [#uses=1]

+	%tmp876877 = trunc i32 %tmp874 to i8		; <i8> [#uses=1]

+	icmp eq i8 %tmp876877, 1		; <i1>:0 [#uses=1]

+	br i1 %0, label %cond_next881, label %cond_true878

+

+cond_true878:		; preds = %bb866

+	unreachable

+

+cond_next881:		; preds = %bb866

+	%tmp884885 = inttoptr i64 %tmp10959 to %struct.tree_identifier*		; <%struct.tree_identifier*> [#uses=1]

+	%tmp887 = getelementptr %struct.tree_identifier* %tmp884885, i32 0, i32 1, i32 0		; <i8**> [#uses=1]

+	%tmp888 = load i8** %tmp887		; <i8*> [#uses=1]

+	tail call void (i32, ...)* @error( i32 undef, i8* %tmp888 )

+	ret void

+

+cond_true918:		; preds = %cond_false841

+	%tmp920957 = trunc i64 %spec.1961.adj.ins to i32		; <i32> [#uses=0]

+	ret void

+}

+

+declare void @error(i32, ...)

+

+declare void @lookup_name()


diff --git a/src/LLVM/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
new file mode 100644
index 0000000..8e121ca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi

+; PR1279

+

+	%struct.rtx_def = type { i16, i8, i8, %struct.u }

+	%struct.u = type { [1 x i64] }

+

+define fastcc void @find_reloads_address(%struct.rtx_def** %loc) {

+entry:

+	%ad_addr = alloca %struct.rtx_def*		; <%struct.rtx_def**> [#uses=2]

+	br i1 false, label %cond_next416, label %cond_true340

+

+cond_true340:		; preds = %entry

+	ret void

+

+cond_next416:		; preds = %entry

+	%tmp1085 = load %struct.rtx_def** %ad_addr		; <%struct.rtx_def*> [#uses=1]

+	br i1 false, label %bb1084, label %cond_true418

+

+cond_true418:		; preds = %cond_next416

+	ret void

+

+bb1084:		; preds = %cond_next416

+	br i1 false, label %cond_true1092, label %cond_next1102

+

+cond_true1092:		; preds = %bb1084

+	%tmp1094 = getelementptr %struct.rtx_def* %tmp1085, i32 0, i32 3		; <%struct.u*> [#uses=1]

+	%tmp10981099 = bitcast %struct.u* %tmp1094 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=2]

+	%tmp1101 = load %struct.rtx_def** %tmp10981099		; <%struct.rtx_def*> [#uses=1]

+	store %struct.rtx_def* %tmp1101, %struct.rtx_def** %ad_addr

+	br label %cond_next1102

+

+cond_next1102:		; preds = %cond_true1092, %bb1084

+	%loc_addr.0 = phi %struct.rtx_def** [ %tmp10981099, %cond_true1092 ], [ %loc, %bb1084 ]		; <%struct.rtx_def**> [#uses=0]

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
new file mode 100644
index 0000000..9c5e7cd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll

@@ -0,0 +1,101 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi

+; PR1279

+

+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }

+	%struct.arm_stack_offsets = type { i32, i32, i32, i32, i32 }

+	%struct.eh_status = type opaque

+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }

+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }

+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }

+	%struct.initial_value_struct = type opaque

+	%struct.lang_decl = type opaque

+	%struct.language_function = type opaque

+	%struct.location_t = type { i8*, i32 }

+	%struct.machine_function = type { %struct.rtx_def*, i32, i32, i32, %struct.arm_stack_offsets, i32, i32, i32, [14 x %struct.rtx_def*] }

+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }

+	%struct.rtx_def = type { i16, i8, i8, %struct.u }

+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }

+	%struct.temp_slot = type opaque

+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }

+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }

+	%struct.tree_decl_u1 = type { i64 }

+	%struct.tree_decl_u2 = type { %struct.function* }

+	%struct.tree_node = type { %struct.tree_decl }

+	%struct.u = type { [1 x i64] }

+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }

+	%struct.varasm_status = type opaque

+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }

+	%union.tree_ann_d = type opaque

+@str469 = external global [42 x i8]		; <[42 x i8]*> [#uses=0]

+@__FUNCTION__.24265 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]

+

+declare void @fancy_abort()

+

+define fastcc void @fold_builtin_bitop() {

+entry:

+	br i1 false, label %cond_true105, label %UnifiedReturnBlock

+

+cond_true105:		; preds = %entry

+	br i1 false, label %cond_true134, label %UnifiedReturnBlock

+

+cond_true134:		; preds = %cond_true105

+	switch i32 0, label %bb479 [

+		 i32 378, label %bb313

+		 i32 380, label %bb313

+		 i32 381, label %bb313

+		 i32 383, label %bb366

+		 i32 385, label %bb366

+		 i32 386, label %bb366

+		 i32 403, label %bb250

+		 i32 405, label %bb250

+		 i32 406, label %bb250

+		 i32 434, label %bb464

+		 i32 436, label %bb464

+		 i32 437, label %bb464

+		 i32 438, label %bb441

+		 i32 440, label %bb441

+		 i32 441, label %bb441

+	]

+

+bb250:		; preds = %cond_true134, %cond_true134, %cond_true134

+	ret void

+

+bb313:		; preds = %cond_true134, %cond_true134, %cond_true134

+	ret void

+

+bb366:		; preds = %cond_true134, %cond_true134, %cond_true134

+	ret void

+

+bb441:		; preds = %cond_true134, %cond_true134, %cond_true134

+	ret void

+

+bb457:		; preds = %bb464, %bb457

+	%tmp459 = add i64 0, 1		; <i64> [#uses=1]

+	br i1 false, label %bb474.preheader, label %bb457

+

+bb464:		; preds = %cond_true134, %cond_true134, %cond_true134

+	br i1 false, label %bb474.preheader, label %bb457

+

+bb474.preheader:		; preds = %bb464, %bb457

+	%result.5.ph = phi i64 [ 0, %bb464 ], [ %tmp459, %bb457 ]		; <i64> [#uses=1]

+	br label %bb474

+

+bb467:		; preds = %bb474

+	%indvar.next586 = add i64 %indvar585, 1		; <i64> [#uses=1]

+	br label %bb474

+

+bb474:		; preds = %bb467, %bb474.preheader

+	%indvar585 = phi i64 [ 0, %bb474.preheader ], [ %indvar.next586, %bb467 ]		; <i64> [#uses=2]

+	br i1 false, label %bb476, label %bb467

+

+bb476:		; preds = %bb474

+	%result.5 = add i64 %indvar585, %result.5.ph		; <i64> [#uses=0]

+	ret void

+

+bb479:		; preds = %cond_true134

+	tail call void @fancy_abort( )

+	unreachable

+

+UnifiedReturnBlock:		; preds = %cond_true105, %entry

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
new file mode 100644
index 0000000..b8f957c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin

+

+	%struct.H_TBL = type { [17 x i8], [256 x i8], i32 }

+	%struct.Q_TBL = type { [64 x i16], i32 }

+	%struct.anon = type { [80 x i8] }

+	%struct.X_c_coef_ccler = type { void (%struct.X_Y*, i32)*, i32 (%struct.X_Y*, i8***)* }

+	%struct.X_c_main_ccler = type { void (%struct.X_Y*, i32)*, void (%struct.X_Y*, i8**, i32*, i32)* }

+	%struct.X_c_prep_ccler = type { void (%struct.X_Y*, i32)*, void (%struct.X_Y*, i8**, i32*, i32, i8***, i32*, i32)* }

+	%struct.X_color_converter = type { void (%struct.X_Y*)*, void (%struct.X_Y*, i8**, i8***, i32, i32)* }

+	%struct.X_common_struct = type { %struct.X_error_mgr*, %struct.X_memory_mgr*, %struct.X_progress_mgr*, i8*, i32, i32 }

+	%struct.X_comp_master = type { void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*)*, i32, i32 }

+	%struct.X_component_info = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Q_TBL*, i8* }

+	%struct.X_Y = type { %struct.X_error_mgr*, %struct.X_memory_mgr*, %struct.X_progress_mgr*, i8*, i32, i32, %struct.X_destination_mgr*, i32, i32, i32, i32, double, i32, i32, i32, %struct.X_component_info*, [4 x %struct.Q_TBL*], [4 x %struct.H_TBL*], [4 x %struct.H_TBL*], [16 x i8], [16 x i8], [16 x i8], i32, %struct.X_scan_info*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i16, i16, i32, i32, i32, i32, i32, i32, i32, [4 x %struct.X_component_info*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, %struct.X_comp_master*, %struct.X_c_main_ccler*, %struct.X_c_prep_ccler*, %struct.X_c_coef_ccler*, %struct.X_marker_writer*, %struct.X_color_converter*, %struct.X_downssr*, %struct.X_forward_D*, %struct.X_entropy_en*, %struct.X_scan_info*, i32 }

+	%struct.X_destination_mgr = type { i8*, i32, void (%struct.X_Y*)*, i32 (%struct.X_Y*)*, void (%struct.X_Y*)* }

+	%struct.X_downssr = type { void (%struct.X_Y*)*, void (%struct.X_Y*, i8***, i32, i8***, i32)*, i32 }

+	%struct.X_entropy_en = type { void (%struct.X_Y*, i32)*, i32 (%struct.X_Y*, [64 x i16]**)*, void (%struct.X_Y*)* }

+	%struct.X_error_mgr = type { void (%struct.X_common_struct*)*, void (%struct.X_common_struct*, i32)*, void (%struct.X_common_struct*)*, void (%struct.X_common_struct*, i8*)*, void (%struct.X_common_struct*)*, i32, %struct.anon, i32, i32, i8**, i32, i8**, i32, i32 }

+	%struct.X_forward_D = type { void (%struct.X_Y*)*, void (%struct.X_Y*, %struct.X_component_info*, i8**, [64 x i16]*, i32, i32, i32)* }

+	%struct.X_marker_writer = type { void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*, i32, i32)*, void (%struct.X_Y*, i32)* }

+	%struct.X_memory_mgr = type { i8* (%struct.X_common_struct*, i32, i32)*, i8* (%struct.X_common_struct*, i32, i32)*, i8** (%struct.X_common_struct*, i32, i32, i32)*, [64 x i16]** (%struct.X_common_struct*, i32, i32, i32)*, %struct.jvirt_sAY_cc* (%struct.X_common_struct*, i32, i32, i32, i32, i32)*, %struct.jvirt_bAY_cc* (%struct.X_common_struct*, i32, i32, i32, i32, i32)*, void (%struct.X_common_struct*)*, i8** (%struct.X_common_struct*, %struct.jvirt_sAY_cc*, i32, i32, i32)*, [64 x i16]** (%struct.X_common_struct*, %struct.jvirt_bAY_cc*, i32, i32, i32)*, void (%struct.X_common_struct*, i32)*, void (%struct.X_common_struct*)*, i32, i32 }

+	%struct.X_progress_mgr = type { void (%struct.X_common_struct*)*, i32, i32, i32, i32 }

+	%struct.X_scan_info = type { i32, [4 x i32], i32, i32, i32, i32 }

+	%struct.jvirt_bAY_cc = type opaque

+	%struct.jvirt_sAY_cc = type opaque

+

+define void @test(%struct.X_Y* %cinfo) {

+entry:

+	br i1 false, label %bb.preheader, label %return

+

+bb.preheader:		; preds = %entry

+	%tbl.014.us = load i32* null		; <i32> [#uses=1]

+	br i1 false, label %cond_next.us, label %bb

+

+cond_next51.us:		; preds = %cond_next.us, %cond_true33.us.cond_true46.us_crit_edge

+	%htblptr.019.1.us = phi %struct.H_TBL** [ %tmp37.us, %cond_true33.us.cond_true46.us_crit_edge ], [ %tmp37.us, %cond_next.us ]		; <%struct.H_TBL**> [#uses=0]

+	ret void

+

+cond_true33.us.cond_true46.us_crit_edge:		; preds = %cond_next.us

+	call void @_C_X_a_HT( )

+	br label %cond_next51.us

+

+cond_next.us:		; preds = %bb.preheader

+	%tmp37.us = getelementptr %struct.X_Y* %cinfo, i32 0, i32 17, i32 %tbl.014.us		; <%struct.H_TBL**> [#uses=3]

+	%tmp4524.us = load %struct.H_TBL** %tmp37.us		; <%struct.H_TBL*> [#uses=1]

+	icmp eq %struct.H_TBL* %tmp4524.us, null		; <i1>:0 [#uses=1]

+	br i1 %0, label %cond_true33.us.cond_true46.us_crit_edge, label %cond_next51.us

+

+bb:		; preds = %bb.preheader

+	ret void

+

+return:		; preds = %entry

+	ret void

+}

+

+declare void @_C_X_a_HT()


diff --git a/src/LLVM/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/src/LLVM/test/CodeGen/ARM/2007-04-03-PEIBug.ll
new file mode 100644
index 0000000..a29c800
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-04-03-PEIBug.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm | not grep {add.*#0}

+

+define i32 @foo() {

+entry:

+	%A = alloca [1123 x i32], align 16		; <[1123 x i32]*> [#uses=1]

+	%B = alloca [3123 x i32], align 16		; <[3123 x i32]*> [#uses=1]

+	%C = alloca [12312 x i32], align 16		; <[12312 x i32]*> [#uses=1]

+	%tmp = call i32 (...)* @bar( [3123 x i32]* %B, [1123 x i32]* %A, [12312 x i32]* %C )		; <i32> [#uses=0]

+	ret i32 undef

+}

+

+declare i32 @bar(...)


diff --git a/src/LLVM/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll b/src/LLVM/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
new file mode 100644
index 0000000..ff32af3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll

@@ -0,0 +1,99 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | \

+; RUN:   not grep LPC9

+

+	%struct.B = type { i32 }

+	%struct.anon = type { void (%struct.B*)*, i32 }

+@str = internal constant [7 x i8] c"i, %d\0A\00"		; <[7 x i8]*> [#uses=1]

+@str1 = internal constant [7 x i8] c"j, %d\0A\00"		; <[7 x i8]*> [#uses=1]

+

+define internal void @_ZN1B1iEv(%struct.B* %this) {

+entry:

+	%tmp1 = getelementptr %struct.B* %this, i32 0, i32 0		; <i32*> [#uses=1]

+	%tmp2 = load i32* %tmp1		; <i32> [#uses=1]

+	%tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @str, i32 0, i32 0), i32 %tmp2 )		; <i32> [#uses=0]

+	ret void

+}

+

+declare i32 @printf(i8*, ...)

+

+define internal void @_ZN1B1jEv(%struct.B* %this) {

+entry:

+	%tmp1 = getelementptr %struct.B* %this, i32 0, i32 0		; <i32*> [#uses=1]

+	%tmp2 = load i32* %tmp1		; <i32> [#uses=1]

+	%tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @str1, i32 0, i32 0), i32 %tmp2 )		; <i32> [#uses=0]

+	ret void

+}

+

+define i32 @main() {

+entry:

+	%b.i29 = alloca %struct.B, align 4		; <%struct.B*> [#uses=3]

+	%b.i1 = alloca %struct.B, align 4		; <%struct.B*> [#uses=3]

+	%b.i = alloca %struct.B, align 4		; <%struct.B*> [#uses=3]

+	%tmp2.i = getelementptr %struct.B* %b.i, i32 0, i32 0		; <i32*> [#uses=1]

+	store i32 4, i32* %tmp2.i

+	br i1 icmp eq (i64 and (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 4294967296), i64 0), label %_Z3fooiM1BFvvE.exit, label %cond_true.i

+

+cond_true.i:		; preds = %entry

+	%b2.i = bitcast %struct.B* %b.i to i8*		; <i8*> [#uses=1]

+	%ctg23.i = getelementptr i8* %b2.i, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]

+	%tmp121314.i = bitcast i8* %ctg23.i to i32 (...)***		; <i32 (...)***> [#uses=1]

+	%tmp15.i = load i32 (...)*** %tmp121314.i		; <i32 (...)**> [#uses=1]

+	%tmp151.i = bitcast i32 (...)** %tmp15.i to i8*		; <i8*> [#uses=1]

+	%ctg2.i = getelementptr i8* %tmp151.i, i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32)		; <i8*> [#uses=1]

+	%tmp2021.i = bitcast i8* %ctg2.i to i32 (...)**		; <i32 (...)**> [#uses=1]

+	%tmp22.i = load i32 (...)** %tmp2021.i		; <i32 (...)*> [#uses=1]

+	%tmp2223.i = bitcast i32 (...)* %tmp22.i to void (%struct.B*)*		; <void (%struct.B*)*> [#uses=1]

+	br label %_Z3fooiM1BFvvE.exit

+

+_Z3fooiM1BFvvE.exit:		; preds = %cond_true.i, %entry

+	%iftmp.2.0.i = phi void (%struct.B*)* [ %tmp2223.i, %cond_true.i ], [ inttoptr (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to void (%struct.B*)*), %entry ]		; <void (%struct.B*)*> [#uses=1]

+	%b4.i = bitcast %struct.B* %b.i to i8*		; <i8*> [#uses=1]

+	%ctg25.i = getelementptr i8* %b4.i, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]

+	%tmp3031.i = bitcast i8* %ctg25.i to %struct.B*		; <%struct.B*> [#uses=1]

+	call void %iftmp.2.0.i( %struct.B* %tmp3031.i )

+	%tmp2.i30 = getelementptr %struct.B* %b.i29, i32 0, i32 0		; <i32*> [#uses=1]

+	store i32 6, i32* %tmp2.i30

+	br i1 icmp eq (i64 and (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 4294967296), i64 0), label %_Z3fooiM1BFvvE.exit56, label %cond_true.i46

+

+cond_true.i46:		; preds = %_Z3fooiM1BFvvE.exit

+	%b2.i35 = bitcast %struct.B* %b.i29 to i8*		; <i8*> [#uses=1]

+	%ctg23.i36 = getelementptr i8* %b2.i35, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]

+	%tmp121314.i37 = bitcast i8* %ctg23.i36 to i32 (...)***		; <i32 (...)***> [#uses=1]

+	%tmp15.i38 = load i32 (...)*** %tmp121314.i37		; <i32 (...)**> [#uses=1]

+	%tmp151.i41 = bitcast i32 (...)** %tmp15.i38 to i8*		; <i8*> [#uses=1]

+	%ctg2.i42 = getelementptr i8* %tmp151.i41, i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32)		; <i8*> [#uses=1]

+	%tmp2021.i43 = bitcast i8* %ctg2.i42 to i32 (...)**		; <i32 (...)**> [#uses=1]

+	%tmp22.i44 = load i32 (...)** %tmp2021.i43		; <i32 (...)*> [#uses=1]

+	%tmp2223.i45 = bitcast i32 (...)* %tmp22.i44 to void (%struct.B*)*		; <void (%struct.B*)*> [#uses=1]

+	br label %_Z3fooiM1BFvvE.exit56

+

+_Z3fooiM1BFvvE.exit56:		; preds = %cond_true.i46, %_Z3fooiM1BFvvE.exit

+	%iftmp.2.0.i49 = phi void (%struct.B*)* [ %tmp2223.i45, %cond_true.i46 ], [ inttoptr (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to void (%struct.B*)*), %_Z3fooiM1BFvvE.exit ]		; <void (%struct.B*)*> [#uses=1]

+	%b4.i53 = bitcast %struct.B* %b.i29 to i8*		; <i8*> [#uses=1]

+	%ctg25.i54 = getelementptr i8* %b4.i53, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]

+	%tmp3031.i55 = bitcast i8* %ctg25.i54 to %struct.B*		; <%struct.B*> [#uses=1]

+	call void %iftmp.2.0.i49( %struct.B* %tmp3031.i55 )

+	%tmp2.i2 = getelementptr %struct.B* %b.i1, i32 0, i32 0		; <i32*> [#uses=1]

+	store i32 -1, i32* %tmp2.i2

+	br i1 icmp eq (i64 and (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 4294967296), i64 0), label %_Z3fooiM1BFvvE.exit28, label %cond_true.i18

+

+cond_true.i18:		; preds = %_Z3fooiM1BFvvE.exit56

+	%b2.i7 = bitcast %struct.B* %b.i1 to i8*		; <i8*> [#uses=1]

+	%ctg23.i8 = getelementptr i8* %b2.i7, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]

+	%tmp121314.i9 = bitcast i8* %ctg23.i8 to i32 (...)***		; <i32 (...)***> [#uses=1]

+	%tmp15.i10 = load i32 (...)*** %tmp121314.i9		; <i32 (...)**> [#uses=1]

+	%tmp151.i13 = bitcast i32 (...)** %tmp15.i10 to i8*		; <i8*> [#uses=1]

+	%ctg2.i14 = getelementptr i8* %tmp151.i13, i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32)		; <i8*> [#uses=1]

+	%tmp2021.i15 = bitcast i8* %ctg2.i14 to i32 (...)**		; <i32 (...)**> [#uses=1]

+	%tmp22.i16 = load i32 (...)** %tmp2021.i15		; <i32 (...)*> [#uses=1]

+	%tmp2223.i17 = bitcast i32 (...)* %tmp22.i16 to void (%struct.B*)*		; <void (%struct.B*)*> [#uses=1]

+	br label %_Z3fooiM1BFvvE.exit28

+

+_Z3fooiM1BFvvE.exit28:		; preds = %cond_true.i18, %_Z3fooiM1BFvvE.exit56

+	%iftmp.2.0.i21 = phi void (%struct.B*)* [ %tmp2223.i17, %cond_true.i18 ], [ inttoptr (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to void (%struct.B*)*), %_Z3fooiM1BFvvE.exit56 ]		; <void (%struct.B*)*> [#uses=1]

+	%b4.i25 = bitcast %struct.B* %b.i1 to i8*		; <i8*> [#uses=1]

+	%ctg25.i26 = getelementptr i8* %b4.i25, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]

+	%tmp3031.i27 = bitcast i8* %ctg25.i26 to %struct.B*		; <%struct.B*> [#uses=1]

+	call void %iftmp.2.0.i21( %struct.B* %tmp3031.i27 )

+	ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll b/src/LLVM/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
new file mode 100644
index 0000000..ba61d9b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2

+

+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"

+target triple = "arm-apple-darwin8"

+        %struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 }

+@search = external global %struct.CHESS_POSITION                ; <%struct.CHESS_POSITION*> [#uses=3]

+@file_mask = external global [8 x i64]          ; <[8 x i64]*> [#uses=1]

+@rank_mask.1.b = external global i1             ; <i1*> [#uses=1]

+

+define fastcc void @EvaluateDevelopment() {

+entry:

+        %tmp7 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 7)         ; <i64> [#uses=1]

+        %tmp50 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 0)                ; <i64> [#uses=1]

+        %tmp52 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 1)                ; <i64> [#uses=1]

+        %tmp53 = or i64 %tmp52, %tmp50          ; <i64> [#uses=1]

+        %tmp57.b = load i1* @rank_mask.1.b              ; <i1> [#uses=1]

+        %tmp57 = select i1 %tmp57.b, i64 71776119061217280, i64 0               ; <i64> [#uses=1]

+        %tmp58 = and i64 %tmp57, %tmp7          ; <i64> [#uses=1]

+        %tmp59 = lshr i64 %tmp58, 8             ; <i64> [#uses=1]

+        %tmp63 = load i64* getelementptr ([8 x i64]* @file_mask, i32 0, i32 4)          ; <i64> [#uses=1]

+        %tmp64 = or i64 %tmp63, 0               ; <i64> [#uses=1]

+        %tmp65 = and i64 %tmp59, %tmp53         ; <i64> [#uses=1]

+        %tmp66 = and i64 %tmp65, %tmp64         ; <i64> [#uses=1]

+        %tmp67 = icmp eq i64 %tmp66, 0          ; <i1> [#uses=1]

+        br i1 %tmp67, label %cond_next145, label %cond_true70

+

+cond_true70:            ; preds = %entry

+        ret void

+

+cond_next145:           ; preds = %entry

+        ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll b/src/LLVM/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
new file mode 100644
index 0000000..a24a872
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll

@@ -0,0 +1,113 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin

+

+	%struct.Connection = type { i32, [10 x i8], i32 }

+	%struct.IntChunk = type { %struct.cppobjtype, i32, i32*, i32 }

+	%struct.Point = type { i8*, %struct.cppobjtype, i16 (%struct.Point*)  *, i16 (%struct.Point*)  *, double (%struct.Point*)*, double (%struct.Point*)* }

+	%struct.RefPoint = type { %struct.Point*, %struct.cppobjtype }

+	%struct.ShortArray = type { %struct.cppobjtype, i32, i16* }

+	%struct.TestObj = type { i8*, %struct.cppobjtype, i8, [32 x i8], i8*, i8**, i16, i16, i32, i32, i32, i32, float, double, %struct.cppobjtype, i32, i16*, i16**, i8**, i32, %struct.XyPoint, [3 x %struct.Connection], %struct.Point*, %struct.XyPoint*, i32, i8*, i8*, i16*, %struct.ShortArray, %struct.IntChunk, %struct.cppobjtype, %struct.cppobjtype, %struct.RefPoint, i32, %struct.cppobjtype, %struct.cppobjtype }

+	%struct.XyPoint = type { i16, i16 }

+	%struct.cppobjtype = type { i32, i16, i16 }

+@Msg = external global [256 x i8]		; <[256 x i8]*> [#uses=1]

+@.str53615 = external constant [48 x i8]		; <[48 x i8]*> [#uses=1]

+@FirstTime.4637.b = external global i1		; <i1*> [#uses=1]

+

+define fastcc void @Draw7(i32 %Option, i32* %Status) {

+entry:

+	%tmp115.b = load i1* @FirstTime.4637.b		; <i1> [#uses=1]

+	br i1 %tmp115.b, label %cond_next239, label %cond_next.i

+

+cond_next.i:		; preds = %entry

+	ret void

+

+cond_next239:		; preds = %entry

+	%tmp242 = icmp eq i32 0, 0		; <i1> [#uses=1]

+	br i1 %tmp242, label %cond_next253, label %cond_next296

+

+cond_next253:		; preds = %cond_next239

+	switch i32 %Option, label %bb1326 [

+		 i32 3, label %cond_true258

+		 i32 4, label %cond_true268

+		 i32 2, label %cond_true279

+		 i32 1, label %cond_next315

+	]

+

+cond_true258:		; preds = %cond_next253

+	ret void

+

+cond_true268:		; preds = %cond_next253

+	ret void

+

+cond_true279:		; preds = %cond_next253

+	ret void

+

+cond_next296:		; preds = %cond_next239

+	ret void

+

+cond_next315:		; preds = %cond_next253

+	%tmp1140 = icmp eq i32 0, 0		; <i1> [#uses=1]

+	br i1 %tmp1140, label %cond_true1143, label %bb1326

+

+cond_true1143:		; preds = %cond_next315

+	%tmp1148 = icmp eq i32 0, 0		; <i1> [#uses=4]

+	br i1 %tmp1148, label %cond_next1153, label %cond_true1151

+

+cond_true1151:		; preds = %cond_true1143

+	ret void

+

+cond_next1153:		; preds = %cond_true1143

+	%tmp8.i.i185 = icmp eq i32 0, 0		; <i1> [#uses=1]

+	br i1 %tmp8.i.i185, label %TestObj_new1.exit, label %cond_true.i.i187

+

+cond_true.i.i187:		; preds = %cond_next1153

+	ret void

+

+TestObj_new1.exit:		; preds = %cond_next1153

+	%tmp1167 = icmp eq i16 0, 0		; <i1> [#uses=1]

+	%tmp1178 = icmp eq i32 0, 0		; <i1> [#uses=1]

+	%bothcond = and i1 %tmp1167, %tmp1178		; <i1> [#uses=1]

+	br i1 %bothcond, label %bb1199, label %bb1181

+

+bb1181:		; preds = %TestObj_new1.exit

+	ret void

+

+bb1199:		; preds = %TestObj_new1.exit

+	br i1 %tmp1148, label %cond_next1235, label %Object_Dump.exit302

+

+Object_Dump.exit302:		; preds = %bb1199

+	ret void

+

+cond_next1235:		; preds = %bb1199

+	%bothcond10485 = or i1 false, %tmp1148		; <i1> [#uses=1]

+	br i1 %bothcond10485, label %cond_next1267, label %cond_true1248

+

+cond_true1248:		; preds = %cond_next1235

+	ret void

+

+cond_next1267:		; preds = %cond_next1235

+	br i1 %tmp1148, label %cond_next1275, label %cond_true1272

+

+cond_true1272:		; preds = %cond_next1267

+	%tmp1273 = load %struct.TestObj** null		; <%struct.TestObj*> [#uses=2]

+	%tmp2930.i = ptrtoint %struct.TestObj* %tmp1273 to i32		; <i32> [#uses=1]

+	%tmp42.i348 = sub i32 0, %tmp2930.i		; <i32> [#uses=1]

+	%tmp45.i = getelementptr %struct.TestObj* %tmp1273, i32 0, i32 0		; <i8**> [#uses=2]

+	%tmp48.i = load i8** %tmp45.i		; <i8*> [#uses=1]

+	%tmp50.i350 = call i32 (i8*, i8*, ...)* @sprintf( i8* getelementptr ([256 x i8]* @Msg, i32 0, i32 0), i8* getelementptr ([48 x i8]* @.str53615, i32 0, i32 0), i8* null, i8** %tmp45.i, i8* %tmp48.i )		; <i32> [#uses=0]

+	br i1 false, label %cond_true.i632.i, label %Ut_TraceMsg.exit648.i

+

+cond_true.i632.i:		; preds = %cond_true1272

+	ret void

+

+Ut_TraceMsg.exit648.i:		; preds = %cond_true1272

+	%tmp57.i = getelementptr i8* null, i32 %tmp42.i348		; <i8*> [#uses=0]

+	ret void

+

+cond_next1275:		; preds = %cond_next1267

+	ret void

+

+bb1326:		; preds = %cond_next315, %cond_next253

+	ret void

+}

+

+declare i32 @sprintf(i8*, i8*, ...)


diff --git a/src/LLVM/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/src/LLVM/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
new file mode 100644
index 0000000..be5d198
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll

@@ -0,0 +1,65 @@
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1

+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1

+; Check that calls to baz and quux are tail-merged.

+; PR1628

+

+; ModuleID = 'tail.c'

+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "i686-apple-darwin8"

+

+define i32 @f(i32 %i, i32 %q) {

+entry:

+	%i_addr = alloca i32		; <i32*> [#uses=2]

+	%q_addr = alloca i32		; <i32*> [#uses=2]

+	%retval = alloca i32, align 4		; <i32*> [#uses=1]

+	store i32 %i, i32* %i_addr

+	store i32 %q, i32* %q_addr

+	%tmp = load i32* %i_addr		; <i32> [#uses=1]

+	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]

+	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]

+	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]

+	br i1 %toBool, label %cond_true, label %cond_false

+

+cond_true:		; preds = %entry

+	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]

+	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]

+	br label %cond_next

+

+cond_false:		; preds = %entry

+	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]

+	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]

+	br label %cond_next

+

+cond_next:		; preds = %cond_false, %cond_true

+	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]

+	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]

+	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]

+	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]

+	br i1 %toBool10, label %cond_true11, label %cond_false15

+

+cond_true11:		; preds = %cond_next

+	%tmp13 = call i32 (...)* @foo( )		; <i32> [#uses=0]

+	%tmp14 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]

+	br label %cond_next18

+

+cond_false15:		; preds = %cond_next

+	%tmp16 = call i32 (...)* @bar( )		; <i32> [#uses=0]

+	%tmp17 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]

+	br label %cond_next18

+

+cond_next18:		; preds = %cond_false15, %cond_true11

+	%tmp19 = call i32 (...)* @bar( )		; <i32> [#uses=0]

+	br label %return

+

+return:		; preds = %cond_next18

+	%retval20 = load i32* %retval		; <i32> [#uses=1]

+	ret i32 %retval20

+}

+

+declare i32 @bar(...)

+

+declare i32 @baz(...)

+

+declare i32 @foo(...)

+

+declare i32 @quux(...)


diff --git a/src/LLVM/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/src/LLVM/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
new file mode 100644
index 0000000..1ec382a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll

@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1

+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1

+; Check that calls to baz and quux are tail-merged.

+; PR1628

+

+; ModuleID = 'tail.c'

+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "i686-apple-darwin8"

+

+define i32 @f(i32 %i, i32 %q) {

+entry:

+	%i_addr = alloca i32		; <i32*> [#uses=2]

+	%q_addr = alloca i32		; <i32*> [#uses=2]

+	%retval = alloca i32, align 4		; <i32*> [#uses=1]

+	store i32 %i, i32* %i_addr

+	store i32 %q, i32* %q_addr

+	%tmp = load i32* %i_addr		; <i32> [#uses=1]

+	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]

+	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]

+	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]

+	br i1 %toBool, label %cond_true, label %cond_false

+

+cond_true:		; preds = %entry

+	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]

+	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]

+	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]

+	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]

+	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]

+	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]

+	br i1 %toBool10, label %cond_true11, label %cond_false15

+

+cond_false:		; preds = %entry

+	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]

+	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]

+	%tmp27 = load i32* %q_addr		; <i32> [#uses=1]

+	%tmp28 = icmp ne i32 %tmp27, 0		; <i1> [#uses=1]

+	%tmp289 = zext i1 %tmp28 to i8		; <i8> [#uses=1]

+	%toBool210 = icmp ne i8 %tmp289, 0		; <i1> [#uses=1]

+	br i1 %toBool210, label %cond_true11, label %cond_false15

+

+cond_true11:		; preds = %cond_next

+	%tmp13 = call i32 (...)* @foo( )		; <i32> [#uses=0]

+	%tmp14 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]

+	br label %cond_next18

+

+cond_false15:		; preds = %cond_next

+	%tmp16 = call i32 (...)* @bar( )		; <i32> [#uses=0]

+	%tmp17 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]

+	br label %cond_next18

+

+cond_next18:		; preds = %cond_false15, %cond_true11

+	%tmp19 = call i32 (...)* @bar( )		; <i32> [#uses=0]

+	br label %return

+

+return:		; preds = %cond_next18

+	%retval20 = load i32* %retval		; <i32> [#uses=1]

+	ret i32 %retval20

+}

+

+declare i32 @bar(...)

+

+declare i32 @baz(...)

+

+declare i32 @foo(...)

+

+declare i32 @quux(...)


diff --git a/src/LLVM/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll b/src/LLVM/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
new file mode 100644
index 0000000..c88a2d6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+v6

+

+define i32 @test3() {

+	tail call void asm sideeffect "/* number: ${0:c} */", "i"( i32 1 )

+	ret i32 11

+}


diff --git a/src/LLVM/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
new file mode 100644
index 0000000..c0fb59d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi

+; PR1406

+

+	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }

+	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }

+	%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 }

+	%struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*] }

+	%struct.AVOption = type opaque

+	%struct.AVPaletteControl = type { i32, [256 x i32] }

+	%struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }

+	%struct.AVRational = type { i32, i32 }

+	%struct.RcOverride = type { i32, i32, i32, float }

+

+define i32 @decode_init(%struct.AVCodecContext* %avctx) {

+entry:

+	br i1 false, label %bb, label %cond_next789

+

+bb:		; preds = %bb, %entry

+	br i1 false, label %bb59, label %bb

+

+bb59:		; preds = %bb

+	%tmp68 = sdiv i64 0, 0		; <i64> [#uses=1]

+	%tmp6869 = trunc i64 %tmp68 to i32		; <i32> [#uses=2]

+	%tmp81 = call i32 asm "smull $0, $1, $2, $3     \0A\09mov   $0, $0,     lsr $4\0A\09add   $1, $0, $1, lsl $5\0A\09", "=&r,=*&r,r,r,i,i"( i32* null, i32 %tmp6869, i32 13316085, i32 23, i32 9 )		; <i32> [#uses=0]

+	%tmp90 = call i32 asm "smull $0, $1, $2, $3     \0A\09mov   $0, $0,     lsr $4\0A\09add   $1, $0, $1, lsl $5\0A\09", "=&r,=*&r,r,r,i,i"( i32* null, i32 %tmp6869, i32 10568984, i32 23, i32 9 )		; <i32> [#uses=0]

+	unreachable

+

+cond_next789:		; preds = %entry

+	ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/src/LLVM/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
new file mode 100644
index 0000000..16a74ff
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll

@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=arm | grep bl.*baz | count 1

+; RUN: llc < %s -march=arm | grep bl.*quux | count 1

+; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*baz | count 2

+; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*quux | count 2

+; Check that tail merging is the default on ARM, and that -enable-tail-merge=0 works.

+; PR1628

+

+; ModuleID = 'tail.c'

+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "i686-apple-darwin8"

+

+define i32 @f(i32 %i, i32 %q) {

+entry:

+	%i_addr = alloca i32		; <i32*> [#uses=2]

+	%q_addr = alloca i32		; <i32*> [#uses=2]

+	%retval = alloca i32, align 4		; <i32*> [#uses=1]

+	store i32 %i, i32* %i_addr

+	store i32 %q, i32* %q_addr

+	%tmp = load i32* %i_addr		; <i32> [#uses=1]

+	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]

+	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]

+	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]

+	br i1 %toBool, label %cond_true, label %cond_false

+

+cond_true:		; preds = %entry

+	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]

+	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]

+	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]

+	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]

+	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]

+	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]

+	br i1 %toBool10, label %cond_true11, label %cond_false15

+

+cond_false:		; preds = %entry

+	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]

+	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]

+	%tmp27 = load i32* %q_addr		; <i32> [#uses=1]

+	%tmp28 = icmp ne i32 %tmp27, 0		; <i1> [#uses=1]

+	%tmp289 = zext i1 %tmp28 to i8		; <i8> [#uses=1]

+	%toBool210 = icmp ne i8 %tmp289, 0		; <i1> [#uses=1]

+	br i1 %toBool210, label %cond_true11, label %cond_false15

+

+cond_true11:		; preds = %cond_next

+	%tmp13 = call i32 (...)* @foo( )		; <i32> [#uses=0]

+	%tmp14 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]

+	br label %cond_next18

+

+cond_false15:		; preds = %cond_next

+	%tmp16 = call i32 (...)* @bar( )		; <i32> [#uses=0]

+	%tmp17 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]

+	br label %cond_next18

+

+cond_next18:		; preds = %cond_false15, %cond_true11

+	%tmp19 = call i32 (...)* @bar( )		; <i32> [#uses=0]

+	br label %return

+

+return:		; preds = %cond_next18

+	%retval20 = load i32* %retval		; <i32> [#uses=1]

+	ret i32 %retval20

+}

+

+declare i32 @bar(...)

+

+declare i32 @baz(...)

+

+declare i32 @foo(...)

+

+declare i32 @quux(...)


diff --git a/src/LLVM/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/src/LLVM/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
new file mode 100644
index 0000000..9977816
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=arm | not grep {str.*\\!}

+

+	%struct.shape_edge_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32 }

+	%struct.shape_path_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32, i32, i32 }

+	%struct.shape_pool_t = type { i8* (%struct.shape_pool_t*, i8*, i32)*, i8* (%struct.shape_pool_t*, i32)*, void (%struct.shape_pool_t*, i8*)* }

+

+define %struct.shape_path_t* @shape_path_alloc(%struct.shape_pool_t* %pool, i32* %shape) {

+entry:

+	br i1 false, label %cond_false, label %bb45

+

+bb45:		; preds = %entry

+	ret %struct.shape_path_t* null

+

+cond_false:		; preds = %entry

+	br i1 false, label %bb140, label %bb174

+

+bb140:		; preds = %bb140, %cond_false

+	%indvar = phi i32 [ 0, %cond_false ], [ %indvar.next, %bb140 ]		; <i32> [#uses=2]

+	%edge.230.0.rec = shl i32 %indvar, 1		; <i32> [#uses=3]

+	%edge.230.0 = getelementptr %struct.shape_edge_t* null, i32 %edge.230.0.rec		; <%struct.shape_edge_t*> [#uses=1]

+	%edge.230.0.sum6970 = or i32 %edge.230.0.rec, 1		; <i32> [#uses=2]

+	%tmp154 = getelementptr %struct.shape_edge_t* null, i32 %edge.230.0.sum6970		; <%struct.shape_edge_t*> [#uses=1]

+	%tmp11.i5 = getelementptr %struct.shape_edge_t* null, i32 %edge.230.0.sum6970, i32 0		; <%struct.shape_edge_t**> [#uses=1]

+	store %struct.shape_edge_t* %edge.230.0, %struct.shape_edge_t** %tmp11.i5

+	store %struct.shape_edge_t* %tmp154, %struct.shape_edge_t** null

+	%tmp16254.0.rec = add i32 %edge.230.0.rec, 2		; <i32> [#uses=1]

+	%xp.350.sum = add i32 0, %tmp16254.0.rec		; <i32> [#uses=1]

+	%tmp168 = icmp slt i32 %xp.350.sum, 0		; <i1> [#uses=1]

+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]

+	br i1 %tmp168, label %bb140, label %bb174

+

+bb174:		; preds = %bb140, %cond_false

+	ret %struct.shape_path_t* null

+}


diff --git a/src/LLVM/test/CodeGen/ARM/2007-08-15-ReuseBug.ll b/src/LLVM/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
new file mode 100644
index 0000000..30b72e0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2007-08-15-ReuseBug.ll

@@ -0,0 +1,106 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6
+; PR1609
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+@_C_nextcmd = external global i32		; <i32*> [#uses=2]
+@_C_cmds = external global [100 x i8*]		; <[100 x i8*]*> [#uses=2]
+@.str44 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	br label %cond_next212.i
+
+bb21.i:		; preds = %cond_next212.i
+	br label %cond_next212.i
+
+bb24.i:		; preds = %cond_next212.i
+	ret i32 0
+
+bb27.i:		; preds = %cond_next212.i
+	ret i32 0
+
+bb30.i:		; preds = %cond_next212.i
+	%tmp205399.i = add i32 %argc_addr.2358.0.i, -1		; <i32> [#uses=1]
+	br label %cond_next212.i
+
+bb33.i:		; preds = %cond_next212.i
+	ret i32 0
+
+cond_next73.i:		; preds = %cond_next212.i
+	ret i32 0
+
+bb75.i:		; preds = %cond_next212.i
+	ret i32 0
+
+bb77.i:		; preds = %cond_next212.i
+	ret i32 0
+
+bb79.i:		; preds = %cond_next212.i
+	ret i32 0
+
+bb102.i:		; preds = %cond_next212.i
+	br i1 false, label %cond_true110.i, label %cond_next123.i
+
+cond_true110.i:		; preds = %bb102.i
+	%tmp116.i = getelementptr i8** %argv_addr.2321.0.i, i32 2		; <i8**> [#uses=1]
+	%tmp117.i = load i8** %tmp116.i		; <i8*> [#uses=1]
+	%tmp126425.i = call %struct.FILE* @fopen( i8* %tmp117.i, i8* getelementptr ([2 x i8]* @.str44, i32 0, i32 0) )		; <%struct.FILE*> [#uses=0]
+	ret i32 0
+
+cond_next123.i:		; preds = %bb102.i
+	%tmp122.i = getelementptr i8* %tmp215.i, i32 2		; <i8*> [#uses=0]
+	ret i32 0
+
+bb162.i:		; preds = %cond_next212.i
+	ret i32 0
+
+C_addcmd.exit120.i:		; preds = %cond_next212.i
+	%tmp3.i.i.i.i105.i = call i8* @calloc( i32 15, i32 1 )		; <i8*> [#uses=1]
+	%tmp1.i108.i = getelementptr [100 x i8*]* @_C_cmds, i32 0, i32 0		; <i8**> [#uses=1]
+	store i8* %tmp3.i.i.i.i105.i, i8** %tmp1.i108.i, align 4
+	%tmp.i91.i = load i32* @_C_nextcmd, align 4		; <i32> [#uses=1]
+	store i32 0, i32* @_C_nextcmd, align 4
+	%tmp3.i.i.i.i95.i = call i8* @calloc( i32 15, i32 1 )		; <i8*> [#uses=1]
+	%tmp1.i98.i = getelementptr [100 x i8*]* @_C_cmds, i32 0, i32 %tmp.i91.i		; <i8**> [#uses=1]
+	store i8* %tmp3.i.i.i.i95.i, i8** %tmp1.i98.i, align 4
+	br label %cond_next212.i
+
+bb174.i:		; preds = %cond_next212.i
+	ret i32 0
+
+bb192.i:		; preds = %cond_next212.i
+	br label %cond_next212.i
+
+cond_next212.i:		; preds = %cond_next212.i, %cond_next212.i, %cond_next212.i, %cond_next212.i, %bb192.i, %C_addcmd.exit120.i, %bb30.i, %bb21.i, %entry
+	%max_d.3 = phi i32 [ -1, %entry ], [ %max_d.3, %bb30.i ], [ %max_d.3, %bb21.i ], [ %max_d.3, %C_addcmd.exit120.i ], [ 0, %bb192.i ], [ %max_d.3, %cond_next212.i ], [ %max_d.3, %cond_next212.i ], [ %max_d.3, %cond_next212.i ], [ %max_d.3, %cond_next212.i ]		; <i32> [#uses=7]
+	%argv_addr.2321.0.i = phi i8** [ %argv, %entry ], [ %tmp214.i, %bb192.i ], [ %tmp214.i, %C_addcmd.exit120.i ], [ %tmp214.i, %bb30.i ], [ %tmp214.i, %bb21.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ]		; <i8**> [#uses=2]
+	%argc_addr.2358.0.i = phi i32 [ %argc, %entry ], [ %tmp205399.i, %bb30.i ], [ 0, %bb21.i ], [ 0, %C_addcmd.exit120.i ], [ 0, %bb192.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ]		; <i32> [#uses=1]
+	%tmp214.i = getelementptr i8** %argv_addr.2321.0.i, i32 1		; <i8**> [#uses=9]
+	%tmp215.i = load i8** %tmp214.i		; <i8*> [#uses=1]
+	%tmp1314.i = sext i8 0 to i32		; <i32> [#uses=1]
+	switch i32 %tmp1314.i, label %bb192.i [
+		 i32 76, label %C_addcmd.exit120.i
+		 i32 77, label %bb174.i
+		 i32 83, label %bb162.i
+		 i32 97, label %bb33.i
+		 i32 98, label %bb21.i
+		 i32 99, label %bb24.i
+		 i32 100, label %bb27.i
+		 i32 101, label %cond_next212.i
+		 i32 102, label %bb102.i
+		 i32 105, label %bb75.i
+		 i32 109, label %bb30.i
+		 i32 113, label %cond_next212.i
+		 i32 114, label %cond_next73.i
+		 i32 115, label %bb79.i
+		 i32 116, label %cond_next212.i
+		 i32 118, label %bb77.i
+		 i32 119, label %cond_next212.i
+	]
+}
+
+declare %struct.FILE* @fopen(i8*, i8*)
+
+declare i8* @calloc(i32, i32)

diff --git a/src/LLVM/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/src/LLVM/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
new file mode 100644
index 0000000..fd2f462
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=fast
+; PR1925
+
+	%struct.encode_aux_nearestmatch = type { i32*, i32*, i32*, i32*, i32, i32 }
+	%struct.encode_aux_pigeonhole = type { float, float, i32, i32, i32*, i32, i32*, i32*, i32* }
+	%struct.encode_aux_threshmatch = type { float*, i32*, i32, i32 }
+	%struct.oggpack_buffer = type { i32, i32, i8*, i8*, i32 }
+	%struct.static_codebook = type { i32, i32, i32*, i32, i32, i32, i32, i32, i32*, %struct.encode_aux_nearestmatch*, %struct.encode_aux_threshmatch*, %struct.encode_aux_pigeonhole*, i32 }
+
+define i32 @vorbis_staticbook_pack(%struct.static_codebook* %c, %struct.oggpack_buffer* %opb) {
+entry:
+	%opb_addr = alloca %struct.oggpack_buffer*		; <%struct.oggpack_buffer**> [#uses=1]
+	%tmp1 = load %struct.oggpack_buffer** %opb_addr, align 4		; <%struct.oggpack_buffer*> [#uses=1]
+	call void @oggpack_write( %struct.oggpack_buffer* %tmp1, i32 5653314, i32 24 ) nounwind 
+	call void @oggpack_write( %struct.oggpack_buffer* null, i32 0, i32 24 ) nounwind 
+	unreachable
+}
+
+declare void @oggpack_write(%struct.oggpack_buffer*, i32, i32)

diff --git a/src/LLVM/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll b/src/LLVM/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
new file mode 100644
index 0000000..44da8e7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=fast
+; PR1925
+
+	%"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" }
+	%"struct.kc::impl_ID" = type { %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_Ccode_option"*, %"struct.kc::impl_casestring__Str"*, i32, %"struct.kc::impl_casestring__Str"* }
+	%"struct.kc::impl_abstract_phylum" = type { i32 (...)** }
+	%"struct.kc::impl_casestring__Str" = type { %"struct.kc::impl_abstract_phylum", i8* }
+
+define %"struct.kc::impl_ID"* @_ZN2kc18f_typeofunpsubtermEPNS_15impl_unpsubtermEPNS_7impl_IDE(%"struct.kc::impl_Ccode_option"* %a_unpsubterm, %"struct.kc::impl_ID"* %a_operator) {
+entry:
+	%tmp8 = getelementptr %"struct.kc::impl_Ccode_option"* %a_unpsubterm, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=0]
+	br i1 false, label %bb41, label %bb55
+
+bb41:		; preds = %entry
+	ret %"struct.kc::impl_ID"* null
+
+bb55:		; preds = %entry
+	%tmp67 = tail call i32 null( %"struct.kc::impl_abstract_phylum"* null )		; <i32> [#uses=0]
+	%tmp97 = tail call i32 null( %"struct.kc::impl_abstract_phylum"* null )		; <i32> [#uses=0]
+	ret %"struct.kc::impl_ID"* null
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll b/src/LLVM/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
new file mode 100644
index 0000000..a604c5c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | not grep 255
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	br label %bb1
+bb1:		; preds = %entry
+	%tmp3.i.i = load i8* null, align 1		; <i8> [#uses=1]
+	%tmp4.i.i = icmp slt i8 %tmp3.i.i, 0		; <i1> [#uses=1]
+	br i1 %tmp4.i.i, label %bb2, label %bb3
+bb2:		; preds = %bb1
+	ret i32 1
+bb3:		; preds = %bb1
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
new file mode 100644
index 0000000..78c6222
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+
+@accum = external global { double, double }		; <{ double, double }*> [#uses=1]
+@.str = external constant [4 x i8]		; <[4 x i8]*> [#uses=1]
+
+define i32 @main() {
+entry:
+	br label %bb74.i
+bb74.i:		; preds = %bb88.i, %bb74.i, %entry
+	br i1 false, label %bb88.i, label %bb74.i
+bb88.i:		; preds = %bb74.i
+	br i1 false, label %mandel.exit, label %bb74.i
+mandel.exit:		; preds = %bb88.i
+	%tmp2 = volatile load double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
+	%tmp23 = fptosi double %tmp2 to i32		; <i32> [#uses=1]
+	%tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %tmp23 )		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @printf(i8*, ...)

diff --git a/src/LLVM/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
new file mode 100644
index 0000000..6b39a76
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll

@@ -0,0 +1,63 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+@numBinsY = external global i32		; <i32*> [#uses=1]
+
+declare double @pow(double, double)
+
+define void @main(i32 %argc, i8** %argv) noreturn nounwind {
+entry:
+	br i1 false, label %bb34.outer.i.i.i, label %cond_false674
+bb34.outer.i.i.i:		; preds = %entry
+	br i1 false, label %bb2.i.i.i, label %bb47.i.i.i
+bb2.i.i.i:		; preds = %bb34.outer.i.i.i
+	%tmp24.i.i.i = call double @pow( double 0.000000e+00, double 2.000000e+00 )		; <double> [#uses=0]
+	ret void
+bb47.i.i.i:		; preds = %bb34.outer.i.i.i
+	br i1 false, label %bb220.i.i.i, label %bb62.preheader.i.i.i
+bb62.preheader.i.i.i:		; preds = %bb47.i.i.i
+	ret void
+bb220.i.i.i:		; preds = %bb47.i.i.i
+	br i1 false, label %bb248.i.i.i, label %cond_next232.i.i.i
+cond_next232.i.i.i:		; preds = %bb220.i.i.i
+	ret void
+bb248.i.i.i:		; preds = %bb220.i.i.i
+	br i1 false, label %bb300.i.i.i, label %cond_false256.i.i.i
+cond_false256.i.i.i:		; preds = %bb248.i.i.i
+	ret void
+bb300.i.i.i:		; preds = %bb248.i.i.i
+	store i32 undef, i32* @numBinsY, align 4
+	ret void
+cond_false674:		; preds = %entry
+	ret void
+}
+
+	%struct.anon = type { %struct.rnode*, %struct.rnode* }
+	%struct.ch_set = type { { i8, i8 }*, %struct.ch_set* }
+	%struct.pat_list = type { i32, %struct.pat_list* }
+	%struct.rnode = type { i16, { %struct.anon }, i16, %struct.pat_list*, %struct.pat_list* }
+
+define fastcc { i16, %struct.rnode* }* @get_token(i8** %s) nounwind  {
+entry:
+	br i1 false, label %bb42, label %bb78
+bb42:		; preds = %entry
+	br label %cond_next119.i
+bb17.i:		; preds = %cond_next119.i
+	br i1 false, label %cond_true53.i, label %cond_false99.i
+cond_true53.i:		; preds = %bb17.i
+	ret { i16, %struct.rnode* }* null
+cond_false99.i:		; preds = %bb17.i
+        %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64), i64 2) to i32))
+        %tmp106.i = bitcast i8* %malloccall to %struct.ch_set*
+	br i1 false, label %bb126.i, label %cond_next119.i
+cond_next119.i:		; preds = %cond_false99.i, %bb42
+	%curr_ptr.0.reg2mem.0.i = phi %struct.ch_set* [ %tmp106.i, %cond_false99.i ], [ null, %bb42 ]		; <%struct.ch_set*> [#uses=2]
+	%prev_ptr.0.reg2mem.0.i = phi %struct.ch_set* [ %curr_ptr.0.reg2mem.0.i, %cond_false99.i ], [ undef, %bb42 ]		; <%struct.ch_set*> [#uses=1]
+	br i1 false, label %bb126.i, label %bb17.i
+bb126.i:		; preds = %cond_next119.i, %cond_false99.i
+	%prev_ptr.0.reg2mem.1.i = phi %struct.ch_set* [ %prev_ptr.0.reg2mem.0.i, %cond_next119.i ], [ %curr_ptr.0.reg2mem.0.i, %cond_false99.i ]		; <%struct.ch_set*> [#uses=0]
+	ret { i16, %struct.rnode* }* null
+bb78:		; preds = %entry
+	ret { i16, %struct.rnode* }* null
+}
+
+declare noalias i8* @malloc(i32)

diff --git a/src/LLVM/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
new file mode 100644
index 0000000..c9a8a67
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll

@@ -0,0 +1,253 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+	%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.LOCBOX = type { i32, i32, i32, i32 }
+	%struct.SIDEBOX = type { i32, i32 }
+	%struct.UNCOMBOX = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.cellbox = type { i8*, i32, i32, i32, [9 x i32], i32, i32, i32, i32, i32, i32, i32, double, double, double, double, double, i32, i32, %struct.CONTENTBOX*, %struct.UNCOMBOX*, [8 x %struct.tilebox*], %struct.SIDEBOX* }
+	%struct.termbox = type { %struct.termbox*, i32, i32, i32, i32, i32 }
+	%struct.tilebox = type { %struct.tilebox*, double, double, double, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.termbox*, %struct.LOCBOX* }
+@.str127 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
+@.str584 = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
+@.str8115 = external constant [9 x i8]		; <[9 x i8]*> [#uses=1]
+
+declare %struct.FILE* @fopen(i8*, i8*)
+
+declare i32 @strcmp(i8*, i8*)
+
+declare i32 @fscanf(%struct.FILE*, i8*, ...)
+
+define void @main(i32 %argc, i8** %argv) noreturn  {
+entry:
+	br i1 false, label %cond_next48, label %cond_false674
+cond_next48:		; preds = %entry
+	%tmp61 = call %struct.FILE* @fopen( i8* null, i8* getelementptr ([2 x i8]* @.str127, i32 0, i32 0) )		; <%struct.FILE*> [#uses=2]
+	br i1 false, label %bb220.i.i.i, label %bb62.preheader.i.i.i
+bb62.preheader.i.i.i:		; preds = %cond_next48
+	ret void
+bb220.i.i.i:		; preds = %cond_next48
+	br i1 false, label %bb248.i.i.i, label %cond_next232.i.i.i
+cond_next232.i.i.i:		; preds = %bb220.i.i.i
+	ret void
+bb248.i.i.i:		; preds = %bb220.i.i.i
+	br i1 false, label %bb300.i.i.i, label %cond_false256.i.i.i
+cond_false256.i.i.i:		; preds = %bb248.i.i.i
+	ret void
+bb300.i.i.i:		; preds = %bb248.i.i.i
+	br label %bb.i.i347.i
+bb.i.i347.i:		; preds = %bb.i.i347.i, %bb300.i.i.i
+	br i1 false, label %bb894.loopexit.i.i, label %bb.i.i347.i
+bb.i350.i:		; preds = %bb894.i.i
+	br i1 false, label %bb24.i.i, label %cond_false373.i.i
+bb24.i.i:		; preds = %bb24.i.i, %bb.i350.i
+	br i1 false, label %bb40.i.i, label %bb24.i.i
+bb40.i.i:		; preds = %bb24.i.i
+	br i1 false, label %bb177.i393.i, label %bb82.i.i
+bb82.i.i:		; preds = %bb40.i.i
+	ret void
+bb177.i393.i:		; preds = %bb40.i.i
+	br i1 false, label %bb894.i.i, label %bb192.i.i
+bb192.i.i:		; preds = %bb177.i393.i
+	ret void
+cond_false373.i.i:		; preds = %bb.i350.i
+	%tmp376.i.i = call i32 @strcmp( i8* null, i8* getelementptr ([9 x i8]* @.str8115, i32 0, i32 0) )		; <i32> [#uses=0]
+	br i1 false, label %cond_true380.i.i, label %cond_next602.i.i
+cond_true380.i.i:		; preds = %cond_false373.i.i
+	%tmp394.i418.i = add i32 %cell.0.i.i, 1		; <i32> [#uses=1]
+	%tmp397.i420.i = load %struct.cellbox** null, align 4		; <%struct.cellbox*> [#uses=1]
+	br label %bb398.i.i
+bb398.i.i:		; preds = %bb398.i.i, %cond_true380.i.i
+	br i1 false, label %bb414.i.i, label %bb398.i.i
+bb414.i.i:		; preds = %bb398.i.i
+	br i1 false, label %bb581.i.i, label %bb455.i442.i
+bb455.i442.i:		; preds = %bb414.i.i
+	ret void
+bb581.i.i:		; preds = %bb581.i.i, %bb414.i.i
+	br i1 false, label %bb894.i.i, label %bb581.i.i
+cond_next602.i.i:		; preds = %cond_false373.i.i
+	br i1 false, label %bb609.i.i, label %bb661.i.i
+bb609.i.i:		; preds = %cond_next602.i.i
+	br label %bb620.i.i
+bb620.i.i:		; preds = %bb620.i.i, %bb609.i.i
+	%indvar166.i465.i = phi i32 [ %indvar.next167.i.i, %bb620.i.i ], [ 0, %bb609.i.i ]		; <i32> [#uses=1]
+	%tmp640.i.i = call i32 (%struct.FILE*, i8*, ...)* @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null )		; <i32> [#uses=0]
+	%tmp648.i.i = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp650.i468.i = icmp sgt i32 0, %tmp648.i.i		; <i1> [#uses=1]
+	%tmp624.i469.i = call i32 (%struct.FILE*, i8*, ...)* @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null )		; <i32> [#uses=0]
+	%indvar.next167.i.i = add i32 %indvar166.i465.i, 1		; <i32> [#uses=1]
+	br i1 %tmp650.i468.i, label %bb653.i.i.loopexit, label %bb620.i.i
+bb653.i.i.loopexit:		; preds = %bb620.i.i
+	%tmp642.i466.i = add i32 0, 1		; <i32> [#uses=1]
+	br label %bb894.i.i
+bb661.i.i:		; preds = %cond_next602.i.i
+	ret void
+bb894.loopexit.i.i:		; preds = %bb.i.i347.i
+	br label %bb894.i.i
+bb894.i.i:		; preds = %bb894.loopexit.i.i, %bb653.i.i.loopexit, %bb581.i.i, %bb177.i393.i
+	%pinctr.0.i.i = phi i32 [ 0, %bb894.loopexit.i.i ], [ %tmp642.i466.i, %bb653.i.i.loopexit ], [ %pinctr.0.i.i, %bb177.i393.i ], [ %pinctr.0.i.i, %bb581.i.i ]		; <i32> [#uses=2]
+	%soft.0.i.i = phi i32 [ undef, %bb894.loopexit.i.i ], [ %soft.0.i.i, %bb653.i.i.loopexit ], [ 0, %bb177.i393.i ], [ 1, %bb581.i.i ]		; <i32> [#uses=1]
+	%cell.0.i.i = phi i32 [ 0, %bb894.loopexit.i.i ], [ %cell.0.i.i, %bb653.i.i.loopexit ], [ 0, %bb177.i393.i ], [ %tmp394.i418.i, %bb581.i.i ]		; <i32> [#uses=2]
+	%ptr.0.i.i = phi %struct.cellbox* [ undef, %bb894.loopexit.i.i ], [ %ptr.0.i.i, %bb653.i.i.loopexit ], [ null, %bb177.i393.i ], [ %tmp397.i420.i, %bb581.i.i ]		; <%struct.cellbox*> [#uses=1]
+	br i1 false, label %bb.i350.i, label %bb902.i502.i
+bb902.i502.i:		; preds = %bb894.i.i
+	ret void
+cond_false674:		; preds = %entry
+	ret void
+}
+
+	%struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] }
+	%struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] }
+	%struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] }
+	%struct.lame_global_flags = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
+@scalefac_band.1 = external global [14 x i32]		; <[14 x i32]*> [#uses=2]
+
+declare fastcc i32 @init_outer_loop(%struct.lame_global_flags*, double*, %struct.gr_info*)
+
+define fastcc void @outer_loop(%struct.lame_global_flags* %gfp, double* %xr, i32 %targ_bits, double* %best_noise, %struct.III_psy_xmin* %l3_xmin, i32* %l3_enc, %struct.III_scalefac_t* %scalefac, %struct.gr_info* %cod_info, i32 %ch) {
+entry:
+	%cod_info.182 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 1		; <i32*> [#uses=1]
+	br label %bb
+bb:		; preds = %bb226, %entry
+	%save_cod_info.1.1 = phi i32 [ undef, %entry ], [ %save_cod_info.1.1, %bb226 ]		; <i32> [#uses=2]
+	br i1 false, label %cond_next, label %cond_true
+cond_true:		; preds = %bb
+	ret void
+cond_next:		; preds = %bb
+	br i1 false, label %cond_next144, label %cond_false
+cond_false:		; preds = %cond_next
+	ret void
+cond_next144:		; preds = %cond_next
+	br i1 false, label %cond_next205, label %cond_true163
+cond_true163:		; preds = %cond_next144
+	br i1 false, label %bb34.i, label %bb.i53
+bb.i53:		; preds = %cond_true163
+	ret void
+bb34.i:		; preds = %cond_true163
+	%tmp37.i55 = load i32* null, align 4		; <i32> [#uses=1]
+	br i1 false, label %bb65.preheader.i, label %bb78.i
+bb65.preheader.i:		; preds = %bb34.i
+	br label %bb65.outer.us.i
+bb65.outer.us.i:		; preds = %bb65.outer.us.i, %bb65.preheader.i
+	br i1 false, label %bb78.i, label %bb65.outer.us.i
+bb78.i:		; preds = %bb65.outer.us.i, %bb34.i
+	br i1 false, label %bb151.i.preheader, label %bb90.i
+bb90.i:		; preds = %bb78.i
+	ret void
+bb151.i.preheader:		; preds = %bb78.i
+	br label %bb151.i
+bb151.i:		; preds = %bb226.backedge.i, %bb151.i.preheader
+	%i.154.i = phi i32 [ %tmp15747.i, %bb226.backedge.i ], [ 0, %bb151.i.preheader ]		; <i32> [#uses=2]
+	%tmp15747.i = add i32 %i.154.i, 1		; <i32> [#uses=3]
+	br i1 false, label %bb155.i, label %bb226.backedge.i
+bb226.backedge.i:		; preds = %cond_next215.i, %bb151.i
+	%tmp228.i71 = icmp slt i32 %tmp15747.i, 3		; <i1> [#uses=1]
+	br i1 %tmp228.i71, label %bb151.i, label %amp_scalefac_bands.exit
+bb155.i:		; preds = %cond_next215.i, %bb151.i
+	%indvar90.i = phi i32 [ %indvar.next91.i, %cond_next215.i ], [ 0, %bb151.i ]		; <i32> [#uses=2]
+	%sfb.3.reg2mem.0.i = add i32 %indvar90.i, %tmp37.i55		; <i32> [#uses=4]
+	%tmp161.i = getelementptr [4 x [21 x double]]* null, i32 0, i32 %tmp15747.i, i32 %sfb.3.reg2mem.0.i		; <double*> [#uses=1]
+	%tmp162.i74 = load double* %tmp161.i, align 4		; <double> [#uses=0]
+	br i1 false, label %cond_true167.i, label %cond_next215.i
+cond_true167.i:		; preds = %bb155.i
+	%tmp173.i = getelementptr %struct.III_scalefac_t* null, i32 0, i32 1, i32 %sfb.3.reg2mem.0.i, i32 %i.154.i		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp173.i, align 4
+	%tmp182.1.i = getelementptr [14 x i32]* @scalefac_band.1, i32 0, i32 %sfb.3.reg2mem.0.i		; <i32*> [#uses=0]
+	%tmp185.i78 = add i32 %sfb.3.reg2mem.0.i, 1		; <i32> [#uses=1]
+	%tmp187.1.i = getelementptr [14 x i32]* @scalefac_band.1, i32 0, i32 %tmp185.i78		; <i32*> [#uses=1]
+	%tmp188.i = load i32* %tmp187.1.i, align 4		; <i32> [#uses=1]
+	%tmp21153.i = icmp slt i32 0, %tmp188.i		; <i1> [#uses=1]
+	br i1 %tmp21153.i, label %bb190.preheader.i, label %cond_next215.i
+bb190.preheader.i:		; preds = %cond_true167.i
+	ret void
+cond_next215.i:		; preds = %cond_true167.i, %bb155.i
+	%indvar.next91.i = add i32 %indvar90.i, 1		; <i32> [#uses=2]
+	%exitcond99.i87 = icmp eq i32 %indvar.next91.i, 0		; <i1> [#uses=1]
+	br i1 %exitcond99.i87, label %bb226.backedge.i, label %bb155.i
+amp_scalefac_bands.exit:		; preds = %bb226.backedge.i
+	br i1 false, label %bb19.i, label %bb.i16
+bb.i16:		; preds = %amp_scalefac_bands.exit
+	ret void
+bb19.i:		; preds = %amp_scalefac_bands.exit
+	br i1 false, label %bb40.outer.i, label %cond_next205
+bb40.outer.i:		; preds = %bb19.i
+	ret void
+cond_next205:		; preds = %bb19.i, %cond_next144
+	br i1 false, label %bb226, label %cond_true210
+cond_true210:		; preds = %cond_next205
+	br i1 false, label %bb226, label %cond_true217
+cond_true217:		; preds = %cond_true210
+	%tmp221 = call fastcc i32 @init_outer_loop( %struct.lame_global_flags* %gfp, double* %xr, %struct.gr_info* %cod_info )		; <i32> [#uses=0]
+	ret void
+bb226:		; preds = %cond_true210, %cond_next205
+	br i1 false, label %bb231, label %bb
+bb231:		; preds = %bb226
+	store i32 %save_cod_info.1.1, i32* %cod_info.182
+	ret void
+}
+
+define fastcc void @outer_loop2(%struct.lame_global_flags* %gfp, double* %xr, i32 %targ_bits, double* %best_noise, %struct.III_psy_xmin* %l3_xmin, i32* %l3_enc, %struct.III_scalefac_t* %scalefac, %struct.gr_info* %cod_info, i32 %ch) {
+entry:
+	%cod_info.20128.1 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 20, i32 1		; <i32*> [#uses=1]
+	%cod_info.20128.2 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 20, i32 2		; <i32*> [#uses=1]
+	%cod_info.20128.3 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 20, i32 3		; <i32*> [#uses=1]
+	br label %bb
+bb:		; preds = %bb226, %entry
+	%save_cod_info.19.1 = phi i32* [ undef, %entry ], [ %save_cod_info.19.0, %bb226 ]		; <i32*> [#uses=1]
+	%save_cod_info.0.1 = phi i32 [ undef, %entry ], [ %save_cod_info.0.0, %bb226 ]		; <i32> [#uses=1]
+	br i1 false, label %cond_next144, label %cond_false
+cond_false:		; preds = %bb
+	br i1 false, label %cond_true56, label %cond_false78
+cond_true56:		; preds = %cond_false
+	br i1 false, label %inner_loop.exit, label %cond_next85
+inner_loop.exit:		; preds = %cond_true56
+	br i1 false, label %cond_next104, label %cond_false96
+cond_false78:		; preds = %cond_false
+	ret void
+cond_next85:		; preds = %cond_true56
+	ret void
+cond_false96:		; preds = %inner_loop.exit
+	ret void
+cond_next104:		; preds = %inner_loop.exit
+	br i1 false, label %cond_next144, label %cond_false110
+cond_false110:		; preds = %cond_next104
+	ret void
+cond_next144:		; preds = %cond_next104, %bb
+	%save_cod_info.19.0 = phi i32* [ %save_cod_info.19.1, %bb ], [ null, %cond_next104 ]		; <i32*> [#uses=1]
+	%save_cod_info.4.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%save_cod_info.3.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%save_cod_info.2.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%save_cod_info.1.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%save_cod_info.0.0 = phi i32 [ %save_cod_info.0.1, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%over.1 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%best_over.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%notdone.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%tmp147 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp148 = icmp eq i32 %tmp147, 0		; <i1> [#uses=1]
+	%tmp153 = icmp eq i32 %over.1, 0		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp148, %tmp153		; <i1> [#uses=1]
+	%notdone.2 = select i1 %bothcond, i32 0, i32 %notdone.0		; <i32> [#uses=1]
+	br i1 false, label %cond_next205, label %cond_true163
+cond_true163:		; preds = %cond_next144
+	ret void
+cond_next205:		; preds = %cond_next144
+	br i1 false, label %bb226, label %cond_true210
+cond_true210:		; preds = %cond_next205
+	ret void
+bb226:		; preds = %cond_next205
+	%tmp228 = icmp eq i32 %notdone.2, 0		; <i1> [#uses=1]
+	br i1 %tmp228, label %bb231, label %bb
+bb231:		; preds = %bb226
+	store i32 %save_cod_info.1.0, i32* null
+	store i32 %save_cod_info.2.0, i32* null
+	store i32 %save_cod_info.3.0, i32* null
+	store i32 %save_cod_info.4.0, i32* null
+	store i32 0, i32* %cod_info.20128.1
+	store i32 0, i32* %cod_info.20128.2
+	store i32 0, i32* %cod_info.20128.3
+	%tmp244245 = sitofp i32 %best_over.0 to double		; <double> [#uses=1]
+	store double %tmp244245, double* %best_noise, align 4
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll b/src/LLVM/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
new file mode 100644
index 0000000..33bd4de
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll

@@ -0,0 +1,3544 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+declare void @foo(i8*, i8*, i32, i32, i32, i32, i32, i32, i32)
+
+define void @t() nounwind  {
+	br label %1
+; <label>:1		; preds = %0
+	br label %bb4351.i
+bb4351.i:		; preds = %1
+	switch i32 0, label %bb4411.i [
+		 i32 1, label %bb4354.i
+		 i32 2, label %bb4369.i
+	]
+bb4354.i:		; preds = %bb4351.i
+	br label %t.exit
+bb4369.i:		; preds = %bb4351.i
+	br label %bb4374.i
+bb4374.i:		; preds = %bb4369.i
+	br label %bb4411.i
+bb4411.i:		; preds = %bb4374.i, %bb4351.i
+	%sf4083.0.i = phi i32 [ 0, %bb4374.i ], [ 6, %bb4351.i ]		; <i32> [#uses=8]
+	br label %bb4498.i
+bb4498.i:		; preds = %bb4411.i
+	%sfComp4077.1.i = phi i32 [ undef, %bb4411.i ]		; <i32> [#uses=2]
+	%stComp4075.1.i = phi i32 [ undef, %bb4411.i ]		; <i32> [#uses=1]
+	switch i32 0, label %bb4553.i [
+		 i32 1, label %bb4501.i
+		 i32 2, label %bb4521.i
+	]
+bb4501.i:		; preds = %bb4498.i
+	%sfComp4077.1.reg2mem.0.i = phi i32 [ %sfComp4077.1.i, %bb4498.i ]		; <i32> [#uses=1]
+	call void @foo( i8* null, i8* null, i32 %sfComp4077.1.reg2mem.0.i, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0 ) nounwind 
+	br i1 false, label %UnifiedReturnBlock.i, label %bb4517.i
+bb4517.i:		; preds = %bb4501.i
+	br label %t.exit
+bb4521.i:		; preds = %bb4498.i
+	br label %bb4526.i
+bb4526.i:		; preds = %bb4521.i
+	switch i32 0, label %bb4529.i [
+		 i32 6, label %bb4530.i
+		 i32 7, label %bb4530.i
+	]
+bb4529.i:		; preds = %bb4526.i
+	br label %bb4530.i
+bb4530.i:		; preds = %bb4529.i, %bb4526.i, %bb4526.i
+	br label %bb4553.i
+bb4553.i:		; preds = %bb4530.i, %bb4498.i
+	%dt4080.0.i = phi i32 [ %stComp4075.1.i, %bb4530.i ], [ 7, %bb4498.i ]		; <i32> [#uses=32]
+	%df4081.0.i = phi i32 [ %sfComp4077.1.i, %bb4530.i ], [ 8, %bb4498.i ]		; <i32> [#uses=17]
+	switch i32 %sf4083.0.i, label %bb4559.i [
+		 i32 0, label %bb4558.i
+		 i32 1, label %bb4558.i
+		 i32 2, label %bb4558.i
+		 i32 5, label %bb4561.i
+		 i32 6, label %bb4561.i
+		 i32 7, label %bb4561.i
+		 i32 9, label %bb4557.i
+	]
+bb4557.i:		; preds = %bb4553.i
+	switch i32 %df4081.0.i, label %bb4569.i [
+		 i32 0, label %bb4568.i
+		 i32 1, label %bb4568.i
+		 i32 2, label %bb4568.i
+		 i32 5, label %bb4571.i
+		 i32 6, label %bb4571.i
+		 i32 7, label %bb4571.i
+		 i32 9, label %bb4567.i
+	]
+bb4558.i:		; preds = %bb4553.i, %bb4553.i, %bb4553.i
+	switch i32 %df4081.0.i, label %bb4569.i [
+		 i32 0, label %bb4568.i
+		 i32 1, label %bb4568.i
+		 i32 2, label %bb4568.i
+		 i32 5, label %bb4571.i
+		 i32 6, label %bb4571.i
+		 i32 7, label %bb4571.i
+		 i32 9, label %bb4567.i
+	]
+bb4559.i:		; preds = %bb4553.i
+	br label %bb4561.i
+bb4561.i:		; preds = %bb4559.i, %bb4553.i, %bb4553.i, %bb4553.i
+	switch i32 %df4081.0.i, label %bb4569.i [
+		 i32 0, label %bb4568.i
+		 i32 1, label %bb4568.i
+		 i32 2, label %bb4568.i
+		 i32 5, label %bb4571.i
+		 i32 6, label %bb4571.i
+		 i32 7, label %bb4571.i
+		 i32 9, label %bb4567.i
+	]
+bb4567.i:		; preds = %bb4561.i, %bb4558.i, %bb4557.i
+	br label %bb4580.i
+bb4568.i:		; preds = %bb4561.i, %bb4561.i, %bb4561.i, %bb4558.i, %bb4558.i, %bb4558.i, %bb4557.i, %bb4557.i, %bb4557.i
+	br label %bb4580.i
+bb4569.i:		; preds = %bb4561.i, %bb4558.i, %bb4557.i
+	br label %bb4571.i
+bb4571.i:		; preds = %bb4569.i, %bb4561.i, %bb4561.i, %bb4561.i, %bb4558.i, %bb4558.i, %bb4558.i, %bb4557.i, %bb4557.i, %bb4557.i
+	br label %bb4580.i
+bb4580.i:		; preds = %bb4571.i, %bb4568.i, %bb4567.i
+	br i1 false, label %bb4611.i, label %bb4593.i
+bb4593.i:		; preds = %bb4580.i
+	br i1 false, label %bb4610.i, label %bb4611.i
+bb4610.i:		; preds = %bb4593.i
+	br label %bb4611.i
+bb4611.i:		; preds = %bb4610.i, %bb4593.i, %bb4580.i
+	br i1 false, label %bb4776.i, label %bb4620.i
+bb4620.i:		; preds = %bb4611.i
+	switch i32 0, label %bb4776.i [
+		 i32 0, label %bb4691.i
+		 i32 2, label %bb4740.i
+		 i32 4, label %bb4755.i
+		 i32 8, label %bb4622.i
+		 i32 9, label %bb4622.i
+		 i32 10, label %bb4629.i
+		 i32 11, label %bb4629.i
+		 i32 12, label %bb4651.i
+		 i32 13, label %bb4651.i
+		 i32 14, label %bb4665.i
+		 i32 15, label %bb4665.i
+		 i32 16, label %bb4691.i
+		 i32 17, label %bb4691.i
+		 i32 18, label %bb4712.i
+		 i32 19, label %bb4712.i
+		 i32 22, label %bb4733.i
+		 i32 23, label %bb4733.i
+	]
+bb4622.i:		; preds = %bb4620.i, %bb4620.i
+	br i1 false, label %bb4628.i, label %bb4776.i
+bb4628.i:		; preds = %bb4622.i
+	br label %bb4776.i
+bb4629.i:		; preds = %bb4620.i, %bb4620.i
+	br i1 false, label %bb4776.i, label %bb4644.i
+bb4644.i:		; preds = %bb4629.i
+	br i1 false, label %bb4650.i, label %bb4776.i
+bb4650.i:		; preds = %bb4644.i
+	br label %bb4776.i
+bb4651.i:		; preds = %bb4620.i, %bb4620.i
+	br i1 false, label %bb4776.i, label %bb4658.i
+bb4658.i:		; preds = %bb4651.i
+	br i1 false, label %bb4664.i, label %bb4776.i
+bb4664.i:		; preds = %bb4658.i
+	br label %bb4776.i
+bb4665.i:		; preds = %bb4620.i, %bb4620.i
+	br i1 false, label %bb4776.i, label %bb4684.i
+bb4684.i:		; preds = %bb4665.i
+	br i1 false, label %bb4690.i, label %bb4776.i
+bb4690.i:		; preds = %bb4684.i
+	br label %bb4776.i
+bb4691.i:		; preds = %bb4620.i, %bb4620.i, %bb4620.i
+	br i1 false, label %bb4776.i, label %bb4698.i
+bb4698.i:		; preds = %bb4691.i
+	br i1 false, label %bb4711.i, label %bb4776.i
+bb4711.i:		; preds = %bb4698.i
+	br label %bb4776.i
+bb4712.i:		; preds = %bb4620.i, %bb4620.i
+	br i1 false, label %bb4776.i, label %bb4726.i
+bb4726.i:		; preds = %bb4712.i
+	br i1 false, label %bb4732.i, label %bb4776.i
+bb4732.i:		; preds = %bb4726.i
+	br label %bb4776.i
+bb4733.i:		; preds = %bb4620.i, %bb4620.i
+	br i1 false, label %bb4739.i, label %bb4776.i
+bb4739.i:		; preds = %bb4733.i
+	br label %bb4776.i
+bb4740.i:		; preds = %bb4620.i
+	br i1 false, label %bb4776.i, label %bb4754.i
+bb4754.i:		; preds = %bb4740.i
+	br label %bb4776.i
+bb4755.i:		; preds = %bb4620.i
+	br i1 false, label %bb4776.i, label %bb4774.i
+bb4774.i:		; preds = %bb4755.i
+	br label %bb4776.i
+bb4776.i:		; preds = %bb4774.i, %bb4755.i, %bb4754.i, %bb4740.i, %bb4739.i, %bb4733.i, %bb4732.i, %bb4726.i, %bb4712.i, %bb4711.i, %bb4698.i, %bb4691.i, %bb4690.i, %bb4684.i, %bb4665.i, %bb4664.i, %bb4658.i, %bb4651.i, %bb4650.i, %bb4644.i, %bb4629.i, %bb4628.i, %bb4622.i, %bb4620.i, %bb4611.i
+	switch i32 0, label %bb4790.i [
+		 i32 0, label %bb4786.i
+		 i32 1, label %bb4784.i
+		 i32 3, label %bb4784.i
+		 i32 5, label %bb4784.i
+		 i32 6, label %bb4785.i
+		 i32 7, label %bb4785.i
+		 i32 8, label %bb4791.i
+		 i32 9, label %bb4791.i
+		 i32 10, label %bb4791.i
+		 i32 11, label %bb4791.i
+		 i32 12, label %bb4791.i
+		 i32 13, label %bb4791.i
+		 i32 14, label %bb4791.i
+		 i32 15, label %bb4791.i
+		 i32 16, label %bb4791.i
+		 i32 17, label %bb4791.i
+		 i32 18, label %bb4791.i
+		 i32 19, label %bb4791.i
+	]
+bb4784.i:		; preds = %bb4776.i, %bb4776.i, %bb4776.i
+	br label %bb4791.i
+bb4785.i:		; preds = %bb4776.i, %bb4776.i
+	br label %bb4791.i
+bb4786.i:		; preds = %bb4776.i
+	br label %bb4791.i
+bb4790.i:		; preds = %bb4776.i
+	br label %bb4791.i
+bb4791.i:		; preds = %bb4790.i, %bb4786.i, %bb4785.i, %bb4784.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i
+	switch i32 %dt4080.0.i, label %bb4803.i [
+		 i32 0, label %bb4799.i
+		 i32 6, label %bb4794.i
+		 i32 7, label %bb4794.i
+		 i32 8, label %bb4804.i
+		 i32 9, label %bb4804.i
+		 i32 10, label %bb4804.i
+		 i32 11, label %bb4804.i
+		 i32 12, label %bb4804.i
+		 i32 13, label %bb4804.i
+		 i32 14, label %bb4804.i
+		 i32 15, label %bb4804.i
+		 i32 16, label %bb4804.i
+		 i32 17, label %bb4804.i
+		 i32 18, label %bb4804.i
+		 i32 19, label %bb4804.i
+	]
+bb4794.i:		; preds = %bb4791.i, %bb4791.i
+	br i1 false, label %bb4809.i, label %bb4819.i
+bb4799.i:		; preds = %bb4791.i
+	br i1 false, label %bb4809.i, label %bb4819.i
+bb4803.i:		; preds = %bb4791.i
+	br label %bb4804.i
+bb4804.i:		; preds = %bb4803.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i
+	br i1 false, label %bb4809.i, label %bb4819.i
+bb4809.i:		; preds = %bb4804.i, %bb4799.i, %bb4794.i
+	switch i32 %df4081.0.i, label %bb71.i.i [
+		 i32 3, label %bb61.i.i
+		 i32 4, label %bb.i.i
+		 i32 5, label %bb.i.i
+		 i32 6, label %bb.i.i
+		 i32 7, label %bb.i.i
+		 i32 8, label %bb38.i.i
+		 i32 9, label %bb38.i.i
+		 i32 10, label %bb50.i.i
+		 i32 11, label %bb40.i.i
+		 i32 16, label %bb38.i.i
+	]
+bb.i.i:		; preds = %bb4809.i, %bb4809.i, %bb4809.i, %bb4809.i
+	br label %bb403.i.i
+bb38.i.i:		; preds = %bb4809.i, %bb4809.i, %bb4809.i
+	br label %bb403.i.i
+bb40.i.i:		; preds = %bb4809.i
+	br label %bb403.i.i
+bb50.i.i:		; preds = %bb4809.i
+	br label %bb403.i.i
+bb61.i.i:		; preds = %bb4809.i
+	br label %bb403.i.i
+bb71.i.i:		; preds = %bb4809.i
+	br label %bb403.i.i
+bb403.i.i:		; preds = %bb71.i.i, %bb61.i.i, %bb50.i.i, %bb40.i.i, %bb38.i.i, %bb.i.i
+	br i1 false, label %bb408.i.i, label %bb502.i.i
+bb408.i.i:		; preds = %bb403.i.i
+	br label %bb708.i.i
+bb502.i.i:		; preds = %bb403.i.i
+	br label %bb708.i.i
+bb708.i.i:		; preds = %bb502.i.i, %bb408.i.i
+	switch i32 0, label %bb758.i.i [
+		 i32 0, label %bb710.i.i
+		 i32 1, label %bb713.i.i
+		 i32 2, label %bb718.i.i
+		 i32 3, label %bb721.i.i
+		 i32 4, label %bb726.i.i
+		 i32 5, label %bb729.i.i
+		 i32 8, label %bb732.i.i
+		 i32 9, label %bb732.i.i
+		 i32 10, label %bb737.i.i
+		 i32 11, label %bb737.i.i
+		 i32 12, label %bb742.i.i
+		 i32 13, label %bb742.i.i
+		 i32 14, label %bb745.i.i
+		 i32 15, label %bb745.i.i
+		 i32 16, label %bb750.i.i
+		 i32 17, label %bb750.i.i
+		 i32 18, label %bb753.i.i
+		 i32 19, label %bb753.i.i
+		 i32 22, label %bb750.i.i
+		 i32 23, label %bb750.i.i
+	]
+bb710.i.i:		; preds = %bb708.i.i
+	br label %bb758.i.i
+bb713.i.i:		; preds = %bb708.i.i
+	br label %bb758.i.i
+bb718.i.i:		; preds = %bb708.i.i
+	br label %bb758.i.i
+bb721.i.i:		; preds = %bb708.i.i
+	br label %bb758.i.i
+bb726.i.i:		; preds = %bb708.i.i
+	br label %bb758.i.i
+bb729.i.i:		; preds = %bb708.i.i
+	br label %bb758.i.i
+bb732.i.i:		; preds = %bb708.i.i, %bb708.i.i
+	br label %bb758.i.i
+bb737.i.i:		; preds = %bb708.i.i, %bb708.i.i
+	br label %bb758.i.i
+bb742.i.i:		; preds = %bb708.i.i, %bb708.i.i
+	br label %bb758.i.i
+bb745.i.i:		; preds = %bb708.i.i, %bb708.i.i
+	br label %bb758.i.i
+bb750.i.i:		; preds = %bb708.i.i, %bb708.i.i, %bb708.i.i, %bb708.i.i
+	br label %bb758.i.i
+bb753.i.i:		; preds = %bb708.i.i, %bb708.i.i
+	br label %bb758.i.i
+bb758.i.i:		; preds = %bb753.i.i, %bb750.i.i, %bb745.i.i, %bb742.i.i, %bb737.i.i, %bb732.i.i, %bb729.i.i, %bb726.i.i, %bb721.i.i, %bb718.i.i, %bb713.i.i, %bb710.i.i, %bb708.i.i
+	switch i32 %dt4080.0.i, label %bb808.i.i [
+		 i32 0, label %bb760.i.i
+		 i32 1, label %bb763.i.i
+		 i32 2, label %bb768.i.i
+		 i32 3, label %bb771.i.i
+		 i32 4, label %bb776.i.i
+		 i32 5, label %bb779.i.i
+		 i32 8, label %bb782.i.i
+		 i32 9, label %bb782.i.i
+		 i32 10, label %bb787.i.i
+		 i32 11, label %bb787.i.i
+		 i32 12, label %bb792.i.i
+		 i32 13, label %bb792.i.i
+		 i32 14, label %bb795.i.i
+		 i32 15, label %bb795.i.i
+		 i32 16, label %bb800.i.i
+		 i32 17, label %bb800.i.i
+		 i32 18, label %bb803.i.i
+		 i32 19, label %bb803.i.i
+		 i32 22, label %bb800.i.i
+		 i32 23, label %bb800.i.i
+	]
+bb760.i.i:		; preds = %bb758.i.i
+	br label %bb811.i.i
+bb763.i.i:		; preds = %bb758.i.i
+	br label %bb811.i.i
+bb768.i.i:		; preds = %bb758.i.i
+	br label %bb811.i.i
+bb771.i.i:		; preds = %bb758.i.i
+	br label %bb811.i.i
+bb776.i.i:		; preds = %bb758.i.i
+	br label %bb811.i.i
+bb779.i.i:		; preds = %bb758.i.i
+	br label %bb811.i.i
+bb782.i.i:		; preds = %bb758.i.i, %bb758.i.i
+	br label %bb811.i.i
+bb787.i.i:		; preds = %bb758.i.i, %bb758.i.i
+	br label %bb811.i.i
+bb792.i.i:		; preds = %bb758.i.i, %bb758.i.i
+	br label %bb811.i.i
+bb795.i.i:		; preds = %bb758.i.i, %bb758.i.i
+	br label %bb811.i.i
+bb800.i.i:		; preds = %bb758.i.i, %bb758.i.i, %bb758.i.i, %bb758.i.i
+	br label %bb811.i.i
+bb803.i.i:		; preds = %bb758.i.i, %bb758.i.i
+	br label %bb808.i.i
+bb808.i.i:		; preds = %bb803.i.i, %bb758.i.i
+	br label %bb811.i.i
+bb811.i.i:		; preds = %bb808.i.i, %bb800.i.i, %bb795.i.i, %bb792.i.i, %bb787.i.i, %bb782.i.i, %bb779.i.i, %bb776.i.i, %bb771.i.i, %bb768.i.i, %bb763.i.i, %bb760.i.i
+	switch i32 0, label %bb928.i.i [
+		 i32 0, label %bb813.i.i
+		 i32 1, label %bb833.i.i
+		 i32 2, label %bb813.i.i
+		 i32 3, label %bb833.i.i
+		 i32 4, label %bb813.i.i
+		 i32 5, label %bb813.i.i
+		 i32 8, label %bb872.i.i
+		 i32 9, label %bb872.i.i
+		 i32 10, label %bb890.i.i
+		 i32 11, label %bb890.i.i
+		 i32 12, label %bb813.i.i
+		 i32 13, label %bb813.i.i
+		 i32 14, label %bb908.i.i
+		 i32 15, label %bb908.i.i
+		 i32 16, label %bb813.i.i
+		 i32 17, label %bb813.i.i
+		 i32 18, label %bb908.i.i
+		 i32 19, label %bb908.i.i
+		 i32 22, label %bb813.i.i
+		 i32 23, label %bb813.i.i
+	]
+bb813.i.i:		; preds = %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i
+	switch i32 %dt4080.0.i, label %bb1065.i.i [
+		 i32 0, label %bb930.i.i
+		 i32 1, label %bb950.i.i
+		 i32 2, label %bb930.i.i
+		 i32 3, label %bb950.i.i
+		 i32 4, label %bb989.i.i
+		 i32 5, label %bb989.i.i
+		 i32 8, label %bb1009.i.i
+		 i32 9, label %bb1009.i.i
+		 i32 10, label %bb1027.i.i
+		 i32 11, label %bb1027.i.i
+		 i32 12, label %bb930.i.i
+		 i32 13, label %bb930.i.i
+		 i32 14, label %bb1045.i.i
+		 i32 15, label %bb1045.i.i
+		 i32 16, label %bb930.i.i
+		 i32 17, label %bb930.i.i
+		 i32 18, label %bb1045.i.i
+		 i32 19, label %bb1045.i.i
+		 i32 22, label %bb930.i.i
+		 i32 23, label %bb930.i.i
+	]
+bb833.i.i:		; preds = %bb811.i.i, %bb811.i.i
+	switch i32 %dt4080.0.i, label %bb1065.i.i [
+		 i32 0, label %bb930.i.i
+		 i32 1, label %bb950.i.i
+		 i32 2, label %bb930.i.i
+		 i32 3, label %bb950.i.i
+		 i32 4, label %bb989.i.i
+		 i32 5, label %bb989.i.i
+		 i32 8, label %bb1009.i.i
+		 i32 9, label %bb1009.i.i
+		 i32 10, label %bb1027.i.i
+		 i32 11, label %bb1027.i.i
+		 i32 12, label %bb930.i.i
+		 i32 13, label %bb930.i.i
+		 i32 14, label %bb1045.i.i
+		 i32 15, label %bb1045.i.i
+		 i32 16, label %bb930.i.i
+		 i32 17, label %bb930.i.i
+		 i32 18, label %bb1045.i.i
+		 i32 19, label %bb1045.i.i
+		 i32 22, label %bb930.i.i
+		 i32 23, label %bb930.i.i
+	]
+bb872.i.i:		; preds = %bb811.i.i, %bb811.i.i
+	switch i32 %dt4080.0.i, label %bb1065.i.i [
+		 i32 0, label %bb930.i.i
+		 i32 1, label %bb950.i.i
+		 i32 2, label %bb930.i.i
+		 i32 3, label %bb950.i.i
+		 i32 4, label %bb989.i.i
+		 i32 5, label %bb989.i.i
+		 i32 8, label %bb1009.i.i
+		 i32 9, label %bb1009.i.i
+		 i32 10, label %bb1027.i.i
+		 i32 11, label %bb1027.i.i
+		 i32 12, label %bb930.i.i
+		 i32 13, label %bb930.i.i
+		 i32 14, label %bb1045.i.i
+		 i32 15, label %bb1045.i.i
+		 i32 16, label %bb930.i.i
+		 i32 17, label %bb930.i.i
+		 i32 18, label %bb1045.i.i
+		 i32 19, label %bb1045.i.i
+		 i32 22, label %bb930.i.i
+		 i32 23, label %bb930.i.i
+	]
+bb890.i.i:		; preds = %bb811.i.i, %bb811.i.i
+	switch i32 %dt4080.0.i, label %bb1065.i.i [
+		 i32 0, label %bb930.i.i
+		 i32 1, label %bb950.i.i
+		 i32 2, label %bb930.i.i
+		 i32 3, label %bb950.i.i
+		 i32 4, label %bb989.i.i
+		 i32 5, label %bb989.i.i
+		 i32 8, label %bb1009.i.i
+		 i32 9, label %bb1009.i.i
+		 i32 10, label %bb1027.i.i
+		 i32 11, label %bb1027.i.i
+		 i32 12, label %bb930.i.i
+		 i32 13, label %bb930.i.i
+		 i32 14, label %bb1045.i.i
+		 i32 15, label %bb1045.i.i
+		 i32 16, label %bb930.i.i
+		 i32 17, label %bb930.i.i
+		 i32 18, label %bb1045.i.i
+		 i32 19, label %bb1045.i.i
+		 i32 22, label %bb930.i.i
+		 i32 23, label %bb930.i.i
+	]
+bb908.i.i:		; preds = %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i
+	br label %bb928.i.i
+bb928.i.i:		; preds = %bb908.i.i, %bb811.i.i
+	switch i32 %dt4080.0.i, label %bb1065.i.i [
+		 i32 0, label %bb930.i.i
+		 i32 1, label %bb950.i.i
+		 i32 2, label %bb930.i.i
+		 i32 3, label %bb950.i.i
+		 i32 4, label %bb989.i.i
+		 i32 5, label %bb989.i.i
+		 i32 8, label %bb1009.i.i
+		 i32 9, label %bb1009.i.i
+		 i32 10, label %bb1027.i.i
+		 i32 11, label %bb1027.i.i
+		 i32 12, label %bb930.i.i
+		 i32 13, label %bb930.i.i
+		 i32 14, label %bb1045.i.i
+		 i32 15, label %bb1045.i.i
+		 i32 16, label %bb930.i.i
+		 i32 17, label %bb930.i.i
+		 i32 18, label %bb1045.i.i
+		 i32 19, label %bb1045.i.i
+		 i32 22, label %bb930.i.i
+		 i32 23, label %bb930.i.i
+	]
+bb930.i.i:		; preds = %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i
+	br label %bb5235.i
+bb950.i.i:		; preds = %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i
+	br label %bb5235.i
+bb989.i.i:		; preds = %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i
+	br label %bb5235.i
+bb1009.i.i:		; preds = %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i
+	br label %bb5235.i
+bb1027.i.i:		; preds = %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i
+	br label %bb5235.i
+bb1045.i.i:		; preds = %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i
+	br label %bb1065.i.i
+bb1065.i.i:		; preds = %bb1045.i.i, %bb928.i.i, %bb890.i.i, %bb872.i.i, %bb833.i.i, %bb813.i.i
+	br label %bb5235.i
+bb4819.i:		; preds = %bb4804.i, %bb4799.i, %bb4794.i
+	br i1 false, label %bb5208.i, label %bb5011.i
+bb5011.i:		; preds = %bb4819.i
+	switch i32 0, label %bb5039.i [
+		 i32 10, label %bb5016.i
+		 i32 3, label %bb5103.i
+	]
+bb5016.i:		; preds = %bb5011.i
+	br i1 false, label %bb5103.i, label %bb5039.i
+bb5039.i:		; preds = %bb5016.i, %bb5011.i
+	switch i32 0, label %bb5052.i [
+		 i32 3, label %bb5103.i
+		 i32 10, label %bb5103.i
+	]
+bb5052.i:		; preds = %bb5039.i
+	br i1 false, label %bb5103.i, label %bb5065.i
+bb5065.i:		; preds = %bb5052.i
+	br i1 false, label %bb5078.i, label %bb5103.i
+bb5078.i:		; preds = %bb5065.i
+	br i1 false, label %bb5103.i, label %bb5084.i
+bb5084.i:		; preds = %bb5078.i
+	br i1 false, label %bb5103.i, label %bb5090.i
+bb5090.i:		; preds = %bb5084.i
+	br i1 false, label %bb5103.i, label %bb5096.i
+bb5096.i:		; preds = %bb5090.i
+	br i1 false, label %bb5103.i, label %bb5102.i
+bb5102.i:		; preds = %bb5096.i
+	br label %bb5103.i
+bb5103.i:		; preds = %bb5102.i, %bb5096.i, %bb5090.i, %bb5084.i, %bb5078.i, %bb5065.i, %bb5052.i, %bb5039.i, %bb5039.i, %bb5016.i, %bb5011.i
+	switch i32 0, label %bb5208.i [
+		 i32 0, label %bb5133.i
+		 i32 2, label %bb5162.i
+		 i32 4, label %bb5182.i
+		 i32 10, label %bb5113.i
+		 i32 11, label %bb5113.i
+		 i32 12, label %bb5121.i
+		 i32 13, label %bb5121.i
+		 i32 14, label %bb5125.i
+		 i32 15, label %bb5125.i
+		 i32 16, label %bb5133.i
+		 i32 17, label %bb5133.i
+		 i32 18, label %bb5146.i
+		 i32 19, label %bb5146.i
+	]
+bb5113.i:		; preds = %bb5103.i, %bb5103.i
+	switch i32 %dt4080.0.i, label %bb5208.i [
+		 i32 8, label %bb5115.i
+		 i32 9, label %bb5115.i
+		 i32 12, label %bb5117.i
+		 i32 13, label %bb5117.i
+		 i32 14, label %bb5119.i
+		 i32 15, label %bb5119.i
+	]
+bb5115.i:		; preds = %bb5113.i, %bb5113.i
+	br label %bb5208.i
+bb5117.i:		; preds = %bb5113.i, %bb5113.i
+	br label %bb5208.i
+bb5119.i:		; preds = %bb5113.i, %bb5113.i
+	br label %bb5208.i
+bb5121.i:		; preds = %bb5103.i, %bb5103.i
+	switch i32 %dt4080.0.i, label %bb5208.i [
+		 i32 8, label %bb5123.i
+		 i32 9, label %bb5123.i
+	]
+bb5123.i:		; preds = %bb5121.i, %bb5121.i
+	br label %bb5208.i
+bb5125.i:		; preds = %bb5103.i, %bb5103.i
+	switch i32 %dt4080.0.i, label %bb5208.i [
+		 i32 8, label %bb5127.i
+		 i32 9, label %bb5127.i
+		 i32 12, label %bb5129.i
+		 i32 13, label %bb5129.i
+	]
+bb5127.i:		; preds = %bb5125.i, %bb5125.i
+	br label %bb5208.i
+bb5129.i:		; preds = %bb5125.i, %bb5125.i
+	br label %bb5208.i
+bb5133.i:		; preds = %bb5103.i, %bb5103.i, %bb5103.i
+	switch i32 %dt4080.0.i, label %bb5208.i [
+		 i32 8, label %bb5135.i
+		 i32 9, label %bb5135.i
+		 i32 10, label %bb5137.i
+		 i32 11, label %bb5137.i
+		 i32 12, label %bb5139.i
+		 i32 13, label %bb5139.i
+		 i32 14, label %bb5143.i
+		 i32 15, label %bb5143.i
+	]
+bb5135.i:		; preds = %bb5133.i, %bb5133.i
+	br label %bb5208.i
+bb5137.i:		; preds = %bb5133.i, %bb5133.i
+	br label %bb5208.i
+bb5139.i:		; preds = %bb5133.i, %bb5133.i
+	br label %bb5208.i
+bb5143.i:		; preds = %bb5133.i, %bb5133.i
+	br label %bb5208.i
+bb5146.i:		; preds = %bb5103.i, %bb5103.i
+	switch i32 %dt4080.0.i, label %bb5208.i [
+		 i32 0, label %bb5158.i
+		 i32 8, label %bb5148.i
+		 i32 9, label %bb5148.i
+		 i32 10, label %bb5150.i
+		 i32 11, label %bb5150.i
+		 i32 12, label %bb5152.i
+		 i32 13, label %bb5152.i
+		 i32 14, label %bb5155.i
+		 i32 15, label %bb5155.i
+		 i32 16, label %bb5158.i
+		 i32 17, label %bb5158.i
+	]
+bb5148.i:		; preds = %bb5146.i, %bb5146.i
+	br label %bb5208.i
+bb5150.i:		; preds = %bb5146.i, %bb5146.i
+	br label %bb5208.i
+bb5152.i:		; preds = %bb5146.i, %bb5146.i
+	br label %bb5208.i
+bb5155.i:		; preds = %bb5146.i, %bb5146.i
+	br label %bb5208.i
+bb5158.i:		; preds = %bb5146.i, %bb5146.i, %bb5146.i
+	br label %bb5208.i
+bb5162.i:		; preds = %bb5103.i
+	switch i32 %dt4080.0.i, label %bb5208.i [
+		 i32 0, label %bb5175.i
+		 i32 8, label %bb5164.i
+		 i32 9, label %bb5164.i
+		 i32 10, label %bb5166.i
+		 i32 11, label %bb5166.i
+		 i32 12, label %bb5168.i
+		 i32 13, label %bb5168.i
+		 i32 14, label %bb5172.i
+		 i32 15, label %bb5172.i
+		 i32 16, label %bb5175.i
+		 i32 17, label %bb5175.i
+		 i32 18, label %bb5179.i
+		 i32 19, label %bb5179.i
+	]
+bb5164.i:		; preds = %bb5162.i, %bb5162.i
+	br label %bb5208.i
+bb5166.i:		; preds = %bb5162.i, %bb5162.i
+	br label %bb5208.i
+bb5168.i:		; preds = %bb5162.i, %bb5162.i
+	br label %bb5208.i
+bb5172.i:		; preds = %bb5162.i, %bb5162.i
+	br label %bb5208.i
+bb5175.i:		; preds = %bb5162.i, %bb5162.i, %bb5162.i
+	br label %bb5208.i
+bb5179.i:		; preds = %bb5162.i, %bb5162.i
+	br label %bb5208.i
+bb5182.i:		; preds = %bb5103.i
+	switch i32 %dt4080.0.i, label %bb5208.i [
+		 i32 0, label %bb5195.i
+		 i32 2, label %bb5202.i
+		 i32 8, label %bb5184.i
+		 i32 9, label %bb5184.i
+		 i32 10, label %bb5186.i
+		 i32 11, label %bb5186.i
+		 i32 12, label %bb5188.i
+		 i32 13, label %bb5188.i
+		 i32 14, label %bb5192.i
+		 i32 15, label %bb5192.i
+		 i32 16, label %bb5195.i
+		 i32 17, label %bb5195.i
+		 i32 18, label %bb5199.i
+		 i32 19, label %bb5199.i
+	]
+bb5184.i:		; preds = %bb5182.i, %bb5182.i
+	br label %bb5208.i
+bb5186.i:		; preds = %bb5182.i, %bb5182.i
+	br label %bb5208.i
+bb5188.i:		; preds = %bb5182.i, %bb5182.i
+	br label %bb5208.i
+bb5192.i:		; preds = %bb5182.i, %bb5182.i
+	br label %bb5208.i
+bb5195.i:		; preds = %bb5182.i, %bb5182.i, %bb5182.i
+	br label %bb5208.i
+bb5199.i:		; preds = %bb5182.i, %bb5182.i
+	br label %bb5208.i
+bb5202.i:		; preds = %bb5182.i
+	br label %bb5208.i
+bb5208.i:		; preds = %bb5202.i, %bb5199.i, %bb5195.i, %bb5192.i, %bb5188.i, %bb5186.i, %bb5184.i, %bb5182.i, %bb5179.i, %bb5175.i, %bb5172.i, %bb5168.i, %bb5166.i, %bb5164.i, %bb5162.i, %bb5158.i, %bb5155.i, %bb5152.i, %bb5150.i, %bb5148.i, %bb5146.i, %bb5143.i, %bb5139.i, %bb5137.i, %bb5135.i, %bb5133.i, %bb5129.i, %bb5127.i, %bb5125.i, %bb5123.i, %bb5121.i, %bb5119.i, %bb5117.i, %bb5115.i, %bb5113.i, %bb5103.i, %bb4819.i
+	switch i32 0, label %bb5221.i [
+		 i32 0, label %bb5210.i
+		 i32 1, label %bb5211.i
+		 i32 2, label %bb5212.i
+		 i32 3, label %bb5213.i
+		 i32 4, label %bb5214.i
+		 i32 5, label %bb5215.i
+		 i32 6, label %bb5217.i
+		 i32 7, label %bb5216.i
+		 i32 12, label %bb5218.i
+		 i32 13, label %bb5218.i
+		 i32 14, label %bb5219.i
+		 i32 15, label %bb5219.i
+		 i32 16, label %bb5210.i
+		 i32 17, label %bb5210.i
+		 i32 22, label %bb5210.i
+		 i32 23, label %bb5210.i
+	]
+bb5210.i:		; preds = %bb5208.i, %bb5208.i, %bb5208.i, %bb5208.i, %bb5208.i
+	br label %bb5224.i
+bb5211.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5212.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5213.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5214.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5215.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5216.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5217.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5218.i:		; preds = %bb5208.i, %bb5208.i
+	br label %bb5224.i
+bb5219.i:		; preds = %bb5208.i, %bb5208.i
+	br label %bb5224.i
+bb5221.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5224.i:		; preds = %bb5221.i, %bb5219.i, %bb5218.i, %bb5217.i, %bb5216.i, %bb5215.i, %bb5214.i, %bb5213.i, %bb5212.i, %bb5211.i, %bb5210.i
+	br label %bb5235.i
+bb5235.i:		; preds = %bb5224.i, %bb1065.i.i, %bb1027.i.i, %bb1009.i.i, %bb989.i.i, %bb950.i.i, %bb930.i.i
+	br label %bb5272.i
+bb5272.i:		; preds = %bb5235.i
+	br label %bb5276.i
+bb5276.i:		; preds = %bb19808.i, %bb5272.i
+	br label %bb16607.i
+bb5295.i:		; preds = %bb5295.preheader.i, %storeVecColor_RGB_UI.exit
+	br label %loadVecColor_BGRA_UI8888R.exit
+loadVecColor_BGRA_UI8888R.exit:		; preds = %bb5295.i
+	br i1 false, label %bb5325.i, label %bb5351.i
+bb5325.i:		; preds = %loadVecColor_BGRA_UI8888R.exit
+	br i1 false, label %bb4527.i, label %bb.i
+bb.i:		; preds = %bb5325.i
+	switch i32 0, label %bb4527.i [
+		 i32 4, label %bb4362.i
+		 i32 8, label %bb4448.i
+	]
+bb4362.i:		; preds = %bb.i
+	br i1 false, label %bb4532.i, label %bb5556.i
+bb4448.i:		; preds = %bb.i
+	br label %bb4527.i
+bb4527.i:		; preds = %bb4448.i, %bb.i, %bb5325.i
+	br i1 false, label %bb4532.i, label %bb5556.i
+bb4532.i:		; preds = %bb4527.i, %bb4362.i
+	switch i32 0, label %bb4997.i [
+		 i32 6, label %bb4534.i
+		 i32 7, label %bb4982.i
+	]
+bb4534.i:		; preds = %bb4532.i
+	br i1 false, label %bb4875.i, label %bb4619.i
+bb4619.i:		; preds = %bb4534.i
+	br i1 false, label %bb4875.i, label %bb4663.i
+bb4663.i:		; preds = %bb4619.i
+	br label %bb4855.i
+bb4759.i:		; preds = %bb4855.i
+	br label %bb4855.i
+bb4855.i:		; preds = %bb4759.i, %bb4663.i
+	br i1 false, label %bb4866.i, label %bb4759.i
+bb4866.i:		; preds = %bb4855.i
+	br label %bb4875.i
+bb4875.i:		; preds = %bb4866.i, %bb4619.i, %bb4534.i
+	br i1 false, label %bb4973.i, label %bb4922.i
+bb4922.i:		; preds = %bb4875.i
+	br label %bb4973.i
+bb4973.i:		; preds = %bb4922.i, %bb4875.i
+	br label %bb4982.i
+bb4982.i:		; preds = %bb4973.i, %bb4532.i
+	br label %bb5041.i
+bb4997.i:		; preds = %bb4532.i
+	br label %bb5041.i
+bb5041.i:		; preds = %bb4997.i, %bb4982.i
+	switch i32 0, label %bb5464.i [
+		 i32 0, label %bb5344.i
+		 i32 1, label %bb5374.i
+		 i32 2, label %bb5404.i
+		 i32 3, label %bb5434.i
+		 i32 11, label %bb5263.i
+	]
+bb5263.i:		; preds = %bb5041.i
+	br i1 false, label %bb12038.i, label %bb5467.i
+bb5344.i:		; preds = %bb5041.i
+	br i1 false, label %bb12038.i, label %bb5467.i
+bb5374.i:		; preds = %bb5041.i
+	br i1 false, label %bb12038.i, label %bb5467.i
+bb5404.i:		; preds = %bb5041.i
+	br i1 false, label %bb12038.i, label %bb5467.i
+bb5434.i:		; preds = %bb5041.i
+	br label %bb5464.i
+bb5464.i:		; preds = %bb5434.i, %bb5041.i
+	br i1 false, label %bb12038.i, label %bb5467.i
+bb5467.i:		; preds = %bb5464.i, %bb5404.i, %bb5374.i, %bb5344.i, %bb5263.i
+	switch i32 0, label %bb15866.i [
+		 i32 3, label %bb13016.i
+		 i32 4, label %bb12040.i
+		 i32 8, label %bb12514.i
+		 i32 10, label %bb12903.i
+		 i32 11, label %bb12553.i
+		 i32 16, label %bb12514.i
+	]
+bb5556.i:		; preds = %bb4527.i, %bb4362.i
+	switch i32 0, label %bb8990.i [
+		 i32 3, label %bb6403.i
+		 i32 4, label %bb6924.i
+		 i32 8, label %bb6924.i
+		 i32 10, label %bb6403.i
+		 i32 11, label %bb5882.i
+		 i32 16, label %bb5558.i
+	]
+bb5558.i:		; preds = %bb5556.i
+	br label %bb8990.i
+bb5882.i:		; preds = %bb5556.i
+	switch i32 0, label %bb6387.i [
+		 i32 1, label %bb6332.i
+		 i32 3, label %bb6332.i
+		 i32 4, label %bb6352.i
+		 i32 6, label %bb5884.i
+		 i32 7, label %bb8990.i
+	]
+bb5884.i:		; preds = %bb5882.i
+	br i1 false, label %bb6225.i, label %bb5969.i
+bb5969.i:		; preds = %bb5884.i
+	br i1 false, label %bb6225.i, label %bb6013.i
+bb6013.i:		; preds = %bb5969.i
+	br label %bb6205.i
+bb6109.i:		; preds = %bb6205.i
+	br label %bb6205.i
+bb6205.i:		; preds = %bb6109.i, %bb6013.i
+	br i1 false, label %bb6216.i, label %bb6109.i
+bb6216.i:		; preds = %bb6205.i
+	br label %bb6225.i
+bb6225.i:		; preds = %bb6216.i, %bb5969.i, %bb5884.i
+	br i1 false, label %bb6323.i, label %bb6272.i
+bb6272.i:		; preds = %bb6225.i
+	switch i32 0, label %bb6908.i [
+		 i32 1, label %bb6853.i48
+		 i32 3, label %bb6853.i48
+		 i32 4, label %bb6873.i
+		 i32 6, label %bb6405.i
+		 i32 7, label %bb8990.i
+	]
+bb6323.i:		; preds = %bb6225.i
+	switch i32 0, label %bb6908.i [
+		 i32 1, label %bb6853.i48
+		 i32 3, label %bb6853.i48
+		 i32 4, label %bb6873.i
+		 i32 6, label %bb6405.i
+		 i32 7, label %bb8990.i
+	]
+bb6332.i:		; preds = %bb5882.i, %bb5882.i
+	switch i32 0, label %bb6908.i [
+		 i32 1, label %bb6853.i48
+		 i32 3, label %bb6853.i48
+		 i32 4, label %bb6873.i
+		 i32 6, label %bb6405.i
+		 i32 7, label %bb8990.i
+	]
+bb6352.i:		; preds = %bb5882.i
+	br label %bb6873.i
+bb6387.i:		; preds = %bb5882.i
+	br label %bb6403.i
+bb6403.i:		; preds = %bb6387.i, %bb5556.i, %bb5556.i
+	switch i32 0, label %bb6908.i [
+		 i32 1, label %bb6853.i48
+		 i32 3, label %bb6853.i48
+		 i32 4, label %bb6873.i
+		 i32 6, label %bb6405.i
+		 i32 7, label %bb8990.i
+	]
+bb6405.i:		; preds = %bb6403.i, %bb6332.i, %bb6323.i, %bb6272.i
+	br i1 false, label %bb6746.i, label %bb6490.i
+bb6490.i:		; preds = %bb6405.i
+	br i1 false, label %bb6746.i, label %bb6534.i
+bb6534.i:		; preds = %bb6490.i
+	br label %bb6726.i
+bb6630.i:		; preds = %bb6726.i
+	br label %bb6726.i
+bb6726.i:		; preds = %bb6630.i, %bb6534.i
+	br i1 false, label %bb6737.i, label %bb6630.i
+bb6737.i:		; preds = %bb6726.i
+	br label %bb6746.i
+bb6746.i:		; preds = %bb6737.i, %bb6490.i, %bb6405.i
+	br i1 false, label %bb6844.i, label %bb6793.i
+bb6793.i:		; preds = %bb6746.i
+	br label %bb8990.i
+bb6844.i:		; preds = %bb6746.i
+	br label %bb8990.i
+bb6853.i48:		; preds = %bb6403.i, %bb6403.i, %bb6332.i, %bb6332.i, %bb6323.i, %bb6323.i, %bb6272.i, %bb6272.i
+	br label %bb8990.i
+bb6873.i:		; preds = %bb6403.i, %bb6352.i, %bb6332.i, %bb6323.i, %bb6272.i
+	br label %bb8990.i
+bb6908.i:		; preds = %bb6403.i, %bb6332.i, %bb6323.i, %bb6272.i
+	br label %bb8990.i
+bb6924.i:		; preds = %bb5556.i, %bb5556.i
+	switch i32 0, label %bb8929.i [
+		 i32 1, label %bb8715.i
+		 i32 3, label %bb8715.i
+		 i32 4, label %bb8792.i
+		 i32 6, label %bb6926.i
+		 i32 7, label %bb8990.i
+	]
+bb6926.i:		; preds = %bb6924.i
+	br i1 false, label %bb7267.i, label %bb7011.i
+bb7011.i:		; preds = %bb6926.i
+	br i1 false, label %bb7267.i, label %bb7055.i
+bb7055.i:		; preds = %bb7011.i
+	br label %bb7247.i
+bb7151.i:		; preds = %bb7247.i
+	br label %bb7247.i
+bb7247.i:		; preds = %bb7151.i, %bb7055.i
+	br i1 false, label %bb7258.i, label %bb7151.i
+bb7258.i:		; preds = %bb7247.i
+	br label %bb7267.i
+bb7267.i:		; preds = %bb7258.i, %bb7011.i, %bb6926.i
+	br i1 false, label %bb7365.i, label %bb7314.i
+bb7314.i:		; preds = %bb7267.i
+	br label %bb7365.i
+bb7365.i:		; preds = %bb7314.i, %bb7267.i
+	br i1 false, label %bb7714.i, label %bb7458.i
+bb7458.i:		; preds = %bb7365.i
+	br i1 false, label %bb7714.i, label %bb7502.i
+bb7502.i:		; preds = %bb7458.i
+	br label %bb7694.i
+bb7598.i:		; preds = %bb7694.i
+	br label %bb7694.i
+bb7694.i:		; preds = %bb7598.i, %bb7502.i
+	br i1 false, label %bb7705.i, label %bb7598.i
+bb7705.i:		; preds = %bb7694.i
+	br label %bb7714.i
+bb7714.i:		; preds = %bb7705.i, %bb7458.i, %bb7365.i
+	br i1 false, label %bb7812.i, label %bb7761.i
+bb7761.i:		; preds = %bb7714.i
+	br label %bb7812.i
+bb7812.i:		; preds = %bb7761.i, %bb7714.i
+	br i1 false, label %bb8161.i, label %bb7905.i
+bb7905.i:		; preds = %bb7812.i
+	br i1 false, label %bb8161.i, label %bb7949.i
+bb7949.i:		; preds = %bb7905.i
+	br label %bb8141.i
+bb8045.i:		; preds = %bb8141.i
+	br label %bb8141.i
+bb8141.i:		; preds = %bb8045.i, %bb7949.i
+	br i1 false, label %bb8152.i, label %bb8045.i
+bb8152.i:		; preds = %bb8141.i
+	br label %bb8161.i
+bb8161.i:		; preds = %bb8152.i, %bb7905.i, %bb7812.i
+	br i1 false, label %bb8259.i, label %bb8208.i
+bb8208.i:		; preds = %bb8161.i
+	br label %bb8259.i
+bb8259.i:		; preds = %bb8208.i, %bb8161.i
+	br i1 false, label %bb8608.i, label %bb8352.i
+bb8352.i:		; preds = %bb8259.i
+	br i1 false, label %bb8608.i, label %bb8396.i
+bb8396.i:		; preds = %bb8352.i
+	br label %bb8588.i63
+bb8492.i:		; preds = %bb8588.i63
+	br label %bb8588.i63
+bb8588.i63:		; preds = %bb8492.i, %bb8396.i
+	br i1 false, label %bb8599.i, label %bb8492.i
+bb8599.i:		; preds = %bb8588.i63
+	br label %bb8608.i
+bb8608.i:		; preds = %bb8599.i, %bb8352.i, %bb8259.i
+	br i1 false, label %bb8706.i, label %bb8655.i
+bb8655.i:		; preds = %bb8608.i
+	br label %bb8990.i
+bb8706.i:		; preds = %bb8608.i
+	br label %bb8990.i
+bb8715.i:		; preds = %bb6924.i, %bb6924.i
+	br label %bb8990.i
+bb8792.i:		; preds = %bb6924.i
+	br label %bb8990.i
+bb8929.i:		; preds = %bb6924.i
+	br label %bb8990.i
+bb8990.i:		; preds = %bb8929.i, %bb8792.i, %bb8715.i, %bb8706.i, %bb8655.i, %bb6924.i, %bb6908.i, %bb6873.i, %bb6853.i48, %bb6844.i, %bb6793.i, %bb6403.i, %bb6332.i, %bb6323.i, %bb6272.i, %bb5882.i, %bb5558.i, %bb5556.i
+	switch i32 %sf4083.0.i, label %bb11184.i [
+		 i32 0, label %bb10372.i
+		 i32 1, label %bb10609.i
+		 i32 2, label %bb10811.i
+		 i32 3, label %bb11013.i
+		 i32 4, label %bb8992.i
+		 i32 5, label %bb8992.i
+		 i32 6, label %bb8992.i
+		 i32 7, label %bb8992.i
+		 i32 8, label %bb9195.i
+		 i32 9, label %bb9195.i
+		 i32 10, label %bb9965.i
+		 i32 11, label %bb9585.i
+		 i32 16, label %bb9195.i
+	]
+bb8992.i:		; preds = %bb8990.i, %bb8990.i, %bb8990.i, %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 0, label %bb9075.i
+		 i32 1, label %bb9105.i
+		 i32 2, label %bb9135.i
+		 i32 3, label %bb9165.i
+		 i32 11, label %bb8994.i
+	]
+bb8994.i:		; preds = %bb8992.i
+	br label %bb11247.i
+bb9075.i:		; preds = %bb8992.i
+	br label %bb11247.i
+bb9105.i:		; preds = %bb8992.i
+	br label %bb11247.i
+bb9135.i:		; preds = %bb8992.i
+	br label %bb11247.i
+bb9165.i:		; preds = %bb8992.i
+	br label %bb11247.i
+bb9195.i:		; preds = %bb8990.i, %bb8990.i, %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 0, label %bb9491.i
+		 i32 1, label %bb9521.i
+		 i32 2, label %bb9551.i
+		 i32 3, label %bb9581.i
+		 i32 4, label %bb9197.i
+		 i32 11, label %bb9342.i
+	]
+bb9197.i:		; preds = %bb9195.i
+	br label %bb11247.i
+bb9342.i:		; preds = %bb9195.i
+	br label %bb11247.i
+bb9491.i:		; preds = %bb9195.i
+	br label %bb11247.i
+bb9521.i:		; preds = %bb9195.i
+	br label %bb11247.i
+bb9551.i:		; preds = %bb9195.i
+	br label %bb11247.i
+bb9581.i:		; preds = %bb9195.i
+	br label %bb11247.i
+bb9585.i:		; preds = %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 0, label %bb9879.i
+		 i32 1, label %bb9920.i
+		 i32 2, label %bb9920.i
+		 i32 3, label %bb9924.i
+		 i32 4, label %bb9587.i
+		 i32 8, label %bb9587.i
+	]
+bb9587.i:		; preds = %bb9585.i, %bb9585.i
+	br label %bb11247.i
+bb9879.i:		; preds = %bb9585.i
+	br label %bb11247.i
+bb9920.i:		; preds = %bb9585.i, %bb9585.i
+	br label %bb11247.i
+bb9924.i:		; preds = %bb9585.i
+	br label %bb11247.i
+bb9965.i:		; preds = %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 1, label %bb10368.i
+		 i32 2, label %bb10368.i
+		 i32 3, label %bb10364.i
+		 i32 4, label %bb9967.i
+		 i32 8, label %bb10127.i
+		 i32 11, label %bb10287.i
+	]
+bb9967.i:		; preds = %bb9965.i
+	br label %bb11247.i
+bb10127.i:		; preds = %bb9965.i
+	br label %bb11247.i
+bb10287.i:		; preds = %bb9965.i
+	br label %bb11247.i
+bb10364.i:		; preds = %bb9965.i
+	br label %bb11247.i
+bb10368.i:		; preds = %bb9965.i, %bb9965.i
+	br label %bb11247.i
+bb10372.i:		; preds = %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 1, label %bb10605.i
+		 i32 2, label %bb10605.i
+		 i32 3, label %bb10601.i
+		 i32 4, label %bb10374.i
+		 i32 8, label %bb10449.i
+		 i32 11, label %bb10524.i
+	]
+bb10374.i:		; preds = %bb10372.i
+	br label %bb11247.i
+bb10449.i:		; preds = %bb10372.i
+	br label %bb11247.i
+bb10524.i:		; preds = %bb10372.i
+	br label %bb11247.i
+bb10601.i:		; preds = %bb10372.i
+	br label %bb11247.i
+bb10605.i:		; preds = %bb10372.i, %bb10372.i
+	br label %bb11247.i
+bb10609.i:		; preds = %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 0, label %bb10807.i
+		 i32 2, label %bb10807.i
+		 i32 3, label %bb10803.i
+		 i32 4, label %bb10611.i
+		 i32 8, label %bb10686.i
+		 i32 11, label %bb10761.i
+	]
+bb10611.i:		; preds = %bb10609.i
+	br label %bb11247.i
+bb10686.i:		; preds = %bb10609.i
+	br label %bb11247.i
+bb10761.i:		; preds = %bb10609.i
+	br label %bb11247.i
+bb10803.i:		; preds = %bb10609.i
+	br label %bb11247.i
+bb10807.i:		; preds = %bb10609.i, %bb10609.i
+	br label %bb11247.i
+bb10811.i:		; preds = %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 0, label %bb11009.i
+		 i32 1, label %bb11009.i
+		 i32 3, label %bb11005.i
+		 i32 4, label %bb10813.i
+		 i32 8, label %bb10888.i
+		 i32 11, label %bb10963.i
+	]
+bb10813.i:		; preds = %bb10811.i
+	br label %bb11247.i
+bb10888.i:		; preds = %bb10811.i
+	br label %bb11247.i
+bb10963.i:		; preds = %bb10811.i
+	br label %bb11247.i
+bb11005.i:		; preds = %bb10811.i
+	br label %bb11247.i
+bb11009.i:		; preds = %bb10811.i, %bb10811.i
+	br label %bb11247.i
+bb11013.i:		; preds = %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 0, label %bb11180.i
+		 i32 1, label %bb11180.i
+		 i32 2, label %bb11180.i
+		 i32 4, label %bb11015.i
+		 i32 8, label %bb11090.i
+		 i32 11, label %bb11103.i
+	]
+bb11015.i:		; preds = %bb11013.i
+	br label %bb11247.i
+bb11090.i:		; preds = %bb11013.i
+	br label %bb11247.i
+bb11103.i:		; preds = %bb11013.i
+	br label %bb11247.i
+bb11180.i:		; preds = %bb11013.i, %bb11013.i, %bb11013.i
+	br label %bb11184.i
+bb11184.i:		; preds = %bb11180.i, %bb11013.i, %bb10811.i, %bb10609.i, %bb10372.i, %bb9965.i, %bb9585.i, %bb9195.i, %bb8992.i, %bb8990.i
+	br label %bb11247.i
+bb11247.i:		; preds = %bb11184.i, %bb11103.i, %bb11090.i, %bb11015.i, %bb11009.i, %bb11005.i, %bb10963.i, %bb10888.i, %bb10813.i, %bb10807.i, %bb10803.i, %bb10761.i, %bb10686.i, %bb10611.i, %bb10605.i, %bb10601.i, %bb10524.i, %bb10449.i, %bb10374.i, %bb10368.i, %bb10364.i, %bb10287.i, %bb10127.i, %bb9967.i, %bb9924.i, %bb9920.i, %bb9879.i, %bb9587.i, %bb9581.i, %bb9551.i, %bb9521.i, %bb9491.i, %bb9342.i, %bb9197.i, %bb9165.i, %bb9135.i, %bb9105.i, %bb9075.i, %bb8994.i
+	br i1 false, label %bb11250.i, label %bb11256.i
+bb11250.i:		; preds = %bb11247.i
+	br label %bb11378.i
+bb11256.i:		; preds = %bb11247.i
+	switch i32 0, label %bb11348.i [
+		 i32 4, label %bb11258.i
+		 i32 8, label %bb11258.i
+		 i32 11, label %bb11318.i
+	]
+bb11258.i:		; preds = %bb11256.i, %bb11256.i
+	br i1 false, label %bb11273.i, label %bb11261.i
+bb11261.i:		; preds = %bb11258.i
+	br label %bb11273.i
+bb11273.i:		; preds = %bb11261.i, %bb11258.i
+	br i1 false, label %bb11288.i, label %bb11276.i
+bb11276.i:		; preds = %bb11273.i
+	br label %bb11288.i
+bb11288.i:		; preds = %bb11276.i, %bb11273.i
+	br i1 false, label %bb11303.i, label %bb11291.i
+bb11291.i:		; preds = %bb11288.i
+	br label %bb11303.i
+bb11303.i:		; preds = %bb11291.i, %bb11288.i
+	br i1 false, label %bb11318.i, label %bb11306.i
+bb11306.i:		; preds = %bb11303.i
+	br label %bb11318.i
+bb11318.i:		; preds = %bb11306.i, %bb11303.i, %bb11256.i
+	br i1 false, label %bb11333.i, label %bb11321.i
+bb11321.i:		; preds = %bb11318.i
+	br label %bb11333.i
+bb11333.i:		; preds = %bb11321.i, %bb11318.i
+	br i1 false, label %bb11348.i, label %bb11336.i
+bb11336.i:		; preds = %bb11333.i
+	br label %bb11348.i
+bb11348.i:		; preds = %bb11336.i, %bb11333.i, %bb11256.i
+	br i1 false, label %bb11363.i, label %bb11351.i
+bb11351.i:		; preds = %bb11348.i
+	br label %bb11363.i
+bb11363.i:		; preds = %bb11351.i, %bb11348.i
+	br i1 false, label %bb11378.i, label %bb11366.i
+bb11366.i:		; preds = %bb11363.i
+	br label %bb11378.i
+bb11378.i:		; preds = %bb11366.i, %bb11363.i, %bb11250.i
+	br label %bb12038.i
+bb12038.i:		; preds = %bb11378.i, %bb5464.i, %bb5404.i, %bb5374.i, %bb5344.i, %bb5263.i
+	switch i32 0, label %bb15866.i [
+		 i32 3, label %bb13016.i
+		 i32 4, label %bb12040.i
+		 i32 8, label %bb12514.i
+		 i32 10, label %bb12903.i
+		 i32 11, label %bb12553.i
+		 i32 16, label %bb12514.i
+	]
+bb12040.i:		; preds = %bb12038.i, %bb5467.i
+	br label %bb13026.i
+bb12514.i:		; preds = %bb12038.i, %bb12038.i, %bb5467.i, %bb5467.i
+	br label %bb13026.i
+bb12553.i:		; preds = %bb12038.i, %bb5467.i
+	br i1 false, label %bb12558.i, label %bb12747.i
+bb12558.i:		; preds = %bb12553.i
+	br i1 false, label %bb12666.i, label %bb12654.i
+bb12654.i:		; preds = %bb12558.i
+	br label %bb12666.i
+bb12666.i:		; preds = %bb12654.i, %bb12558.i
+	br label %bb12747.i
+bb12747.i:		; preds = %bb12666.i, %bb12553.i
+	br label %bb13026.i
+bb12903.i:		; preds = %bb12038.i, %bb5467.i
+	br i1 false, label %bb12908.i, label %bb13026.i
+bb12908.i:		; preds = %bb12903.i
+	br i1 false, label %bb13026.i, label %bb13004.i
+bb13004.i:		; preds = %bb12908.i
+	switch i32 0, label %bb15866.i [
+		 i32 3, label %bb13752.i
+		 i32 4, label %bb14197.i
+		 i32 8, label %bb14197.i
+		 i32 10, label %bb13752.i
+		 i32 11, label %bb13307.i
+		 i32 16, label %bb13028.i
+	]
+bb13016.i:		; preds = %bb12038.i, %bb5467.i
+	br label %bb13026.i
+bb13026.i:		; preds = %bb13016.i, %bb12908.i, %bb12903.i, %bb12747.i, %bb12514.i, %bb12040.i
+	switch i32 0, label %bb15866.i [
+		 i32 3, label %bb13752.i
+		 i32 4, label %bb14197.i
+		 i32 8, label %bb14197.i
+		 i32 10, label %bb13752.i
+		 i32 11, label %bb13307.i
+		 i32 16, label %bb13028.i
+	]
+bb13028.i:		; preds = %bb13026.i, %bb13004.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb13307.i:		; preds = %bb13026.i, %bb13004.i
+	switch i32 %dt4080.0.i, label %bb13736.i [
+		 i32 6, label %bb13312.i
+		 i32 1, label %bb13624.i
+		 i32 3, label %bb13624.i
+		 i32 5, label %bb13649.i
+		 i32 4, label %bb13688.i
+		 i32 7, label %bb15866.i
+	]
+bb13312.i:		; preds = %bb13307.i
+	br i1 false, label %bb13483.i, label %bb13400.i
+bb13400.i:		; preds = %bb13312.i
+	br label %bb13483.i
+bb13483.i:		; preds = %bb13400.i, %bb13312.i
+	br i1 false, label %bb13593.i, label %bb13505.i
+bb13505.i:		; preds = %bb13483.i
+	switch i32 %dt4080.0.i, label %bb14181.i [
+		 i32 6, label %bb13757.i
+		 i32 1, label %bb14069.i
+		 i32 3, label %bb14069.i
+		 i32 5, label %bb14094.i
+		 i32 4, label %bb14133.i
+		 i32 7, label %bb15866.i
+	]
+bb13593.i:		; preds = %bb13483.i
+	switch i32 %dt4080.0.i, label %bb14181.i [
+		 i32 6, label %bb13757.i
+		 i32 1, label %bb14069.i
+		 i32 3, label %bb14069.i
+		 i32 5, label %bb14094.i
+		 i32 4, label %bb14133.i
+		 i32 7, label %bb15866.i
+	]
+bb13624.i:		; preds = %bb13307.i, %bb13307.i
+	switch i32 %dt4080.0.i, label %bb14181.i [
+		 i32 6, label %bb13757.i
+		 i32 1, label %bb14069.i
+		 i32 3, label %bb14069.i
+		 i32 5, label %bb14094.i
+		 i32 4, label %bb14133.i
+		 i32 7, label %bb15866.i
+	]
+bb13649.i:		; preds = %bb13307.i
+	br label %bb14094.i
+bb13688.i:		; preds = %bb13307.i
+	br label %bb14133.i
+bb13736.i:		; preds = %bb13307.i
+	br label %bb13752.i
+bb13752.i:		; preds = %bb13736.i, %bb13026.i, %bb13026.i, %bb13004.i, %bb13004.i
+	switch i32 %dt4080.0.i, label %bb14181.i [
+		 i32 6, label %bb13757.i
+		 i32 1, label %bb14069.i
+		 i32 3, label %bb14069.i
+		 i32 5, label %bb14094.i
+		 i32 4, label %bb14133.i
+		 i32 7, label %bb15866.i
+	]
+bb13757.i:		; preds = %bb13752.i, %bb13624.i, %bb13593.i, %bb13505.i
+	br i1 false, label %bb13928.i, label %bb13845.i
+bb13845.i:		; preds = %bb13757.i
+	br label %bb13928.i
+bb13928.i:		; preds = %bb13845.i, %bb13757.i
+	br i1 false, label %bb14038.i, label %bb13950.i
+bb13950.i:		; preds = %bb13928.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14038.i:		; preds = %bb13928.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14069.i:		; preds = %bb13752.i, %bb13752.i, %bb13624.i, %bb13624.i, %bb13593.i, %bb13593.i, %bb13505.i, %bb13505.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14094.i:		; preds = %bb13752.i, %bb13649.i, %bb13624.i, %bb13593.i, %bb13505.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14133.i:		; preds = %bb13752.i, %bb13688.i, %bb13624.i, %bb13593.i, %bb13505.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14181.i:		; preds = %bb13752.i, %bb13624.i, %bb13593.i, %bb13505.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14197.i:		; preds = %bb13026.i, %bb13026.i, %bb13004.i, %bb13004.i
+	switch i32 %dt4080.0.i, label %bb15805.i [
+		 i32 6, label %bb14202.i
+		 i32 1, label %bb15411.i
+		 i32 3, label %bb15411.i
+		 i32 5, label %bb15493.i
+		 i32 4, label %bb15631.i
+		 i32 7, label %bb15866.i
+	]
+bb14202.i:		; preds = %bb14197.i
+	br i1 false, label %bb14373.i, label %bb14290.i
+bb14290.i:		; preds = %bb14202.i
+	br label %bb14373.i
+bb14373.i:		; preds = %bb14290.i, %bb14202.i
+	br i1 false, label %bb14483.i, label %bb14395.i
+bb14395.i:		; preds = %bb14373.i
+	br label %bb14483.i
+bb14483.i:		; preds = %bb14395.i, %bb14373.i
+	br i1 false, label %bb14672.i, label %bb14589.i
+bb14589.i:		; preds = %bb14483.i
+	br label %bb14672.i
+bb14672.i:		; preds = %bb14589.i, %bb14483.i
+	br i1 false, label %bb14782.i, label %bb14694.i
+bb14694.i:		; preds = %bb14672.i
+	br label %bb14782.i
+bb14782.i:		; preds = %bb14694.i, %bb14672.i
+	br i1 false, label %bb14971.i, label %bb14888.i
+bb14888.i:		; preds = %bb14782.i
+	br label %bb14971.i
+bb14971.i:		; preds = %bb14888.i, %bb14782.i
+	br i1 false, label %bb15081.i, label %bb14993.i
+bb14993.i:		; preds = %bb14971.i
+	br label %bb15081.i
+bb15081.i:		; preds = %bb14993.i, %bb14971.i
+	br i1 false, label %bb15270.i, label %bb15187.i
+bb15187.i:		; preds = %bb15081.i
+	br label %bb15270.i
+bb15270.i:		; preds = %bb15187.i, %bb15081.i
+	br i1 false, label %bb15380.i, label %bb15292.i
+bb15292.i:		; preds = %bb15270.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15380.i:		; preds = %bb15270.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15411.i:		; preds = %bb14197.i, %bb14197.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15493.i:		; preds = %bb14197.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15631.i:		; preds = %bb14197.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15805.i:		; preds = %bb14197.i
+	br label %bb15866.i
+bb15866.i:		; preds = %bb15805.i, %bb14197.i, %bb13752.i, %bb13624.i, %bb13593.i, %bb13505.i, %bb13307.i, %bb13026.i, %bb13004.i, %bb12038.i, %bb5467.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15869.i:		; preds = %bb15866.i, %bb15631.i, %bb15493.i, %bb15411.i, %bb15380.i, %bb15292.i, %bb14181.i, %bb14133.i, %bb14094.i, %bb14069.i, %bb14038.i, %bb13950.i, %bb13028.i
+	switch i32 0, label %UnifiedReturnBlock.i177 [
+		 i32 4, label %bb15874.i
+		 i32 8, label %bb15960.i
+	]
+bb15874.i:		; preds = %bb15869.i
+	br label %glgVectorFloatConversion.exit
+bb15960.i:		; preds = %bb15869.i
+	br label %glgVectorFloatConversion.exit
+UnifiedReturnBlock.i177:		; preds = %bb15869.i, %bb15866.i, %bb15631.i, %bb15493.i, %bb15411.i, %bb15380.i, %bb15292.i, %bb14181.i, %bb14133.i, %bb14094.i, %bb14069.i, %bb14038.i, %bb13950.i, %bb13028.i
+	br label %glgVectorFloatConversion.exit
+glgVectorFloatConversion.exit:		; preds = %UnifiedReturnBlock.i177, %bb15960.i, %bb15874.i
+	br label %bb16581.i
+bb5351.i:		; preds = %loadVecColor_BGRA_UI8888R.exit
+	br i1 false, label %bb5359.i, label %bb5586.i
+bb5359.i:		; preds = %bb5351.i
+	switch i32 0, label %bb5586.i [
+		 i32 0, label %bb5361.i
+		 i32 1, label %bb5511.i
+		 i32 2, label %bb5511.i
+	]
+bb5361.i:		; preds = %bb5359.i
+	br i1 false, label %bb5366.i, label %bb5379.i
+bb5366.i:		; preds = %bb5361.i
+	br label %bb7230.i
+bb5379.i:		; preds = %bb5361.i
+	switch i32 %sf4083.0.i, label %bb5415.i [
+		 i32 1, label %bb5384.i
+		 i32 2, label %bb5402.i
+	]
+bb5384.i:		; preds = %bb5379.i
+	switch i32 0, label %bb7230.i [
+		 i32 4, label %bb5445.i
+		 i32 8, label %bb5445.i
+		 i32 11, label %bb5445.i
+	]
+bb5402.i:		; preds = %bb5379.i
+	switch i32 0, label %bb7230.i [
+		 i32 4, label %bb5445.i
+		 i32 8, label %bb5445.i
+		 i32 11, label %bb5445.i
+	]
+bb5415.i:		; preds = %bb5379.i
+	switch i32 0, label %bb7230.i [
+		 i32 4, label %bb5445.i
+		 i32 8, label %bb5445.i
+		 i32 11, label %bb5445.i
+	]
+bb5445.i:		; preds = %bb5415.i, %bb5415.i, %bb5415.i, %bb5402.i, %bb5402.i, %bb5402.i, %bb5384.i, %bb5384.i, %bb5384.i
+	switch i32 0, label %bb7230.i [
+		 i32 4, label %bb5470.i
+		 i32 8, label %bb5470.i
+		 i32 11, label %bb6853.i
+	]
+bb5470.i:		; preds = %bb5445.i, %bb5445.i
+	switch i32 0, label %bb7230.i [
+		 i32 4, label %bb5498.i
+		 i32 8, label %bb5493.i
+		 i32 11, label %bb6853.i
+	]
+bb5493.i:		; preds = %bb5470.i
+	br i1 false, label %bb5498.i, label %bb5586.i
+bb5498.i:		; preds = %bb5493.i, %bb5470.i
+	switch i32 0, label %bb7230.i [
+		 i32 4, label %bb5591.i
+		 i32 8, label %bb6153.i
+		 i32 11, label %bb6853.i
+	]
+bb5511.i:		; preds = %bb5359.i, %bb5359.i
+	br i1 false, label %bb5568.i, label %bb5586.i
+bb5568.i:		; preds = %bb5511.i
+	br label %bb5586.i
+bb5586.i:		; preds = %bb5568.i, %bb5511.i, %bb5493.i, %bb5359.i, %bb5351.i
+	switch i32 0, label %bb7230.i [
+		 i32 4, label %bb5591.i
+		 i32 8, label %bb6153.i
+		 i32 11, label %bb6853.i
+	]
+bb5591.i:		; preds = %bb5586.i, %bb5498.i
+	switch i32 0, label %bb5995.i [
+		 i32 4, label %bb5596.i
+		 i32 8, label %bb5680.i
+		 i32 11, label %bb5842.i
+	]
+bb5596.i:		; preds = %bb5591.i
+	br i1 false, label %bb8428.i, label %bb5602.i
+bb5602.i:		; preds = %bb5596.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb5680.i:		; preds = %bb5591.i
+	br i1 false, label %bb5692.i, label %bb5764.i
+bb5692.i:		; preds = %bb5680.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb5764.i:		; preds = %bb5680.i
+	br i1 false, label %bb8428.i, label %bb5772.i
+bb5772.i:		; preds = %bb5764.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb5842.i:		; preds = %bb5591.i
+	br i1 false, label %bb5920.i, label %bb5845.i
+bb5845.i:		; preds = %bb5842.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb5920.i:		; preds = %bb5842.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb5995.i:		; preds = %bb5591.i
+	switch i32 %df4081.0.i, label %bb8428.i [
+		 i32 0, label %bb6007.i
+		 i32 10, label %bb6007.i
+		 i32 1, label %bb6042.i
+		 i32 2, label %bb6079.i
+		 i32 3, label %bb6116.i
+	]
+bb6007.i:		; preds = %bb5995.i, %bb5995.i
+	br i1 false, label %bb6012.i, label %bb8428.i
+bb6012.i:		; preds = %bb6007.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6042.i:		; preds = %bb5995.i
+	br i1 false, label %bb6049.i, label %bb6045.i
+bb6045.i:		; preds = %bb6042.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6049.i:		; preds = %bb6042.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6079.i:		; preds = %bb5995.i
+	br i1 false, label %bb6086.i, label %bb6082.i
+bb6082.i:		; preds = %bb6079.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6086.i:		; preds = %bb6079.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6116.i:		; preds = %bb5995.i
+	br i1 false, label %bb6123.i, label %bb6119.i
+bb6119.i:		; preds = %bb6116.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6123.i:		; preds = %bb6116.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6153.i:		; preds = %bb5586.i, %bb5498.i
+	switch i32 0, label %bb6724.i [
+		 i32 4, label %bb6158.i
+		 i32 8, label %bb6459.i
+		 i32 11, label %bb6621.i
+	]
+bb6158.i:		; preds = %bb6153.i
+	br i1 false, label %bb6242.i, label %bb6161.i
+bb6161.i:		; preds = %bb6158.i
+	br i1 false, label %bb6239.i, label %bb6166.i
+bb6166.i:		; preds = %bb6161.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6239.i:		; preds = %bb6161.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6242.i:		; preds = %bb6158.i
+	br i1 false, label %bb6245.i, label %bb6317.i
+bb6245.i:		; preds = %bb6242.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6317.i:		; preds = %bb6242.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6459.i:		; preds = %bb6153.i
+	br i1 false, label %bb6471.i, label %bb6543.i
+bb6471.i:		; preds = %bb6459.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6543.i:		; preds = %bb6459.i
+	br i1 false, label %bb8428.i, label %bb6551.i
+bb6551.i:		; preds = %bb6543.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6621.i:		; preds = %bb6153.i
+	br i1 false, label %bb6626.i, label %bb6651.i
+bb6626.i:		; preds = %bb6621.i
+	br label %bb6651.i
+bb6651.i:		; preds = %bb6626.i, %bb6621.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6724.i:		; preds = %bb6153.i
+	switch i32 %df4081.0.i, label %bb8428.i [
+		 i32 0, label %bb6736.i
+		 i32 10, label %bb6736.i
+		 i32 1, label %bb6771.i
+		 i32 2, label %bb6808.i
+		 i32 3, label %bb6845.i
+	]
+bb6736.i:		; preds = %bb6724.i, %bb6724.i
+	br i1 false, label %bb6741.i, label %bb8428.i
+bb6741.i:		; preds = %bb6736.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6771.i:		; preds = %bb6724.i
+	br i1 false, label %bb6778.i, label %bb6774.i
+bb6774.i:		; preds = %bb6771.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6778.i:		; preds = %bb6771.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6808.i:		; preds = %bb6724.i
+	br i1 false, label %bb6815.i, label %bb6811.i
+bb6811.i:		; preds = %bb6808.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6815.i:		; preds = %bb6808.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6845.i:		; preds = %bb6724.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6853.i:		; preds = %bb5586.i, %bb5498.i, %bb5470.i, %bb5445.i
+	switch i32 0, label %bb8428.i [
+		 i32 4, label %bb6858.i
+		 i32 8, label %bb7072.i
+		 i32 10, label %bb7149.i
+		 i32 3, label %bb7192.i
+	]
+bb6858.i:		; preds = %bb6853.i
+	br i1 false, label %bb6942.i, label %bb6861.i
+bb6861.i:		; preds = %bb6858.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6942.i:		; preds = %bb6858.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7072.i:		; preds = %bb6853.i
+	br i1 false, label %bb7119.i, label %bb7075.i
+bb7075.i:		; preds = %bb7072.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7119.i:		; preds = %bb7072.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7149.i:		; preds = %bb6853.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7192.i:		; preds = %bb6853.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7230.i:		; preds = %bb5586.i, %bb5498.i, %bb5470.i, %bb5445.i, %bb5415.i, %bb5402.i, %bb5384.i, %bb5366.i
+	switch i32 %sf4083.0.i, label %bb8428.i [
+		 i32 10, label %bb7235.i
+		 i32 0, label %bb7455.i
+		 i32 1, label %bb7725.i
+		 i32 2, label %bb7978.i
+		 i32 3, label %bb8231.i
+	]
+bb7235.i:		; preds = %bb7230.i
+	switch i32 0, label %bb7442.i [
+		 i32 4, label %bb7240.i
+		 i32 8, label %bb7329.i
+		 i32 11, label %bb7369.i
+	]
+bb7240.i:		; preds = %bb7235.i
+	br i1 false, label %bb7252.i, label %bb7243.i
+bb7243.i:		; preds = %bb7240.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7252.i:		; preds = %bb7240.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7329.i:		; preds = %bb7235.i
+	br i1 false, label %bb7339.i, label %bb7332.i
+bb7332.i:		; preds = %bb7329.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7339.i:		; preds = %bb7329.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7369.i:		; preds = %bb7235.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7442.i:		; preds = %bb7235.i
+	br i1 false, label %bb7447.i, label %bb8428.i
+bb7447.i:		; preds = %bb7442.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7455.i:		; preds = %bb7230.i
+	switch i32 0, label %bb7703.i [
+		 i32 4, label %bb7460.i
+		 i32 8, label %bb7546.i
+		 i32 11, label %bb7630.i
+	]
+bb7460.i:		; preds = %bb7455.i
+	br i1 false, label %bb7471.i, label %bb7463.i
+bb7463.i:		; preds = %bb7460.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7471.i:		; preds = %bb7460.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7546.i:		; preds = %bb7455.i
+	br i1 false, label %bb7555.i, label %bb7549.i
+bb7549.i:		; preds = %bb7546.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7555.i:		; preds = %bb7546.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7630.i:		; preds = %bb7455.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7703.i:		; preds = %bb7455.i
+	br i1 false, label %bb7709.i, label %bb7712.i
+bb7709.i:		; preds = %bb7703.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7712.i:		; preds = %bb7703.i
+	br i1 false, label %bb7717.i, label %bb8428.i
+bb7717.i:		; preds = %bb7712.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7725.i:		; preds = %bb7230.i
+	switch i32 0, label %bb7945.i [
+		 i32 4, label %bb7730.i
+		 i32 8, label %bb7819.i
+		 i32 11, label %bb7906.i
+	]
+bb7730.i:		; preds = %bb7725.i
+	br i1 false, label %bb7744.i, label %bb7733.i
+bb7733.i:		; preds = %bb7730.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7744.i:		; preds = %bb7730.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7819.i:		; preds = %bb7725.i
+	br i1 false, label %bb7831.i, label %bb7822.i
+bb7822.i:		; preds = %bb7819.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7831.i:		; preds = %bb7819.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7906.i:		; preds = %bb7725.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7945.i:		; preds = %bb7725.i
+	switch i32 %df4081.0.i, label %bb8428.i [
+		 i32 0, label %bb7962.i
+		 i32 2, label %bb7962.i
+		 i32 10, label %bb7962.i
+		 i32 3, label %bb7970.i
+	]
+bb7962.i:		; preds = %bb7945.i, %bb7945.i, %bb7945.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7970.i:		; preds = %bb7945.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7978.i:		; preds = %bb7230.i
+	switch i32 0, label %bb8198.i [
+		 i32 4, label %bb7983.i
+		 i32 8, label %bb8072.i
+		 i32 11, label %bb8159.i
+	]
+bb7983.i:		; preds = %bb7978.i
+	br i1 false, label %bb7997.i, label %bb7986.i
+bb7986.i:		; preds = %bb7983.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7997.i:		; preds = %bb7983.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8072.i:		; preds = %bb7978.i
+	br i1 false, label %bb8084.i, label %bb8075.i
+bb8075.i:		; preds = %bb8072.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8084.i:		; preds = %bb8072.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8159.i:		; preds = %bb7978.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8198.i:		; preds = %bb7978.i
+	switch i32 %df4081.0.i, label %bb8428.i [
+		 i32 0, label %bb8215.i
+		 i32 1, label %bb8215.i
+		 i32 10, label %bb8215.i
+		 i32 3, label %bb8223.i
+	]
+bb8215.i:		; preds = %bb8198.i, %bb8198.i, %bb8198.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8223.i:		; preds = %bb8198.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8231.i:		; preds = %bb7230.i
+	switch i32 0, label %bb8428.i [
+		 i32 4, label %bb8236.i
+		 i32 8, label %bb8326.i
+		 i32 11, label %bb8347.i
+		 i32 10, label %bb8425.i
+	]
+bb8236.i:		; preds = %bb8231.i
+	br i1 false, label %bb8251.i, label %bb8239.i
+bb8239.i:		; preds = %bb8236.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8251.i:		; preds = %bb8236.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8326.i:		; preds = %bb8231.i
+	br i1 false, label %bb8339.i, label %bb8428.i
+bb8339.i:		; preds = %bb8326.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8347.i:		; preds = %bb8231.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8425.i:		; preds = %bb8231.i
+	br label %bb8428.i
+bb8428.i:		; preds = %bb8425.i, %bb8326.i, %bb8231.i, %bb8198.i, %bb7945.i, %bb7712.i, %bb7442.i, %bb7230.i, %bb6853.i, %bb6736.i, %bb6724.i, %bb6543.i, %bb6007.i, %bb5995.i, %bb5764.i, %bb5596.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8434.i:		; preds = %bb8428.i, %bb8347.i, %bb8339.i, %bb8251.i, %bb8239.i, %bb8223.i, %bb8215.i, %bb8159.i, %bb8084.i, %bb8075.i, %bb7997.i, %bb7986.i, %bb7970.i, %bb7962.i, %bb7906.i, %bb7831.i, %bb7822.i, %bb7744.i, %bb7733.i, %bb7717.i, %bb7709.i, %bb7630.i, %bb7555.i, %bb7549.i, %bb7471.i, %bb7463.i, %bb7447.i, %bb7369.i, %bb7339.i, %bb7332.i, %bb7252.i, %bb7243.i, %bb7192.i, %bb7149.i, %bb7119.i, %bb7075.i, %bb6942.i, %bb6861.i, %bb6845.i, %bb6815.i, %bb6811.i, %bb6778.i, %bb6774.i, %bb6741.i, %bb6651.i, %bb6551.i, %bb6471.i, %bb6317.i, %bb6245.i, %bb6239.i, %bb6166.i, %bb6123.i, %bb6119.i, %bb6086.i, %bb6082.i, %bb6049.i, %bb6045.i, %bb6012.i, %bb5920.i, %bb5845.i, %bb5772.i, %bb5692.i, %bb5602.i
+	switch i32 0, label %bb8668.i [
+		 i32 0, label %bb8436.i
+		 i32 1, label %bb8531.i
+		 i32 2, label %bb8531.i
+	]
+bb8436.i:		; preds = %bb8434.i
+	switch i32 0, label %bb9310.i [
+		 i32 4, label %bb8465.i
+		 i32 8, label %bb8465.i
+		 i32 11, label %bb8465.i
+		 i32 3, label %bb9301.i
+	]
+bb8465.i:		; preds = %bb8436.i, %bb8436.i, %bb8436.i
+	switch i32 0, label %bb9310.i [
+		 i32 4, label %bb8490.i
+		 i32 8, label %bb8490.i
+		 i32 3, label %bb9301.i
+		 i32 11, label %bb9153.i
+	]
+bb8490.i:		; preds = %bb8465.i, %bb8465.i
+	switch i32 0, label %bb9310.i [
+		 i32 4, label %bb8518.i
+		 i32 8, label %bb8513.i
+		 i32 3, label %bb9301.i
+		 i32 11, label %bb9153.i
+	]
+bb8513.i:		; preds = %bb8490.i
+	br i1 false, label %bb8518.i, label %bb8668.i
+bb8518.i:		; preds = %bb8513.i, %bb8490.i
+	switch i32 0, label %bb9310.i [
+		 i32 3, label %bb9301.i
+		 i32 4, label %bb8670.i
+		 i32 8, label %bb9112.i
+		 i32 11, label %bb9153.i
+	]
+bb8531.i:		; preds = %bb8434.i, %bb8434.i
+	br i1 false, label %bb8536.i, label %bb8575.i
+bb8536.i:		; preds = %bb8531.i
+	br i1 false, label %bb8557.i, label %bb8588.i
+bb8557.i:		; preds = %bb8536.i
+	switch i32 0, label %bb9310.i [
+		 i32 4, label %bb8600.i
+		 i32 8, label %bb8600.i
+		 i32 3, label %bb9301.i
+		 i32 11, label %bb9153.i
+	]
+bb8575.i:		; preds = %bb8531.i
+	br label %bb8588.i
+bb8588.i:		; preds = %bb8575.i, %bb8536.i
+	switch i32 0, label %bb9310.i [
+		 i32 4, label %bb8600.i
+		 i32 8, label %bb8600.i
+		 i32 3, label %bb9301.i
+		 i32 11, label %bb9153.i
+	]
+bb8600.i:		; preds = %bb8588.i, %bb8588.i, %bb8557.i, %bb8557.i
+	switch i32 0, label %bb9310.i [
+		 i32 4, label %bb8629.i
+		 i32 3, label %bb9301.i
+		 i32 8, label %bb9112.i
+		 i32 11, label %bb9153.i
+	]
+bb8629.i:		; preds = %bb8600.i
+	br i1 false, label %bb8650.i, label %bb8668.i
+bb8650.i:		; preds = %bb8629.i
+	br label %bb8668.i
+bb8668.i:		; preds = %bb8650.i, %bb8629.i, %bb8513.i, %bb8434.i, %bb8428.i, %bb8347.i, %bb8339.i, %bb8251.i, %bb8239.i, %bb8223.i, %bb8215.i, %bb8159.i, %bb8084.i, %bb8075.i, %bb7997.i, %bb7986.i, %bb7970.i, %bb7962.i, %bb7906.i, %bb7831.i, %bb7822.i, %bb7744.i, %bb7733.i, %bb7717.i, %bb7709.i, %bb7630.i, %bb7555.i, %bb7549.i, %bb7471.i, %bb7463.i, %bb7447.i, %bb7369.i, %bb7339.i, %bb7332.i, %bb7252.i, %bb7243.i, %bb7192.i, %bb7149.i, %bb7119.i, %bb7075.i, %bb6942.i, %bb6861.i, %bb6845.i, %bb6815.i, %bb6811.i, %bb6778.i, %bb6774.i, %bb6741.i, %bb6651.i, %bb6551.i, %bb6471.i, %bb6317.i, %bb6245.i, %bb6239.i, %bb6166.i, %bb6123.i, %bb6119.i, %bb6086.i, %bb6082.i, %bb6049.i, %bb6045.i, %bb6012.i, %bb5920.i, %bb5845.i, %bb5772.i, %bb5692.i, %bb5602.i
+	switch i32 0, label %bb9310.i [
+		 i32 3, label %bb9301.i
+		 i32 4, label %bb8670.i
+		 i32 8, label %bb9112.i
+		 i32 11, label %bb9153.i
+	]
+bb8670.i:		; preds = %bb8668.i, %bb8518.i
+	br label %bb9310.i
+bb9112.i:		; preds = %bb8668.i, %bb8600.i, %bb8518.i
+	br label %bb9310.i
+bb9153.i:		; preds = %bb8668.i, %bb8600.i, %bb8588.i, %bb8557.i, %bb8518.i, %bb8490.i, %bb8465.i
+	br label %bb9310.i
+bb9301.i:		; preds = %bb8668.i, %bb8600.i, %bb8588.i, %bb8557.i, %bb8518.i, %bb8490.i, %bb8465.i, %bb8436.i
+	br label %bb9310.i
+bb9310.i:		; preds = %bb9301.i, %bb9153.i, %bb9112.i, %bb8670.i, %bb8668.i, %bb8600.i, %bb8588.i, %bb8557.i, %bb8518.i, %bb8490.i, %bb8465.i, %bb8436.i
+	br i1 false, label %bb16581.i, label %bb9313.i
+bb9313.i:		; preds = %bb9310.i
+	switch i32 %dt4080.0.i, label %bb16578.i [
+		 i32 0, label %bb9315.i
+		 i32 1, label %bb9890.i
+		 i32 2, label %bb10465.i
+		 i32 3, label %bb11040.i
+		 i32 4, label %bb11615.i
+		 i32 5, label %bb11823.i
+		 i32 8, label %bb12398.i
+		 i32 9, label %bb12833.i
+		 i32 10, label %bb13268.i
+		 i32 11, label %bb13268.i
+		 i32 12, label %bb13703.i
+		 i32 13, label %bb13703.i
+		 i32 14, label %bb14278.i
+		 i32 15, label %bb14853.i
+		 i32 16, label %bb9315.i
+		 i32 17, label %bb9315.i
+		 i32 18, label %bb15428.i
+		 i32 19, label %bb16003.i
+	]
+bb9315.i:		; preds = %bb9313.i, %bb9313.i, %bb9313.i
+	br i1 false, label %bb9535.i, label %bb9323.i
+bb9323.i:		; preds = %bb9315.i
+	br label %bb9535.i
+bb9535.i:		; preds = %bb9323.i, %bb9315.i
+	br label %bb16581.i
+bb9890.i:		; preds = %bb9313.i
+	br i1 false, label %bb10255.i, label %bb9898.i
+bb9898.i:		; preds = %bb9890.i
+	br label %bb10255.i
+bb10255.i:		; preds = %bb9898.i, %bb9890.i
+	br label %bb16581.i
+bb10465.i:		; preds = %bb9313.i
+	br i1 false, label %bb10685.i, label %bb10473.i
+bb10473.i:		; preds = %bb10465.i
+	br label %bb10685.i
+bb10685.i:		; preds = %bb10473.i, %bb10465.i
+	br label %bb16581.i
+bb11040.i:		; preds = %bb9313.i
+	br i1 false, label %bb11405.i, label %bb11048.i
+bb11048.i:		; preds = %bb11040.i
+	br label %bb11405.i
+bb11405.i:		; preds = %bb11048.i, %bb11040.i
+	br label %bb16581.i
+bb11615.i:		; preds = %bb9313.i
+	br i1 false, label %bb16581.i, label %bb11618.i
+bb11618.i:		; preds = %bb11615.i
+	br label %bb16581.i
+bb11823.i:		; preds = %bb9313.i
+	br i1 false, label %bb12188.i, label %bb11831.i
+bb11831.i:		; preds = %bb11823.i
+	br label %bb12188.i
+bb12188.i:		; preds = %bb11831.i, %bb11823.i
+	br label %bb16581.i
+bb12398.i:		; preds = %bb9313.i
+	br i1 false, label %bb12566.i, label %bb12406.i
+bb12406.i:		; preds = %bb12398.i
+	br label %bb12566.i
+bb12566.i:		; preds = %bb12406.i, %bb12398.i
+	br label %bb16581.i
+bb12833.i:		; preds = %bb9313.i
+	br i1 false, label %bb13001.i, label %bb12841.i
+bb12841.i:		; preds = %bb12833.i
+	br label %bb13001.i
+bb13001.i:		; preds = %bb12841.i, %bb12833.i
+	br label %bb16581.i
+bb13268.i:		; preds = %bb9313.i, %bb9313.i
+	br i1 false, label %bb13436.i, label %bb13276.i
+bb13276.i:		; preds = %bb13268.i
+	br label %bb13436.i
+bb13436.i:		; preds = %bb13276.i, %bb13268.i
+	br label %bb16581.i
+bb13703.i:		; preds = %bb9313.i, %bb9313.i
+	br i1 false, label %bb13923.i, label %bb13711.i
+bb13711.i:		; preds = %bb13703.i
+	br label %bb13923.i
+bb13923.i:		; preds = %bb13711.i, %bb13703.i
+	br label %bb16581.i
+bb14278.i:		; preds = %bb9313.i
+	br i1 false, label %bb14498.i, label %bb14286.i
+bb14286.i:		; preds = %bb14278.i
+	br label %bb14498.i
+bb14498.i:		; preds = %bb14286.i, %bb14278.i
+	br label %bb16581.i
+bb14853.i:		; preds = %bb9313.i
+	br i1 false, label %bb15073.i, label %bb14861.i
+bb14861.i:		; preds = %bb14853.i
+	br label %bb15073.i
+bb15073.i:		; preds = %bb14861.i, %bb14853.i
+	br label %bb16581.i
+bb15428.i:		; preds = %bb9313.i
+	br i1 false, label %bb15648.i, label %bb15436.i
+bb15436.i:		; preds = %bb15428.i
+	br label %bb15648.i
+bb15648.i:		; preds = %bb15436.i, %bb15428.i
+	br label %bb16581.i
+bb16003.i:		; preds = %bb9313.i
+	br i1 false, label %bb16223.i, label %bb16011.i
+bb16011.i:		; preds = %bb16003.i
+	br label %bb16223.i
+bb16223.i:		; preds = %bb16011.i, %bb16003.i
+	br label %bb16581.i
+bb16578.i:		; preds = %bb9313.i
+	unreachable
+bb16581.i:		; preds = %bb16223.i, %bb15648.i, %bb15073.i, %bb14498.i, %bb13923.i, %bb13436.i, %bb13001.i, %bb12566.i, %bb12188.i, %bb11618.i, %bb11615.i, %bb11405.i, %bb10685.i, %bb10255.i, %bb9535.i, %bb9310.i, %glgVectorFloatConversion.exit
+	br label %storeVecColor_RGB_UI.exit
+storeVecColor_RGB_UI.exit:		; preds = %bb16581.i
+	br i1 false, label %bb5295.i, label %bb16621.i
+bb16607.i:		; preds = %bb5276.i
+	br i1 false, label %bb5295.preheader.i, label %bb16621.i
+bb5295.preheader.i:		; preds = %bb16607.i
+	br label %bb5295.i
+bb16621.i:		; preds = %bb16607.i, %storeVecColor_RGB_UI.exit
+	br label %bb16650.outer.i
+bb16650.outer.i:		; preds = %bb16621.i
+	br label %bb16650.i
+bb16650.i:		; preds = %storeColor_RGB_UI.exit, %bb16650.outer.i
+	br label %loadColor_BGRA_UI8888R.exit
+loadColor_BGRA_UI8888R.exit:		; preds = %bb16650.i
+	br i1 false, label %bb16671.i, label %bb16697.i
+bb16671.i:		; preds = %loadColor_BGRA_UI8888R.exit
+	br i1 false, label %bb.i179, label %bb662.i
+bb.i179:		; preds = %bb16671.i
+	switch i32 0, label %bb513.i [
+		 i32 7, label %bb418.i
+		 i32 6, label %bb433.i
+	]
+bb418.i:		; preds = %bb.i179
+	br label %bb559.i
+bb433.i:		; preds = %bb.i179
+	switch i32 0, label %bb493.i [
+		 i32 31744, label %bb455.i
+		 i32 0, label %bb471.i
+	]
+bb455.i:		; preds = %bb433.i
+	br i1 false, label %bb463.i, label %bb504.i
+bb463.i:		; preds = %bb455.i
+	br label %bb559.i
+bb471.i:		; preds = %bb433.i
+	br i1 false, label %bb497.i, label %bb484.preheader.i
+bb484.preheader.i:		; preds = %bb471.i
+	br i1 false, label %bb479.i, label %bb490.i
+bb479.i:		; preds = %bb479.i, %bb484.preheader.i
+	br i1 false, label %bb479.i, label %bb490.i
+bb490.i:		; preds = %bb479.i, %bb484.preheader.i
+	br label %bb559.i
+bb493.i:		; preds = %bb433.i
+	br label %bb497.i
+bb497.i:		; preds = %bb493.i, %bb471.i
+	br label %bb504.i
+bb504.i:		; preds = %bb497.i, %bb455.i
+	br label %bb513.i
+bb513.i:		; preds = %bb504.i, %bb.i179
+	br label %bb559.i
+bb559.i:		; preds = %bb513.i, %bb490.i, %bb463.i, %bb418.i
+	br i1 false, label %bb2793.i, label %bb614.i
+bb614.i:		; preds = %bb559.i
+	br i1 false, label %bb626.i, label %bb620.i
+bb620.i:		; preds = %bb614.i
+	br i1 false, label %bb625.i, label %bb626.i
+bb625.i:		; preds = %bb620.i
+	br label %bb626.i
+bb626.i:		; preds = %bb625.i, %bb620.i, %bb614.i
+	br i1 false, label %bb638.i, label %bb632.i
+bb632.i:		; preds = %bb626.i
+	br i1 false, label %bb637.i, label %bb638.i
+bb637.i:		; preds = %bb632.i
+	br label %bb638.i
+bb638.i:		; preds = %bb637.i, %bb632.i, %bb626.i
+	br i1 false, label %bb650.i, label %bb644.i
+bb644.i:		; preds = %bb638.i
+	br i1 false, label %bb649.i, label %bb650.i
+bb649.i:		; preds = %bb644.i
+	br label %bb650.i
+bb650.i:		; preds = %bb649.i, %bb644.i, %bb638.i
+	br i1 false, label %bb2793.i, label %bb656.i
+bb656.i:		; preds = %bb650.i
+	br i1 false, label %bb661.i, label %bb2793.i
+bb661.i:		; preds = %bb656.i
+	switch i32 0, label %bb2883.i [
+		 i32 3, label %bb2874.i
+		 i32 4, label %bb2795.i
+		 i32 8, label %bb2810.i
+		 i32 10, label %bb2834.i
+		 i32 11, label %bb2819.i
+		 i32 16, label %bb2810.i
+	]
+bb662.i:		; preds = %bb16671.i
+	switch i32 0, label %bb1937.i [
+		 i32 3, label %bb902.i
+		 i32 4, label %bb1416.i
+		 i32 8, label %bb1020.i
+		 i32 10, label %bb902.i
+		 i32 11, label %bb784.i
+		 i32 16, label %bb664.i
+	]
+bb664.i:		; preds = %bb662.i
+	br i1 false, label %bb682.i, label %bb669.i
+bb669.i:		; preds = %bb664.i
+	br label %bb710.i
+bb682.i:		; preds = %bb664.i
+	br label %bb710.i
+bb710.i:		; preds = %bb682.i, %bb669.i
+	br i1 false, label %bb760.i, label %bb754.i
+bb754.i:		; preds = %bb710.i
+	br i1 false, label %bb759.i, label %bb760.i
+bb759.i:		; preds = %bb754.i
+	br label %bb760.i
+bb760.i:		; preds = %bb759.i, %bb754.i, %bb710.i
+	br i1 false, label %bb772.i, label %bb766.i
+bb766.i:		; preds = %bb760.i
+	br i1 false, label %bb771.i, label %bb772.i
+bb771.i:		; preds = %bb766.i
+	br label %bb772.i
+bb772.i:		; preds = %bb771.i, %bb766.i, %bb760.i
+	br i1 false, label %bb1937.i, label %bb778.i
+bb778.i:		; preds = %bb772.i
+	br i1 false, label %bb783.i, label %bb1937.i
+bb783.i:		; preds = %bb778.i
+	br label %bb1937.i
+bb784.i:		; preds = %bb662.i
+	switch i32 0, label %bb892.i [
+		 i32 1, label %bb868.i
+		 i32 3, label %bb868.i
+		 i32 4, label %bb882.i
+		 i32 6, label %bb792.i
+		 i32 7, label %bb786.i
+	]
+bb786.i:		; preds = %bb784.i
+	br label %bb904.i
+bb792.i:		; preds = %bb784.i
+	switch i32 0, label %bb852.i [
+		 i32 31744, label %bb814.i
+		 i32 0, label %bb830.i
+	]
+bb814.i:		; preds = %bb792.i
+	br i1 false, label %bb822.i, label %bb863.i
+bb822.i:		; preds = %bb814.i
+	switch i32 0, label %bb1010.i [
+		 i32 1, label %bb986.i
+		 i32 3, label %bb986.i
+		 i32 4, label %bb1000.i
+		 i32 6, label %bb910.i
+		 i32 7, label %bb904.i
+	]
+bb830.i:		; preds = %bb792.i
+	br i1 false, label %bb856.i, label %bb843.preheader.i
+bb843.preheader.i:		; preds = %bb830.i
+	br i1 false, label %bb838.i, label %bb849.i
+bb838.i:		; preds = %bb838.i, %bb843.preheader.i
+	br i1 false, label %bb838.i, label %bb849.i
+bb849.i:		; preds = %bb838.i, %bb843.preheader.i
+	switch i32 0, label %bb1010.i [
+		 i32 1, label %bb986.i
+		 i32 3, label %bb986.i
+		 i32 4, label %bb1000.i
+		 i32 6, label %bb910.i
+		 i32 7, label %bb904.i
+	]
+bb852.i:		; preds = %bb792.i
+	br label %bb856.i
+bb856.i:		; preds = %bb852.i, %bb830.i
+	switch i32 0, label %bb1010.i [
+		 i32 1, label %bb986.i
+		 i32 3, label %bb986.i
+		 i32 4, label %bb1000.i
+		 i32 6, label %bb910.i
+		 i32 7, label %bb904.i
+	]
+bb863.i:		; preds = %bb814.i
+	switch i32 0, label %bb1010.i [
+		 i32 1, label %bb986.i
+		 i32 3, label %bb986.i
+		 i32 4, label %bb1000.i
+		 i32 6, label %bb910.i
+		 i32 7, label %bb904.i
+	]
+bb868.i:		; preds = %bb784.i, %bb784.i
+	switch i32 0, label %bb1010.i [
+		 i32 1, label %bb986.i
+		 i32 3, label %bb986.i
+		 i32 4, label %bb1000.i
+		 i32 6, label %bb910.i
+		 i32 7, label %bb904.i
+	]
+bb882.i:		; preds = %bb784.i
+	br label %bb1000.i
+bb892.i:		; preds = %bb784.i
+	br label %bb902.i
+bb902.i:		; preds = %bb892.i, %bb662.i, %bb662.i
+	switch i32 0, label %bb1010.i [
+		 i32 1, label %bb986.i
+		 i32 3, label %bb986.i
+		 i32 4, label %bb1000.i
+		 i32 6, label %bb910.i
+		 i32 7, label %bb904.i
+	]
+bb904.i:		; preds = %bb902.i, %bb868.i, %bb863.i, %bb856.i, %bb849.i, %bb822.i, %bb786.i
+	br label %bb1937.i
+bb910.i:		; preds = %bb902.i, %bb868.i, %bb863.i, %bb856.i, %bb849.i, %bb822.i
+	switch i32 0, label %bb970.i [
+		 i32 31744, label %bb932.i
+		 i32 0, label %bb948.i
+	]
+bb932.i:		; preds = %bb910.i
+	br i1 false, label %bb940.i, label %bb981.i
+bb940.i:		; preds = %bb932.i
+	br label %bb1937.i
+bb948.i:		; preds = %bb910.i
+	br i1 false, label %bb974.i, label %bb961.preheader.i
+bb961.preheader.i:		; preds = %bb948.i
+	br i1 false, label %bb956.i, label %bb967.i
+bb956.i:		; preds = %bb956.i, %bb961.preheader.i
+	br i1 false, label %bb956.i, label %bb967.i
+bb967.i:		; preds = %bb956.i, %bb961.preheader.i
+	br label %bb1937.i
+bb970.i:		; preds = %bb910.i
+	br label %bb974.i
+bb974.i:		; preds = %bb970.i, %bb948.i
+	br label %bb1937.i
+bb981.i:		; preds = %bb932.i
+	br label %bb1937.i
+bb986.i:		; preds = %bb902.i, %bb902.i, %bb868.i, %bb868.i, %bb863.i, %bb863.i, %bb856.i, %bb856.i, %bb849.i, %bb849.i, %bb822.i, %bb822.i
+	br label %bb1937.i
+bb1000.i:		; preds = %bb902.i, %bb882.i, %bb868.i, %bb863.i, %bb856.i, %bb849.i, %bb822.i
+	br label %bb1937.i
+bb1010.i:		; preds = %bb902.i, %bb868.i, %bb863.i, %bb856.i, %bb849.i, %bb822.i
+	br label %bb1937.i
+bb1020.i:		; preds = %bb662.i
+	switch i32 0, label %bb1388.i [
+		 i32 1, label %bb1264.i
+		 i32 3, label %bb1264.i
+		 i32 4, label %bb1304.i
+		 i32 6, label %bb1038.i
+		 i32 7, label %bb1022.i
+		 i32 8, label %bb1332.i
+		 i32 9, label %bb1332.i
+		 i32 10, label %bb1360.i
+		 i32 11, label %bb1360.i
+	]
+bb1022.i:		; preds = %bb1020.i
+	br label %bb1937.i
+bb1038.i:		; preds = %bb1020.i
+	switch i32 0, label %bb1098.i [
+		 i32 31744, label %bb1060.i
+		 i32 0, label %bb1076.i
+	]
+bb1060.i:		; preds = %bb1038.i
+	br i1 false, label %bb1068.i, label %bb1109.i
+bb1068.i:		; preds = %bb1060.i
+	br label %bb1109.i
+bb1076.i:		; preds = %bb1038.i
+	br i1 false, label %bb1102.i, label %bb1089.preheader.i
+bb1089.preheader.i:		; preds = %bb1076.i
+	br i1 false, label %bb1084.i, label %bb1095.i
+bb1084.i:		; preds = %bb1084.i, %bb1089.preheader.i
+	br i1 false, label %bb1084.i, label %bb1095.i
+bb1095.i:		; preds = %bb1084.i, %bb1089.preheader.i
+	br label %bb1109.i
+bb1098.i:		; preds = %bb1038.i
+	br label %bb1102.i
+bb1102.i:		; preds = %bb1098.i, %bb1076.i
+	br label %bb1109.i
+bb1109.i:		; preds = %bb1102.i, %bb1095.i, %bb1068.i, %bb1060.i
+	switch i32 0, label %bb1173.i [
+		 i32 31744, label %bb1135.i
+		 i32 0, label %bb1151.i
+	]
+bb1135.i:		; preds = %bb1109.i
+	br i1 false, label %bb1143.i, label %bb1184.i
+bb1143.i:		; preds = %bb1135.i
+	br label %bb1184.i
+bb1151.i:		; preds = %bb1109.i
+	br i1 false, label %bb1177.i, label %bb1164.preheader.i
+bb1164.preheader.i:		; preds = %bb1151.i
+	br i1 false, label %bb1159.i, label %bb1170.i
+bb1159.i:		; preds = %bb1159.i, %bb1164.preheader.i
+	br i1 false, label %bb1159.i, label %bb1170.i
+bb1170.i:		; preds = %bb1159.i, %bb1164.preheader.i
+	br label %bb1184.i
+bb1173.i:		; preds = %bb1109.i
+	br label %bb1177.i
+bb1177.i:		; preds = %bb1173.i, %bb1151.i
+	br label %bb1184.i
+bb1184.i:		; preds = %bb1177.i, %bb1170.i, %bb1143.i, %bb1135.i
+	switch i32 0, label %bb1248.i [
+		 i32 31744, label %bb1210.i
+		 i32 0, label %bb1226.i
+	]
+bb1210.i:		; preds = %bb1184.i
+	br i1 false, label %bb1218.i, label %bb1259.i
+bb1218.i:		; preds = %bb1210.i
+	br label %bb1937.i
+bb1226.i:		; preds = %bb1184.i
+	br i1 false, label %bb1252.i, label %bb1239.preheader.i
+bb1239.preheader.i:		; preds = %bb1226.i
+	br i1 false, label %bb1234.i, label %bb1245.i
+bb1234.i:		; preds = %bb1234.i, %bb1239.preheader.i
+	br i1 false, label %bb1234.i, label %bb1245.i
+bb1245.i:		; preds = %bb1234.i, %bb1239.preheader.i
+	br label %bb1937.i
+bb1248.i:		; preds = %bb1184.i
+	br label %bb1252.i
+bb1252.i:		; preds = %bb1248.i, %bb1226.i
+	br label %bb1937.i
+bb1259.i:		; preds = %bb1210.i
+	br label %bb1937.i
+bb1264.i:		; preds = %bb1020.i, %bb1020.i
+	br label %bb1937.i
+bb1304.i:		; preds = %bb1020.i
+	br label %bb1937.i
+bb1332.i:		; preds = %bb1020.i, %bb1020.i
+	br label %bb1937.i
+bb1360.i:		; preds = %bb1020.i, %bb1020.i
+	br label %bb1937.i
+bb1388.i:		; preds = %bb1020.i
+	br label %bb1937.i
+bb1416.i:		; preds = %bb662.i
+	switch i32 0, label %bb1900.i [
+		 i32 1, label %bb1740.i
+		 i32 3, label %bb1740.i
+		 i32 4, label %bb1793.i
+		 i32 6, label %bb1439.i
+		 i32 7, label %bb1418.i
+		 i32 14, label %bb1830.i
+		 i32 15, label %bb1830.i
+		 i32 18, label %bb1863.i
+		 i32 19, label %bb1863.i
+	]
+bb1418.i:		; preds = %bb1416.i
+	br label %bb1937.i
+bb1439.i:		; preds = %bb1416.i
+	switch i32 0, label %bb1499.i [
+		 i32 31744, label %bb1461.i
+		 i32 0, label %bb1477.i
+	]
+bb1461.i:		; preds = %bb1439.i
+	br i1 false, label %bb1469.i, label %bb1510.i
+bb1469.i:		; preds = %bb1461.i
+	br label %bb1510.i
+bb1477.i:		; preds = %bb1439.i
+	br i1 false, label %bb1503.i, label %bb1490.preheader.i
+bb1490.preheader.i:		; preds = %bb1477.i
+	br i1 false, label %bb1485.i, label %bb1496.i
+bb1485.i:		; preds = %bb1485.i, %bb1490.preheader.i
+	br i1 false, label %bb1485.i, label %bb1496.i
+bb1496.i:		; preds = %bb1485.i, %bb1490.preheader.i
+	br label %bb1510.i
+bb1499.i:		; preds = %bb1439.i
+	br label %bb1503.i
+bb1503.i:		; preds = %bb1499.i, %bb1477.i
+	br label %bb1510.i
+bb1510.i:		; preds = %bb1503.i, %bb1496.i, %bb1469.i, %bb1461.i
+	switch i32 0, label %bb1574.i [
+		 i32 31744, label %bb1536.i
+		 i32 0, label %bb1552.i
+	]
+bb1536.i:		; preds = %bb1510.i
+	br i1 false, label %bb1544.i, label %bb1585.i
+bb1544.i:		; preds = %bb1536.i
+	br label %bb1585.i
+bb1552.i:		; preds = %bb1510.i
+	br i1 false, label %bb1578.i, label %bb1565.preheader.i
+bb1565.preheader.i:		; preds = %bb1552.i
+	br i1 false, label %bb1560.i, label %bb1571.i
+bb1560.i:		; preds = %bb1560.i, %bb1565.preheader.i
+	br i1 false, label %bb1560.i, label %bb1571.i
+bb1571.i:		; preds = %bb1560.i, %bb1565.preheader.i
+	br label %bb1585.i
+bb1574.i:		; preds = %bb1510.i
+	br label %bb1578.i
+bb1578.i:		; preds = %bb1574.i, %bb1552.i
+	br label %bb1585.i
+bb1585.i:		; preds = %bb1578.i, %bb1571.i, %bb1544.i, %bb1536.i
+	switch i32 0, label %bb1649.i [
+		 i32 31744, label %bb1611.i
+		 i32 0, label %bb1627.i
+	]
+bb1611.i:		; preds = %bb1585.i
+	br i1 false, label %bb1619.i, label %bb1660.i
+bb1619.i:		; preds = %bb1611.i
+	br label %bb1660.i
+bb1627.i:		; preds = %bb1585.i
+	br i1 false, label %bb1653.i, label %bb1640.preheader.i
+bb1640.preheader.i:		; preds = %bb1627.i
+	br i1 false, label %bb1635.i, label %bb1646.i
+bb1635.i:		; preds = %bb1635.i, %bb1640.preheader.i
+	br i1 false, label %bb1635.i, label %bb1646.i
+bb1646.i:		; preds = %bb1635.i, %bb1640.preheader.i
+	br label %bb1660.i
+bb1649.i:		; preds = %bb1585.i
+	br label %bb1653.i
+bb1653.i:		; preds = %bb1649.i, %bb1627.i
+	br label %bb1660.i
+bb1660.i:		; preds = %bb1653.i, %bb1646.i, %bb1619.i, %bb1611.i
+	switch i32 0, label %bb1724.i [
+		 i32 31744, label %bb1686.i
+		 i32 0, label %bb1702.i
+	]
+bb1686.i:		; preds = %bb1660.i
+	br i1 false, label %bb1694.i, label %bb1735.i
+bb1694.i:		; preds = %bb1686.i
+	br label %bb1937.i
+bb1702.i:		; preds = %bb1660.i
+	br i1 false, label %bb1728.i, label %bb1715.preheader.i
+bb1715.preheader.i:		; preds = %bb1702.i
+	br i1 false, label %bb1710.i, label %bb1721.i
+bb1710.i:		; preds = %bb1710.i, %bb1715.preheader.i
+	br i1 false, label %bb1710.i, label %bb1721.i
+bb1721.i:		; preds = %bb1710.i, %bb1715.preheader.i
+	br label %bb1937.i
+bb1724.i:		; preds = %bb1660.i
+	br label %bb1728.i
+bb1728.i:		; preds = %bb1724.i, %bb1702.i
+	br label %bb1937.i
+bb1735.i:		; preds = %bb1686.i
+	br label %bb1937.i
+bb1740.i:		; preds = %bb1416.i, %bb1416.i
+	br label %bb1937.i
+bb1793.i:		; preds = %bb1416.i
+	br label %bb1937.i
+bb1830.i:		; preds = %bb1416.i, %bb1416.i
+	br label %bb1937.i
+bb1863.i:		; preds = %bb1416.i, %bb1416.i
+	br label %bb1937.i
+bb1900.i:		; preds = %bb1416.i
+	br label %bb1937.i
+bb1937.i:		; preds = %bb1900.i, %bb1863.i, %bb1830.i, %bb1793.i, %bb1740.i, %bb1735.i, %bb1728.i, %bb1721.i, %bb1694.i, %bb1418.i, %bb1388.i, %bb1360.i, %bb1332.i, %bb1304.i, %bb1264.i, %bb1259.i, %bb1252.i, %bb1245.i, %bb1218.i, %bb1022.i, %bb1010.i, %bb1000.i, %bb986.i, %bb981.i, %bb974.i, %bb967.i, %bb940.i, %bb904.i, %bb783.i, %bb778.i, %bb772.i, %bb662.i
+	switch i32 %sf4083.0.i, label %bb2321.i [
+		 i32 0, label %bb2027.i
+		 i32 1, label %bb2081.i
+		 i32 2, label %bb2161.i
+		 i32 3, label %bb2241.i
+		 i32 8, label %bb1939.i
+		 i32 9, label %bb1939.i
+		 i32 10, label %bb1957.i
+		 i32 11, label %bb1975.i
+		 i32 16, label %bb1939.i
+	]
+bb1939.i:		; preds = %bb1937.i, %bb1937.i, %bb1937.i
+	switch i32 0, label %bb2321.i [
+		 i32 3, label %bb1956.i
+		 i32 4, label %bb1956.i
+		 i32 11, label %bb1956.i
+	]
+bb1956.i:		; preds = %bb1939.i, %bb1939.i, %bb1939.i
+	br label %bb2337.i
+bb1957.i:		; preds = %bb1937.i
+	switch i32 0, label %bb1975.i [
+		 i32 3, label %bb1974.i
+		 i32 4, label %bb1974.i
+		 i32 11, label %bb1974.i
+	]
+bb1974.i:		; preds = %bb1957.i, %bb1957.i, %bb1957.i
+	br label %bb1975.i
+bb1975.i:		; preds = %bb1974.i, %bb1957.i, %bb1937.i
+	switch i32 0, label %bb2001.i [
+		 i32 1, label %bb1992.i
+		 i32 4, label %bb1992.i
+		 i32 8, label %bb1992.i
+	]
+bb1992.i:		; preds = %bb1975.i, %bb1975.i, %bb1975.i
+	br label %bb2001.i
+bb2001.i:		; preds = %bb1992.i, %bb1975.i
+	switch i32 0, label %bb2321.i [
+		 i32 2, label %bb2018.i
+		 i32 4, label %bb2018.i
+		 i32 8, label %bb2018.i
+	]
+bb2018.i:		; preds = %bb2001.i, %bb2001.i, %bb2001.i
+	br label %bb2321.i
+bb2027.i:		; preds = %bb1937.i
+	switch i32 0, label %bb2045.i [
+		 i32 1, label %bb2044.i
+		 i32 4, label %bb2044.i
+		 i32 8, label %bb2044.i
+	]
+bb2044.i:		; preds = %bb2027.i, %bb2027.i, %bb2027.i
+	br label %bb2045.i
+bb2045.i:		; preds = %bb2044.i, %bb2027.i
+	switch i32 0, label %bb2063.i [
+		 i32 2, label %bb2062.i
+		 i32 4, label %bb2062.i
+		 i32 8, label %bb2062.i
+	]
+bb2062.i:		; preds = %bb2045.i, %bb2045.i, %bb2045.i
+	br label %bb2063.i
+bb2063.i:		; preds = %bb2062.i, %bb2045.i
+	switch i32 0, label %bb2321.i [
+		 i32 3, label %bb2080.i
+		 i32 4, label %bb2080.i
+		 i32 11, label %bb2080.i
+	]
+bb2080.i:		; preds = %bb2063.i, %bb2063.i, %bb2063.i
+	br label %bb2321.i
+bb2081.i:		; preds = %bb1937.i
+	switch i32 0, label %bb2100.i [
+		 i32 1, label %bb2098.i
+		 i32 4, label %bb2098.i
+		 i32 8, label %bb2098.i
+	]
+bb2098.i:		; preds = %bb2081.i, %bb2081.i, %bb2081.i
+	br label %bb2100.i
+bb2100.i:		; preds = %bb2098.i, %bb2081.i
+	switch i32 0, label %bb2125.i [
+		 i32 4, label %bb2124.i
+		 i32 8, label %bb2124.i
+		 i32 0, label %bb2124.i
+		 i32 11, label %bb2124.i
+	]
+bb2124.i:		; preds = %bb2100.i, %bb2100.i, %bb2100.i, %bb2100.i
+	br label %bb2125.i
+bb2125.i:		; preds = %bb2124.i, %bb2100.i
+	switch i32 0, label %bb2143.i [
+		 i32 2, label %bb2142.i
+		 i32 4, label %bb2142.i
+		 i32 8, label %bb2142.i
+	]
+bb2142.i:		; preds = %bb2125.i, %bb2125.i, %bb2125.i
+	br label %bb2143.i
+bb2143.i:		; preds = %bb2142.i, %bb2125.i
+	switch i32 0, label %bb2321.i [
+		 i32 3, label %bb2160.i
+		 i32 4, label %bb2160.i
+		 i32 11, label %bb2160.i
+	]
+bb2160.i:		; preds = %bb2143.i, %bb2143.i, %bb2143.i
+	br label %bb2321.i
+bb2161.i:		; preds = %bb1937.i
+	switch i32 0, label %bb2180.i [
+		 i32 2, label %bb2178.i
+		 i32 4, label %bb2178.i
+		 i32 8, label %bb2178.i
+	]
+bb2178.i:		; preds = %bb2161.i, %bb2161.i, %bb2161.i
+	br label %bb2180.i
+bb2180.i:		; preds = %bb2178.i, %bb2161.i
+	switch i32 0, label %bb2205.i [
+		 i32 4, label %bb2204.i
+		 i32 8, label %bb2204.i
+		 i32 0, label %bb2204.i
+		 i32 11, label %bb2204.i
+	]
+bb2204.i:		; preds = %bb2180.i, %bb2180.i, %bb2180.i, %bb2180.i
+	br label %bb2205.i
+bb2205.i:		; preds = %bb2204.i, %bb2180.i
+	switch i32 0, label %bb2223.i [
+		 i32 1, label %bb2222.i
+		 i32 4, label %bb2222.i
+		 i32 8, label %bb2222.i
+	]
+bb2222.i:		; preds = %bb2205.i, %bb2205.i, %bb2205.i
+	br label %bb2223.i
+bb2223.i:		; preds = %bb2222.i, %bb2205.i
+	switch i32 0, label %bb2321.i [
+		 i32 3, label %bb2240.i
+		 i32 4, label %bb2240.i
+		 i32 11, label %bb2240.i
+	]
+bb2240.i:		; preds = %bb2223.i, %bb2223.i, %bb2223.i
+	br label %bb2321.i
+bb2241.i:		; preds = %bb1937.i
+	switch i32 0, label %bb2260.i [
+		 i32 3, label %bb2258.i
+		 i32 4, label %bb2258.i
+		 i32 11, label %bb2258.i
+	]
+bb2258.i:		; preds = %bb2241.i, %bb2241.i, %bb2241.i
+	br label %bb2260.i
+bb2260.i:		; preds = %bb2258.i, %bb2241.i
+	switch i32 0, label %bb2285.i [
+		 i32 4, label %bb2284.i
+		 i32 11, label %bb2284.i
+		 i32 0, label %bb2284.i
+		 i32 8, label %bb2284.i
+	]
+bb2284.i:		; preds = %bb2260.i, %bb2260.i, %bb2260.i, %bb2260.i
+	br label %bb2285.i
+bb2285.i:		; preds = %bb2284.i, %bb2260.i
+	switch i32 0, label %bb2303.i [
+		 i32 1, label %bb2302.i
+		 i32 4, label %bb2302.i
+		 i32 8, label %bb2302.i
+	]
+bb2302.i:		; preds = %bb2285.i, %bb2285.i, %bb2285.i
+	br label %bb2303.i
+bb2303.i:		; preds = %bb2302.i, %bb2285.i
+	switch i32 0, label %bb2321.i [
+		 i32 2, label %bb2320.i
+		 i32 4, label %bb2320.i
+		 i32 8, label %bb2320.i
+	]
+bb2320.i:		; preds = %bb2303.i, %bb2303.i, %bb2303.i
+	br label %bb2321.i
+bb2321.i:		; preds = %bb2320.i, %bb2303.i, %bb2240.i, %bb2223.i, %bb2160.i, %bb2143.i, %bb2080.i, %bb2063.i, %bb2018.i, %bb2001.i, %bb1939.i, %bb1937.i
+	br label %bb2337.i
+bb2337.i:		; preds = %bb2321.i, %bb1956.i
+	br label %bb2353.i
+bb2353.i:		; preds = %bb2337.i
+	br label %bb2369.i
+bb2369.i:		; preds = %bb2353.i
+	br label %bb2385.i
+bb2385.i:		; preds = %bb2369.i
+	br i1 false, label %bb2388.i, label %bb2394.i
+bb2388.i:		; preds = %bb2385.i
+	br label %bb2600.i
+bb2394.i:		; preds = %bb2385.i
+	switch i32 0, label %bb2600.i [
+		 i32 0, label %bb2504.i
+		 i32 1, label %bb2528.i
+		 i32 2, label %bb2552.i
+		 i32 3, label %bb2576.i
+		 i32 4, label %bb2396.i
+		 i32 8, label %bb2420.i
+		 i32 11, label %bb2480.i
+	]
+bb2396.i:		; preds = %bb2394.i
+	br i1 false, label %bb2411.i, label %bb2399.i
+bb2399.i:		; preds = %bb2396.i
+	br i1 false, label %bb2420.i, label %bb2405.i
+bb2405.i:		; preds = %bb2399.i
+	br i1 false, label %bb2410.i, label %bb2420.i
+bb2410.i:		; preds = %bb2405.i
+	br i1 false, label %bb2459.i, label %bb2423.i
+bb2411.i:		; preds = %bb2396.i
+	br i1 false, label %bb2420.i, label %bb2414.i
+bb2414.i:		; preds = %bb2411.i
+	br i1 false, label %bb2419.i, label %bb2420.i
+bb2419.i:		; preds = %bb2414.i
+	br label %bb2420.i
+bb2420.i:		; preds = %bb2419.i, %bb2414.i, %bb2411.i, %bb2405.i, %bb2399.i, %bb2394.i
+	br i1 false, label %bb2459.i, label %bb2423.i
+bb2423.i:		; preds = %bb2420.i, %bb2410.i
+	br i1 false, label %bb2435.i, label %bb2429.i
+bb2429.i:		; preds = %bb2423.i
+	br i1 false, label %bb2434.i, label %bb2435.i
+bb2434.i:		; preds = %bb2429.i
+	br label %bb2435.i
+bb2435.i:		; preds = %bb2434.i, %bb2429.i, %bb2423.i
+	br i1 false, label %bb2447.i, label %bb2441.i
+bb2441.i:		; preds = %bb2435.i
+	br i1 false, label %bb2446.i, label %bb2447.i
+bb2446.i:		; preds = %bb2441.i
+	br label %bb2447.i
+bb2447.i:		; preds = %bb2446.i, %bb2441.i, %bb2435.i
+	br i1 false, label %bb2600.i, label %bb2453.i
+bb2453.i:		; preds = %bb2447.i
+	br i1 false, label %bb2458.i, label %bb2600.i
+bb2458.i:		; preds = %bb2453.i
+	br label %bb2793.i
+bb2459.i:		; preds = %bb2420.i, %bb2410.i
+	br i1 false, label %bb2600.i, label %bb2462.i
+bb2462.i:		; preds = %bb2459.i
+	br i1 false, label %bb2479.i, label %bb2600.i
+bb2479.i:		; preds = %bb2462.i
+	br label %bb2600.i
+bb2480.i:		; preds = %bb2394.i
+	br i1 false, label %bb2495.i, label %bb2483.i
+bb2483.i:		; preds = %bb2480.i
+	br i1 false, label %bb2504.i, label %bb2489.i
+bb2489.i:		; preds = %bb2483.i
+	br i1 false, label %bb2494.i, label %bb2504.i
+bb2494.i:		; preds = %bb2489.i
+	br i1 false, label %bb2519.i, label %bb2507.i
+bb2495.i:		; preds = %bb2480.i
+	br i1 false, label %bb2504.i, label %bb2498.i
+bb2498.i:		; preds = %bb2495.i
+	br i1 false, label %bb2503.i, label %bb2504.i
+bb2503.i:		; preds = %bb2498.i
+	br label %bb2504.i
+bb2504.i:		; preds = %bb2503.i, %bb2498.i, %bb2495.i, %bb2489.i, %bb2483.i, %bb2394.i
+	br i1 false, label %bb2519.i, label %bb2507.i
+bb2507.i:		; preds = %bb2504.i, %bb2494.i
+	br i1 false, label %bb2600.i, label %bb2513.i
+bb2513.i:		; preds = %bb2507.i
+	br i1 false, label %bb2518.i, label %bb2600.i
+bb2518.i:		; preds = %bb2513.i
+	br label %bb2600.i
+bb2519.i:		; preds = %bb2504.i, %bb2494.i
+	br i1 false, label %bb2600.i, label %bb2522.i
+bb2522.i:		; preds = %bb2519.i
+	br i1 false, label %bb2527.i, label %bb2600.i
+bb2527.i:		; preds = %bb2522.i
+	br label %bb2600.i
+bb2528.i:		; preds = %bb2394.i
+	br i1 false, label %bb2543.i, label %bb2531.i
+bb2531.i:		; preds = %bb2528.i
+	br i1 false, label %bb2600.i, label %bb2537.i
+bb2537.i:		; preds = %bb2531.i
+	br i1 false, label %bb2542.i, label %bb2600.i
+bb2542.i:		; preds = %bb2537.i
+	br label %bb2600.i
+bb2543.i:		; preds = %bb2528.i
+	br i1 false, label %bb2600.i, label %bb2546.i
+bb2546.i:		; preds = %bb2543.i
+	br i1 false, label %bb2551.i, label %bb2600.i
+bb2551.i:		; preds = %bb2546.i
+	br label %bb2600.i
+bb2552.i:		; preds = %bb2394.i
+	br i1 false, label %bb2567.i, label %bb2555.i
+bb2555.i:		; preds = %bb2552.i
+	br i1 false, label %bb2600.i, label %bb2561.i
+bb2561.i:		; preds = %bb2555.i
+	br i1 false, label %bb2566.i, label %bb2600.i
+bb2566.i:		; preds = %bb2561.i
+	br label %bb2600.i
+bb2567.i:		; preds = %bb2552.i
+	br i1 false, label %bb2600.i, label %bb2570.i
+bb2570.i:		; preds = %bb2567.i
+	br i1 false, label %bb2575.i, label %bb2600.i
+bb2575.i:		; preds = %bb2570.i
+	br label %bb2600.i
+bb2576.i:		; preds = %bb2394.i
+	br i1 false, label %bb2591.i, label %bb2579.i
+bb2579.i:		; preds = %bb2576.i
+	br i1 false, label %bb2600.i, label %bb2585.i
+bb2585.i:		; preds = %bb2579.i
+	br i1 false, label %bb2590.i, label %bb2600.i
+bb2590.i:		; preds = %bb2585.i
+	br label %bb2600.i
+bb2591.i:		; preds = %bb2576.i
+	br i1 false, label %bb2600.i, label %bb2594.i
+bb2594.i:		; preds = %bb2591.i
+	br i1 false, label %bb2599.i, label %bb2600.i
+bb2599.i:		; preds = %bb2594.i
+	br label %bb2600.i
+bb2600.i:		; preds = %bb2599.i, %bb2594.i, %bb2591.i, %bb2590.i, %bb2585.i, %bb2579.i, %bb2575.i, %bb2570.i, %bb2567.i, %bb2566.i, %bb2561.i, %bb2555.i, %bb2551.i, %bb2546.i, %bb2543.i, %bb2542.i, %bb2537.i, %bb2531.i, %bb2527.i, %bb2522.i, %bb2519.i, %bb2518.i, %bb2513.i, %bb2507.i, %bb2479.i, %bb2462.i, %bb2459.i, %bb2453.i, %bb2447.i, %bb2394.i, %bb2388.i
+	br label %bb2793.i
+bb2793.i:		; preds = %bb2600.i, %bb2458.i, %bb656.i, %bb650.i, %bb559.i
+	switch i32 0, label %bb2883.i [
+		 i32 3, label %bb2874.i
+		 i32 4, label %bb2795.i
+		 i32 8, label %bb2810.i
+		 i32 10, label %bb2834.i
+		 i32 11, label %bb2819.i
+		 i32 16, label %bb2810.i
+	]
+bb2795.i:		; preds = %bb2793.i, %bb661.i
+	br label %bb2810.i
+bb2810.i:		; preds = %bb2795.i, %bb2793.i, %bb2793.i, %bb661.i, %bb661.i
+	br label %bb2883.i
+bb2819.i:		; preds = %bb2793.i, %bb661.i
+	br label %bb2834.i
+bb2834.i:		; preds = %bb2819.i, %bb2793.i, %bb661.i
+	switch i32 0, label %bb2860.i [
+		 i32 4, label %bb2846.i
+		 i32 8, label %bb2846.i
+	]
+bb2846.i:		; preds = %bb2834.i, %bb2834.i
+	br i1 false, label %bb2859.i, label %bb2860.i
+bb2859.i:		; preds = %bb2846.i
+	br label %bb2860.i
+bb2860.i:		; preds = %bb2859.i, %bb2846.i, %bb2834.i
+	switch i32 %df4081.0.i, label %bb2867.bb2883_crit_edge.i [
+		 i32 1, label %bb2883.i
+		 i32 2, label %bb2872.i
+	]
+bb2867.bb2883_crit_edge.i:		; preds = %bb2860.i
+	br label %bb2883.i
+bb2872.i:		; preds = %bb2860.i
+	switch i32 0, label %UnifiedReturnBlock.i235 [
+		 i32 3, label %bb3253.i
+		 i32 4, label %bb4173.i
+		 i32 8, label %bb3485.i
+		 i32 10, label %bb3253.i
+		 i32 11, label %bb3021.i
+		 i32 16, label %bb2885.i
+	]
+bb2874.i:		; preds = %bb2793.i, %bb661.i
+	br label %bb2883.i
+bb2883.i:		; preds = %bb2874.i, %bb2867.bb2883_crit_edge.i, %bb2860.i, %bb2810.i, %bb2793.i, %bb661.i
+	%f_alpha.1.i = phi i32 [ 0, %bb2867.bb2883_crit_edge.i ], [ 0, %bb2874.i ], [ 1065353216, %bb661.i ], [ 0, %bb2793.i ], [ 0, %bb2810.i ], [ 0, %bb2860.i ]		; <i32> [#uses=1]
+	switch i32 0, label %UnifiedReturnBlock.i235 [
+		 i32 3, label %bb3253.i
+		 i32 4, label %bb4173.i
+		 i32 8, label %bb3485.i
+		 i32 10, label %bb3253.i
+		 i32 11, label %bb3021.i
+		 i32 16, label %bb2885.i
+	]
+bb2885.i:		; preds = %bb2883.i, %bb2872.i
+	br i1 false, label %bb3011.i, label %bb2890.i
+bb2890.i:		; preds = %bb2885.i
+	br i1 false, label %bb2960.i, label %bb2954.i
+bb2954.i:		; preds = %bb2890.i
+	br i1 false, label %bb2959.i, label %bb2960.i
+bb2959.i:		; preds = %bb2954.i
+	br label %bb2960.i
+bb2960.i:		; preds = %bb2959.i, %bb2954.i, %bb2890.i
+	br i1 false, label %bb2972.i, label %bb2966.i
+bb2966.i:		; preds = %bb2960.i
+	br i1 false, label %bb2971.i, label %bb2972.i
+bb2971.i:		; preds = %bb2966.i
+	br label %bb2972.i
+bb2972.i:		; preds = %bb2971.i, %bb2966.i, %bb2960.i
+	br label %glgScalarFloatConversion.exit
+bb3011.i:		; preds = %bb2885.i
+	br label %glgScalarFloatConversion.exit
+bb3021.i:		; preds = %bb2883.i, %bb2872.i
+	switch i32 %dt4080.0.i, label %bb3192.i [
+		 i32 7, label %bb3026.i
+		 i32 6, label %bb3037.i
+		 i32 1, label %bb3125.i
+		 i32 3, label %bb3125.i
+		 i32 5, label %bb3144.i
+	]
+bb3026.i:		; preds = %bb3021.i
+	br label %bb3258.i
+bb3037.i:		; preds = %bb3021.i
+	br i1 false, label %bb3052.i, label %bb3074.i
+bb3052.i:		; preds = %bb3037.i
+	br i1 false, label %bb3105.i, label %bb3069.i
+bb3069.i:		; preds = %bb3052.i
+	switch i32 %dt4080.0.i, label %bb3424.i [
+		 i32 7, label %bb3258.i
+		 i32 6, label %bb3269.i
+		 i32 1, label %bb3357.i
+		 i32 3, label %bb3357.i
+		 i32 5, label %bb3376.i
+	]
+bb3074.i:		; preds = %bb3037.i
+	br i1 false, label %bb3079.i, label %bb3092.i
+bb3079.i:		; preds = %bb3074.i
+	switch i32 %dt4080.0.i, label %bb3424.i [
+		 i32 7, label %bb3258.i
+		 i32 6, label %bb3269.i
+		 i32 1, label %bb3357.i
+		 i32 3, label %bb3357.i
+		 i32 5, label %bb3376.i
+	]
+bb3092.i:		; preds = %bb3074.i
+	switch i32 %dt4080.0.i, label %bb3424.i [
+		 i32 7, label %bb3258.i
+		 i32 6, label %bb3269.i
+		 i32 1, label %bb3357.i
+		 i32 3, label %bb3357.i
+		 i32 5, label %bb3376.i
+	]
+bb3105.i:		; preds = %bb3052.i
+	switch i32 %dt4080.0.i, label %bb3424.i [
+		 i32 7, label %bb3258.i
+		 i32 6, label %bb3269.i
+		 i32 1, label %bb3357.i
+		 i32 3, label %bb3357.i
+		 i32 5, label %bb3376.i
+	]
+bb3125.i:		; preds = %bb3021.i, %bb3021.i
+	switch i32 %dt4080.0.i, label %bb3424.i [
+		 i32 7, label %bb3258.i
+		 i32 6, label %bb3269.i
+		 i32 1, label %bb3357.i
+		 i32 3, label %bb3357.i
+		 i32 5, label %bb3376.i
+	]
+bb3144.i:		; preds = %bb3021.i
+	br label %bb3376.i
+bb3192.i:		; preds = %bb3021.i
+	br i1 false, label %bb3197.i, label %bb3243.i
+bb3197.i:		; preds = %bb3192.i
+	br label %bb3424.i
+bb3243.i:		; preds = %bb3192.i
+	br label %bb3253.i
+bb3253.i:		; preds = %bb3243.i, %bb2883.i, %bb2883.i, %bb2872.i, %bb2872.i
+	switch i32 %dt4080.0.i, label %bb3424.i [
+		 i32 7, label %bb3258.i
+		 i32 6, label %bb3269.i
+		 i32 1, label %bb3357.i
+		 i32 3, label %bb3357.i
+		 i32 5, label %bb3376.i
+	]
+bb3258.i:		; preds = %bb3253.i, %bb3125.i, %bb3105.i, %bb3092.i, %bb3079.i, %bb3069.i, %bb3026.i
+	br label %glgScalarFloatConversion.exit
+bb3269.i:		; preds = %bb3253.i, %bb3125.i, %bb3105.i, %bb3092.i, %bb3079.i, %bb3069.i
+	br i1 false, label %bb3284.i, label %bb3306.i
+bb3284.i:		; preds = %bb3269.i
+	br i1 false, label %bb3337.i, label %bb3301.i
+bb3301.i:		; preds = %bb3284.i
+	br label %glgScalarFloatConversion.exit
+bb3306.i:		; preds = %bb3269.i
+	br i1 false, label %bb3311.i, label %bb3324.i
+bb3311.i:		; preds = %bb3306.i
+	br label %glgScalarFloatConversion.exit
+bb3324.i:		; preds = %bb3306.i
+	br label %glgScalarFloatConversion.exit
+bb3337.i:		; preds = %bb3284.i
+	br label %glgScalarFloatConversion.exit
+bb3357.i:		; preds = %bb3253.i, %bb3253.i, %bb3125.i, %bb3125.i, %bb3105.i, %bb3105.i, %bb3092.i, %bb3092.i, %bb3079.i, %bb3079.i, %bb3069.i, %bb3069.i
+	br label %glgScalarFloatConversion.exit
+bb3376.i:		; preds = %bb3253.i, %bb3144.i, %bb3125.i, %bb3105.i, %bb3092.i, %bb3079.i, %bb3069.i
+	br label %glgScalarFloatConversion.exit
+bb3424.i:		; preds = %bb3253.i, %bb3197.i, %bb3125.i, %bb3105.i, %bb3092.i, %bb3079.i, %bb3069.i
+	br i1 false, label %bb3429.i, label %bb3475.i
+bb3429.i:		; preds = %bb3424.i
+	br label %glgScalarFloatConversion.exit
+bb3475.i:		; preds = %bb3424.i
+	br label %glgScalarFloatConversion.exit
+bb3485.i:		; preds = %bb2883.i, %bb2872.i
+	switch i32 %dt4080.0.i, label %bb4077.i [
+		 i32 7, label %bb3490.i
+		 i32 6, label %bb3511.i
+		 i32 1, label %bb3749.i
+		 i32 3, label %bb3749.i
+		 i32 5, label %bb3794.i
+		 i32 4, label %bb3941.i
+	]
+bb3490.i:		; preds = %bb3485.i
+	br label %glgScalarFloatConversion.exit
+bb3511.i:		; preds = %bb3485.i
+	br i1 false, label %bb3526.i, label %bb3548.i
+bb3526.i:		; preds = %bb3511.i
+	br i1 false, label %bb3579.i, label %bb3543.i
+bb3543.i:		; preds = %bb3526.i
+	br label %bb3579.i
+bb3548.i:		; preds = %bb3511.i
+	br i1 false, label %bb3553.i, label %bb3566.i
+bb3553.i:		; preds = %bb3548.i
+	br label %bb3579.i
+bb3566.i:		; preds = %bb3548.i
+	br label %bb3579.i
+bb3579.i:		; preds = %bb3566.i, %bb3553.i, %bb3543.i, %bb3526.i
+	br i1 false, label %bb3601.i, label %bb3623.i
+bb3601.i:		; preds = %bb3579.i
+	br i1 false, label %bb3654.i, label %bb3618.i
+bb3618.i:		; preds = %bb3601.i
+	br label %bb3654.i
+bb3623.i:		; preds = %bb3579.i
+	br i1 false, label %bb3628.i, label %bb3641.i
+bb3628.i:		; preds = %bb3623.i
+	br label %bb3654.i
+bb3641.i:		; preds = %bb3623.i
+	br label %bb3654.i
+bb3654.i:		; preds = %bb3641.i, %bb3628.i, %bb3618.i, %bb3601.i
+	br i1 false, label %bb3676.i, label %bb3698.i
+bb3676.i:		; preds = %bb3654.i
+	br i1 false, label %bb3729.i, label %bb3693.i
+bb3693.i:		; preds = %bb3676.i
+	br label %glgScalarFloatConversion.exit
+bb3698.i:		; preds = %bb3654.i
+	br i1 false, label %bb3703.i, label %bb3716.i
+bb3703.i:		; preds = %bb3698.i
+	br label %glgScalarFloatConversion.exit
+bb3716.i:		; preds = %bb3698.i
+	br label %glgScalarFloatConversion.exit
+bb3729.i:		; preds = %bb3676.i
+	br label %glgScalarFloatConversion.exit
+bb3749.i:		; preds = %bb3485.i, %bb3485.i
+	br label %glgScalarFloatConversion.exit
+bb3794.i:		; preds = %bb3485.i
+	br label %glgScalarFloatConversion.exit
+bb3941.i:		; preds = %bb3485.i
+	br label %glgScalarFloatConversion.exit
+bb4077.i:		; preds = %bb3485.i
+	br i1 false, label %bb4083.i, label %bb4111.i
+bb4083.i:		; preds = %bb4077.i
+	br label %glgScalarFloatConversion.exit
+bb4111.i:		; preds = %bb4077.i
+	br i1 false, label %bb4117.i, label %bb4145.i
+bb4117.i:		; preds = %bb4111.i
+	br label %glgScalarFloatConversion.exit
+bb4145.i:		; preds = %bb4111.i
+	br label %glgScalarFloatConversion.exit
+bb4173.i:		; preds = %bb2883.i, %bb2872.i
+	%f_red.0.reg2mem.4.i = phi i32 [ 0, %bb2872.i ], [ 0, %bb2883.i ]		; <i32> [#uses=2]
+	%f_green.0.reg2mem.2.i = phi i32 [ 0, %bb2872.i ], [ 0, %bb2883.i ]		; <i32> [#uses=1]
+	%f_blue.0.reg2mem.2.i = phi i32 [ 0, %bb2872.i ], [ 0, %bb2883.i ]		; <i32> [#uses=1]
+	%f_alpha.1.reg2mem.1.i = phi i32 [ 0, %bb2872.i ], [ %f_alpha.1.i, %bb2883.i ]		; <i32> [#uses=1]
+	switch i32 %dt4080.0.i, label %bb4950.i [
+		 i32 7, label %bb4178.i
+		 i32 6, label %bb4204.i
+		 i32 1, label %bb4517.i202
+		 i32 3, label %bb4517.i202
+		 i32 5, label %bb4575.i
+		 i32 4, label %bb4769.i
+	]
+bb4178.i:		; preds = %bb4173.i
+	br label %glgScalarFloatConversion.exit
+bb4204.i:		; preds = %bb4173.i
+	%tmp4210.i = and i32 0, 32768		; <i32> [#uses=4]
+	%tmp4212.i = and i32 %f_red.0.reg2mem.4.i, 2139095040		; <i32> [#uses=1]
+	%tmp4214.i = and i32 %f_red.0.reg2mem.4.i, 8388607		; <i32> [#uses=1]
+	br i1 false, label %bb4219.i, label %bb4241.i
+bb4219.i:		; preds = %bb4204.i
+	br i1 false, label %bb4272.i, label %bb4236.i
+bb4236.i:		; preds = %bb4219.i
+	br label %bb4272.i
+bb4241.i:		; preds = %bb4204.i
+	br i1 false, label %bb4246.i, label %bb4259.i
+bb4246.i:		; preds = %bb4241.i
+	%tmp4253.i = lshr i32 %tmp4214.i, 0		; <i32> [#uses=1]
+	%tmp4253.masked.i = and i32 %tmp4253.i, 65535		; <i32> [#uses=1]
+	br label %bb4272.i
+bb4259.i:		; preds = %bb4241.i
+	%tmp4261.i187 = add i32 %tmp4212.i, 134217728		; <i32> [#uses=1]
+	%tmp4262.i188 = lshr i32 %tmp4261.i187, 13		; <i32> [#uses=1]
+	%tmp4262.masked.i = and i32 %tmp4262.i188, 64512		; <i32> [#uses=1]
+	%tmp42665693.masked.i = or i32 %tmp4262.masked.i, %tmp4210.i		; <i32> [#uses=1]
+	br label %bb4272.i
+bb4272.i:		; preds = %bb4259.i, %bb4246.i, %bb4236.i, %bb4219.i
+	%tmp42665693.masked.pn.i = phi i32 [ %tmp42665693.masked.i, %bb4259.i ], [ %tmp4253.masked.i, %bb4246.i ], [ %tmp4210.i, %bb4236.i ], [ %tmp4210.i, %bb4219.i ]		; <i32> [#uses=1]
+	%tmp4268.pn.i = phi i32 [ 0, %bb4259.i ], [ %tmp4210.i, %bb4246.i ], [ 31744, %bb4236.i ], [ 32767, %bb4219.i ]		; <i32> [#uses=1]
+	%tmp100.0.i = or i32 %tmp4268.pn.i, %tmp42665693.masked.pn.i		; <i32> [#uses=0]
+	%tmp4289.i = and i32 %f_green.0.reg2mem.2.i, 8388607		; <i32> [#uses=1]
+	br i1 false, label %bb4294.i, label %bb4316.i
+bb4294.i:		; preds = %bb4272.i
+	br i1 false, label %bb4347.i, label %bb4311.i
+bb4311.i:		; preds = %bb4294.i
+	br label %bb4347.i
+bb4316.i:		; preds = %bb4272.i
+	br i1 false, label %bb4321.i, label %bb4334.i
+bb4321.i:		; preds = %bb4316.i
+	br label %bb4347.i
+bb4334.i:		; preds = %bb4316.i
+	%tmp4343.i = lshr i32 %tmp4289.i, 13		; <i32> [#uses=0]
+	br label %bb4347.i
+bb4347.i:		; preds = %bb4334.i, %bb4321.i, %bb4311.i, %bb4294.i
+	%tmp4364.i190 = and i32 %f_blue.0.reg2mem.2.i, 8388607		; <i32> [#uses=1]
+	br i1 false, label %bb4369.i192, label %bb4391.i
+bb4369.i192:		; preds = %bb4347.i
+	br i1 false, label %bb4422.i, label %bb4386.i
+bb4386.i:		; preds = %bb4369.i192
+	br label %bb4422.i
+bb4391.i:		; preds = %bb4347.i
+	br i1 false, label %bb4396.i, label %bb4409.i
+bb4396.i:		; preds = %bb4391.i
+	br label %bb4422.i
+bb4409.i:		; preds = %bb4391.i
+	%tmp4418.i = lshr i32 %tmp4364.i190, 13		; <i32> [#uses=0]
+	br label %bb4422.i
+bb4422.i:		; preds = %bb4409.i, %bb4396.i, %bb4386.i, %bb4369.i192
+	%tmp4439.i194 = and i32 %f_alpha.1.reg2mem.1.i, 8388607		; <i32> [#uses=1]
+	br i1 false, label %bb4444.i, label %bb4466.i
+bb4444.i:		; preds = %bb4422.i
+	br i1 false, label %bb4497.i, label %bb4461.i
+bb4461.i:		; preds = %bb4444.i
+	br label %glgScalarFloatConversion.exit
+bb4466.i:		; preds = %bb4422.i
+	br i1 false, label %bb4471.i, label %bb4484.i
+bb4471.i:		; preds = %bb4466.i
+	br label %glgScalarFloatConversion.exit
+bb4484.i:		; preds = %bb4466.i
+	%tmp4493.i = lshr i32 %tmp4439.i194, 13		; <i32> [#uses=0]
+	br label %glgScalarFloatConversion.exit
+bb4497.i:		; preds = %bb4444.i
+	br label %glgScalarFloatConversion.exit
+bb4517.i202:		; preds = %bb4173.i, %bb4173.i
+	br label %glgScalarFloatConversion.exit
+bb4575.i:		; preds = %bb4173.i
+	br label %glgScalarFloatConversion.exit
+bb4769.i:		; preds = %bb4173.i
+	br label %glgScalarFloatConversion.exit
+bb4950.i:		; preds = %bb4173.i
+	br i1 false, label %bb4956.i, label %bb4993.i
+bb4956.i:		; preds = %bb4950.i
+	br label %glgScalarFloatConversion.exit
+bb4993.i:		; preds = %bb4950.i
+	br i1 false, label %bb4999.i, label %bb5036.i
+bb4999.i:		; preds = %bb4993.i
+	br label %glgScalarFloatConversion.exit
+bb5036.i:		; preds = %bb4993.i
+	br label %glgScalarFloatConversion.exit
+UnifiedReturnBlock.i235:		; preds = %bb2883.i, %bb2872.i
+	br label %glgScalarFloatConversion.exit
+glgScalarFloatConversion.exit:		; preds = %UnifiedReturnBlock.i235, %bb5036.i, %bb4999.i, %bb4956.i, %bb4769.i, %bb4575.i, %bb4517.i202, %bb4497.i, %bb4484.i, %bb4471.i, %bb4461.i, %bb4178.i, %bb4145.i, %bb4117.i, %bb4083.i, %bb3941.i, %bb3794.i, %bb3749.i, %bb3729.i, %bb3716.i, %bb3703.i, %bb3693.i, %bb3490.i, %bb3475.i, %bb3429.i, %bb3376.i, %bb3357.i, %bb3337.i, %bb3324.i, %bb3311.i, %bb3301.i, %bb3258.i, %bb3011.i, %bb2972.i
+	br label %bb18851.i
+bb16697.i:		; preds = %loadColor_BGRA_UI8888R.exit
+	br i1 false, label %bb17749.i, label %bb16700.i
+bb16700.i:		; preds = %bb16697.i
+	switch i32 0, label %bb16829.i [
+		 i32 4, label %bb16705.i
+		 i32 8, label %bb16743.i
+		 i32 11, label %bb16795.i
+	]
+bb16705.i:		; preds = %bb16700.i
+	switch i32 %df4081.0.i, label %bb17183.i [
+		 i32 1, label %bb16710.i
+		 i32 2, label %bb16721.i
+		 i32 3, label %bb16732.i
+	]
+bb16710.i:		; preds = %bb16705.i
+	br label %bb17195.i
+bb16721.i:		; preds = %bb16705.i
+	br label %bb17195.i
+bb16732.i:		; preds = %bb16705.i
+	br label %bb17195.i
+bb16743.i:		; preds = %bb16700.i
+	switch i32 0, label %bb16759.i [
+		 i32 4, label %bb16755.i
+		 i32 11, label %bb16755.i
+	]
+bb16755.i:		; preds = %bb16743.i, %bb16743.i
+	br label %bb17195.i
+bb16759.i:		; preds = %bb16743.i
+	switch i32 %df4081.0.i, label %bb17183.i [
+		 i32 1, label %bb16764.i
+		 i32 2, label %bb16775.i
+		 i32 3, label %bb16786.i
+	]
+bb16764.i:		; preds = %bb16759.i
+	br label %bb17195.i
+bb16775.i:		; preds = %bb16759.i
+	br label %bb17195.i
+bb16786.i:		; preds = %bb16759.i
+	br label %bb17195.i
+bb16795.i:		; preds = %bb16700.i
+	switch i32 0, label %bb17183.i [
+		 i32 4, label %bb16807.i
+		 i32 8, label %bb16807.i
+		 i32 3, label %bb16823.i
+	]
+bb16807.i:		; preds = %bb16795.i, %bb16795.i
+	br label %bb17195.i
+bb16823.i:		; preds = %bb16795.i
+	br label %bb17195.i
+bb16829.i:		; preds = %bb16700.i
+	switch i32 %sf4083.0.i, label %bb17183.i [
+		 i32 10, label %bb16834.i
+		 i32 0, label %bb16892.i
+		 i32 1, label %bb16953.i
+		 i32 2, label %bb17037.i
+		 i32 3, label %bb17121.i
+	]
+bb16834.i:		; preds = %bb16829.i
+	switch i32 0, label %bb16878.i [
+		 i32 4, label %bb16839.i
+		 i32 8, label %bb16858.i
+		 i32 11, label %bb16874.i
+	]
+bb16839.i:		; preds = %bb16834.i
+	br label %bb17195.i
+bb16858.i:		; preds = %bb16834.i
+	br label %bb17195.i
+bb16874.i:		; preds = %bb16834.i
+	br label %bb17195.i
+bb16878.i:		; preds = %bb16834.i
+	br i1 false, label %bb16883.i, label %bb17183.i
+bb16883.i:		; preds = %bb16878.i
+	br label %bb17195.i
+bb16892.i:		; preds = %bb16829.i
+	switch i32 0, label %bb16930.i [
+		 i32 4, label %bb16897.i
+		 i32 8, label %bb16913.i
+		 i32 11, label %bb16926.i
+	]
+bb16897.i:		; preds = %bb16892.i
+	br label %bb17195.i
+bb16913.i:		; preds = %bb16892.i
+	br label %bb17195.i
+bb16926.i:		; preds = %bb16892.i
+	br label %bb17195.i
+bb16930.i:		; preds = %bb16892.i
+	br i1 false, label %bb16936.i, label %bb16939.i
+bb16936.i:		; preds = %bb16930.i
+	br label %bb17195.i
+bb16939.i:		; preds = %bb16930.i
+	br i1 false, label %bb16944.i, label %bb17183.i
+bb16944.i:		; preds = %bb16939.i
+	br label %bb17195.i
+bb16953.i:		; preds = %bb16829.i
+	switch i32 0, label %bb17003.i [
+		 i32 4, label %bb16958.i
+		 i32 8, label %bb16979.i
+		 i32 11, label %bb16997.i
+	]
+bb16958.i:		; preds = %bb16953.i
+	br label %bb17195.i
+bb16979.i:		; preds = %bb16953.i
+	br label %bb17195.i
+bb16997.i:		; preds = %bb16953.i
+	br label %bb17195.i
+bb17003.i:		; preds = %bb16953.i
+	switch i32 %df4081.0.i, label %bb17183.i [
+		 i32 0, label %bb17020.i
+		 i32 2, label %bb17020.i
+		 i32 10, label %bb17020.i
+		 i32 3, label %bb17028.i
+	]
+bb17020.i:		; preds = %bb17003.i, %bb17003.i, %bb17003.i
+	br label %bb17195.i
+bb17028.i:		; preds = %bb17003.i
+	br label %bb17195.i
+bb17037.i:		; preds = %bb16829.i
+	switch i32 0, label %bb17087.i [
+		 i32 4, label %bb17042.i
+		 i32 8, label %bb17063.i
+		 i32 11, label %bb17081.i
+	]
+bb17042.i:		; preds = %bb17037.i
+	br label %bb17195.i
+bb17063.i:		; preds = %bb17037.i
+	br label %bb17195.i
+bb17081.i:		; preds = %bb17037.i
+	br label %bb17195.i
+bb17087.i:		; preds = %bb17037.i
+	switch i32 %df4081.0.i, label %bb17183.i [
+		 i32 0, label %bb17104.i
+		 i32 1, label %bb17104.i
+		 i32 10, label %bb17104.i
+		 i32 3, label %bb17112.i
+	]
+bb17104.i:		; preds = %bb17087.i, %bb17087.i, %bb17087.i
+	br label %bb17195.i
+bb17112.i:		; preds = %bb17087.i
+	br label %bb17195.i
+bb17121.i:		; preds = %bb16829.i
+	switch i32 0, label %bb17183.i [
+		 i32 4, label %bb17126.i
+		 i32 8, label %bb17149.i
+		 i32 11, label %bb17167.i
+		 i32 10, label %bb17180.i
+	]
+bb17126.i:		; preds = %bb17121.i
+	br label %bb17195.i
+bb17149.i:		; preds = %bb17121.i
+	br label %bb17195.i
+bb17167.i:		; preds = %bb17121.i
+	br label %bb17195.i
+bb17180.i:		; preds = %bb17121.i
+	br label %bb17183.i
+bb17183.i:		; preds = %bb17180.i, %bb17121.i, %bb17087.i, %bb17003.i, %bb16939.i, %bb16878.i, %bb16829.i, %bb16795.i, %bb16759.i, %bb16705.i
+	br label %bb17195.i
+bb17195.i:		; preds = %bb17183.i, %bb17167.i, %bb17149.i, %bb17126.i, %bb17112.i, %bb17104.i, %bb17081.i, %bb17063.i, %bb17042.i, %bb17028.i, %bb17020.i, %bb16997.i, %bb16979.i, %bb16958.i, %bb16944.i, %bb16936.i, %bb16926.i, %bb16913.i, %bb16897.i, %bb16883.i, %bb16874.i, %bb16858.i, %bb16839.i, %bb16823.i, %bb16807.i, %bb16786.i, %bb16775.i, %bb16764.i, %bb16755.i, %bb16732.i, %bb16721.i, %bb16710.i
+	br i1 false, label %bb18845.i, label %bb17225.i
+bb17225.i:		; preds = %bb17195.i
+	switch i32 %dt4080.0.i, label %bb17677.i [
+		 i32 4, label %bb17227.i
+		 i32 8, label %bb17259.i
+		 i32 9, label %bb17309.i
+		 i32 10, label %bb17359.i
+		 i32 11, label %bb17359.i
+		 i32 14, label %bb17409.i
+		 i32 15, label %bb17474.i
+		 i32 18, label %bb17539.i
+		 i32 19, label %bb17604.i
+		 i32 0, label %bb17680.i
+		 i32 1, label %bb17672.i
+		 i32 2, label %bb17673.i
+		 i32 3, label %bb17674.i
+		 i32 5, label %bb17675.i
+		 i32 12, label %bb17676.i
+		 i32 13, label %bb17676.i
+		 i32 16, label %bb17680.i
+		 i32 17, label %bb17680.i
+	]
+bb17227.i:		; preds = %bb17225.i
+	br i1 false, label %bb18845.i, label %bb17230.i
+bb17230.i:		; preds = %bb17227.i
+	br label %bb18851.i
+bb17259.i:		; preds = %bb17225.i
+	br i1 false, label %bb17284.i, label %bb17262.i
+bb17262.i:		; preds = %bb17259.i
+	br label %bb17284.i
+bb17284.i:		; preds = %bb17262.i, %bb17259.i
+	br label %bb18851.i
+bb17309.i:		; preds = %bb17225.i
+	br i1 false, label %bb17334.i, label %bb17312.i
+bb17312.i:		; preds = %bb17309.i
+	br label %bb17334.i
+bb17334.i:		; preds = %bb17312.i, %bb17309.i
+	br label %bb18851.i
+bb17359.i:		; preds = %bb17225.i, %bb17225.i
+	br i1 false, label %bb17384.i, label %bb17362.i
+bb17362.i:		; preds = %bb17359.i
+	br label %bb17384.i
+bb17384.i:		; preds = %bb17362.i, %bb17359.i
+	br label %bb18851.i
+bb17409.i:		; preds = %bb17225.i
+	br i1 false, label %bb17441.i, label %bb17412.i
+bb17412.i:		; preds = %bb17409.i
+	br label %bb17441.i
+bb17441.i:		; preds = %bb17412.i, %bb17409.i
+	br label %bb18851.i
+bb17474.i:		; preds = %bb17225.i
+	br i1 false, label %bb17506.i, label %bb17477.i
+bb17477.i:		; preds = %bb17474.i
+	br label %bb17506.i
+bb17506.i:		; preds = %bb17477.i, %bb17474.i
+	br label %bb18851.i
+bb17539.i:		; preds = %bb17225.i
+	br i1 false, label %bb17571.i, label %bb17542.i
+bb17542.i:		; preds = %bb17539.i
+	br label %bb17571.i
+bb17571.i:		; preds = %bb17542.i, %bb17539.i
+	br label %bb18851.i
+bb17604.i:		; preds = %bb17225.i
+	br i1 false, label %bb17636.i, label %bb17607.i
+bb17607.i:		; preds = %bb17604.i
+	br label %bb17636.i
+bb17636.i:		; preds = %bb17607.i, %bb17604.i
+	br label %bb18851.i
+bb17672.i:		; preds = %bb17225.i
+	br i1 false, label %bb17716.i, label %bb17683.i
+bb17673.i:		; preds = %bb17225.i
+	br i1 false, label %bb17716.i, label %bb17683.i
+bb17674.i:		; preds = %bb17225.i
+	br i1 false, label %bb17716.i, label %bb17683.i
+bb17675.i:		; preds = %bb17225.i
+	br i1 false, label %bb17716.i, label %bb17683.i
+bb17676.i:		; preds = %bb17225.i, %bb17225.i
+	br i1 false, label %bb17716.i, label %bb17683.i
+bb17677.i:		; preds = %bb17225.i
+	unreachable
+bb17680.i:		; preds = %bb17225.i, %bb17225.i, %bb17225.i
+	br i1 false, label %bb17716.i, label %bb17683.i
+bb17683.i:		; preds = %bb17680.i, %bb17676.i, %bb17675.i, %bb17674.i, %bb17673.i, %bb17672.i
+	br label %bb17716.i
+bb17716.i:		; preds = %bb17683.i, %bb17680.i, %bb17676.i, %bb17675.i, %bb17674.i, %bb17673.i, %bb17672.i
+	br label %bb18851.i
+bb17749.i:		; preds = %bb16697.i
+	br i1 false, label %bb17757.i, label %bb17903.i
+bb17757.i:		; preds = %bb17749.i
+	switch i32 0, label %bb17903.i [
+		 i32 0, label %bb17759.i
+		 i32 1, label %bb17853.i
+		 i32 2, label %bb17853.i
+	]
+bb17759.i:		; preds = %bb17757.i
+	br i1 false, label %bb17764.i, label %bb17772.i
+bb17764.i:		; preds = %bb17759.i
+	br label %bb18032.i
+bb17772.i:		; preds = %bb17759.i
+	switch i32 %sf4083.0.i, label %bb17798.i [
+		 i32 1, label %bb17777.i
+		 i32 2, label %bb17790.i
+	]
+bb17777.i:		; preds = %bb17772.i
+	switch i32 0, label %bb18032.i [
+		 i32 4, label %bb17818.i
+		 i32 8, label %bb17818.i
+		 i32 11, label %bb17845.i
+	]
+bb17790.i:		; preds = %bb17772.i
+	switch i32 0, label %bb18032.i [
+		 i32 4, label %bb17818.i
+		 i32 8, label %bb17818.i
+		 i32 11, label %bb17845.i
+	]
+bb17798.i:		; preds = %bb17772.i
+	switch i32 0, label %bb18032.i [
+		 i32 4, label %bb17818.i
+		 i32 8, label %bb17818.i
+		 i32 11, label %bb17845.i
+	]
+bb17818.i:		; preds = %bb17798.i, %bb17798.i, %bb17790.i, %bb17790.i, %bb17777.i, %bb17777.i
+	switch i32 0, label %bb18032.i [
+		 i32 4, label %bb17845.i
+		 i32 11, label %bb17845.i
+		 i32 8, label %bb17946.i
+	]
+bb17845.i:		; preds = %bb17818.i, %bb17818.i, %bb17798.i, %bb17790.i, %bb17777.i
+	switch i32 0, label %bb18032.i [
+		 i32 4, label %bb17908.i
+		 i32 8, label %bb17946.i
+		 i32 11, label %bb17998.i
+	]
+bb17853.i:		; preds = %bb17757.i, %bb17757.i
+	br i1 false, label %bb17890.i, label %bb17903.i
+bb17890.i:		; preds = %bb17853.i
+	br label %bb17903.i
+bb17903.i:		; preds = %bb17890.i, %bb17853.i, %bb17757.i, %bb17749.i
+	switch i32 0, label %bb18032.i [
+		 i32 4, label %bb17908.i
+		 i32 8, label %bb17946.i
+		 i32 11, label %bb17998.i
+	]
+bb17908.i:		; preds = %bb17903.i, %bb17845.i
+	switch i32 %df4081.0.i, label %bb18386.i [
+		 i32 1, label %bb17913.i
+		 i32 2, label %bb17924.i
+		 i32 3, label %bb17935.i
+	]
+bb17913.i:		; preds = %bb17908.i
+	br label %bb18398.i
+bb17924.i:		; preds = %bb17908.i
+	br label %bb18398.i
+bb17935.i:		; preds = %bb17908.i
+	br label %bb18398.i
+bb17946.i:		; preds = %bb17903.i, %bb17845.i, %bb17818.i
+	switch i32 0, label %bb17962.i [
+		 i32 4, label %bb17958.i
+		 i32 11, label %bb17958.i
+	]
+bb17958.i:		; preds = %bb17946.i, %bb17946.i
+	br label %bb18398.i
+bb17962.i:		; preds = %bb17946.i
+	switch i32 %df4081.0.i, label %bb18386.i [
+		 i32 1, label %bb17967.i
+		 i32 2, label %bb17978.i
+		 i32 3, label %bb17989.i
+	]
+bb17967.i:		; preds = %bb17962.i
+	br label %bb18398.i
+bb17978.i:		; preds = %bb17962.i
+	br label %bb18398.i
+bb17989.i:		; preds = %bb17962.i
+	br label %bb18398.i
+bb17998.i:		; preds = %bb17903.i, %bb17845.i
+	switch i32 0, label %bb18386.i [
+		 i32 4, label %bb18010.i
+		 i32 8, label %bb18010.i
+		 i32 3, label %bb18026.i
+	]
+bb18010.i:		; preds = %bb17998.i, %bb17998.i
+	br label %bb18398.i
+bb18026.i:		; preds = %bb17998.i
+	br label %bb18398.i
+bb18032.i:		; preds = %bb17903.i, %bb17845.i, %bb17818.i, %bb17798.i, %bb17790.i, %bb17777.i, %bb17764.i
+	switch i32 %sf4083.0.i, label %bb18386.i [
+		 i32 10, label %bb18037.i
+		 i32 0, label %bb18095.i
+		 i32 1, label %bb18156.i
+		 i32 2, label %bb18240.i
+		 i32 3, label %bb18324.i
+	]
+bb18037.i:		; preds = %bb18032.i
+	switch i32 0, label %bb18081.i [
+		 i32 4, label %bb18042.i
+		 i32 8, label %bb18061.i
+		 i32 11, label %bb18077.i
+	]
+bb18042.i:		; preds = %bb18037.i
+	br label %bb18398.i
+bb18061.i:		; preds = %bb18037.i
+	br label %bb18398.i
+bb18077.i:		; preds = %bb18037.i
+	br label %bb18398.i
+bb18081.i:		; preds = %bb18037.i
+	br i1 false, label %bb18086.i, label %bb18386.i
+bb18086.i:		; preds = %bb18081.i
+	br label %bb18398.i
+bb18095.i:		; preds = %bb18032.i
+	switch i32 0, label %bb18133.i [
+		 i32 4, label %bb18100.i
+		 i32 8, label %bb18116.i
+		 i32 11, label %bb18129.i
+	]
+bb18100.i:		; preds = %bb18095.i
+	br label %bb18398.i
+bb18116.i:		; preds = %bb18095.i
+	br label %bb18398.i
+bb18129.i:		; preds = %bb18095.i
+	br label %bb18398.i
+bb18133.i:		; preds = %bb18095.i
+	br i1 false, label %bb18139.i, label %bb18142.i
+bb18139.i:		; preds = %bb18133.i
+	br label %bb18398.i
+bb18142.i:		; preds = %bb18133.i
+	br i1 false, label %bb18147.i, label %bb18386.i
+bb18147.i:		; preds = %bb18142.i
+	br label %bb18398.i
+bb18156.i:		; preds = %bb18032.i
+	switch i32 0, label %bb18206.i [
+		 i32 4, label %bb18161.i
+		 i32 8, label %bb18182.i
+		 i32 11, label %bb18200.i
+	]
+bb18161.i:		; preds = %bb18156.i
+	br label %bb18398.i
+bb18182.i:		; preds = %bb18156.i
+	br label %bb18398.i
+bb18200.i:		; preds = %bb18156.i
+	br label %bb18398.i
+bb18206.i:		; preds = %bb18156.i
+	switch i32 %df4081.0.i, label %bb18386.i [
+		 i32 0, label %bb18223.i
+		 i32 2, label %bb18223.i
+		 i32 10, label %bb18223.i
+		 i32 3, label %bb18231.i
+	]
+bb18223.i:		; preds = %bb18206.i, %bb18206.i, %bb18206.i
+	br label %bb18398.i
+bb18231.i:		; preds = %bb18206.i
+	br label %bb18398.i
+bb18240.i:		; preds = %bb18032.i
+	switch i32 0, label %bb18290.i [
+		 i32 4, label %bb18245.i
+		 i32 8, label %bb18266.i
+		 i32 11, label %bb18284.i
+	]
+bb18245.i:		; preds = %bb18240.i
+	br label %bb18398.i
+bb18266.i:		; preds = %bb18240.i
+	br label %bb18398.i
+bb18284.i:		; preds = %bb18240.i
+	br label %bb18398.i
+bb18290.i:		; preds = %bb18240.i
+	switch i32 %df4081.0.i, label %bb18386.i [
+		 i32 0, label %bb18307.i
+		 i32 1, label %bb18307.i
+		 i32 10, label %bb18307.i
+		 i32 3, label %bb18315.i
+	]
+bb18307.i:		; preds = %bb18290.i, %bb18290.i, %bb18290.i
+	br label %bb18398.i
+bb18315.i:		; preds = %bb18290.i
+	br label %bb18398.i
+bb18324.i:		; preds = %bb18032.i
+	switch i32 0, label %bb18386.i [
+		 i32 4, label %bb18329.i
+		 i32 8, label %bb18352.i
+		 i32 11, label %bb18370.i
+		 i32 10, label %bb18383.i
+	]
+bb18329.i:		; preds = %bb18324.i
+	br label %bb18398.i
+bb18352.i:		; preds = %bb18324.i
+	br label %bb18398.i
+bb18370.i:		; preds = %bb18324.i
+	br label %bb18398.i
+bb18383.i:		; preds = %bb18324.i
+	br label %bb18386.i
+bb18386.i:		; preds = %bb18383.i, %bb18324.i, %bb18290.i, %bb18206.i, %bb18142.i, %bb18081.i, %bb18032.i, %bb17998.i, %bb17962.i, %bb17908.i
+	br label %bb18398.i
+bb18398.i:		; preds = %bb18386.i, %bb18370.i, %bb18352.i, %bb18329.i, %bb18315.i, %bb18307.i, %bb18284.i, %bb18266.i, %bb18245.i, %bb18231.i, %bb18223.i, %bb18200.i, %bb18182.i, %bb18161.i, %bb18147.i, %bb18139.i, %bb18129.i, %bb18116.i, %bb18100.i, %bb18086.i, %bb18077.i, %bb18061.i, %bb18042.i, %bb18026.i, %bb18010.i, %bb17989.i, %bb17978.i, %bb17967.i, %bb17958.i, %bb17935.i, %bb17924.i, %bb17913.i
+	br i1 false, label %bb18589.i, label %bb18431.i
+bb18431.i:		; preds = %bb18398.i
+	switch i32 0, label %bb18589.i [
+		 i32 0, label %bb18433.i
+		 i32 1, label %bb18487.i
+		 i32 2, label %bb18487.i
+	]
+bb18433.i:		; preds = %bb18431.i
+	switch i32 0, label %bb18589.i [
+		 i32 4, label %bb18452.i
+		 i32 8, label %bb18452.i
+		 i32 11, label %bb18479.i
+	]
+bb18452.i:		; preds = %bb18433.i, %bb18433.i
+	switch i32 0, label %bb18589.i [
+		 i32 4, label %bb18479.i
+		 i32 11, label %bb18479.i
+	]
+bb18479.i:		; preds = %bb18452.i, %bb18452.i, %bb18433.i
+	br i1 false, label %bb18845.i, label %bb18592.i
+bb18487.i:		; preds = %bb18431.i, %bb18431.i
+	br i1 false, label %bb18492.i, label %bb18521.i
+bb18492.i:		; preds = %bb18487.i
+	br i1 false, label %bb18508.i, label %bb18529.i
+bb18508.i:		; preds = %bb18492.i
+	switch i32 0, label %bb18589.i [
+		 i32 4, label %bb18541.i
+		 i32 8, label %bb18541.i
+	]
+bb18521.i:		; preds = %bb18487.i
+	br label %bb18529.i
+bb18529.i:		; preds = %bb18521.i, %bb18492.i
+	switch i32 0, label %bb18589.i [
+		 i32 4, label %bb18541.i
+		 i32 8, label %bb18541.i
+	]
+bb18541.i:		; preds = %bb18529.i, %bb18529.i, %bb18508.i, %bb18508.i
+	br i1 false, label %bb18560.i, label %bb18589.i
+bb18560.i:		; preds = %bb18541.i
+	br i1 false, label %bb18576.i, label %bb18589.i
+bb18576.i:		; preds = %bb18560.i
+	br label %bb18589.i
+bb18589.i:		; preds = %bb18576.i, %bb18560.i, %bb18541.i, %bb18529.i, %bb18508.i, %bb18452.i, %bb18433.i, %bb18431.i, %bb18398.i
+	br i1 false, label %bb18845.i, label %bb18592.i
+bb18592.i:		; preds = %bb18589.i, %bb18479.i
+	switch i32 %dt4080.0.i, label %bb18809.i [
+		 i32 4, label %bb18845.i
+		 i32 8, label %bb18594.i
+		 i32 9, label %bb18619.i
+		 i32 10, label %bb18644.i
+		 i32 11, label %bb18644.i
+		 i32 14, label %bb18669.i
+		 i32 15, label %bb18702.i
+		 i32 18, label %bb18735.i
+		 i32 19, label %bb18768.i
+		 i32 0, label %bb18812.i
+		 i32 1, label %bb18804.i
+		 i32 2, label %bb18805.i
+		 i32 3, label %bb18806.i
+		 i32 5, label %bb18807.i
+		 i32 12, label %bb18808.i
+		 i32 13, label %bb18808.i
+		 i32 16, label %bb18812.i
+		 i32 17, label %bb18812.i
+	]
+bb18594.i:		; preds = %bb18592.i
+	br label %bb18851.i
+bb18619.i:		; preds = %bb18592.i
+	br label %bb18851.i
+bb18644.i:		; preds = %bb18592.i, %bb18592.i
+	br label %bb18851.i
+bb18669.i:		; preds = %bb18592.i
+	br label %bb18851.i
+bb18702.i:		; preds = %bb18592.i
+	br label %bb18851.i
+bb18735.i:		; preds = %bb18592.i
+	br label %bb18851.i
+bb18768.i:		; preds = %bb18592.i
+	br label %bb18851.i
+bb18804.i:		; preds = %bb18592.i
+	br label %bb18812.i
+bb18805.i:		; preds = %bb18592.i
+	br label %bb18812.i
+bb18806.i:		; preds = %bb18592.i
+	br label %bb18812.i
+bb18807.i:		; preds = %bb18592.i
+	br label %bb18812.i
+bb18808.i:		; preds = %bb18592.i, %bb18592.i
+	br label %bb18812.i
+bb18809.i:		; preds = %bb18592.i
+	unreachable
+bb18812.i:		; preds = %bb18808.i, %bb18807.i, %bb18806.i, %bb18805.i, %bb18804.i, %bb18592.i, %bb18592.i, %bb18592.i
+	br label %bb18845.i
+bb18845.i:		; preds = %bb18812.i, %bb18592.i, %bb18589.i, %bb18479.i, %bb17227.i, %bb17195.i
+	br label %bb18851.i
+bb18851.i:		; preds = %bb18845.i, %bb18768.i, %bb18735.i, %bb18702.i, %bb18669.i, %bb18644.i, %bb18619.i, %bb18594.i, %bb17716.i, %bb17636.i, %bb17571.i, %bb17506.i, %bb17441.i, %bb17384.i, %bb17334.i, %bb17284.i, %bb17230.i, %glgScalarFloatConversion.exit
+	br label %storeColor_RGB_UI.exit
+storeColor_RGB_UI.exit:		; preds = %bb18851.i
+	br i1 false, label %bb19786.i, label %bb16650.i
+bb19786.i:		; preds = %storeColor_RGB_UI.exit
+	br label %bb19808.i
+bb19808.i:		; preds = %bb19786.i
+	br i1 false, label %bb19818.i, label %bb5276.i
+bb19818.i:		; preds = %bb19808.i
+	br i1 false, label %bb19840.i, label %bb19821.i
+bb19821.i:		; preds = %bb19818.i
+	br label %bb19840.i
+bb19840.i:		; preds = %bb19821.i, %bb19818.i
+	br i1 false, label %UnifiedReturnBlock.i, label %bb19843.i
+bb19843.i:		; preds = %bb19840.i
+	br label %t.exit
+UnifiedReturnBlock.i:		; preds = %bb19840.i, %bb4501.i
+	br label %t.exit
+t.exit:		; preds = %UnifiedReturnBlock.i, %bb19843.i, %bb4517.i, %bb4354.i
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll b/src/LLVM/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
new file mode 100644
index 0000000..71aa603
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+	%struct.BiContextType = type { i16, i8, i32 }
+	%struct.Bitstream = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 }
+	%struct.DataPartition = type { %struct.Bitstream*, %struct.EncodingEnvironment, %struct.EncodingEnvironment }
+	%struct.DecRefPicMarking_t = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t* }
+	%struct.EncodingEnvironment = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 }
+	%struct.ImageParameters = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %struct.Picture*, %struct.Slice*, %struct.Macroblock*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 }
+	%struct.Macroblock = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.MotionInfoContexts = type { [3 x [11 x %struct.BiContextType]], [2 x [9 x %struct.BiContextType]], [2 x [10 x %struct.BiContextType]], [2 x [6 x %struct.BiContextType]], [4 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x %struct.BiContextType] }
+	%struct.Picture = type { i32, i32, [100 x %struct.Slice*], i32, float, float, float }
+	%struct.Slice = type { i32, i32, i32, i32, i32, i32, %struct.DataPartition*, %struct.MotionInfoContexts*, %struct.TextureInfoContexts*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] }
+	%struct.TextureInfoContexts = type { [2 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x [4 x %struct.BiContextType]], [10 x [4 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]] }
+@images = external global %struct.ImageParameters		; <%struct.ImageParameters*> [#uses=2]
+
+declare i8* @calloc(i32, i32)
+
+define fastcc void @init_global_buffers() nounwind {
+entry:
+	%tmp50.i.i = mul i32 0, 0		; <i32> [#uses=2]
+	br i1 false, label %init_orig_buffers.exit, label %cond_true.i29
+
+cond_true.i29:		; preds = %entry
+	%tmp17.i = load i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 20), align 8		; <i32> [#uses=1]
+	%tmp20.i27 = load i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 16), align 8		; <i32> [#uses=1]
+	%tmp8.i.i = select i1 false, i32 1, i32 0		; <i32> [#uses=1]
+	br label %bb.i8.us.i
+
+bb.i8.us.i:		; preds = %get_mem2Dpel.exit.i.us.i, %cond_true.i29
+	%j.04.i.us.i = phi i32 [ %indvar.next39.i, %get_mem2Dpel.exit.i.us.i ], [ 0, %cond_true.i29 ]		; <i32> [#uses=2]
+	%tmp13.i.us.i = getelementptr i16*** null, i32 %j.04.i.us.i		; <i16***> [#uses=0]
+	%tmp15.i.i.us.i = tail call i8* @calloc( i32 0, i32 2 )		; <i8*> [#uses=0]
+	store i16* null, i16** null, align 4
+	br label %bb.i.i.us.i
+
+get_mem2Dpel.exit.i.us.i:		; preds = %bb.i.i.us.i
+	%indvar.next39.i = add i32 %j.04.i.us.i, 1		; <i32> [#uses=2]
+	%exitcond40.i = icmp eq i32 %indvar.next39.i, 2		; <i1> [#uses=1]
+	br i1 %exitcond40.i, label %get_mem3Dpel.exit.split.i, label %bb.i8.us.i
+
+bb.i.i.us.i:		; preds = %bb.i.i.us.i, %bb.i8.us.i
+	%exitcond.i = icmp eq i32 0, %tmp8.i.i		; <i1> [#uses=1]
+	br i1 %exitcond.i, label %get_mem2Dpel.exit.i.us.i, label %bb.i.i.us.i
+
+get_mem3Dpel.exit.split.i:		; preds = %get_mem2Dpel.exit.i.us.i
+	%tmp30.i.i = shl i32 %tmp17.i, 2		; <i32> [#uses=1]
+	%tmp31.i.i = mul i32 %tmp30.i.i, %tmp20.i27		; <i32> [#uses=1]
+	%tmp23.i31 = add i32 %tmp31.i.i, %tmp50.i.i		; <i32> [#uses=1]
+	br label %init_orig_buffers.exit
+
+init_orig_buffers.exit:		; preds = %get_mem3Dpel.exit.split.i, %entry
+	%memory_size.0.i = phi i32 [ %tmp23.i31, %get_mem3Dpel.exit.split.i ], [ %tmp50.i.i, %entry ]		; <i32> [#uses=1]
+	%tmp41 = add i32 0, %memory_size.0.i		; <i32> [#uses=0]
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
new file mode 100644
index 0000000..aa61d86
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+	%struct.Decoders = type { i32**, i16***, i16****, i16***, i16**, i8**, i8** }
+@decoders = external global %struct.Decoders		; <%struct.Decoders*> [#uses=1]
+
+declare i8* @calloc(i32, i32)
+
+declare fastcc i32 @get_mem2Dint(i32***, i32, i32)
+
+define fastcc void @init_global_buffers() nounwind {
+entry:
+	%tmp151 = tail call fastcc i32 @get_mem2Dint( i32*** getelementptr (%struct.Decoders* @decoders, i32 0, i32 0), i32 16, i32 16 )		; <i32> [#uses=1]
+	%tmp158 = tail call i8* @calloc( i32 0, i32 4 )		; <i8*> [#uses=0]
+	br i1 false, label %cond_true166, label %bb190.preheader
+
+bb190.preheader:		; preds = %entry
+	%memory_size.3555 = add i32 0, %tmp151		; <i32> [#uses=0]
+	unreachable
+
+cond_true166:		; preds = %entry
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2008-07-17-Fdiv.ll b/src/LLVM/test/CodeGen/ARM/2008-07-17-Fdiv.ll
new file mode 100644
index 0000000..4cb768e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2008-07-17-Fdiv.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm
+
+define float @f(float %a, float %b) nounwind  {
+	%tmp = fdiv float %a, %b
+	ret float %tmp
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll b/src/LLVM/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
new file mode 100644
index 0000000..83fde07
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm
+; PR2589
+
+define void @main({ i32 }*) {
+entry:
+	%sret1 = alloca { i32 }		; <{ i32 }*> [#uses=1]
+	load { i32 }* %sret1		; <{ i32 }>:1 [#uses=0]
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll b/src/LLVM/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
new file mode 100644
index 0000000..adb0112
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 -relocation-model=pic | grep comm
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__gcov_var = type { %struct.FILE*, i32, i32, i32, i32, i32, i32, [1025 x i32] }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+@__gcov_var = common global %struct.__gcov_var zeroinitializer		; <%struct.__gcov_var*> [#uses=1]
+
+define i32 @__gcov_close() nounwind {
+entry:
+	load i32* getelementptr (%struct.__gcov_var* @__gcov_var, i32 0, i32 5), align 4		; <i32>:0 [#uses=1]
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll b/src/LLVM/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
new file mode 100644
index 0000000..d3bc3e1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define void @gcov_exit() nounwind {
+entry:
+	br i1 false, label %bb24, label %bb33.thread
+
+bb24:		; preds = %entry
+	br label %bb39
+
+bb33.thread:		; preds = %entry
+	%0 = alloca i8, i32 0		; <i8*> [#uses=1]
+	br label %bb39
+
+bb39:		; preds = %bb33.thread, %bb24
+	%.reg2mem.0 = phi i8* [ %0, %bb33.thread ], [ null, %bb24 ]		; <i8*> [#uses=0]
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
new file mode 100644
index 0000000..601a516
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
+
+define hidden i64 @__muldi3(i64 %u, i64 %v) nounwind {
+entry:
+	%0 = trunc i64 %u to i32		; <i32> [#uses=1]
+	%asmtmp = tail call { i32, i32, i32, i32, i32 } asm "@ Inlined umul_ppmm\0A\09mov\09$2, $5, lsr #16\0A\09mov\09$0, $6, lsr #16\0A\09bic\09$3, $5, $2, lsl #16\0A\09bic\09$4, $6, $0, lsl #16\0A\09mul\09$1, $3, $4\0A\09mul\09$4, $2, $4\0A\09mul\09$3, $0, $3\0A\09mul\09$0, $2, $0\0A\09adds\09$3, $4, $3\0A\09addcs\09$0, $0, #65536\0A\09adds\09$1, $1, $3, lsl #16\0A\09adc\09$0, $0, $3, lsr #16", "=&r,=r,=&r,=&r,=r,r,r,~{cc}"(i32 %0, i32 0) nounwind		; <{ i32, i32, i32, i32, i32 }> [#uses=1]
+	%asmresult1 = extractvalue { i32, i32, i32, i32, i32 } %asmtmp, 1		; <i32> [#uses=1]
+	%asmresult116 = zext i32 %asmresult1 to i64		; <i64> [#uses=1]
+	%asmresult116.ins = or i64 0, %asmresult116		; <i64> [#uses=1]
+	%1 = lshr i64 %v, 32		; <i64> [#uses=1]
+	%2 = mul i64 %1, %u		; <i64> [#uses=1]
+	%3 = add i64 %2, 0		; <i64> [#uses=1]
+	%4 = shl i64 %3, 32		; <i64> [#uses=1]
+	%5 = add i64 %asmresult116.ins, %4		; <i64> [#uses=1]
+	ret i64 %5
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-02-16-SpillerBug.ll b/src/LLVM/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
new file mode 100644
index 0000000..4c0c59c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-02-16-SpillerBug.ll

@@ -0,0 +1,117 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
+
+target triple = "arm-apple-darwin9"
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { { i16, i8, i8 } }
+	%struct.STYLE = type { { %struct.GAP }, { %struct.GAP }, i16, i16, i32 }
+	%struct.THIRD_UNION = type { { [2 x i32], [2 x i32] } }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, { %struct.rec* }, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+@no_file_pos = external global %struct.FILE_POS		; <%struct.FILE_POS*> [#uses=1]
+@"\01LC13423" = external constant [23 x i8]		; <[23 x i8]*> [#uses=1]
+@"\01LC18972" = external constant [13 x i8]		; <[13 x i8]*> [#uses=1]
+
+define fastcc void @FlushGalley(%struct.rec* %hd) nounwind {
+entry:
+	br label %RESUME
+
+RESUME:		; preds = %bb520.preheader, %entry
+	br label %bb396
+
+bb122:		; preds = %bb396
+	switch i32 0, label %bb394 [
+		i32 1, label %bb131
+		i32 2, label %bb244
+		i32 4, label %bb244
+		i32 5, label %bb244
+		i32 6, label %bb244
+		i32 7, label %bb244
+		i32 11, label %bb244
+		i32 12, label %bb244
+		i32 15, label %bb244
+		i32 17, label %bb244
+		i32 18, label %bb244
+		i32 19, label %bb244
+		i32 20, label %bb396
+		i32 21, label %bb396
+		i32 22, label %bb396
+		i32 23, label %bb396
+		i32 24, label %bb244
+		i32 25, label %bb244
+		i32 26, label %bb244
+		i32 27, label %bb244
+		i32 28, label %bb244
+		i32 29, label %bb244
+		i32 30, label %bb244
+		i32 31, label %bb244
+		i32 32, label %bb244
+		i32 33, label %bb244
+		i32 34, label %bb244
+		i32 35, label %bb244
+		i32 36, label %bb244
+		i32 37, label %bb244
+		i32 38, label %bb244
+		i32 39, label %bb244
+		i32 40, label %bb244
+		i32 41, label %bb244
+		i32 42, label %bb244
+		i32 43, label %bb244
+		i32 44, label %bb244
+		i32 45, label %bb244
+		i32 46, label %bb244
+		i32 50, label %bb244
+		i32 51, label %bb244
+		i32 94, label %bb244
+		i32 95, label %bb244
+		i32 96, label %bb244
+		i32 97, label %bb244
+		i32 98, label %bb244
+		i32 99, label %bb244
+	]
+
+bb131:		; preds = %bb122
+	br label %bb396
+
+bb244:		; preds = %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122
+	%0 = icmp eq %struct.rec* %stop_link.3, null		; <i1> [#uses=1]
+	br i1 %0, label %bb435, label %bb433
+
+bb394:		; preds = %bb122
+	call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 3, i8* getelementptr ([23 x i8]* @"\01LC13423", i32 0, i32 0), i32 0, %struct.FILE_POS* @no_file_pos, i8* getelementptr ([13 x i8]* @"\01LC18972", i32 0, i32 0), i8* null) nounwind
+	br label %bb396
+
+bb396:		; preds = %bb394, %bb131, %bb122, %bb122, %bb122, %bb122, %RESUME
+	%stop_link.3 = phi %struct.rec* [ null, %RESUME ], [ %stop_link.3, %bb394 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %link.1, %bb131 ]		; <%struct.rec*> [#uses=7]
+	%headers_seen.1 = phi i32 [ 0, %RESUME ], [ %headers_seen.1, %bb394 ], [ 1, %bb122 ], [ 1, %bb122 ], [ 1, %bb122 ], [ 1, %bb122 ], [ %headers_seen.1, %bb131 ]		; <i32> [#uses=2]
+	%link.1 = load %struct.rec** null		; <%struct.rec*> [#uses=2]
+	%1 = icmp eq %struct.rec* %link.1, %hd		; <i1> [#uses=1]
+	br i1 %1, label %bb398, label %bb122
+
+bb398:		; preds = %bb396
+	unreachable
+
+bb433:		; preds = %bb244
+	call fastcc void @Promote(%struct.rec* %hd, %struct.rec* %stop_link.3, %struct.rec* null, i32 1) nounwind
+	br label %bb435
+
+bb435:		; preds = %bb433, %bb244
+	br i1 false, label %bb491, label %bb499
+
+bb491:		; preds = %bb435
+	br label %bb499
+
+bb499:		; preds = %bb499, %bb491, %bb435
+	%2 = icmp eq %struct.rec* null, null		; <i1> [#uses=1]
+	br i1 %2, label %bb520.preheader, label %bb499
+
+bb520.preheader:		; preds = %bb499
+	br label %RESUME
+}
+
+declare fastcc void @Promote(%struct.rec*, %struct.rec*, %struct.rec* nocapture, i32) nounwind
+
+declare void @Error(i32, i32, i8*, i32, %struct.FILE_POS*, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll b/src/LLVM/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
new file mode 100644
index 0000000..a48f003
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s
+; PR3610
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32"
+target triple = "arm-elf"
+
+define i32 @main(i8*) nounwind {
+entry:
+	%ap = alloca i8*		; <i8**> [#uses=2]
+	store i8* %0, i8** %ap
+	%retval = alloca i32		; <i32*> [#uses=2]
+	store i32 0, i32* %retval
+	%tmp = alloca float		; <float*> [#uses=1]
+	%1 = va_arg i8** %ap, float		; <float> [#uses=1]
+	store float %1, float* %tmp
+	br label %return
+
+return:		; preds = %entry
+	%2 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-02-27-SpillerBug.ll b/src/LLVM/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
new file mode 100644
index 0000000..bc5e602
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-02-27-SpillerBug.ll

@@ -0,0 +1,229 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
+
+target triple = "arm-apple-darwin9"
+@a = external global double		; <double*> [#uses=1]
+@N = external global double		; <double*> [#uses=1]
+
+declare double @llvm.exp.f64(double) nounwind readonly
+
+define fastcc void @findratio(double* nocapture %res1, double* nocapture %res2) nounwind {
+bb.thread:
+	br label %bb52
+
+bb32:		; preds = %bb52
+	%0 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%1 = add i32 %j.1, 1		; <i32> [#uses=1]
+	br label %bb52
+
+bb52:		; preds = %bb53, %bb32, %bb.thread
+	%i.3494 = phi i32 [ 0, %bb.thread ], [ %3, %bb53 ], [ %i.3494, %bb32 ]		; <i32> [#uses=2]
+	%k.4 = phi double [ %0, %bb32 ], [ 0.000000e+00, %bb53 ], [ 0.000000e+00, %bb.thread ]		; <double> [#uses=2]
+	%j.1 = phi i32 [ %1, %bb32 ], [ 0, %bb53 ], [ 0, %bb.thread ]		; <i32> [#uses=2]
+	%2 = icmp sgt i32 %j.1, 99		; <i1> [#uses=1]
+	br i1 %2, label %bb53, label %bb32
+
+bb53:		; preds = %bb52
+	%3 = add i32 %i.3494, 1		; <i32> [#uses=2]
+	%phitmp = icmp sgt i32 %3, 999999		; <i1> [#uses=1]
+	br i1 %phitmp, label %bb55, label %bb52
+
+bb55:		; preds = %bb53
+	%4 = load double* @a, align 4		; <double> [#uses=10]
+	%5 = fadd double %4, 0.000000e+00		; <double> [#uses=16]
+	%6 = fcmp ogt double %k.4, 0.000000e+00		; <i1> [#uses=1]
+	%.pn404 = fmul double %4, %4		; <double> [#uses=4]
+	%.pn402 = fmul double %5, %5		; <double> [#uses=5]
+	%.pn165.in = load double* @N		; <double> [#uses=5]
+	%.pn198 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
+	%.pn185 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%.pn147 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%.pn141 = fdiv double 0.000000e+00, %4		; <double> [#uses=1]
+	%.pn142 = fdiv double 0.000000e+00, %5		; <double> [#uses=1]
+	%.pn136 = fdiv double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%.pn132 = fdiv double 0.000000e+00, %5		; <double> [#uses=1]
+	%.pn123 = fdiv double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%.pn124 = fdiv double 0.000000e+00, %.pn198		; <double> [#uses=1]
+	%.pn120 = fdiv double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%.pn117 = fdiv double 0.000000e+00, %4		; <double> [#uses=1]
+	%.pn118 = fdiv double %.pn185, %5		; <double> [#uses=1]
+	%.pn88 = fdiv double %.pn147, %5		; <double> [#uses=1]
+	%.pn81 = fsub double %.pn141, %.pn142		; <double> [#uses=1]
+	%.pn77 = fsub double 0.000000e+00, %.pn136		; <double> [#uses=1]
+	%.pn75 = fsub double 0.000000e+00, %.pn132		; <double> [#uses=1]
+	%.pn69 = fsub double %.pn123, %.pn124		; <double> [#uses=1]
+	%.pn67 = fsub double 0.000000e+00, %.pn120		; <double> [#uses=1]
+	%.pn56 = fsub double %.pn117, %.pn118		; <double> [#uses=1]
+	%.pn42 = fsub double 0.000000e+00, %.pn88		; <double> [#uses=1]
+	%.pn60 = fmul double %.pn81, 0.000000e+00		; <double> [#uses=1]
+	%.pn57 = fadd double %.pn77, 0.000000e+00		; <double> [#uses=1]
+	%.pn58 = fmul double %.pn75, %.pn165.in		; <double> [#uses=1]
+	%.pn32 = fadd double %.pn69, 0.000000e+00		; <double> [#uses=1]
+	%.pn33 = fmul double %.pn67, %.pn165.in		; <double> [#uses=1]
+	%.pn17 = fsub double 0.000000e+00, %.pn60		; <double> [#uses=1]
+	%.pn9 = fadd double %.pn57, %.pn58		; <double> [#uses=1]
+	%.pn30 = fmul double 0.000000e+00, %.pn56		; <double> [#uses=1]
+	%.pn24 = fmul double 0.000000e+00, %.pn42		; <double> [#uses=1]
+	%.pn1 = fadd double %.pn32, %.pn33		; <double> [#uses=1]
+	%.pn28 = fsub double %.pn30, 0.000000e+00		; <double> [#uses=1]
+	%.pn26 = fadd double %.pn28, 0.000000e+00		; <double> [#uses=1]
+	%.pn22 = fsub double %.pn26, 0.000000e+00		; <double> [#uses=1]
+	%.pn20 = fsub double %.pn24, 0.000000e+00		; <double> [#uses=1]
+	%.pn18 = fadd double %.pn22, 0.000000e+00		; <double> [#uses=1]
+	%.pn16 = fadd double %.pn20, 0.000000e+00		; <double> [#uses=1]
+	%.pn14 = fsub double %.pn18, 0.000000e+00		; <double> [#uses=1]
+	%.pn12 = fsub double %.pn16, %.pn17		; <double> [#uses=1]
+	%.pn10 = fadd double %.pn14, 0.000000e+00		; <double> [#uses=1]
+	%.pn8 = fadd double %.pn12, 0.000000e+00		; <double> [#uses=1]
+	%.pn6 = fsub double %.pn10, 0.000000e+00		; <double> [#uses=1]
+	%.pn4 = fsub double %.pn8, %.pn9		; <double> [#uses=1]
+	%.pn2 = fadd double %.pn6, 0.000000e+00		; <double> [#uses=1]
+	%.pn = fadd double %.pn4, 0.000000e+00		; <double> [#uses=1]
+	%N1.0 = fsub double %.pn2, 0.000000e+00		; <double> [#uses=2]
+	%D1.0 = fsub double %.pn, %.pn1		; <double> [#uses=2]
+	br i1 %6, label %bb62, label %bb64
+
+bb62:		; preds = %bb55
+	%7 = fmul double 0.000000e+00, %4		; <double> [#uses=1]
+	%8 = fsub double -0.000000e+00, %7		; <double> [#uses=3]
+	%9 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
+	%10 = fsub double -0.000000e+00, %9		; <double> [#uses=3]
+	%11 = fmul double %.pn404, %4		; <double> [#uses=5]
+	%12 = fmul double %.pn402, %5		; <double> [#uses=5]
+	%13 = fmul double 0.000000e+00, -2.000000e+00		; <double> [#uses=1]
+	%14 = fdiv double 0.000000e+00, %.pn402		; <double> [#uses=1]
+	%15 = fsub double 0.000000e+00, %14		; <double> [#uses=1]
+	%16 = fmul double 0.000000e+00, %15		; <double> [#uses=1]
+	%17 = fadd double %13, %16		; <double> [#uses=1]
+	%18 = fmul double %.pn165.in, -2.000000e+00		; <double> [#uses=5]
+	%19 = fmul double %18, 0.000000e+00		; <double> [#uses=1]
+	%20 = fadd double %17, %19		; <double> [#uses=1]
+	%21 = fmul double 0.000000e+00, %20		; <double> [#uses=1]
+	%22 = fadd double 0.000000e+00, %21		; <double> [#uses=1]
+	%23 = fdiv double 0.000000e+00, %12		; <double> [#uses=1]
+	%24 = fsub double 0.000000e+00, %23		; <double> [#uses=0]
+	%25 = fmul double %18, 0.000000e+00		; <double> [#uses=1]
+	%26 = fadd double 0.000000e+00, %25		; <double> [#uses=1]
+	%27 = fmul double 0.000000e+00, %26		; <double> [#uses=1]
+	%28 = fsub double %22, %27		; <double> [#uses=1]
+	%29 = fmul double %11, %4		; <double> [#uses=1]
+	%30 = fmul double %12, %5		; <double> [#uses=3]
+	%31 = fmul double %.pn165.in, -4.000000e+00		; <double> [#uses=1]
+	%32 = fmul double %.pn165.in, 0x3FF5555555555555		; <double> [#uses=1]
+	%33 = fmul double %32, 0.000000e+00		; <double> [#uses=2]
+	%34 = fadd double %28, 0.000000e+00		; <double> [#uses=1]
+	%35 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%36 = fdiv double %35, %11		; <double> [#uses=1]
+	%37 = fdiv double 0.000000e+00, %12		; <double> [#uses=1]
+	%38 = fsub double %36, %37		; <double> [#uses=1]
+	%39 = fmul double 0.000000e+00, %38		; <double> [#uses=1]
+	%40 = fadd double 0.000000e+00, %39		; <double> [#uses=1]
+	%41 = fadd double %40, 0.000000e+00		; <double> [#uses=1]
+	%42 = fadd double %41, 0.000000e+00		; <double> [#uses=1]
+	%43 = fmul double %42, 0.000000e+00		; <double> [#uses=1]
+	%44 = fsub double %34, %43		; <double> [#uses=1]
+	%45 = tail call double @llvm.exp.f64(double %8) nounwind		; <double> [#uses=1]
+	%46 = fsub double -0.000000e+00, %45		; <double> [#uses=2]
+	%47 = fdiv double %46, 0.000000e+00		; <double> [#uses=1]
+	%48 = fmul double %30, %5		; <double> [#uses=1]
+	%49 = fdiv double 0.000000e+00, %48		; <double> [#uses=1]
+	%50 = fsub double %47, %49		; <double> [#uses=1]
+	%51 = fmul double %50, -4.000000e+00		; <double> [#uses=1]
+	%52 = fadd double %51, 0.000000e+00		; <double> [#uses=1]
+	%53 = fdiv double %46, %11		; <double> [#uses=1]
+	%54 = fsub double %53, 0.000000e+00		; <double> [#uses=1]
+	%55 = fmul double %31, %54		; <double> [#uses=1]
+	%56 = fadd double %52, %55		; <double> [#uses=1]
+	%57 = fadd double %56, 0.000000e+00		; <double> [#uses=1]
+	%58 = fadd double %44, %57		; <double> [#uses=1]
+	%59 = fsub double %58, 0.000000e+00		; <double> [#uses=1]
+	%60 = tail call double @llvm.exp.f64(double 0.000000e+00) nounwind		; <double> [#uses=1]
+	%61 = fsub double -0.000000e+00, %60		; <double> [#uses=1]
+	%62 = fdiv double 0.000000e+00, -6.000000e+00		; <double> [#uses=1]
+	%63 = fdiv double %61, %5		; <double> [#uses=1]
+	%64 = fsub double 0.000000e+00, %63		; <double> [#uses=1]
+	%65 = fmul double %62, %64		; <double> [#uses=1]
+	%66 = fsub double 0.000000e+00, %65		; <double> [#uses=1]
+	%67 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=2]
+	%68 = tail call double @llvm.exp.f64(double %10) nounwind		; <double> [#uses=1]
+	%69 = fsub double -0.000000e+00, %68		; <double> [#uses=2]
+	%70 = fdiv double %67, %.pn404		; <double> [#uses=1]
+	%71 = fdiv double %69, %.pn402		; <double> [#uses=1]
+	%72 = fsub double %70, %71		; <double> [#uses=1]
+	%73 = fmul double %72, -5.000000e-01		; <double> [#uses=1]
+	%74 = fdiv double %67, %4		; <double> [#uses=1]
+	%75 = fdiv double %69, %5		; <double> [#uses=1]
+	%76 = fsub double %74, %75		; <double> [#uses=1]
+	%77 = fmul double %76, 0.000000e+00		; <double> [#uses=1]
+	%78 = fadd double %73, %77		; <double> [#uses=1]
+	%79 = fmul double 0.000000e+00, %78		; <double> [#uses=1]
+	%80 = fadd double %66, %79		; <double> [#uses=1]
+	%81 = fdiv double 0.000000e+00, %.pn404		; <double> [#uses=1]
+	%82 = fdiv double 0.000000e+00, %.pn402		; <double> [#uses=1]
+	%83 = fsub double %81, %82		; <double> [#uses=1]
+	%84 = fmul double %83, -5.000000e-01		; <double> [#uses=1]
+	%85 = fdiv double 0.000000e+00, %4		; <double> [#uses=1]
+	%86 = fdiv double 0.000000e+00, %5		; <double> [#uses=1]
+	%87 = fsub double %85, %86		; <double> [#uses=1]
+	%88 = fmul double %87, 0.000000e+00		; <double> [#uses=1]
+	%89 = fadd double %84, %88		; <double> [#uses=1]
+	%90 = fmul double 0.000000e+00, %89		; <double> [#uses=1]
+	%91 = fsub double %80, %90		; <double> [#uses=1]
+	%92 = tail call double @llvm.exp.f64(double %8) nounwind		; <double> [#uses=1]
+	%93 = fsub double -0.000000e+00, %92		; <double> [#uses=1]
+	%94 = tail call double @llvm.exp.f64(double %10) nounwind		; <double> [#uses=1]
+	%95 = fsub double -0.000000e+00, %94		; <double> [#uses=3]
+	%96 = fdiv double %95, %.pn402		; <double> [#uses=1]
+	%97 = fsub double 0.000000e+00, %96		; <double> [#uses=1]
+	%98 = fmul double 0.000000e+00, %97		; <double> [#uses=1]
+	%99 = fdiv double %93, %11		; <double> [#uses=1]
+	%100 = fdiv double %95, %12		; <double> [#uses=1]
+	%101 = fsub double %99, %100		; <double> [#uses=1]
+	%102 = fsub double %98, %101		; <double> [#uses=1]
+	%103 = fdiv double %95, %5		; <double> [#uses=1]
+	%104 = fsub double 0.000000e+00, %103		; <double> [#uses=1]
+	%105 = fmul double %18, %104		; <double> [#uses=1]
+	%106 = fadd double %102, %105		; <double> [#uses=1]
+	%107 = fmul double %106, %k.4		; <double> [#uses=1]
+	%108 = fadd double %91, %107		; <double> [#uses=1]
+	%109 = fsub double %108, 0.000000e+00		; <double> [#uses=1]
+	%110 = tail call double @llvm.exp.f64(double %8) nounwind		; <double> [#uses=1]
+	%111 = fsub double -0.000000e+00, %110		; <double> [#uses=2]
+	%112 = tail call double @llvm.exp.f64(double %10) nounwind		; <double> [#uses=1]
+	%113 = fsub double -0.000000e+00, %112		; <double> [#uses=2]
+	%114 = fdiv double %111, %11		; <double> [#uses=1]
+	%115 = fdiv double %113, %12		; <double> [#uses=1]
+	%116 = fsub double %114, %115		; <double> [#uses=1]
+	%117 = fmul double 0.000000e+00, %116		; <double> [#uses=1]
+	%118 = fdiv double %111, %29		; <double> [#uses=1]
+	%119 = fdiv double %113, %30		; <double> [#uses=1]
+	%120 = fsub double %118, %119		; <double> [#uses=1]
+	%121 = fsub double %117, %120		; <double> [#uses=1]
+	%122 = fmul double %18, 0.000000e+00		; <double> [#uses=1]
+	%123 = fadd double %121, %122		; <double> [#uses=1]
+	%124 = fmul double %33, 0.000000e+00		; <double> [#uses=1]
+	%125 = fadd double %123, %124		; <double> [#uses=1]
+	%126 = fadd double %109, %125		; <double> [#uses=1]
+	%127 = tail call double @llvm.exp.f64(double 0.000000e+00) nounwind		; <double> [#uses=1]
+	%128 = fsub double -0.000000e+00, %127		; <double> [#uses=2]
+	%129 = fdiv double %128, %30		; <double> [#uses=1]
+	%130 = fsub double 0.000000e+00, %129		; <double> [#uses=1]
+	%131 = fsub double 0.000000e+00, %130		; <double> [#uses=1]
+	%132 = fdiv double 0.000000e+00, %.pn404		; <double> [#uses=1]
+	%133 = fsub double %132, 0.000000e+00		; <double> [#uses=1]
+	%134 = fmul double %18, %133		; <double> [#uses=1]
+	%135 = fadd double %131, %134		; <double> [#uses=1]
+	%136 = fdiv double %128, %5		; <double> [#uses=1]
+	%137 = fsub double 0.000000e+00, %136		; <double> [#uses=1]
+	%138 = fmul double %33, %137		; <double> [#uses=1]
+	%139 = fadd double %135, %138		; <double> [#uses=1]
+	%140 = fsub double %126, %139		; <double> [#uses=1]
+	%141 = fadd double %N1.0, %59		; <double> [#uses=1]
+	%142 = fadd double %D1.0, %140		; <double> [#uses=1]
+	br label %bb64
+
+bb64:		; preds = %bb62, %bb55
+	%N1.0.pn = phi double [ %141, %bb62 ], [ %N1.0, %bb55 ]		; <double> [#uses=1]
+	%D1.0.pn = phi double [ %142, %bb62 ], [ %D1.0, %bb55 ]		; <double> [#uses=1]
+	%x.1 = fdiv double %N1.0.pn, %D1.0.pn		; <double> [#uses=0]
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/src/LLVM/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
new file mode 100644
index 0000000..377bbd2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-03-07-SpillerBug.ll

@@ -0,0 +1,79 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin9 -mattr=+vfp2
+; rdar://6653182
+
+
+%struct.ggBRDF = type { i32 (...)** }
+%struct.ggPoint2 = type { [2 x double] }
+%struct.ggPoint3 = type { [3 x double] }
+%struct.ggSpectrum = type { [8 x float] }
+%struct.ggSphere = type { %struct.ggPoint3, double }
+%struct.mrDiffuseAreaSphereLuminaire = type { %struct.mrSphere, %struct.ggSpectrum }
+%struct.mrDiffuseCosineSphereLuminaire = type { %struct.mrDiffuseAreaSphereLuminaire }
+%struct.mrSphere = type { %struct.ggBRDF, %struct.ggSphere }
+
+declare double @llvm.sqrt.f64(double) nounwind readonly
+
+declare double @sin(double) nounwind readonly
+
+declare double @acos(double) nounwind readonly
+
+define i32 @_ZNK34mrDiffuseSolidAngleSphereLuminaire18selectVisiblePointERK8ggPoint3RK9ggVector3RK8ggPoint2dRS0_Rd(%struct.mrDiffuseCosineSphereLuminaire* nocapture %this, %struct.ggPoint3* nocapture %x, %struct.ggPoint3* nocapture %unnamed_arg, %struct.ggPoint2* nocapture %uv, double %unnamed_arg2, %struct.ggPoint3* nocapture %on_light, double* nocapture %invProb) nounwind {
+entry:
+  %0 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind
+  %1 = fcmp ult double 0.000000e+00, %0
+  br i1 %1, label %bb3, label %bb7
+
+bb3:                                              ; preds = %entry
+  %2 = fdiv double 1.000000e+00, 0.000000e+00
+  %3 = fmul double 0.000000e+00, %2
+  %4 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind
+  %5 = fdiv double 1.000000e+00, %4
+  %6 = fmul double %3, %5
+  %7 = fmul double 0.000000e+00, %5
+  %8 = fmul double %3, %7
+  %9 = fsub double %8, 0.000000e+00
+  %10 = fmul double 0.000000e+00, %6
+  %11 = fsub double 0.000000e+00, %10
+  %12 = fsub double -0.000000e+00, %11
+  %13 = fmul double %0, %0
+  %14 = fsub double %13, 0.000000e+00
+  %15 = call double @llvm.sqrt.f64(double %14)
+  %16 = fmul double 0.000000e+00, %15
+  %17 = fdiv double %16, %0
+  %18 = fadd double 0.000000e+00, %17
+  %19 = call double @acos(double %18) nounwind readonly
+  %20 = load double* null, align 4
+  %21 = fmul double %20, 0x401921FB54442D18
+  %22 = call double @sin(double %19) nounwind readonly
+  %23 = fmul double %22, 0.000000e+00
+  %24 = fmul double %6, %23
+  %25 = fmul double %7, %23
+  %26 = call double @sin(double %21) nounwind readonly
+  %27 = fmul double %22, %26
+  %28 = fmul double %9, %27
+  %29 = fmul double %27, %12
+  %30 = fadd double %24, %28
+  %31 = fadd double 0.000000e+00, %29
+  %32 = fadd double %25, 0.000000e+00
+  %33 = fadd double %30, 0.000000e+00
+  %34 = fadd double %31, 0.000000e+00
+  %35 = fadd double %32, 0.000000e+00
+  %36 = bitcast %struct.ggPoint3* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* null, i8* %36, i32 24, i32 4, i1 false)
+  store double %33, double* null, align 8
+  br i1 false, label %_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit, label %bb5.i.i.i
+
+bb5.i.i.i:                                        ; preds = %bb3
+  unreachable
+
+_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit: ; preds = %bb3
+  %37 = fsub double %13, 0.000000e+00
+  %38 = fsub double -0.000000e+00, %34
+  %39 = fsub double -0.000000e+00, %35
+  ret i32 1
+
+bb7:                                              ; preds = %entry
+  ret i32 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll b/src/LLVM/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
new file mode 100644
index 0000000..a1ce384
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm
+
+	%struct.hit_t = type { %struct.v_t, double }
+	%struct.node_t = type { %struct.hit_t, %struct.hit_t, i32 }
+	%struct.v_t = type { double, double, double }
+
+define fastcc %struct.node_t* @_ZL6createP6node_tii3v_tS1_d(%struct.node_t* %n, i32 %lvl, i32 %dist, i64 %c.0.0, i64 %c.0.1, i64 %c.0.2, i64 %d.0.0, i64 %d.0.1, i64 %d.0.2, double %r) nounwind {
+entry:
+	%0 = getelementptr %struct.node_t* %n, i32 0, i32 1		; <%struct.hit_t*> [#uses=1]
+	%1 = bitcast %struct.hit_t* %0 to i256*		; <i256*> [#uses=1]
+	store i256 0, i256* %1, align 4
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/src/LLVM/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
new file mode 100644
index 0000000..3526722
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-04-06-AsmModifier.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm | grep {swi 107}
+
+define i32 @_swilseek(i32) nounwind {
+entry:
+	%ptr = alloca i32		; <i32*> [#uses=2]
+	store i32 %0, i32* %ptr
+	%retval = alloca i32		; <i32*> [#uses=2]
+	store i32 0, i32* %retval
+	%res = alloca i32		; <i32*> [#uses=0]
+	%fh = alloca i32		; <i32*> [#uses=1]
+	%1 = load i32* %fh		; <i32> [#uses=1]
+	%2 = load i32* %ptr		; <i32> [#uses=1]
+	%3 = call i32 asm "mov r0, $2; mov r1, $3; swi ${1:a}; mov $0, r0", "=r,i,r,r,~{r0},~{r1}"(i32 107, i32 %1, i32 %2) nounwind		; <i32> [#uses=1]
+        store i32 %3, i32* %retval
+	br label %return
+
+return:		; preds = %entry
+	%4 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %4
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll b/src/LLVM/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
new file mode 100644
index 0000000..f6b3d2c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm
+; PR3795
+
+define fastcc void @_D3foo3fooFAriZv({ i32, { double, double }* } %d_arg, i32 %x_arg) {
+entry:
+	%d = alloca { i32, { double, double }* }		; <{ i32, { double, double }* }*> [#uses=2]
+	%x = alloca i32		; <i32*> [#uses=2]
+	%b = alloca { double, double }		; <{ double, double }*> [#uses=1]
+	store { i32, { double, double }* } %d_arg, { i32, { double, double }* }* %d
+	store i32 %x_arg, i32* %x
+	%tmp = load i32* %x		; <i32> [#uses=1]
+	%tmp1 = getelementptr { i32, { double, double }* }* %d, i32 0, i32 1		; <{ double, double }**> [#uses=1]
+	%.ptr = load { double, double }** %tmp1		; <{ double, double }*> [#uses=1]
+	%tmp2 = getelementptr { double, double }* %.ptr, i32 %tmp		; <{ double, double }*> [#uses=1]
+	%tmp3 = load { double, double }* %tmp2		; <{ double, double }> [#uses=1]
+	store { double, double } %tmp3, { double, double }* %b
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-04-08-FREM.ll b/src/LLVM/test/CodeGen/ARM/2009-04-08-FREM.ll
new file mode 100644
index 0000000..99907fc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-04-08-FREM.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%rem_r = frem double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%1 = call i32 (i8*, ...)* @printf(i8* null, double %rem_r)		; <i32> [#uses=0]
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-04-08-FloatUndef.ll b/src/LLVM/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
new file mode 100644
index 0000000..05d2f26
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-04-08-FloatUndef.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm
+
+define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>* %CONST) {
+entry:
+	%input2 = load <4 x float>* null, align 16		; <<4 x float>> [#uses=2]
+	%shuffle7 = shufflevector <4 x float> %input2, <4 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>		; <<4 x float>> [#uses=1]
+	%mul1 = fmul <4 x float> %shuffle7, zeroinitializer		; <<4 x float>> [#uses=1]
+	%add2 = fadd <4 x float> %mul1, %input2		; <<4 x float>> [#uses=1]
+	store <4 x float> %add2, <4 x float>* null, align 16
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll b/src/LLVM/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
new file mode 100644
index 0000000..deb092b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm
+; PR3954
+
+define void @foo(...) nounwind {
+entry:
+	%rr = alloca i32		; <i32*> [#uses=2]
+	%0 = load i32* %rr		; <i32> [#uses=1]
+	%1 = call i32 asm "nop", "=r,0"(i32 %0) nounwind		; <i32> [#uses=1]
+	store i32 %1, i32* %rr
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll b/src/LLVM/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
new file mode 100644
index 0000000..a48e41f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=arm-unknown-linux-gnueabi -mattr=+v6
+; PR4166
+
+	%"byte[]" = type { i32, i8* }
+	%tango.time.Time.Time = type { i64 }
+
+define fastcc void @t() {
+entry:
+	%tmp28 = call fastcc i1 null(i32* null, %"byte[]" undef, %"byte[]" undef, %tango.time.Time.Time* byval null)		; <i1> [#uses=0]
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll b/src/LLVM/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
new file mode 100644
index 0000000..524b5eb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=fast
+; PR4100
+@.str = external constant [30 x i8]		; <[30 x i8]*> [#uses=1]
+
+define i16 @fn16(i16 %arg0.0, <2 x i16> %arg1, i16 %arg2.0) nounwind {
+entry:
+	store <2 x i16> %arg1, <2 x i16>* null
+	%0 = call i32 (i8*, ...)* @printf(i8* getelementptr ([30 x i8]* @.str, i32 0, i32 0), i32 0) nounwind		; <i32> [#uses=0]
+	ret i16 0
+}
+
+declare i32 @printf(i8*, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll b/src/LLVM/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
new file mode 100644
index 0000000..7046fcc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=arm
+	%struct.List = type { %struct.List*, i32 }
+@Node5 = external constant %struct.List		; <%struct.List*> [#uses=1]
+@"\01LC" = external constant [7 x i8]		; <[7 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb3, %entry
+	%CurL.02 = phi %struct.List* [ @Node5, %entry ], [ %2, %bb3 ]		; <%struct.List*> [#uses=1]
+	%PrevL.01 = phi %struct.List* [ null, %entry ], [ %CurL.02, %bb3 ]		; <%struct.List*> [#uses=1]
+	%0 = icmp eq %struct.List* %PrevL.01, null		; <i1> [#uses=1]
+	br i1 %0, label %bb3, label %bb1
+
+bb1:		; preds = %bb
+	br label %bb3
+
+bb3:		; preds = %bb1, %bb
+	%iftmp.0.0 = phi i32 [ 0, %bb1 ], [ -1, %bb ]		; <i32> [#uses=1]
+	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 %iftmp.0.0) nounwind		; <i32> [#uses=0]
+	%2 = load %struct.List** null, align 4		; <%struct.List*> [#uses=2]
+	%phitmp = icmp eq %struct.List* %2, null		; <i1> [#uses=1]
+	br i1 %phitmp, label %bb5, label %bb
+
+bb5:		; preds = %bb3
+	ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll b/src/LLVM/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
new file mode 100644
index 0000000..1e2707f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=thumb | FileCheck %s
+; PR4091
+
+define void @foo(i32 %i, i32* %p) nounwind {
+;CHECK: swp r2, r0, [r1]
+	%asmtmp = call i32 asm sideeffect "swp $0, $2, $3", "=&r,=*m,r,*m,~{memory}"(i32* %p, i32 %i, i32* %p) nounwind
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-06-02-ISelCrash.ll b/src/LLVM/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
new file mode 100644
index 0000000..403e3f65
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-06-02-ISelCrash.ll

@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6,+vfp2
+
+@"\01LC" = external constant [15 x i8]		; <[15 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+	br label %bb.i1.i
+
+bb.i1.i:		; preds = %Cos.exit.i.i, %entry
+	br label %bb.i.i.i
+
+bb.i.i.i:		; preds = %bb.i.i.i, %bb.i1.i
+	br i1 undef, label %Cos.exit.i.i, label %bb.i.i.i
+
+Cos.exit.i.i:		; preds = %bb.i.i.i
+	br i1 undef, label %bb2.i.i, label %bb.i1.i
+
+bb2.i.i:		; preds = %Cos.exit.i.i
+	br label %bb3.i.i
+
+bb3.i.i:		; preds = %bb5.i.i, %bb2.i.i
+	br label %bb4.i.i
+
+bb4.i.i:		; preds = %bb4.i.i, %bb3.i.i
+	br i1 undef, label %bb5.i.i, label %bb4.i.i
+
+bb5.i.i:		; preds = %bb4.i.i
+	br i1 undef, label %bb.i, label %bb3.i.i
+
+bb.i:		; preds = %bb.i, %bb5.i.i
+	br i1 undef, label %bb1.outer2.i.i.outer, label %bb.i
+
+bb1.outer2.i.i.outer:		; preds = %Fft.exit.i, %bb5.i12.i, %bb.i
+	br label %bb1.outer2.i.i
+
+bb1.outer2.i.i:		; preds = %bb2.i9.i, %bb1.outer2.i.i.outer
+	br label %bb1.i.i
+
+bb1.i.i:		; preds = %bb1.i.i, %bb1.outer2.i.i
+	br i1 undef, label %bb2.i9.i, label %bb1.i.i
+
+bb2.i9.i:		; preds = %bb1.i.i
+	br i1 undef, label %bb4.i11.i, label %bb1.outer2.i.i
+
+bb4.i11.i:		; preds = %bb4.i11.i, %bb2.i9.i
+	br i1 undef, label %bb5.i12.i, label %bb4.i11.i
+
+bb5.i12.i:		; preds = %bb4.i11.i
+	br i1 undef, label %bb7.i.i, label %bb1.outer2.i.i.outer
+
+bb7.i.i:		; preds = %bb7.i.i, %bb5.i12.i
+	br i1 undef, label %Fft.exit.i, label %bb7.i.i
+
+Fft.exit.i:		; preds = %bb7.i.i
+	br i1 undef, label %bb5.i, label %bb1.outer2.i.i.outer
+
+bb5.i:		; preds = %Fft.exit.i
+	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([15 x i8]* @"\01LC", i32 0, i32 0), double undef, double undef) nounwind		; <i32> [#uses=0]
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll b/src/LLVM/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
new file mode 100644
index 0000000..98e0023
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll

@@ -0,0 +1,263 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6
+
+	%struct.anon = type { i16, i16 }
+	%struct.cab_archive = type { i32, i16, i16, i16, i16, i8, %struct.cab_folder*, %struct.cab_file* }
+	%struct.cab_file = type { i32, i16, i64, i8*, i32, i32, i32, %struct.cab_folder*, %struct.cab_file*, %struct.cab_archive*, %struct.cab_state* }
+	%struct.cab_folder = type { i16, i16, %struct.cab_archive*, i64, %struct.cab_folder* }
+	%struct.cab_state = type { i8*, i8*, [38912 x i8], i16, i16, i8*, i16 }
+	%struct.qtm_model = type { i32, i32, %struct.anon* }
+	%struct.qtm_stream = type { i32, i32, i8, i8*, i32, i32, i32, i16, i16, i16, i8, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i8, [42 x i32], [42 x i8], [27 x i8], [27 x i8], %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, [65 x %struct.anon], [65 x %struct.anon], [65 x %struct.anon], [65 x %struct.anon], [25 x %struct.anon], [37 x %struct.anon], [43 x %struct.anon], [28 x %struct.anon], [8 x %struct.anon], %struct.cab_file*, i32 (%struct.cab_file*, i8*, i32)* }
+
+declare fastcc i32 @qtm_read_input(%struct.qtm_stream* nocapture) nounwind
+
+define fastcc i32 @qtm_decompress(%struct.qtm_stream* %qtm, i64 %out_bytes) nounwind {
+entry:
+	br i1 undef, label %bb245, label %bb3
+
+bb3:		; preds = %entry
+	br i1 undef, label %bb5, label %bb4
+
+bb4:		; preds = %bb3
+	ret i32 undef
+
+bb5:		; preds = %bb3
+	br i1 undef, label %bb245, label %bb14
+
+bb14:		; preds = %bb5
+	br label %bb238
+
+bb28:		; preds = %bb215
+	br label %bb31
+
+bb29:		; preds = %bb31
+	br i1 undef, label %bb31, label %bb32
+
+bb31:		; preds = %bb29, %bb28
+	br i1 undef, label %bb29, label %bb32
+
+bb32:		; preds = %bb31, %bb29
+	br label %bb33
+
+bb33:		; preds = %bb33, %bb32
+	br i1 undef, label %bb34, label %bb33
+
+bb34:		; preds = %bb33
+	br i1 undef, label %bb35, label %bb36
+
+bb35:		; preds = %bb34
+	br label %bb36
+
+bb36:		; preds = %bb46, %bb35, %bb34
+	br i1 undef, label %bb40, label %bb37
+
+bb37:		; preds = %bb36
+	br i1 undef, label %bb77, label %bb60
+
+bb40:		; preds = %bb36
+	br i1 undef, label %bb46, label %bb41
+
+bb41:		; preds = %bb40
+	br i1 undef, label %bb45, label %bb42
+
+bb42:		; preds = %bb41
+	ret i32 undef
+
+bb45:		; preds = %bb41
+	br label %bb46
+
+bb46:		; preds = %bb45, %bb40
+	br label %bb36
+
+bb60:		; preds = %bb60, %bb37
+	br label %bb60
+
+bb77:		; preds = %bb37
+	switch i32 undef, label %bb197 [
+		i32 5, label %bb108
+		i32 6, label %bb138
+	]
+
+bb108:		; preds = %bb77
+	br label %bb111
+
+bb109:		; preds = %bb111
+	br i1 undef, label %bb111, label %bb112
+
+bb111:		; preds = %bb109, %bb108
+	br i1 undef, label %bb109, label %bb112
+
+bb112:		; preds = %bb111, %bb109
+	br label %bb113
+
+bb113:		; preds = %bb113, %bb112
+	br i1 undef, label %bb114, label %bb113
+
+bb114:		; preds = %bb113
+	br i1 undef, label %bb115, label %bb116
+
+bb115:		; preds = %bb114
+	br label %bb116
+
+bb116:		; preds = %bb115, %bb114
+	br i1 undef, label %bb120, label %bb117
+
+bb117:		; preds = %bb116
+	br label %bb136
+
+bb120:		; preds = %bb116
+	ret i32 undef
+
+bb128:		; preds = %bb136
+	br i1 undef, label %bb134, label %bb129
+
+bb129:		; preds = %bb128
+	br i1 undef, label %bb133, label %bb130
+
+bb130:		; preds = %bb129
+	br i1 undef, label %bb132, label %bb131
+
+bb131:		; preds = %bb130
+	ret i32 undef
+
+bb132:		; preds = %bb130
+	br label %bb133
+
+bb133:		; preds = %bb132, %bb129
+	br label %bb134
+
+bb134:		; preds = %bb133, %bb128
+	br label %bb136
+
+bb136:		; preds = %bb134, %bb117
+	br i1 undef, label %bb198, label %bb128
+
+bb138:		; preds = %bb77
+	%0 = trunc i32 undef to i16		; <i16> [#uses=1]
+	br label %bb141
+
+bb139:		; preds = %bb141
+	%scevgep441442881 = load i16* undef		; <i16> [#uses=1]
+	%1 = icmp ugt i16 %scevgep441442881, %0		; <i1> [#uses=1]
+	br i1 %1, label %bb141, label %bb142
+
+bb141:		; preds = %bb139, %bb138
+	br i1 undef, label %bb139, label %bb142
+
+bb142:		; preds = %bb141, %bb139
+	br label %bb143
+
+bb143:		; preds = %bb143, %bb142
+	br i1 undef, label %bb144, label %bb143
+
+bb144:		; preds = %bb143
+	br i1 undef, label %bb145, label %bb146
+
+bb145:		; preds = %bb144
+	unreachable
+
+bb146:		; preds = %bb156, %bb144
+	br i1 undef, label %bb150, label %bb147
+
+bb147:		; preds = %bb146
+	br i1 undef, label %bb157, label %bb148
+
+bb148:		; preds = %bb147
+	br i1 undef, label %bb149, label %bb157
+
+bb149:		; preds = %bb148
+	br label %bb150
+
+bb150:		; preds = %bb149, %bb146
+	br i1 undef, label %bb156, label %bb152
+
+bb152:		; preds = %bb150
+	unreachable
+
+bb156:		; preds = %bb150
+	br label %bb146
+
+bb157:		; preds = %bb148, %bb147
+	br i1 undef, label %bb167, label %bb160
+
+bb160:		; preds = %bb157
+	ret i32 undef
+
+bb167:		; preds = %bb157
+	br label %bb170
+
+bb168:		; preds = %bb170
+	br i1 undef, label %bb170, label %bb171
+
+bb170:		; preds = %bb168, %bb167
+	br i1 undef, label %bb168, label %bb171
+
+bb171:		; preds = %bb170, %bb168
+	br label %bb172
+
+bb172:		; preds = %bb172, %bb171
+	br i1 undef, label %bb173, label %bb172
+
+bb173:		; preds = %bb172
+	br i1 undef, label %bb174, label %bb175
+
+bb174:		; preds = %bb173
+	unreachable
+
+bb175:		; preds = %bb179, %bb173
+	br i1 undef, label %bb179, label %bb176
+
+bb176:		; preds = %bb175
+	br i1 undef, label %bb186, label %bb177
+
+bb177:		; preds = %bb176
+	br i1 undef, label %bb178, label %bb186
+
+bb178:		; preds = %bb177
+	br label %bb179
+
+bb179:		; preds = %bb178, %bb175
+	br label %bb175
+
+bb186:		; preds = %bb177, %bb176
+	br label %bb195
+
+bb187:		; preds = %bb195
+	br i1 undef, label %bb193, label %bb189
+
+bb189:		; preds = %bb187
+	%2 = tail call fastcc i32 @qtm_read_input(%struct.qtm_stream* %qtm) nounwind		; <i32> [#uses=0]
+	ret i32 undef
+
+bb193:		; preds = %bb187
+	br label %bb195
+
+bb195:		; preds = %bb193, %bb186
+	br i1 undef, label %bb198, label %bb187
+
+bb197:		; preds = %bb77
+	ret i32 -124
+
+bb198:		; preds = %bb195, %bb136
+	br i1 undef, label %bb211.preheader, label %bb214
+
+bb211.preheader:		; preds = %bb198
+	br label %bb211
+
+bb211:		; preds = %bb211, %bb211.preheader
+	br i1 undef, label %bb214, label %bb211
+
+bb214:		; preds = %bb211, %bb198
+	br label %bb215
+
+bb215:		; preds = %bb238, %bb214
+	br i1 undef, label %bb28, label %bb216
+
+bb216:		; preds = %bb215
+	br label %bb238
+
+bb238:		; preds = %bb216, %bb14
+	br label %bb215
+
+bb245:		; preds = %bb5, %entry
+	ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
new file mode 100644
index 0000000..a0f903b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll

@@ -0,0 +1,344 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin
+
+  %struct.term = type { i32, i32, i32 }
+
+declare fastcc i8* @memory_Malloc(i32) nounwind
+
+define fastcc %struct.term* @t1() nounwind {
+entry:
+	br i1 undef, label %bb, label %bb1
+
+bb:		; preds = %entry
+	ret %struct.term* undef
+
+bb1:		; preds = %entry
+	%0 = tail call fastcc i8* @memory_Malloc(i32 12) nounwind		; <i8*> [#uses=0]
+	%1 = tail call fastcc i8* @memory_Malloc(i32 12) nounwind		; <i8*> [#uses=0]
+	ret %struct.term* undef
+}
+
+
+define i32 @t2(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+	br label %bb6.i8
+
+bb6.i8:		; preds = %memory_CalculateRealBlockSize1374.exit.i, %entry
+	br i1 undef, label %memory_CalculateRealBlockSize1374.exit.i, label %bb.i.i9
+
+bb.i.i9:		; preds = %bb6.i8
+	br label %memory_CalculateRealBlockSize1374.exit.i
+
+memory_CalculateRealBlockSize1374.exit.i:		; preds = %bb.i.i9, %bb6.i8
+	%0 = phi i32 [ undef, %bb.i.i9 ], [ undef, %bb6.i8 ]		; <i32> [#uses=2]
+	store i32 %0, i32* undef, align 4
+	%1 = urem i32 8184, %0		; <i32> [#uses=1]
+	%2 = sub i32 8188, %1		; <i32> [#uses=1]
+	store i32 %2, i32* undef, align 4
+	br i1 undef, label %memory_Init.exit, label %bb6.i8
+
+memory_Init.exit:		; preds = %memory_CalculateRealBlockSize1374.exit.i
+	br label %bb.i.i
+
+bb.i.i:		; preds = %bb.i.i, %memory_Init.exit
+	br i1 undef, label %symbol_Init.exit, label %bb.i.i
+
+symbol_Init.exit:		; preds = %bb.i.i
+	br label %bb.i.i67
+
+bb.i.i67:		; preds = %bb.i.i67, %symbol_Init.exit
+	br i1 undef, label %symbol_CreatePrecedence3522.exit, label %bb.i.i67
+
+symbol_CreatePrecedence3522.exit:		; preds = %bb.i.i67
+	br label %bb.i.i8.i
+
+bb.i.i8.i:		; preds = %bb.i.i8.i, %symbol_CreatePrecedence3522.exit
+	br i1 undef, label %cont_Create.exit9.i, label %bb.i.i8.i
+
+cont_Create.exit9.i:		; preds = %bb.i.i8.i
+	br label %bb.i.i.i72
+
+bb.i.i.i72:		; preds = %bb.i.i.i72, %cont_Create.exit9.i
+	br i1 undef, label %cont_Init.exit, label %bb.i.i.i72
+
+cont_Init.exit:		; preds = %bb.i.i.i72
+	br label %bb.i103
+
+bb.i103:		; preds = %bb.i103, %cont_Init.exit
+	br i1 undef, label %subs_Init.exit, label %bb.i103
+
+subs_Init.exit:		; preds = %bb.i103
+	br i1 undef, label %bb1.i.i.i80, label %cc_Init.exit
+
+bb1.i.i.i80:		; preds = %subs_Init.exit
+	unreachable
+
+cc_Init.exit:		; preds = %subs_Init.exit
+	br label %bb.i.i375
+
+bb.i.i375:		; preds = %bb.i.i375, %cc_Init.exit
+	br i1 undef, label %bb.i439, label %bb.i.i375
+
+bb.i439:		; preds = %bb.i439, %bb.i.i375
+	br i1 undef, label %opts_DeclareSPASSFlagsAsOptions.exit, label %bb.i439
+
+opts_DeclareSPASSFlagsAsOptions.exit:		; preds = %bb.i439
+	br i1 undef, label %opts_TranslateShortOptDeclarations.exit.i, label %bb.i.i82
+
+bb.i.i82:		; preds = %opts_DeclareSPASSFlagsAsOptions.exit
+	unreachable
+
+opts_TranslateShortOptDeclarations.exit.i:		; preds = %opts_DeclareSPASSFlagsAsOptions.exit
+	br i1 undef, label %list_Length.exit.i.thread.i, label %bb.i.i4.i
+
+list_Length.exit.i.thread.i:		; preds = %opts_TranslateShortOptDeclarations.exit.i
+	br i1 undef, label %bb18.i.i.i, label %bb26.i.i.i
+
+bb.i.i4.i:		; preds = %opts_TranslateShortOptDeclarations.exit.i
+	unreachable
+
+bb18.i.i.i:		; preds = %list_Length.exit.i.thread.i
+	unreachable
+
+bb26.i.i.i:		; preds = %list_Length.exit.i.thread.i
+	br i1 undef, label %bb27.i142, label %opts_GetOptLongOnly.exit.thread97.i
+
+opts_GetOptLongOnly.exit.thread97.i:		; preds = %bb26.i.i.i
+	br label %bb27.i142
+
+bb27.i142:		; preds = %opts_GetOptLongOnly.exit.thread97.i, %bb26.i.i.i
+	br label %bb1.i3.i
+
+bb1.i3.i:		; preds = %bb1.i3.i, %bb27.i142
+	br i1 undef, label %opts_FreeLongOptsArray.exit.i, label %bb1.i3.i
+
+opts_FreeLongOptsArray.exit.i:		; preds = %bb1.i3.i
+	br label %bb.i443
+
+bb.i443:		; preds = %bb.i443, %opts_FreeLongOptsArray.exit.i
+	br i1 undef, label %flag_InitStoreByDefaults3542.exit, label %bb.i443
+
+flag_InitStoreByDefaults3542.exit:		; preds = %bb.i443
+	br i1 undef, label %bb6.i449, label %bb.i503
+
+bb6.i449:		; preds = %flag_InitStoreByDefaults3542.exit
+	unreachable
+
+bb.i503:		; preds = %bb.i503, %flag_InitStoreByDefaults3542.exit
+	br i1 undef, label %flag_CleanStore3464.exit, label %bb.i503
+
+flag_CleanStore3464.exit:		; preds = %bb.i503
+	br i1 undef, label %bb1.i81.i.preheader, label %bb.i173
+
+bb.i173:		; preds = %flag_CleanStore3464.exit
+	unreachable
+
+bb1.i81.i.preheader:		; preds = %flag_CleanStore3464.exit
+	br i1 undef, label %bb1.i64.i.preheader, label %bb5.i179
+
+bb5.i179:		; preds = %bb1.i81.i.preheader
+	unreachable
+
+bb1.i64.i.preheader:		; preds = %bb1.i81.i.preheader
+	br i1 undef, label %dfg_DeleteProofList.exit.i, label %bb.i9.i
+
+bb.i9.i:		; preds = %bb1.i64.i.preheader
+	unreachable
+
+dfg_DeleteProofList.exit.i:		; preds = %bb1.i64.i.preheader
+	br i1 undef, label %term_DeleteTermList621.exit.i, label %bb.i.i62.i
+
+bb.i.i62.i:		; preds = %bb.i.i62.i, %dfg_DeleteProofList.exit.i
+	br i1 undef, label %term_DeleteTermList621.exit.i, label %bb.i.i62.i
+
+term_DeleteTermList621.exit.i:		; preds = %bb.i.i62.i, %dfg_DeleteProofList.exit.i
+	br i1 undef, label %dfg_DFGParser.exit, label %bb.i.i211
+
+bb.i.i211:		; preds = %term_DeleteTermList621.exit.i
+	unreachable
+
+dfg_DFGParser.exit:		; preds = %term_DeleteTermList621.exit.i
+	br label %bb.i513
+
+bb.i513:		; preds = %bb2.i516, %dfg_DFGParser.exit
+	br i1 undef, label %bb2.i516, label %bb1.i514
+
+bb1.i514:		; preds = %bb.i513
+	unreachable
+
+bb2.i516:		; preds = %bb.i513
+	br i1 undef, label %bb.i509, label %bb.i513
+
+bb.i509:		; preds = %bb.i509, %bb2.i516
+	br i1 undef, label %symbol_TransferPrecedence3468.exit511, label %bb.i509
+
+symbol_TransferPrecedence3468.exit511:		; preds = %bb.i509
+	br i1 undef, label %bb20, label %bb21
+
+bb20:		; preds = %symbol_TransferPrecedence3468.exit511
+	unreachable
+
+bb21:		; preds = %symbol_TransferPrecedence3468.exit511
+	br i1 undef, label %cnf_Init.exit, label %bb.i498
+
+bb.i498:		; preds = %bb21
+	unreachable
+
+cnf_Init.exit:		; preds = %bb21
+	br i1 undef, label %bb23, label %bb22
+
+bb22:		; preds = %cnf_Init.exit
+	br i1 undef, label %bb2.i.i496, label %bb.i.i494
+
+bb.i.i494:		; preds = %bb22
+	unreachable
+
+bb2.i.i496:		; preds = %bb22
+	unreachable
+
+bb23:		; preds = %cnf_Init.exit
+	br i1 undef, label %bb28, label %bb24
+
+bb24:		; preds = %bb23
+	unreachable
+
+bb28:		; preds = %bb23
+	br i1 undef, label %bb31, label %bb29
+
+bb29:		; preds = %bb28
+	unreachable
+
+bb31:		; preds = %bb28
+	br i1 undef, label %bb34, label %bb32
+
+bb32:		; preds = %bb31
+	unreachable
+
+bb34:		; preds = %bb31
+	br i1 undef, label %bb83, label %bb66
+
+bb66:		; preds = %bb34
+	unreachable
+
+bb83:		; preds = %bb34
+	br i1 undef, label %bb2.i1668, label %bb.i1667
+
+bb.i1667:		; preds = %bb83
+	unreachable
+
+bb2.i1668:		; preds = %bb83
+	br i1 undef, label %bb5.i205, label %bb3.i204
+
+bb3.i204:		; preds = %bb2.i1668
+	unreachable
+
+bb5.i205:		; preds = %bb2.i1668
+	br i1 undef, label %bb.i206.i, label %ana_AnalyzeSortStructure.exit.i
+
+bb.i206.i:		; preds = %bb5.i205
+	br i1 undef, label %bb1.i207.i, label %ana_AnalyzeSortStructure.exit.i
+
+bb1.i207.i:		; preds = %bb.i206.i
+	br i1 undef, label %bb25.i1801.thread, label %bb.i1688
+
+bb.i1688:		; preds = %bb1.i207.i
+	unreachable
+
+bb25.i1801.thread:		; preds = %bb1.i207.i
+	unreachable
+
+ana_AnalyzeSortStructure.exit.i:		; preds = %bb.i206.i, %bb5.i205
+	br i1 undef, label %bb7.i207, label %bb.i1806
+
+bb.i1806:		; preds = %ana_AnalyzeSortStructure.exit.i
+	br i1 undef, label %bb2.i.i.i1811, label %bb.i.i.i1809
+
+bb.i.i.i1809:		; preds = %bb.i1806
+	unreachable
+
+bb2.i.i.i1811:		; preds = %bb.i1806
+	unreachable
+
+bb7.i207:		; preds = %ana_AnalyzeSortStructure.exit.i
+	br i1 undef, label %bb9.i, label %bb8.i
+
+bb8.i:		; preds = %bb7.i207
+	unreachable
+
+bb9.i:		; preds = %bb7.i207
+	br i1 undef, label %bb23.i, label %bb26.i
+
+bb23.i:		; preds = %bb9.i
+	br i1 undef, label %bb25.i, label %bb24.i
+
+bb24.i:		; preds = %bb23.i
+	br i1 undef, label %sort_SortTheoryIsTrivial.exit.i, label %bb.i2093
+
+bb.i2093:		; preds = %bb.i2093, %bb24.i
+	br label %bb.i2093
+
+sort_SortTheoryIsTrivial.exit.i:		; preds = %bb24.i
+	br i1 undef, label %bb3.i2141, label %bb4.i2143
+
+bb3.i2141:		; preds = %sort_SortTheoryIsTrivial.exit.i
+	unreachable
+
+bb4.i2143:		; preds = %sort_SortTheoryIsTrivial.exit.i
+	br i1 undef, label %bb8.i2178, label %bb5.i2144
+
+bb5.i2144:		; preds = %bb4.i2143
+	br i1 undef, label %bb7.i2177, label %bb1.i28.i
+
+bb1.i28.i:		; preds = %bb5.i2144
+	br i1 undef, label %bb4.i43.i, label %bb2.i.i2153
+
+bb2.i.i2153:		; preds = %bb1.i28.i
+	br i1 undef, label %bb4.i.i33.i, label %bb.i.i30.i
+
+bb.i.i30.i:		; preds = %bb2.i.i2153
+	unreachable
+
+bb4.i.i33.i:		; preds = %bb2.i.i2153
+	br i1 undef, label %bb9.i.i36.i, label %bb5.i.i34.i
+
+bb5.i.i34.i:		; preds = %bb4.i.i33.i
+	unreachable
+
+bb9.i.i36.i:		; preds = %bb4.i.i33.i
+	br i1 undef, label %bb14.i.i.i2163, label %bb10.i.i37.i
+
+bb10.i.i37.i:		; preds = %bb9.i.i36.i
+	unreachable
+
+bb14.i.i.i2163:		; preds = %bb9.i.i36.i
+	br i1 undef, label %sort_LinkPrint.exit.i.i, label %bb15.i.i.i2164
+
+bb15.i.i.i2164:		; preds = %bb14.i.i.i2163
+	unreachable
+
+sort_LinkPrint.exit.i.i:		; preds = %bb14.i.i.i2163
+	unreachable
+
+bb4.i43.i:		; preds = %bb1.i28.i
+	unreachable
+
+bb7.i2177:		; preds = %bb5.i2144
+	unreachable
+
+bb8.i2178:		; preds = %bb4.i2143
+	br i1 undef, label %sort_ApproxStaticSortTheory.exit, label %bb.i5.i2185.preheader
+
+bb.i5.i2185.preheader:		; preds = %bb8.i2178
+	br label %bb.i5.i2185
+
+bb.i5.i2185:		; preds = %bb.i5.i2185, %bb.i5.i2185.preheader
+	br i1 undef, label %sort_ApproxStaticSortTheory.exit, label %bb.i5.i2185
+
+sort_ApproxStaticSortTheory.exit:		; preds = %bb.i5.i2185, %bb8.i2178
+	br label %bb25.i
+
+bb25.i:		; preds = %sort_ApproxStaticSortTheory.exit, %bb23.i
+	unreachable
+
+bb26.i:		; preds = %bb9.i
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
new file mode 100644
index 0000000..b56b684
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=armv6-eabi -mattr=+vfp2 -float-abi=hard
+; PR4419
+
+define float @__ieee754_acosf(float %x) nounwind {
+entry:
+	br i1 undef, label %bb, label %bb4
+
+bb:		; preds = %entry
+	ret float undef
+
+bb4:		; preds = %entry
+	br i1 undef, label %bb5, label %bb6
+
+bb5:		; preds = %bb4
+	ret float undef
+
+bb6:		; preds = %bb4
+	br i1 undef, label %bb11, label %bb12
+
+bb11:		; preds = %bb6
+	%0 = tail call float @__ieee754_sqrtf(float undef) nounwind		; <float> [#uses=1]
+	%1 = fmul float %0, -2.000000e+00		; <float> [#uses=1]
+	%2 = fadd float %1, 0x400921FB40000000		; <float> [#uses=1]
+	ret float %2
+
+bb12:		; preds = %bb6
+	ret float undef
+}
+
+declare float @__ieee754_sqrtf(float)

diff --git a/src/LLVM/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll b/src/LLVM/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
new file mode 100644
index 0000000..7e9b066
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin
+
+	%struct.rtunion = type { i64 }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }
+
+define void @simplify_unary_real(i8* nocapture %p) nounwind {
+entry:
+	%tmp121 = load i64* null, align 4		; <i64> [#uses=1]
+	%0 = getelementptr %struct.rtx_def* null, i32 0, i32 3, i32 3, i32 0		; <i64*> [#uses=1]
+	%tmp122 = load i64* %0, align 4		; <i64> [#uses=1]
+	%1 = zext i64 undef to i192		; <i192> [#uses=2]
+	%2 = zext i64 %tmp121 to i192		; <i192> [#uses=1]
+	%3 = shl i192 %2, 64		; <i192> [#uses=2]
+	%4 = zext i64 %tmp122 to i192		; <i192> [#uses=1]
+	%5 = shl i192 %4, 128		; <i192> [#uses=1]
+	%6 = or i192 %3, %1		; <i192> [#uses=1]
+	%7 = or i192 %6, %5		; <i192> [#uses=2]
+	switch i32 undef, label %bb82 [
+		i32 77, label %bb38
+		i32 129, label %bb21
+		i32 130, label %bb20
+	]
+
+bb20:		; preds = %entry
+	ret void
+
+bb21:		; preds = %entry
+	br i1 undef, label %bb82, label %bb29
+
+bb29:		; preds = %bb21
+	%tmp18.i = and i192 %3, 1208907372870555465154560		; <i192> [#uses=1]
+	%mask.i = or i192 %tmp18.i, %1		; <i192> [#uses=1]
+	%mask41.i = or i192 %mask.i, 0		; <i192> [#uses=1]
+	br label %bb82
+
+bb38:		; preds = %entry
+	br label %bb82
+
+bb82:		; preds = %bb38, %bb29, %bb21, %entry
+	%d.0 = phi i192 [ %mask41.i, %bb29 ], [ undef, %bb38 ], [ %7, %entry ], [ %7, %bb21 ]		; <i192> [#uses=1]
+	%tmp51 = trunc i192 %d.0 to i64		; <i64> [#uses=0]
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
new file mode 100644
index 0000000..812f018
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll

@@ -0,0 +1,122 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@nn = external global i32		; <i32*> [#uses=1]
+@al_len = external global i32		; <i32*> [#uses=2]
+@no_mat = external global i32		; <i32*> [#uses=2]
+@no_mis = external global i32		; <i32*> [#uses=2]
+@"\01LC12" = external constant [29 x i8], align 1		; <[29 x i8]*> [#uses=1]
+@"\01LC16" = external constant [33 x i8], align 1		; <[33 x i8]*> [#uses=1]
+@"\01LC17" = external constant [47 x i8], align 1		; <[47 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @diff(i8*, i8*, i32, i32, i32, i32) nounwind
+
+define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br label %bb
+
+bb5:		; preds = %entry
+	br i1 undef, label %bb6, label %bb8
+
+bb6:		; preds = %bb6, %bb5
+	br i1 undef, label %bb8, label %bb6
+
+bb8:		; preds = %bb6, %bb5
+	br label %bb15
+
+bb9:		; preds = %bb15
+	br i1 undef, label %bb10, label %bb11
+
+bb10:		; preds = %bb9
+	unreachable
+
+bb11:		; preds = %bb9
+	%0 = load i32* undef, align 4		; <i32> [#uses=2]
+	%1 = add i32 %0, 1		; <i32> [#uses=2]
+	store i32 %1, i32* undef, align 4
+	%2 = load i32* undef, align 4		; <i32> [#uses=1]
+	store i32 %2, i32* @nn, align 4
+	store i32 0, i32* @al_len, align 4
+	store i32 0, i32* @no_mat, align 4
+	store i32 0, i32* @no_mis, align 4
+	%3 = getelementptr i8* %B, i32 %0		; <i8*> [#uses=1]
+	tail call  void @diff(i8* undef, i8* %3, i32 undef, i32 undef, i32 undef, i32 undef) nounwind
+	%4 = sitofp i32 undef to double		; <double> [#uses=1]
+	%5 = fdiv double %4, 1.000000e+01		; <double> [#uses=1]
+	%6 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @"\01LC12", i32 0, i32 0), double %5) nounwind		; <i32> [#uses=0]
+	%7 = load i32* @al_len, align 4		; <i32> [#uses=1]
+	%8 = load i32* @no_mat, align 4		; <i32> [#uses=1]
+	%9 = load i32* @no_mis, align 4		; <i32> [#uses=1]
+	%10 = sub i32 %7, %8		; <i32> [#uses=1]
+	%11 = sub i32 %10, %9		; <i32> [#uses=1]
+	%12 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC16", i32 0, i32 0), i32 %11) nounwind		; <i32> [#uses=0]
+	%13 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 undef) nounwind		; <i32> [#uses=0]
+	br i1 undef, label %bb15, label %bb12
+
+bb12:		; preds = %bb11
+	br label %bb228.i
+
+bb74.i:		; preds = %bb228.i
+	br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i:		; preds = %bb74.i
+	br label %bb145.i
+
+bb145.i:		; preds = %bb228.i, %bb138.i, %bb74.i
+	br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i:		; preds = %bb145.i
+	br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i:		; preds = %bb146.i, %bb145.i
+	br i1 undef, label %bb153.i, label %bb228.i
+
+bb153.i:		; preds = %bb151.i
+	br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98:		; preds = %bb153.i
+	br label %bb158.i
+
+bb158.i:		; preds = %bb218.i, %bb.nph.i98
+	br i1 undef, label %bb168.i, label %bb160.i
+
+bb160.i:		; preds = %bb158.i
+	br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i:		; preds = %bb160.i
+	br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i:		; preds = %bb161.i
+	br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i:		; preds = %bb163.i
+	br label %bb168.i
+
+bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+	br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i:		; preds = %bb168.i
+	br label %bb218.i
+
+bb218.i:		; preds = %bb211.i, %bb168.i
+	br i1 undef, label %bb220.i, label %bb158.i
+
+bb220.i:		; preds = %bb218.i, %bb153.i
+	br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i:		; preds = %bb220.i
+	br label %bb228.i
+
+bb228.i:		; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+	br i1 undef, label %bb74.i, label %bb145.i
+
+bb15:		; preds = %bb11, %bb8
+	br i1 undef, label %return, label %bb9
+
+return:		; preds = %bb15
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll b/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
new file mode 100644
index 0000000..f5fb97c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll

@@ -0,0 +1,116 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@no_mat = external global i32		; <i32*> [#uses=1]
+@no_mis = external global i32		; <i32*> [#uses=2]
+@"\01LC11" = external constant [33 x i8], align 1		; <[33 x i8]*> [#uses=1]
+@"\01LC15" = external constant [33 x i8], align 1		; <[33 x i8]*> [#uses=1]
+@"\01LC17" = external constant [47 x i8], align 1		; <[47 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @diff(i8*, i8*, i32, i32, i32, i32) nounwind
+
+define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br label %bb
+
+bb5:		; preds = %entry
+	br i1 undef, label %bb6, label %bb8
+
+bb6:		; preds = %bb6, %bb5
+	br i1 undef, label %bb8, label %bb6
+
+bb8:		; preds = %bb6, %bb5
+	br label %bb15
+
+bb9:		; preds = %bb15
+	br i1 undef, label %bb10, label %bb11
+
+bb10:		; preds = %bb9
+	unreachable
+
+bb11:		; preds = %bb9
+	%0 = load i32* undef, align 4		; <i32> [#uses=3]
+	%1 = add i32 %0, 1		; <i32> [#uses=2]
+	store i32 %1, i32* undef, align 4
+	%2 = load i32* undef, align 4		; <i32> [#uses=2]
+	%3 = sub i32 %2, %0		; <i32> [#uses=1]
+	store i32 0, i32* @no_mat, align 4
+	store i32 0, i32* @no_mis, align 4
+	%4 = getelementptr i8* %B, i32 %0		; <i8*> [#uses=1]
+	tail call  void @diff(i8* undef, i8* %4, i32 undef, i32 %3, i32 undef, i32 undef) nounwind
+	%5 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC11", i32 0, i32 0), i32 %tmp13) nounwind		; <i32> [#uses=0]
+	%6 = load i32* @no_mis, align 4		; <i32> [#uses=1]
+	%7 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC15", i32 0, i32 0), i32 %6) nounwind		; <i32> [#uses=0]
+	%8 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 %2) nounwind		; <i32> [#uses=0]
+	br i1 undef, label %bb15, label %bb12
+
+bb12:		; preds = %bb11
+	br label %bb228.i
+
+bb74.i:		; preds = %bb228.i
+	br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i:		; preds = %bb74.i
+	br label %bb145.i
+
+bb145.i:		; preds = %bb228.i, %bb138.i, %bb74.i
+	br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i:		; preds = %bb145.i
+	br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i:		; preds = %bb146.i, %bb145.i
+	br i1 undef, label %bb153.i, label %bb228.i
+
+bb153.i:		; preds = %bb151.i
+	br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98:		; preds = %bb153.i
+	br label %bb158.i
+
+bb158.i:		; preds = %bb218.i, %bb.nph.i98
+	br i1 undef, label %bb168.i, label %bb160.i
+
+bb160.i:		; preds = %bb158.i
+	br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i:		; preds = %bb160.i
+	br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i:		; preds = %bb161.i
+	br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i:		; preds = %bb163.i
+	br label %bb168.i
+
+bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+	br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i:		; preds = %bb168.i
+	br label %bb218.i
+
+bb218.i:		; preds = %bb211.i, %bb168.i
+	br i1 undef, label %bb220.i, label %bb158.i
+
+bb220.i:		; preds = %bb218.i, %bb153.i
+	br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i:		; preds = %bb220.i
+	br label %bb228.i
+
+bb228.i:		; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+	br i1 undef, label %bb74.i, label %bb145.i
+
+bb15:		; preds = %bb11, %bb8
+	%indvar11 = phi i32 [ 0, %bb8 ], [ %tmp13, %bb11 ]		; <i32> [#uses=2]
+	%tmp13 = add i32 %indvar11, 1		; <i32> [#uses=2]
+	%count.0 = sub i32 undef, %indvar11		; <i32> [#uses=0]
+	br i1 undef, label %return, label %bb9
+
+return:		; preds = %bb15
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll b/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
new file mode 100644
index 0000000..d7e4c90
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll

@@ -0,0 +1,128 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@JJ = external global i32*		; <i32**> [#uses=1]
+
+define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br label %bb
+
+bb5:		; preds = %entry
+	br i1 undef, label %bb6, label %bb8
+
+bb6:		; preds = %bb6, %bb5
+	br i1 undef, label %bb8, label %bb6
+
+bb8:		; preds = %bb6, %bb5
+	br label %bb15
+
+bb9:		; preds = %bb15
+	br i1 undef, label %bb10, label %bb11
+
+bb10:		; preds = %bb9
+	unreachable
+
+bb11:		; preds = %bb9
+	br i1 undef, label %bb15, label %bb12
+
+bb12:		; preds = %bb11
+	%0 = load i32** @JJ, align 4		; <i32*> [#uses=1]
+	br label %bb228.i
+
+bb74.i:		; preds = %bb228.i
+	br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i:		; preds = %bb74.i
+	br label %bb145.i
+
+bb145.i:		; preds = %bb228.i, %bb138.i, %bb74.i
+	%cflag.0.i = phi i16 [ 0, %bb228.i ], [ 0, %bb74.i ], [ 1, %bb138.i ]		; <i16> [#uses=1]
+	br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i:		; preds = %bb145.i
+	br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i:		; preds = %bb146.i, %bb145.i
+	%.not297 = icmp ne i16 %cflag.0.i, 0		; <i1> [#uses=1]
+	%or.cond298 = and i1 undef, %.not297		; <i1> [#uses=1]
+	br i1 %or.cond298, label %bb153.i, label %bb228.i
+
+bb153.i:		; preds = %bb151.i
+	br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98:		; preds = %bb153.i
+	br label %bb158.i
+
+bb158.i:		; preds = %bb218.i, %bb.nph.i98
+	%c.1020.i = phi i32 [ 0, %bb.nph.i98 ], [ %c.14.i, %bb218.i ]		; <i32> [#uses=1]
+	%cflag.418.i = phi i16 [ 0, %bb.nph.i98 ], [ %cflag.3.i, %bb218.i ]		; <i16> [#uses=1]
+	%pj.317.i = phi i32 [ undef, %bb.nph.i98 ], [ %8, %bb218.i ]		; <i32> [#uses=1]
+	%pi.316.i = phi i32 [ undef, %bb.nph.i98 ], [ %7, %bb218.i ]		; <i32> [#uses=1]
+	%fj.515.i = phi i32 [ undef, %bb.nph.i98 ], [ %fj.4.i, %bb218.i ]		; <i32> [#uses=3]
+	%ci.910.i = phi i32 [ undef, %bb.nph.i98 ], [ %ci.12.i, %bb218.i ]		; <i32> [#uses=2]
+	%i.121.i = sub i32 undef, undef		; <i32> [#uses=3]
+	%tmp105.i = sub i32 undef, undef		; <i32> [#uses=1]
+	%1 = sub i32 %c.1020.i, undef		; <i32> [#uses=0]
+	br i1 undef, label %bb168.i, label %bb160.i
+
+bb160.i:		; preds = %bb158.i
+	br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i:		; preds = %bb160.i
+	br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i:		; preds = %bb161.i
+	%2 = icmp slt i32 %fj.515.i, undef		; <i1> [#uses=1]
+	%3 = and i1 %2, undef		; <i1> [#uses=1]
+	br i1 %3, label %bb167.i, label %bb168.i
+
+bb167.i:		; preds = %bb163.i
+	br label %bb168.i
+
+bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+	%fi.5.i = phi i32 [ undef, %bb167.i ], [ %ci.910.i, %bb158.i ], [ undef, %bb160.i ], [ %ci.910.i, %bb161.i ], [ undef, %bb163.i ]		; <i32> [#uses=1]
+	%fj.4.i = phi i32 [ undef, %bb167.i ], [ undef, %bb158.i ], [ %fj.515.i, %bb160.i ], [ undef, %bb161.i ], [ %fj.515.i, %bb163.i ]		; <i32> [#uses=2]
+	%scevgep88.i = getelementptr i32* null, i32 %i.121.i		; <i32*> [#uses=3]
+	%4 = load i32* %scevgep88.i, align 4		; <i32> [#uses=2]
+	%scevgep89.i = getelementptr i32* %0, i32 %i.121.i		; <i32*> [#uses=3]
+	%5 = load i32* %scevgep89.i, align 4		; <i32> [#uses=1]
+	%ci.10.i = select i1 undef, i32 %pi.316.i, i32 %i.121.i		; <i32> [#uses=0]
+	%cj.9.i = select i1 undef, i32 %pj.317.i, i32 undef		; <i32> [#uses=0]
+	%6 = icmp slt i32 undef, 0		; <i1> [#uses=3]
+	%ci.12.i = select i1 %6, i32 %fi.5.i, i32 %4		; <i32> [#uses=2]
+	%cj.11.i100 = select i1 %6, i32 %fj.4.i, i32 %5		; <i32> [#uses=1]
+	%c.14.i = select i1 %6, i32 0, i32 undef		; <i32> [#uses=2]
+	store i32 %c.14.i, i32* undef, align 4
+	%7 = load i32* %scevgep88.i, align 4		; <i32> [#uses=1]
+	%8 = load i32* %scevgep89.i, align 4		; <i32> [#uses=1]
+	store i32 %ci.12.i, i32* %scevgep88.i, align 4
+	store i32 %cj.11.i100, i32* %scevgep89.i, align 4
+	store i32 %4, i32* undef, align 4
+	br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i:		; preds = %bb168.i
+	br label %bb218.i
+
+bb218.i:		; preds = %bb211.i, %bb168.i
+	%cflag.3.i = phi i16 [ %cflag.418.i, %bb168.i ], [ 1, %bb211.i ]		; <i16> [#uses=2]
+	%9 = icmp slt i32 %tmp105.i, undef		; <i1> [#uses=1]
+	br i1 %9, label %bb220.i, label %bb158.i
+
+bb220.i:		; preds = %bb218.i, %bb153.i
+	%cflag.4.lcssa.i = phi i16 [ 0, %bb153.i ], [ %cflag.3.i, %bb218.i ]		; <i16> [#uses=0]
+	br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i:		; preds = %bb220.i
+	br label %bb228.i
+
+bb228.i:		; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+	br i1 undef, label %bb74.i, label %bb145.i
+
+bb15:		; preds = %bb11, %bb8
+	br i1 undef, label %return, label %bb9
+
+return:		; preds = %bb15
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll b/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
new file mode 100644
index 0000000..77c133a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll

@@ -0,0 +1,128 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@r = external global i32		; <i32*> [#uses=1]
+@qr = external global i32		; <i32*> [#uses=1]
+@II = external global i32*		; <i32**> [#uses=1]
+@no_mis = external global i32		; <i32*> [#uses=1]
+@name1 = external global i8*		; <i8**> [#uses=1]
+
+declare void @diff(i8*, i8*, i32, i32, i32, i32) nounwind
+
+define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br label %bb
+
+bb5:		; preds = %entry
+	br i1 undef, label %bb6, label %bb8
+
+bb6:		; preds = %bb6, %bb5
+	br i1 undef, label %bb8, label %bb6
+
+bb8:		; preds = %bb6, %bb5
+	%0 = load i8** @name1, align 4		; <i8*> [#uses=0]
+	br label %bb15
+
+bb9:		; preds = %bb15
+	br i1 undef, label %bb10, label %bb11
+
+bb10:		; preds = %bb9
+	unreachable
+
+bb11:		; preds = %bb9
+	store i32 0, i32* @no_mis, align 4
+	%1 = getelementptr i8* %A, i32 0		; <i8*> [#uses=1]
+	%2 = getelementptr i8* %B, i32 0		; <i8*> [#uses=1]
+	tail call  void @diff(i8* %1, i8* %2, i32 undef, i32 undef, i32 undef, i32 undef) nounwind
+	br i1 undef, label %bb15, label %bb12
+
+bb12:		; preds = %bb11
+	%3 = load i32** @II, align 4		; <i32*> [#uses=1]
+	%4 = load i32* @r, align 4		; <i32> [#uses=1]
+	%5 = load i32* @qr, align 4		; <i32> [#uses=1]
+	br label %bb228.i
+
+bb74.i:		; preds = %bb228.i
+	br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i:		; preds = %bb74.i
+	br label %bb145.i
+
+bb145.i:		; preds = %bb228.i, %bb138.i, %bb74.i
+	br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i:		; preds = %bb145.i
+	br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i:		; preds = %bb146.i, %bb145.i
+	br i1 undef, label %bb153.i, label %bb228.i
+
+bb153.i:		; preds = %bb151.i
+	%6 = add i32 undef, -1		; <i32> [#uses=3]
+	br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98:		; preds = %bb153.i
+	br label %bb158.i
+
+bb158.i:		; preds = %bb218.i, %bb.nph.i98
+	%c.1020.i = phi i32 [ 0, %bb.nph.i98 ], [ %c.14.i, %bb218.i ]		; <i32> [#uses=1]
+	%f.419.i = phi i32 [ undef, %bb.nph.i98 ], [ %f.5.i, %bb218.i ]		; <i32> [#uses=1]
+	%pi.316.i = phi i32 [ undef, %bb.nph.i98 ], [ %10, %bb218.i ]		; <i32> [#uses=1]
+	%fj.515.i = phi i32 [ %6, %bb.nph.i98 ], [ %fj.4.i, %bb218.i ]		; <i32> [#uses=2]
+	%fi.614.i = phi i32 [ undef, %bb.nph.i98 ], [ %fi.5.i, %bb218.i ]		; <i32> [#uses=3]
+	%cj.811.i = phi i32 [ %6, %bb.nph.i98 ], [ %cj.11.i100, %bb218.i ]		; <i32> [#uses=3]
+	%ci.910.i = phi i32 [ undef, %bb.nph.i98 ], [ %ci.12.i, %bb218.i ]		; <i32> [#uses=2]
+	%7 = sub i32 %f.419.i, %4		; <i32> [#uses=5]
+	%8 = sub i32 %c.1020.i, %5		; <i32> [#uses=2]
+	%9 = icmp slt i32 %7, %8		; <i1> [#uses=1]
+	br i1 %9, label %bb168.i, label %bb160.i
+
+bb160.i:		; preds = %bb158.i
+	br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i:		; preds = %bb160.i
+	br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i:		; preds = %bb161.i
+	br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i:		; preds = %bb163.i
+	br label %bb168.i
+
+bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+	%fi.5.i = phi i32 [ %fi.614.i, %bb167.i ], [ %ci.910.i, %bb158.i ], [ %fi.614.i, %bb160.i ], [ %ci.910.i, %bb161.i ], [ %fi.614.i, %bb163.i ]		; <i32> [#uses=2]
+	%fj.4.i = phi i32 [ %cj.811.i, %bb167.i ], [ %cj.811.i, %bb158.i ], [ %fj.515.i, %bb160.i ], [ %cj.811.i, %bb161.i ], [ %fj.515.i, %bb163.i ]		; <i32> [#uses=2]
+	%f.5.i = phi i32 [ %7, %bb167.i ], [ %8, %bb158.i ], [ %7, %bb160.i ], [ %7, %bb161.i ], [ %7, %bb163.i ]		; <i32> [#uses=2]
+	%scevgep88.i = getelementptr i32* %3, i32 undef		; <i32*> [#uses=1]
+	%ci.10.i = select i1 undef, i32 %pi.316.i, i32 undef		; <i32> [#uses=0]
+	%ci.12.i = select i1 undef, i32 %fi.5.i, i32 undef		; <i32> [#uses=1]
+	%cj.11.i100 = select i1 undef, i32 %fj.4.i, i32 undef		; <i32> [#uses=1]
+	%c.14.i = select i1 undef, i32 %f.5.i, i32 undef		; <i32> [#uses=1]
+	%10 = load i32* %scevgep88.i, align 4		; <i32> [#uses=1]
+	br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i:		; preds = %bb168.i
+	br label %bb218.i
+
+bb218.i:		; preds = %bb211.i, %bb168.i
+	br i1 undef, label %bb220.i, label %bb158.i
+
+bb220.i:		; preds = %bb218.i, %bb153.i
+	%11 = getelementptr i32* null, i32 %6		; <i32*> [#uses=1]
+	store i32 undef, i32* %11, align 4
+	br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i:		; preds = %bb220.i
+	br label %bb228.i
+
+bb228.i:		; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+	br i1 undef, label %bb74.i, label %bb145.i
+
+bb15:		; preds = %bb11, %bb8
+	br i1 undef, label %return, label %bb9
+
+return:		; preds = %bb15
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll b/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
new file mode 100644
index 0000000..16f5d1d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll

@@ -0,0 +1,99 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@XX = external global i32*		; <i32**> [#uses=1]
+
+define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br label %bb
+
+bb5:		; preds = %entry
+	br i1 undef, label %bb6, label %bb8
+
+bb6:		; preds = %bb6, %bb5
+	br i1 undef, label %bb8, label %bb6
+
+bb8:		; preds = %bb6, %bb5
+	br label %bb15
+
+bb9:		; preds = %bb15
+	br i1 undef, label %bb10, label %bb11
+
+bb10:		; preds = %bb9
+	unreachable
+
+bb11:		; preds = %bb9
+	br i1 undef, label %bb15, label %bb12
+
+bb12:		; preds = %bb11
+	%0 = load i32** @XX, align 4		; <i32*> [#uses=0]
+	br label %bb228.i
+
+bb74.i:		; preds = %bb228.i
+	br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i:		; preds = %bb74.i
+	br label %bb145.i
+
+bb145.i:		; preds = %bb228.i, %bb138.i, %bb74.i
+	br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i:		; preds = %bb145.i
+	br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i:		; preds = %bb146.i, %bb145.i
+	br i1 undef, label %bb153.i, label %bb228.i
+
+bb153.i:		; preds = %bb151.i
+	br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98:		; preds = %bb153.i
+	br label %bb158.i
+
+bb158.i:		; preds = %bb218.i, %bb.nph.i98
+	%1 = sub i32 undef, undef		; <i32> [#uses=4]
+	%2 = sub i32 undef, undef		; <i32> [#uses=1]
+	br i1 undef, label %bb168.i, label %bb160.i
+
+bb160.i:		; preds = %bb158.i
+	br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i:		; preds = %bb160.i
+	br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i:		; preds = %bb161.i
+	br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i:		; preds = %bb163.i
+	br label %bb168.i
+
+bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+	%f.5.i = phi i32 [ %1, %bb167.i ], [ %2, %bb158.i ], [ %1, %bb160.i ], [ %1, %bb161.i ], [ %1, %bb163.i ]		; <i32> [#uses=1]
+	%c.14.i = select i1 undef, i32 %f.5.i, i32 undef		; <i32> [#uses=1]
+	store i32 %c.14.i, i32* undef, align 4
+	store i32 undef, i32* null, align 4
+	br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i:		; preds = %bb168.i
+	br label %bb218.i
+
+bb218.i:		; preds = %bb211.i, %bb168.i
+	br i1 undef, label %bb220.i, label %bb158.i
+
+bb220.i:		; preds = %bb218.i, %bb153.i
+	br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i:		; preds = %bb220.i
+	br label %bb228.i
+
+bb228.i:		; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+	br i1 undef, label %bb74.i, label %bb145.i
+
+bb15:		; preds = %bb11, %bb8
+	br i1 undef, label %return, label %bb9
+
+return:		; preds = %bb15
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-07-01-CommuteBug.ll b/src/LLVM/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
new file mode 100644
index 0000000..f0d79ce
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-07-01-CommuteBug.ll

@@ -0,0 +1,130 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@qr = external global i32		; <i32*> [#uses=1]
+@II = external global i32*		; <i32**> [#uses=1]
+@JJ = external global i32*		; <i32**> [#uses=1]
+
+define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br label %bb
+
+bb5:		; preds = %entry
+	br i1 undef, label %bb6, label %bb8
+
+bb6:		; preds = %bb6, %bb5
+	br i1 undef, label %bb8, label %bb6
+
+bb8:		; preds = %bb6, %bb5
+	br label %bb15
+
+bb9:		; preds = %bb15
+	br i1 undef, label %bb10, label %bb11
+
+bb10:		; preds = %bb9
+	unreachable
+
+bb11:		; preds = %bb9
+	br i1 undef, label %bb15, label %bb12
+
+bb12:		; preds = %bb11
+	%0 = load i32** @II, align 4		; <i32*> [#uses=1]
+	%1 = load i32** @JJ, align 4		; <i32*> [#uses=1]
+	%2 = load i32* @qr, align 4		; <i32> [#uses=1]
+	br label %bb228.i
+
+bb74.i:		; preds = %bb228.i
+	br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i:		; preds = %bb74.i
+	br label %bb145.i
+
+bb145.i:		; preds = %bb228.i, %bb138.i, %bb74.i
+	%cflag.0.i = phi i16 [ %cflag.1.i, %bb228.i ], [ %cflag.1.i, %bb74.i ], [ 1, %bb138.i ]		; <i16> [#uses=2]
+	br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i:		; preds = %bb145.i
+	br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i:		; preds = %bb146.i, %bb145.i
+	%.not297 = icmp ne i16 %cflag.0.i, 0		; <i1> [#uses=1]
+	%or.cond298 = and i1 undef, %.not297		; <i1> [#uses=1]
+	br i1 %or.cond298, label %bb153.i, label %bb228.i
+
+bb153.i:		; preds = %bb151.i
+	br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98:		; preds = %bb153.i
+	br label %bb158.i
+
+bb158.i:		; preds = %bb218.i, %bb.nph.i98
+	%c.1020.i = phi i32 [ 0, %bb.nph.i98 ], [ %c.14.i, %bb218.i ]		; <i32> [#uses=1]
+	%f.419.i = phi i32 [ undef, %bb.nph.i98 ], [ %f.5.i, %bb218.i ]		; <i32> [#uses=1]
+	%cflag.418.i = phi i16 [ 0, %bb.nph.i98 ], [ %cflag.3.i, %bb218.i ]		; <i16> [#uses=1]
+	%pj.317.i = phi i32 [ undef, %bb.nph.i98 ], [ %7, %bb218.i ]		; <i32> [#uses=1]
+	%pi.316.i = phi i32 [ undef, %bb.nph.i98 ], [ %6, %bb218.i ]		; <i32> [#uses=1]
+	%fj.515.i = phi i32 [ undef, %bb.nph.i98 ], [ %fj.4.i, %bb218.i ]		; <i32> [#uses=2]
+	%fi.614.i = phi i32 [ undef, %bb.nph.i98 ], [ %fi.5.i, %bb218.i ]		; <i32> [#uses=3]
+	%cj.811.i = phi i32 [ undef, %bb.nph.i98 ], [ %cj.11.i100, %bb218.i ]		; <i32> [#uses=3]
+	%ci.910.i = phi i32 [ undef, %bb.nph.i98 ], [ %ci.12.i, %bb218.i ]		; <i32> [#uses=2]
+	%3 = sub i32 %f.419.i, 0		; <i32> [#uses=5]
+	%4 = sub i32 %c.1020.i, %2		; <i32> [#uses=2]
+	%5 = icmp slt i32 %3, %4		; <i1> [#uses=1]
+	br i1 %5, label %bb168.i, label %bb160.i
+
+bb160.i:		; preds = %bb158.i
+	br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i:		; preds = %bb160.i
+	br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i:		; preds = %bb161.i
+	br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i:		; preds = %bb163.i
+	br label %bb168.i
+
+bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+	%fi.5.i = phi i32 [ %fi.614.i, %bb167.i ], [ %ci.910.i, %bb158.i ], [ %fi.614.i, %bb160.i ], [ %ci.910.i, %bb161.i ], [ %fi.614.i, %bb163.i ]		; <i32> [#uses=2]
+	%fj.4.i = phi i32 [ %cj.811.i, %bb167.i ], [ %cj.811.i, %bb158.i ], [ %fj.515.i, %bb160.i ], [ %cj.811.i, %bb161.i ], [ %fj.515.i, %bb163.i ]		; <i32> [#uses=2]
+	%f.5.i = phi i32 [ %3, %bb167.i ], [ %4, %bb158.i ], [ %3, %bb160.i ], [ %3, %bb161.i ], [ %3, %bb163.i ]		; <i32> [#uses=2]
+	%scevgep88.i = getelementptr i32* %0, i32 undef		; <i32*> [#uses=2]
+	%scevgep89.i = getelementptr i32* %1, i32 undef		; <i32*> [#uses=2]
+	%ci.10.i = select i1 undef, i32 %pi.316.i, i32 undef		; <i32> [#uses=0]
+	%cj.9.i = select i1 undef, i32 %pj.317.i, i32 undef		; <i32> [#uses=0]
+	%ci.12.i = select i1 undef, i32 %fi.5.i, i32 undef		; <i32> [#uses=2]
+	%cj.11.i100 = select i1 undef, i32 %fj.4.i, i32 undef		; <i32> [#uses=2]
+	%c.14.i = select i1 undef, i32 %f.5.i, i32 undef		; <i32> [#uses=1]
+	%6 = load i32* %scevgep88.i, align 4		; <i32> [#uses=1]
+	%7 = load i32* %scevgep89.i, align 4		; <i32> [#uses=1]
+	store i32 %ci.12.i, i32* %scevgep88.i, align 4
+	store i32 %cj.11.i100, i32* %scevgep89.i, align 4
+	br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i:		; preds = %bb168.i
+	br label %bb218.i
+
+bb218.i:		; preds = %bb211.i, %bb168.i
+	%cflag.3.i = phi i16 [ %cflag.418.i, %bb168.i ], [ 1, %bb211.i ]		; <i16> [#uses=2]
+	%8 = icmp slt i32 undef, undef		; <i1> [#uses=1]
+	br i1 %8, label %bb220.i, label %bb158.i
+
+bb220.i:		; preds = %bb218.i, %bb153.i
+	%cflag.4.lcssa.i = phi i16 [ 0, %bb153.i ], [ %cflag.3.i, %bb218.i ]		; <i16> [#uses=2]
+	br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i:		; preds = %bb220.i
+	br label %bb228.i
+
+bb228.i:		; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+	%cflag.1.i = phi i16 [ 0, %bb146.i ], [ %cflag.0.i, %bb151.i ], [ %cflag.4.lcssa.i, %bb220.i ], [ 1, %bb12 ], [ %cflag.4.lcssa.i, %bb221.i ]		; <i16> [#uses=2]
+	br i1 false, label %bb74.i, label %bb145.i
+
+bb15:		; preds = %bb11, %bb8
+	br i1 false, label %return, label %bb9
+
+return:		; preds = %bb15
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll b/src/LLVM/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
new file mode 100644
index 0000000..e1e94b6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+
+define void @test(i8* %x) nounwind {
+entry:
+	call void asm sideeffect "pld\09${0:a}", "r,~{cc}"(i8* %x) nounwind
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/src/LLVM/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
new file mode 100644
index 0000000..454fee5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-07-18-RewriterBug.ll

@@ -0,0 +1,1323 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep vcmpe | count 13
+
+	%struct.EDGE_PAIR = type { %struct.edge_rec*, %struct.edge_rec* }
+	%struct.VEC2 = type { double, double, double }
+	%struct.VERTEX = type { %struct.VEC2, %struct.VERTEX*, %struct.VERTEX* }
+	%struct.edge_rec = type { %struct.VERTEX*, %struct.edge_rec*, i32, i8* }
+@avail_edge = internal global %struct.edge_rec* null		; <%struct.edge_rec**> [#uses=6]
+@_2E_str7 = internal constant [21 x i8] c"ERROR: Only 1 point!\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[21 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.EDGE_PAIR*, %struct.VERTEX*, %struct.VERTEX*)* @build_delaunay to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @build_delaunay(%struct.EDGE_PAIR* noalias nocapture sret %agg.result, %struct.VERTEX* %tree, %struct.VERTEX* %extra) nounwind {
+entry:
+	%delright = alloca %struct.EDGE_PAIR, align 8		; <%struct.EDGE_PAIR*> [#uses=3]
+	%delleft = alloca %struct.EDGE_PAIR, align 8		; <%struct.EDGE_PAIR*> [#uses=3]
+	%0 = icmp eq %struct.VERTEX* %tree, null		; <i1> [#uses=1]
+	br i1 %0, label %bb8, label %bb
+
+bb:		; preds = %entry
+	%1 = getelementptr %struct.VERTEX* %tree, i32 0, i32 2		; <%struct.VERTEX**> [#uses=1]
+	%2 = load %struct.VERTEX** %1, align 4		; <%struct.VERTEX*> [#uses=2]
+	%3 = icmp eq %struct.VERTEX* %2, null		; <i1> [#uses=1]
+	br i1 %3, label %bb7, label %bb1.i
+
+bb1.i:		; preds = %bb1.i, %bb
+	%tree_addr.0.i = phi %struct.VERTEX* [ %5, %bb1.i ], [ %tree, %bb ]		; <%struct.VERTEX*> [#uses=3]
+	%4 = getelementptr %struct.VERTEX* %tree_addr.0.i, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%5 = load %struct.VERTEX** %4, align 4		; <%struct.VERTEX*> [#uses=2]
+	%6 = icmp eq %struct.VERTEX* %5, null		; <i1> [#uses=1]
+	br i1 %6, label %get_low.exit, label %bb1.i
+
+get_low.exit:		; preds = %bb1.i
+	call  void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delright, %struct.VERTEX* %2, %struct.VERTEX* %extra) nounwind
+	%7 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%8 = load %struct.VERTEX** %7, align 4		; <%struct.VERTEX*> [#uses=1]
+	call  void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delleft, %struct.VERTEX* %8, %struct.VERTEX* %tree) nounwind
+	%9 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 0		; <%struct.edge_rec**> [#uses=1]
+	%10 = load %struct.edge_rec** %9, align 8		; <%struct.edge_rec*> [#uses=2]
+	%11 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%12 = load %struct.edge_rec** %11, align 4		; <%struct.edge_rec*> [#uses=1]
+	%13 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 0		; <%struct.edge_rec**> [#uses=1]
+	%14 = load %struct.edge_rec** %13, align 8		; <%struct.edge_rec*> [#uses=1]
+	%15 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%16 = load %struct.edge_rec** %15, align 4		; <%struct.edge_rec*> [#uses=2]
+	br label %bb.i
+
+bb.i:		; preds = %bb4.i, %get_low.exit
+	%rdi_addr.0.i = phi %struct.edge_rec* [ %14, %get_low.exit ], [ %72, %bb4.i ]		; <%struct.edge_rec*> [#uses=2]
+	%ldi_addr.1.i = phi %struct.edge_rec* [ %12, %get_low.exit ], [ %ldi_addr.0.i, %bb4.i ]		; <%struct.edge_rec*> [#uses=3]
+	%17 = getelementptr %struct.edge_rec* %rdi_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%18 = load %struct.VERTEX** %17, align 4		; <%struct.VERTEX*> [#uses=3]
+	%19 = ptrtoint %struct.edge_rec* %ldi_addr.1.i to i32		; <i32> [#uses=1]
+	%20 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%21 = load double* %20, align 4		; <double> [#uses=3]
+	%22 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%23 = load double* %22, align 4		; <double> [#uses=3]
+	br label %bb2.i
+
+bb1.i1:		; preds = %bb2.i
+	%24 = ptrtoint %struct.edge_rec* %ldi_addr.0.i to i32		; <i32> [#uses=2]
+	%25 = add i32 %24, 48		; <i32> [#uses=1]
+	%26 = and i32 %25, 63		; <i32> [#uses=1]
+	%27 = and i32 %24, -64		; <i32> [#uses=1]
+	%28 = or i32 %26, %27		; <i32> [#uses=1]
+	%29 = inttoptr i32 %28 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%30 = getelementptr %struct.edge_rec* %29, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%31 = load %struct.edge_rec** %30, align 4		; <%struct.edge_rec*> [#uses=1]
+	%32 = ptrtoint %struct.edge_rec* %31 to i32		; <i32> [#uses=2]
+	%33 = add i32 %32, 16		; <i32> [#uses=1]
+	%34 = and i32 %33, 63		; <i32> [#uses=1]
+	%35 = and i32 %32, -64		; <i32> [#uses=1]
+	%36 = or i32 %34, %35		; <i32> [#uses=2]
+	%37 = inttoptr i32 %36 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	br label %bb2.i
+
+bb2.i:		; preds = %bb1.i1, %bb.i
+	%ldi_addr.1.pn.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn6.in.in.i = phi i32 [ %19, %bb.i ], [ %36, %bb1.i1 ]		; <i32> [#uses=1]
+	%ldi_addr.0.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ]		; <%struct.edge_rec*> [#uses=4]
+	%.pn6.in.i = xor i32 %.pn6.in.in.i, 32		; <i32> [#uses=1]
+	%.pn6.i = inttoptr i32 %.pn6.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%t1.0.in.i = getelementptr %struct.edge_rec* %ldi_addr.1.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%t2.0.in.i = getelementptr %struct.edge_rec* %.pn6.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%t1.0.i = load %struct.VERTEX** %t1.0.in.i		; <%struct.VERTEX*> [#uses=2]
+	%t2.0.i = load %struct.VERTEX** %t2.0.in.i		; <%struct.VERTEX*> [#uses=2]
+	%38 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%39 = load double* %38, align 4		; <double> [#uses=3]
+	%40 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%41 = load double* %40, align 4		; <double> [#uses=3]
+	%42 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%43 = load double* %42, align 4		; <double> [#uses=1]
+	%44 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%45 = load double* %44, align 4		; <double> [#uses=1]
+	%46 = fsub double %39, %21		; <double> [#uses=1]
+	%47 = fsub double %45, %23		; <double> [#uses=1]
+	%48 = fmul double %46, %47		; <double> [#uses=1]
+	%49 = fsub double %43, %21		; <double> [#uses=1]
+	%50 = fsub double %41, %23		; <double> [#uses=1]
+	%51 = fmul double %49, %50		; <double> [#uses=1]
+	%52 = fsub double %48, %51		; <double> [#uses=1]
+	%53 = fcmp ogt double %52, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %53, label %bb1.i1, label %bb3.i
+
+bb3.i:		; preds = %bb2.i
+	%54 = ptrtoint %struct.edge_rec* %rdi_addr.0.i to i32		; <i32> [#uses=1]
+	%55 = xor i32 %54, 32		; <i32> [#uses=3]
+	%56 = inttoptr i32 %55 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%57 = getelementptr %struct.edge_rec* %56, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%58 = load %struct.VERTEX** %57, align 4		; <%struct.VERTEX*> [#uses=2]
+	%59 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%60 = load double* %59, align 4		; <double> [#uses=1]
+	%61 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%62 = load double* %61, align 4		; <double> [#uses=1]
+	%63 = fsub double %60, %39		; <double> [#uses=1]
+	%64 = fsub double %23, %41		; <double> [#uses=1]
+	%65 = fmul double %63, %64		; <double> [#uses=1]
+	%66 = fsub double %21, %39		; <double> [#uses=1]
+	%67 = fsub double %62, %41		; <double> [#uses=1]
+	%68 = fmul double %66, %67		; <double> [#uses=1]
+	%69 = fsub double %65, %68		; <double> [#uses=1]
+	%70 = fcmp ogt double %69, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %70, label %bb4.i, label %bb5.i
+
+bb4.i:		; preds = %bb3.i
+	%71 = getelementptr %struct.edge_rec* %56, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%72 = load %struct.edge_rec** %71, align 4		; <%struct.edge_rec*> [#uses=1]
+	br label %bb.i
+
+bb5.i:		; preds = %bb3.i
+	%73 = add i32 %55, 48		; <i32> [#uses=1]
+	%74 = and i32 %73, 63		; <i32> [#uses=1]
+	%75 = and i32 %55, -64		; <i32> [#uses=1]
+	%76 = or i32 %74, %75		; <i32> [#uses=1]
+	%77 = inttoptr i32 %76 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%78 = getelementptr %struct.edge_rec* %77, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%79 = load %struct.edge_rec** %78, align 4		; <%struct.edge_rec*> [#uses=1]
+	%80 = ptrtoint %struct.edge_rec* %79 to i32		; <i32> [#uses=2]
+	%81 = add i32 %80, 16		; <i32> [#uses=1]
+	%82 = and i32 %81, 63		; <i32> [#uses=1]
+	%83 = and i32 %80, -64		; <i32> [#uses=1]
+	%84 = or i32 %82, %83		; <i32> [#uses=1]
+	%85 = inttoptr i32 %84 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%86 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%87 = load %struct.VERTEX** %86, align 4		; <%struct.VERTEX*> [#uses=1]
+	%88 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=6]
+	%89 = getelementptr %struct.edge_rec* %88, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %88, %struct.edge_rec** %89, align 4
+	%90 = getelementptr %struct.edge_rec* %88, i32 0, i32 0		; <%struct.VERTEX**> [#uses=2]
+	store %struct.VERTEX* %18, %struct.VERTEX** %90, align 4
+	%91 = ptrtoint %struct.edge_rec* %88 to i32		; <i32> [#uses=5]
+	%92 = add i32 %91, 16		; <i32> [#uses=2]
+	%93 = inttoptr i32 %92 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%94 = add i32 %91, 48		; <i32> [#uses=1]
+	%95 = inttoptr i32 %94 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%96 = getelementptr %struct.edge_rec* %93, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %95, %struct.edge_rec** %96, align 4
+	%97 = add i32 %91, 32		; <i32> [#uses=1]
+	%98 = inttoptr i32 %97 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%99 = getelementptr %struct.edge_rec* %98, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %98, %struct.edge_rec** %99, align 4
+	%100 = getelementptr %struct.edge_rec* %98, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %87, %struct.VERTEX** %100, align 4
+	%101 = getelementptr %struct.edge_rec* %95, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %93, %struct.edge_rec** %101, align 4
+	%102 = load %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
+	%103 = ptrtoint %struct.edge_rec* %102 to i32		; <i32> [#uses=2]
+	%104 = add i32 %103, 16		; <i32> [#uses=1]
+	%105 = and i32 %104, 63		; <i32> [#uses=1]
+	%106 = and i32 %103, -64		; <i32> [#uses=1]
+	%107 = or i32 %105, %106		; <i32> [#uses=1]
+	%108 = inttoptr i32 %107 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%109 = getelementptr %struct.edge_rec* %85, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%110 = load %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
+	%111 = ptrtoint %struct.edge_rec* %110 to i32		; <i32> [#uses=2]
+	%112 = add i32 %111, 16		; <i32> [#uses=1]
+	%113 = and i32 %112, 63		; <i32> [#uses=1]
+	%114 = and i32 %111, -64		; <i32> [#uses=1]
+	%115 = or i32 %113, %114		; <i32> [#uses=1]
+	%116 = inttoptr i32 %115 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%117 = getelementptr %struct.edge_rec* %116, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%118 = load %struct.edge_rec** %117, align 4		; <%struct.edge_rec*> [#uses=1]
+	%119 = getelementptr %struct.edge_rec* %108, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%120 = load %struct.edge_rec** %119, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %118, %struct.edge_rec** %119, align 4
+	store %struct.edge_rec* %120, %struct.edge_rec** %117, align 4
+	%121 = load %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
+	%122 = load %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %121, %struct.edge_rec** %109, align 4
+	store %struct.edge_rec* %122, %struct.edge_rec** %89, align 4
+	%123 = xor i32 %91, 32		; <i32> [#uses=1]
+	%124 = inttoptr i32 %123 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%125 = getelementptr %struct.edge_rec* %124, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%126 = load %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
+	%127 = ptrtoint %struct.edge_rec* %126 to i32		; <i32> [#uses=2]
+	%128 = add i32 %127, 16		; <i32> [#uses=1]
+	%129 = and i32 %128, 63		; <i32> [#uses=1]
+	%130 = and i32 %127, -64		; <i32> [#uses=1]
+	%131 = or i32 %129, %130		; <i32> [#uses=1]
+	%132 = inttoptr i32 %131 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%133 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%134 = load %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=1]
+	%135 = ptrtoint %struct.edge_rec* %134 to i32		; <i32> [#uses=2]
+	%136 = add i32 %135, 16		; <i32> [#uses=1]
+	%137 = and i32 %136, 63		; <i32> [#uses=1]
+	%138 = and i32 %135, -64		; <i32> [#uses=1]
+	%139 = or i32 %137, %138		; <i32> [#uses=1]
+	%140 = inttoptr i32 %139 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%141 = getelementptr %struct.edge_rec* %140, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%142 = load %struct.edge_rec** %141, align 4		; <%struct.edge_rec*> [#uses=1]
+	%143 = getelementptr %struct.edge_rec* %132, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%144 = load %struct.edge_rec** %143, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %142, %struct.edge_rec** %143, align 4
+	store %struct.edge_rec* %144, %struct.edge_rec** %141, align 4
+	%145 = load %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
+	%146 = load %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=2]
+	store %struct.edge_rec* %145, %struct.edge_rec** %133, align 4
+	store %struct.edge_rec* %146, %struct.edge_rec** %125, align 4
+	%147 = and i32 %92, 63		; <i32> [#uses=1]
+	%148 = and i32 %91, -64		; <i32> [#uses=1]
+	%149 = or i32 %147, %148		; <i32> [#uses=1]
+	%150 = inttoptr i32 %149 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%151 = getelementptr %struct.edge_rec* %150, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%152 = load %struct.edge_rec** %151, align 4		; <%struct.edge_rec*> [#uses=1]
+	%153 = ptrtoint %struct.edge_rec* %152 to i32		; <i32> [#uses=2]
+	%154 = add i32 %153, 16		; <i32> [#uses=1]
+	%155 = and i32 %154, 63		; <i32> [#uses=1]
+	%156 = and i32 %153, -64		; <i32> [#uses=1]
+	%157 = or i32 %155, %156		; <i32> [#uses=1]
+	%158 = inttoptr i32 %157 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%159 = load %struct.VERTEX** %90, align 4		; <%struct.VERTEX*> [#uses=1]
+	%160 = getelementptr %struct.edge_rec* %124, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%161 = load %struct.VERTEX** %160, align 4		; <%struct.VERTEX*> [#uses=1]
+	%162 = getelementptr %struct.edge_rec* %16, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%163 = load %struct.VERTEX** %162, align 4		; <%struct.VERTEX*> [#uses=1]
+	%164 = icmp eq %struct.VERTEX* %163, %159		; <i1> [#uses=1]
+	%rdo_addr.0.i = select i1 %164, %struct.edge_rec* %88, %struct.edge_rec* %16		; <%struct.edge_rec*> [#uses=3]
+	%165 = getelementptr %struct.edge_rec* %10, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%166 = load %struct.VERTEX** %165, align 4		; <%struct.VERTEX*> [#uses=1]
+	%167 = icmp eq %struct.VERTEX* %166, %161		; <i1> [#uses=1]
+	%ldo_addr.0.ph.i = select i1 %167, %struct.edge_rec* %124, %struct.edge_rec* %10		; <%struct.edge_rec*> [#uses=3]
+	br label %bb9.i
+
+bb9.i:		; preds = %bb25.i, %bb24.i, %bb5.i
+	%lcand.2.i = phi %struct.edge_rec* [ %146, %bb5.i ], [ %lcand.1.i, %bb24.i ], [ %739, %bb25.i ]		; <%struct.edge_rec*> [#uses=5]
+	%rcand.2.i = phi %struct.edge_rec* [ %158, %bb5.i ], [ %666, %bb24.i ], [ %rcand.1.i, %bb25.i ]		; <%struct.edge_rec*> [#uses=5]
+	%basel.0.i = phi %struct.edge_rec* [ %88, %bb5.i ], [ %595, %bb24.i ], [ %716, %bb25.i ]		; <%struct.edge_rec*> [#uses=2]
+	%168 = getelementptr %struct.edge_rec* %lcand.2.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%169 = load %struct.edge_rec** %168, align 4		; <%struct.edge_rec*> [#uses=3]
+	%170 = getelementptr %struct.edge_rec* %basel.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	%171 = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=4]
+	%172 = ptrtoint %struct.edge_rec* %basel.0.i to i32		; <i32> [#uses=3]
+	%173 = xor i32 %172, 32		; <i32> [#uses=1]
+	%174 = inttoptr i32 %173 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%175 = getelementptr %struct.edge_rec* %174, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	%176 = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=3]
+	%177 = ptrtoint %struct.edge_rec* %169 to i32		; <i32> [#uses=1]
+	%178 = xor i32 %177, 32		; <i32> [#uses=1]
+	%179 = inttoptr i32 %178 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%180 = getelementptr %struct.edge_rec* %179, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%181 = load %struct.VERTEX** %180, align 4		; <%struct.VERTEX*> [#uses=2]
+	%182 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 0		; <double*> [#uses=2]
+	%183 = load double* %182, align 4		; <double> [#uses=2]
+	%184 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 1		; <double*> [#uses=2]
+	%185 = load double* %184, align 4		; <double> [#uses=2]
+	%186 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%187 = load double* %186, align 4		; <double> [#uses=1]
+	%188 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%189 = load double* %188, align 4		; <double> [#uses=1]
+	%190 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%191 = load double* %190, align 4		; <double> [#uses=2]
+	%192 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%193 = load double* %192, align 4		; <double> [#uses=2]
+	%194 = fsub double %183, %191		; <double> [#uses=1]
+	%195 = fsub double %189, %193		; <double> [#uses=1]
+	%196 = fmul double %194, %195		; <double> [#uses=1]
+	%197 = fsub double %187, %191		; <double> [#uses=1]
+	%198 = fsub double %185, %193		; <double> [#uses=1]
+	%199 = fmul double %197, %198		; <double> [#uses=1]
+	%200 = fsub double %196, %199		; <double> [#uses=1]
+	%201 = fcmp ogt double %200, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %201, label %bb10.i, label %bb13.i
+
+bb10.i:		; preds = %bb9.i
+	%202 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%avail_edge.promoted25 = load %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
+	br label %bb12.i
+
+bb11.i:		; preds = %bb12.i
+	%203 = ptrtoint %struct.edge_rec* %lcand.0.i to i32		; <i32> [#uses=3]
+	%204 = add i32 %203, 16		; <i32> [#uses=1]
+	%205 = and i32 %204, 63		; <i32> [#uses=1]
+	%206 = and i32 %203, -64		; <i32> [#uses=3]
+	%207 = or i32 %205, %206		; <i32> [#uses=1]
+	%208 = inttoptr i32 %207 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%209 = getelementptr %struct.edge_rec* %208, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%210 = load %struct.edge_rec** %209, align 4		; <%struct.edge_rec*> [#uses=1]
+	%211 = ptrtoint %struct.edge_rec* %210 to i32		; <i32> [#uses=2]
+	%212 = add i32 %211, 16		; <i32> [#uses=1]
+	%213 = and i32 %212, 63		; <i32> [#uses=1]
+	%214 = and i32 %211, -64		; <i32> [#uses=1]
+	%215 = or i32 %213, %214		; <i32> [#uses=1]
+	%216 = inttoptr i32 %215 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%217 = getelementptr %struct.edge_rec* %lcand.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%218 = load %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
+	%219 = ptrtoint %struct.edge_rec* %218 to i32		; <i32> [#uses=2]
+	%220 = add i32 %219, 16		; <i32> [#uses=1]
+	%221 = and i32 %220, 63		; <i32> [#uses=1]
+	%222 = and i32 %219, -64		; <i32> [#uses=1]
+	%223 = or i32 %221, %222		; <i32> [#uses=1]
+	%224 = inttoptr i32 %223 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%225 = getelementptr %struct.edge_rec* %216, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%226 = load %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
+	%227 = ptrtoint %struct.edge_rec* %226 to i32		; <i32> [#uses=2]
+	%228 = add i32 %227, 16		; <i32> [#uses=1]
+	%229 = and i32 %228, 63		; <i32> [#uses=1]
+	%230 = and i32 %227, -64		; <i32> [#uses=1]
+	%231 = or i32 %229, %230		; <i32> [#uses=1]
+	%232 = inttoptr i32 %231 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%233 = getelementptr %struct.edge_rec* %232, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%234 = load %struct.edge_rec** %233, align 4		; <%struct.edge_rec*> [#uses=1]
+	%235 = getelementptr %struct.edge_rec* %224, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%236 = load %struct.edge_rec** %235, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %234, %struct.edge_rec** %235, align 4
+	store %struct.edge_rec* %236, %struct.edge_rec** %233, align 4
+	%237 = load %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
+	%238 = load %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %237, %struct.edge_rec** %225, align 4
+	store %struct.edge_rec* %238, %struct.edge_rec** %217, align 4
+	%239 = xor i32 %203, 32		; <i32> [#uses=2]
+	%240 = add i32 %239, 16		; <i32> [#uses=1]
+	%241 = and i32 %240, 63		; <i32> [#uses=1]
+	%242 = or i32 %241, %206		; <i32> [#uses=1]
+	%243 = inttoptr i32 %242 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%244 = getelementptr %struct.edge_rec* %243, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%245 = load %struct.edge_rec** %244, align 4		; <%struct.edge_rec*> [#uses=1]
+	%246 = ptrtoint %struct.edge_rec* %245 to i32		; <i32> [#uses=2]
+	%247 = add i32 %246, 16		; <i32> [#uses=1]
+	%248 = and i32 %247, 63		; <i32> [#uses=1]
+	%249 = and i32 %246, -64		; <i32> [#uses=1]
+	%250 = or i32 %248, %249		; <i32> [#uses=1]
+	%251 = inttoptr i32 %250 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%252 = inttoptr i32 %239 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%253 = getelementptr %struct.edge_rec* %252, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%254 = load %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
+	%255 = ptrtoint %struct.edge_rec* %254 to i32		; <i32> [#uses=2]
+	%256 = add i32 %255, 16		; <i32> [#uses=1]
+	%257 = and i32 %256, 63		; <i32> [#uses=1]
+	%258 = and i32 %255, -64		; <i32> [#uses=1]
+	%259 = or i32 %257, %258		; <i32> [#uses=1]
+	%260 = inttoptr i32 %259 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%261 = getelementptr %struct.edge_rec* %251, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%262 = load %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
+	%263 = ptrtoint %struct.edge_rec* %262 to i32		; <i32> [#uses=2]
+	%264 = add i32 %263, 16		; <i32> [#uses=1]
+	%265 = and i32 %264, 63		; <i32> [#uses=1]
+	%266 = and i32 %263, -64		; <i32> [#uses=1]
+	%267 = or i32 %265, %266		; <i32> [#uses=1]
+	%268 = inttoptr i32 %267 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%269 = getelementptr %struct.edge_rec* %268, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%270 = load %struct.edge_rec** %269, align 4		; <%struct.edge_rec*> [#uses=1]
+	%271 = getelementptr %struct.edge_rec* %260, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%272 = load %struct.edge_rec** %271, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %270, %struct.edge_rec** %271, align 4
+	store %struct.edge_rec* %272, %struct.edge_rec** %269, align 4
+	%273 = load %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
+	%274 = load %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %273, %struct.edge_rec** %261, align 4
+	store %struct.edge_rec* %274, %struct.edge_rec** %253, align 4
+	%275 = inttoptr i32 %206 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%276 = getelementptr %struct.edge_rec* %275, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** %276, align 4
+	%277 = getelementptr %struct.edge_rec* %t.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%278 = load %struct.edge_rec** %277, align 4		; <%struct.edge_rec*> [#uses=2]
+	%.pre.i = load double* %182, align 4		; <double> [#uses=1]
+	%.pre22.i = load double* %184, align 4		; <double> [#uses=1]
+	br label %bb12.i
+
+bb12.i:		; preds = %bb11.i, %bb10.i
+	%avail_edge.tmp.026 = phi %struct.edge_rec* [ %avail_edge.promoted25, %bb10.i ], [ %275, %bb11.i ]		; <%struct.edge_rec*> [#uses=2]
+	%279 = phi double [ %.pre22.i, %bb11.i ], [ %185, %bb10.i ]		; <double> [#uses=3]
+	%280 = phi double [ %.pre.i, %bb11.i ], [ %183, %bb10.i ]		; <double> [#uses=3]
+	%lcand.0.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=3]
+	%t.0.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ]		; <%struct.edge_rec*> [#uses=4]
+	%.pn5.in.in.in.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn4.in.in.in.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%lcand.2.pn.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn5.in.in.i = ptrtoint %struct.edge_rec* %.pn5.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn4.in.in.i = ptrtoint %struct.edge_rec* %.pn4.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn5.in.i = xor i32 %.pn5.in.in.i, 32		; <i32> [#uses=1]
+	%.pn4.in.i = xor i32 %.pn4.in.in.i, 32		; <i32> [#uses=1]
+	%.pn5.i = inttoptr i32 %.pn5.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%.pn4.i = inttoptr i32 %.pn4.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%v1.0.in.i = getelementptr %struct.edge_rec* %.pn5.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v2.0.in.i = getelementptr %struct.edge_rec* %.pn4.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v3.0.in.i = getelementptr %struct.edge_rec* %lcand.2.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v1.0.i = load %struct.VERTEX** %v1.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v2.0.i = load %struct.VERTEX** %v2.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v3.0.i = load %struct.VERTEX** %v3.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%281 = load double* %202, align 4		; <double> [#uses=3]
+	%282 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%283 = load double* %282, align 4		; <double> [#uses=1]
+	%284 = fsub double %283, %280		; <double> [#uses=2]
+	%285 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%286 = load double* %285, align 4		; <double> [#uses=1]
+	%287 = fsub double %286, %279		; <double> [#uses=2]
+	%288 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%289 = load double* %288, align 4		; <double> [#uses=1]
+	%290 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%291 = load double* %290, align 4		; <double> [#uses=1]
+	%292 = fsub double %291, %280		; <double> [#uses=2]
+	%293 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%294 = load double* %293, align 4		; <double> [#uses=1]
+	%295 = fsub double %294, %279		; <double> [#uses=2]
+	%296 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%297 = load double* %296, align 4		; <double> [#uses=1]
+	%298 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%299 = load double* %298, align 4		; <double> [#uses=1]
+	%300 = fsub double %299, %280		; <double> [#uses=2]
+	%301 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%302 = load double* %301, align 4		; <double> [#uses=1]
+	%303 = fsub double %302, %279		; <double> [#uses=2]
+	%304 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%305 = load double* %304, align 4		; <double> [#uses=1]
+	%306 = fsub double %289, %281		; <double> [#uses=1]
+	%307 = fmul double %292, %303		; <double> [#uses=1]
+	%308 = fmul double %295, %300		; <double> [#uses=1]
+	%309 = fsub double %307, %308		; <double> [#uses=1]
+	%310 = fmul double %306, %309		; <double> [#uses=1]
+	%311 = fsub double %297, %281		; <double> [#uses=1]
+	%312 = fmul double %300, %287		; <double> [#uses=1]
+	%313 = fmul double %303, %284		; <double> [#uses=1]
+	%314 = fsub double %312, %313		; <double> [#uses=1]
+	%315 = fmul double %311, %314		; <double> [#uses=1]
+	%316 = fadd double %315, %310		; <double> [#uses=1]
+	%317 = fsub double %305, %281		; <double> [#uses=1]
+	%318 = fmul double %284, %295		; <double> [#uses=1]
+	%319 = fmul double %287, %292		; <double> [#uses=1]
+	%320 = fsub double %318, %319		; <double> [#uses=1]
+	%321 = fmul double %317, %320		; <double> [#uses=1]
+	%322 = fadd double %321, %316		; <double> [#uses=1]
+	%323 = fcmp ogt double %322, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %323, label %bb11.i, label %bb13.loopexit.i
+
+bb13.loopexit.i:		; preds = %bb12.i
+	store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** @avail_edge
+	%.pre23.i = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre24.i = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
+	br label %bb13.i
+
+bb13.i:		; preds = %bb13.loopexit.i, %bb9.i
+	%324 = phi %struct.VERTEX* [ %.pre24.i, %bb13.loopexit.i ], [ %176, %bb9.i ]		; <%struct.VERTEX*> [#uses=4]
+	%325 = phi %struct.VERTEX* [ %.pre23.i, %bb13.loopexit.i ], [ %171, %bb9.i ]		; <%struct.VERTEX*> [#uses=3]
+	%lcand.1.i = phi %struct.edge_rec* [ %lcand.0.i, %bb13.loopexit.i ], [ %lcand.2.i, %bb9.i ]		; <%struct.edge_rec*> [#uses=3]
+	%326 = ptrtoint %struct.edge_rec* %rcand.2.i to i32		; <i32> [#uses=2]
+	%327 = add i32 %326, 16		; <i32> [#uses=1]
+	%328 = and i32 %327, 63		; <i32> [#uses=1]
+	%329 = and i32 %326, -64		; <i32> [#uses=1]
+	%330 = or i32 %328, %329		; <i32> [#uses=1]
+	%331 = inttoptr i32 %330 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%332 = getelementptr %struct.edge_rec* %331, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%333 = load %struct.edge_rec** %332, align 4		; <%struct.edge_rec*> [#uses=1]
+	%334 = ptrtoint %struct.edge_rec* %333 to i32		; <i32> [#uses=2]
+	%335 = add i32 %334, 16		; <i32> [#uses=1]
+	%336 = and i32 %335, 63		; <i32> [#uses=1]
+	%337 = and i32 %334, -64		; <i32> [#uses=1]
+	%338 = or i32 %336, %337		; <i32> [#uses=3]
+	%339 = xor i32 %338, 32		; <i32> [#uses=1]
+	%340 = inttoptr i32 %339 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%341 = getelementptr %struct.edge_rec* %340, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%342 = load %struct.VERTEX** %341, align 4		; <%struct.VERTEX*> [#uses=2]
+	%343 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%344 = load double* %343, align 4		; <double> [#uses=1]
+	%345 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%346 = load double* %345, align 4		; <double> [#uses=1]
+	%347 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%348 = load double* %347, align 4		; <double> [#uses=1]
+	%349 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%350 = load double* %349, align 4		; <double> [#uses=1]
+	%351 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 0		; <double*> [#uses=2]
+	%352 = load double* %351, align 4		; <double> [#uses=3]
+	%353 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 1		; <double*> [#uses=2]
+	%354 = load double* %353, align 4		; <double> [#uses=3]
+	%355 = fsub double %344, %352		; <double> [#uses=1]
+	%356 = fsub double %350, %354		; <double> [#uses=1]
+	%357 = fmul double %355, %356		; <double> [#uses=1]
+	%358 = fsub double %348, %352		; <double> [#uses=1]
+	%359 = fsub double %346, %354		; <double> [#uses=1]
+	%360 = fmul double %358, %359		; <double> [#uses=1]
+	%361 = fsub double %357, %360		; <double> [#uses=1]
+	%362 = fcmp ogt double %361, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %362, label %bb14.i, label %bb17.i
+
+bb14.i:		; preds = %bb13.i
+	%363 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%avail_edge.promoted = load %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
+	br label %bb16.i
+
+bb15.i:		; preds = %bb16.i
+	%364 = ptrtoint %struct.edge_rec* %rcand.0.i to i32		; <i32> [#uses=3]
+	%365 = add i32 %364, 16		; <i32> [#uses=1]
+	%366 = and i32 %365, 63		; <i32> [#uses=1]
+	%367 = and i32 %364, -64		; <i32> [#uses=3]
+	%368 = or i32 %366, %367		; <i32> [#uses=1]
+	%369 = inttoptr i32 %368 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%370 = getelementptr %struct.edge_rec* %369, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%371 = load %struct.edge_rec** %370, align 4		; <%struct.edge_rec*> [#uses=1]
+	%372 = ptrtoint %struct.edge_rec* %371 to i32		; <i32> [#uses=2]
+	%373 = add i32 %372, 16		; <i32> [#uses=1]
+	%374 = and i32 %373, 63		; <i32> [#uses=1]
+	%375 = and i32 %372, -64		; <i32> [#uses=1]
+	%376 = or i32 %374, %375		; <i32> [#uses=1]
+	%377 = inttoptr i32 %376 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%378 = getelementptr %struct.edge_rec* %rcand.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%379 = load %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
+	%380 = ptrtoint %struct.edge_rec* %379 to i32		; <i32> [#uses=2]
+	%381 = add i32 %380, 16		; <i32> [#uses=1]
+	%382 = and i32 %381, 63		; <i32> [#uses=1]
+	%383 = and i32 %380, -64		; <i32> [#uses=1]
+	%384 = or i32 %382, %383		; <i32> [#uses=1]
+	%385 = inttoptr i32 %384 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%386 = getelementptr %struct.edge_rec* %377, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%387 = load %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
+	%388 = ptrtoint %struct.edge_rec* %387 to i32		; <i32> [#uses=2]
+	%389 = add i32 %388, 16		; <i32> [#uses=1]
+	%390 = and i32 %389, 63		; <i32> [#uses=1]
+	%391 = and i32 %388, -64		; <i32> [#uses=1]
+	%392 = or i32 %390, %391		; <i32> [#uses=1]
+	%393 = inttoptr i32 %392 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%394 = getelementptr %struct.edge_rec* %393, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%395 = load %struct.edge_rec** %394, align 4		; <%struct.edge_rec*> [#uses=1]
+	%396 = getelementptr %struct.edge_rec* %385, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%397 = load %struct.edge_rec** %396, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %395, %struct.edge_rec** %396, align 4
+	store %struct.edge_rec* %397, %struct.edge_rec** %394, align 4
+	%398 = load %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
+	%399 = load %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %398, %struct.edge_rec** %386, align 4
+	store %struct.edge_rec* %399, %struct.edge_rec** %378, align 4
+	%400 = xor i32 %364, 32		; <i32> [#uses=2]
+	%401 = add i32 %400, 16		; <i32> [#uses=1]
+	%402 = and i32 %401, 63		; <i32> [#uses=1]
+	%403 = or i32 %402, %367		; <i32> [#uses=1]
+	%404 = inttoptr i32 %403 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%405 = getelementptr %struct.edge_rec* %404, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%406 = load %struct.edge_rec** %405, align 4		; <%struct.edge_rec*> [#uses=1]
+	%407 = ptrtoint %struct.edge_rec* %406 to i32		; <i32> [#uses=2]
+	%408 = add i32 %407, 16		; <i32> [#uses=1]
+	%409 = and i32 %408, 63		; <i32> [#uses=1]
+	%410 = and i32 %407, -64		; <i32> [#uses=1]
+	%411 = or i32 %409, %410		; <i32> [#uses=1]
+	%412 = inttoptr i32 %411 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%413 = inttoptr i32 %400 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%414 = getelementptr %struct.edge_rec* %413, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%415 = load %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
+	%416 = ptrtoint %struct.edge_rec* %415 to i32		; <i32> [#uses=2]
+	%417 = add i32 %416, 16		; <i32> [#uses=1]
+	%418 = and i32 %417, 63		; <i32> [#uses=1]
+	%419 = and i32 %416, -64		; <i32> [#uses=1]
+	%420 = or i32 %418, %419		; <i32> [#uses=1]
+	%421 = inttoptr i32 %420 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%422 = getelementptr %struct.edge_rec* %412, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%423 = load %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
+	%424 = ptrtoint %struct.edge_rec* %423 to i32		; <i32> [#uses=2]
+	%425 = add i32 %424, 16		; <i32> [#uses=1]
+	%426 = and i32 %425, 63		; <i32> [#uses=1]
+	%427 = and i32 %424, -64		; <i32> [#uses=1]
+	%428 = or i32 %426, %427		; <i32> [#uses=1]
+	%429 = inttoptr i32 %428 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%430 = getelementptr %struct.edge_rec* %429, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%431 = load %struct.edge_rec** %430, align 4		; <%struct.edge_rec*> [#uses=1]
+	%432 = getelementptr %struct.edge_rec* %421, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%433 = load %struct.edge_rec** %432, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %431, %struct.edge_rec** %432, align 4
+	store %struct.edge_rec* %433, %struct.edge_rec** %430, align 4
+	%434 = load %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
+	%435 = load %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %434, %struct.edge_rec** %422, align 4
+	store %struct.edge_rec* %435, %struct.edge_rec** %414, align 4
+	%436 = inttoptr i32 %367 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%437 = getelementptr %struct.edge_rec* %436, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** %437, align 4
+	%438 = add i32 %t.1.in.i, 16		; <i32> [#uses=1]
+	%439 = and i32 %438, 63		; <i32> [#uses=1]
+	%440 = and i32 %t.1.in.i, -64		; <i32> [#uses=1]
+	%441 = or i32 %439, %440		; <i32> [#uses=1]
+	%442 = inttoptr i32 %441 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%443 = getelementptr %struct.edge_rec* %442, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%444 = load %struct.edge_rec** %443, align 4		; <%struct.edge_rec*> [#uses=1]
+	%445 = ptrtoint %struct.edge_rec* %444 to i32		; <i32> [#uses=2]
+	%446 = add i32 %445, 16		; <i32> [#uses=1]
+	%447 = and i32 %446, 63		; <i32> [#uses=1]
+	%448 = and i32 %445, -64		; <i32> [#uses=1]
+	%449 = or i32 %447, %448		; <i32> [#uses=2]
+	%.pre25.i = load double* %351, align 4		; <double> [#uses=1]
+	%.pre26.i = load double* %353, align 4		; <double> [#uses=1]
+	br label %bb16.i
+
+bb16.i:		; preds = %bb15.i, %bb14.i
+	%avail_edge.tmp.0 = phi %struct.edge_rec* [ %avail_edge.promoted, %bb14.i ], [ %436, %bb15.i ]		; <%struct.edge_rec*> [#uses=2]
+	%450 = phi double [ %.pre26.i, %bb15.i ], [ %354, %bb14.i ]		; <double> [#uses=3]
+	%451 = phi double [ %.pre25.i, %bb15.i ], [ %352, %bb14.i ]		; <double> [#uses=3]
+	%rcand.0.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=3]
+	%t.1.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ]		; <i32> [#uses=3]
+	%.pn3.in.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ]		; <i32> [#uses=1]
+	%.pn.in.in.in.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=1]
+	%rcand.2.pn.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=1]
+	%t.1.i = inttoptr i32 %t.1.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%.pn.in.in.i = ptrtoint %struct.edge_rec* %.pn.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn3.in.i = xor i32 %.pn3.in.in.i, 32		; <i32> [#uses=1]
+	%.pn.in.i = xor i32 %.pn.in.in.i, 32		; <i32> [#uses=1]
+	%.pn3.i = inttoptr i32 %.pn3.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%.pn.i = inttoptr i32 %.pn.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%v1.1.in.i = getelementptr %struct.edge_rec* %.pn3.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v2.1.in.i = getelementptr %struct.edge_rec* %.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v3.1.in.i = getelementptr %struct.edge_rec* %rcand.2.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v1.1.i = load %struct.VERTEX** %v1.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v2.1.i = load %struct.VERTEX** %v2.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v3.1.i = load %struct.VERTEX** %v3.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%452 = load double* %363, align 4		; <double> [#uses=3]
+	%453 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%454 = load double* %453, align 4		; <double> [#uses=1]
+	%455 = fsub double %454, %451		; <double> [#uses=2]
+	%456 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%457 = load double* %456, align 4		; <double> [#uses=1]
+	%458 = fsub double %457, %450		; <double> [#uses=2]
+	%459 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%460 = load double* %459, align 4		; <double> [#uses=1]
+	%461 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%462 = load double* %461, align 4		; <double> [#uses=1]
+	%463 = fsub double %462, %451		; <double> [#uses=2]
+	%464 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%465 = load double* %464, align 4		; <double> [#uses=1]
+	%466 = fsub double %465, %450		; <double> [#uses=2]
+	%467 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%468 = load double* %467, align 4		; <double> [#uses=1]
+	%469 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%470 = load double* %469, align 4		; <double> [#uses=1]
+	%471 = fsub double %470, %451		; <double> [#uses=2]
+	%472 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%473 = load double* %472, align 4		; <double> [#uses=1]
+	%474 = fsub double %473, %450		; <double> [#uses=2]
+	%475 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%476 = load double* %475, align 4		; <double> [#uses=1]
+	%477 = fsub double %460, %452		; <double> [#uses=1]
+	%478 = fmul double %463, %474		; <double> [#uses=1]
+	%479 = fmul double %466, %471		; <double> [#uses=1]
+	%480 = fsub double %478, %479		; <double> [#uses=1]
+	%481 = fmul double %477, %480		; <double> [#uses=1]
+	%482 = fsub double %468, %452		; <double> [#uses=1]
+	%483 = fmul double %471, %458		; <double> [#uses=1]
+	%484 = fmul double %474, %455		; <double> [#uses=1]
+	%485 = fsub double %483, %484		; <double> [#uses=1]
+	%486 = fmul double %482, %485		; <double> [#uses=1]
+	%487 = fadd double %486, %481		; <double> [#uses=1]
+	%488 = fsub double %476, %452		; <double> [#uses=1]
+	%489 = fmul double %455, %466		; <double> [#uses=1]
+	%490 = fmul double %458, %463		; <double> [#uses=1]
+	%491 = fsub double %489, %490		; <double> [#uses=1]
+	%492 = fmul double %488, %491		; <double> [#uses=1]
+	%493 = fadd double %492, %487		; <double> [#uses=1]
+	%494 = fcmp ogt double %493, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %494, label %bb15.i, label %bb17.loopexit.i
+
+bb17.loopexit.i:		; preds = %bb16.i
+	store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** @avail_edge
+	%.pre27.i = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre28.i = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
+	br label %bb17.i
+
+bb17.i:		; preds = %bb17.loopexit.i, %bb13.i
+	%495 = phi %struct.VERTEX* [ %.pre28.i, %bb17.loopexit.i ], [ %324, %bb13.i ]		; <%struct.VERTEX*> [#uses=3]
+	%496 = phi %struct.VERTEX* [ %.pre27.i, %bb17.loopexit.i ], [ %325, %bb13.i ]		; <%struct.VERTEX*> [#uses=3]
+	%rcand.1.i = phi %struct.edge_rec* [ %rcand.0.i, %bb17.loopexit.i ], [ %rcand.2.i, %bb13.i ]		; <%struct.edge_rec*> [#uses=3]
+	%497 = ptrtoint %struct.edge_rec* %lcand.1.i to i32		; <i32> [#uses=1]
+	%498 = xor i32 %497, 32		; <i32> [#uses=1]
+	%499 = inttoptr i32 %498 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%500 = getelementptr %struct.edge_rec* %499, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%501 = load %struct.VERTEX** %500, align 4		; <%struct.VERTEX*> [#uses=4]
+	%502 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%503 = load double* %502, align 4		; <double> [#uses=1]
+	%504 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%505 = load double* %504, align 4		; <double> [#uses=1]
+	%506 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%507 = load double* %506, align 4		; <double> [#uses=2]
+	%508 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%509 = load double* %508, align 4		; <double> [#uses=2]
+	%510 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%511 = load double* %510, align 4		; <double> [#uses=3]
+	%512 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%513 = load double* %512, align 4		; <double> [#uses=3]
+	%514 = fsub double %503, %511		; <double> [#uses=2]
+	%515 = fsub double %509, %513		; <double> [#uses=1]
+	%516 = fmul double %514, %515		; <double> [#uses=1]
+	%517 = fsub double %507, %511		; <double> [#uses=1]
+	%518 = fsub double %505, %513		; <double> [#uses=2]
+	%519 = fmul double %517, %518		; <double> [#uses=1]
+	%520 = fsub double %516, %519		; <double> [#uses=1]
+	%521 = fcmp ogt double %520, 0.000000e+00		; <i1> [#uses=2]
+	%522 = ptrtoint %struct.edge_rec* %rcand.1.i to i32		; <i32> [#uses=3]
+	%523 = xor i32 %522, 32		; <i32> [#uses=1]
+	%524 = inttoptr i32 %523 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%525 = getelementptr %struct.edge_rec* %524, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%526 = load %struct.VERTEX** %525, align 4		; <%struct.VERTEX*> [#uses=4]
+	%527 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%528 = load double* %527, align 4		; <double> [#uses=4]
+	%529 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%530 = load double* %529, align 4		; <double> [#uses=4]
+	%531 = fsub double %530, %513		; <double> [#uses=1]
+	%532 = fmul double %514, %531		; <double> [#uses=1]
+	%533 = fsub double %528, %511		; <double> [#uses=1]
+	%534 = fmul double %533, %518		; <double> [#uses=1]
+	%535 = fsub double %532, %534		; <double> [#uses=1]
+	%536 = fcmp ogt double %535, 0.000000e+00		; <i1> [#uses=2]
+	%537 = or i1 %536, %521		; <i1> [#uses=1]
+	br i1 %537, label %bb21.i, label %do_merge.exit
+
+bb21.i:		; preds = %bb17.i
+	%538 = getelementptr %struct.edge_rec* %lcand.1.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%539 = load %struct.VERTEX** %538, align 4		; <%struct.VERTEX*> [#uses=3]
+	%540 = getelementptr %struct.edge_rec* %rcand.1.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%541 = load %struct.VERTEX** %540, align 4		; <%struct.VERTEX*> [#uses=3]
+	br i1 %521, label %bb22.i, label %bb24.i
+
+bb22.i:		; preds = %bb21.i
+	br i1 %536, label %bb23.i, label %bb25.i
+
+bb23.i:		; preds = %bb22.i
+	%542 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%543 = load double* %542, align 4		; <double> [#uses=3]
+	%544 = fsub double %507, %528		; <double> [#uses=2]
+	%545 = fsub double %509, %530		; <double> [#uses=2]
+	%546 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%547 = load double* %546, align 4		; <double> [#uses=1]
+	%548 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%549 = load double* %548, align 4		; <double> [#uses=1]
+	%550 = fsub double %549, %528		; <double> [#uses=2]
+	%551 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%552 = load double* %551, align 4		; <double> [#uses=1]
+	%553 = fsub double %552, %530		; <double> [#uses=2]
+	%554 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%555 = load double* %554, align 4		; <double> [#uses=1]
+	%556 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%557 = load double* %556, align 4		; <double> [#uses=1]
+	%558 = fsub double %557, %528		; <double> [#uses=2]
+	%559 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%560 = load double* %559, align 4		; <double> [#uses=1]
+	%561 = fsub double %560, %530		; <double> [#uses=2]
+	%562 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%563 = load double* %562, align 4		; <double> [#uses=1]
+	%564 = fsub double %547, %543		; <double> [#uses=1]
+	%565 = fmul double %550, %561		; <double> [#uses=1]
+	%566 = fmul double %553, %558		; <double> [#uses=1]
+	%567 = fsub double %565, %566		; <double> [#uses=1]
+	%568 = fmul double %564, %567		; <double> [#uses=1]
+	%569 = fsub double %555, %543		; <double> [#uses=1]
+	%570 = fmul double %558, %545		; <double> [#uses=1]
+	%571 = fmul double %561, %544		; <double> [#uses=1]
+	%572 = fsub double %570, %571		; <double> [#uses=1]
+	%573 = fmul double %569, %572		; <double> [#uses=1]
+	%574 = fadd double %573, %568		; <double> [#uses=1]
+	%575 = fsub double %563, %543		; <double> [#uses=1]
+	%576 = fmul double %544, %553		; <double> [#uses=1]
+	%577 = fmul double %545, %550		; <double> [#uses=1]
+	%578 = fsub double %576, %577		; <double> [#uses=1]
+	%579 = fmul double %575, %578		; <double> [#uses=1]
+	%580 = fadd double %579, %574		; <double> [#uses=1]
+	%581 = fcmp ogt double %580, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %581, label %bb24.i, label %bb25.i
+
+bb24.i:		; preds = %bb23.i, %bb21.i
+	%582 = add i32 %522, 48		; <i32> [#uses=1]
+	%583 = and i32 %582, 63		; <i32> [#uses=1]
+	%584 = and i32 %522, -64		; <i32> [#uses=1]
+	%585 = or i32 %583, %584		; <i32> [#uses=1]
+	%586 = inttoptr i32 %585 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%587 = getelementptr %struct.edge_rec* %586, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%588 = load %struct.edge_rec** %587, align 4		; <%struct.edge_rec*> [#uses=1]
+	%589 = ptrtoint %struct.edge_rec* %588 to i32		; <i32> [#uses=2]
+	%590 = add i32 %589, 16		; <i32> [#uses=1]
+	%591 = and i32 %590, 63		; <i32> [#uses=1]
+	%592 = and i32 %589, -64		; <i32> [#uses=1]
+	%593 = or i32 %591, %592		; <i32> [#uses=1]
+	%594 = inttoptr i32 %593 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%595 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=5]
+	%596 = getelementptr %struct.edge_rec* %595, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %595, %struct.edge_rec** %596, align 4
+	%597 = getelementptr %struct.edge_rec* %595, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %526, %struct.VERTEX** %597, align 4
+	%598 = ptrtoint %struct.edge_rec* %595 to i32		; <i32> [#uses=5]
+	%599 = add i32 %598, 16		; <i32> [#uses=1]
+	%600 = inttoptr i32 %599 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%601 = add i32 %598, 48		; <i32> [#uses=1]
+	%602 = inttoptr i32 %601 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%603 = getelementptr %struct.edge_rec* %600, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %602, %struct.edge_rec** %603, align 4
+	%604 = add i32 %598, 32		; <i32> [#uses=1]
+	%605 = inttoptr i32 %604 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%606 = getelementptr %struct.edge_rec* %605, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %605, %struct.edge_rec** %606, align 4
+	%607 = getelementptr %struct.edge_rec* %605, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %495, %struct.VERTEX** %607, align 4
+	%608 = getelementptr %struct.edge_rec* %602, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %600, %struct.edge_rec** %608, align 4
+	%609 = load %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
+	%610 = ptrtoint %struct.edge_rec* %609 to i32		; <i32> [#uses=2]
+	%611 = add i32 %610, 16		; <i32> [#uses=1]
+	%612 = and i32 %611, 63		; <i32> [#uses=1]
+	%613 = and i32 %610, -64		; <i32> [#uses=1]
+	%614 = or i32 %612, %613		; <i32> [#uses=1]
+	%615 = inttoptr i32 %614 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%616 = getelementptr %struct.edge_rec* %594, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%617 = load %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
+	%618 = ptrtoint %struct.edge_rec* %617 to i32		; <i32> [#uses=2]
+	%619 = add i32 %618, 16		; <i32> [#uses=1]
+	%620 = and i32 %619, 63		; <i32> [#uses=1]
+	%621 = and i32 %618, -64		; <i32> [#uses=1]
+	%622 = or i32 %620, %621		; <i32> [#uses=1]
+	%623 = inttoptr i32 %622 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%624 = getelementptr %struct.edge_rec* %623, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%625 = load %struct.edge_rec** %624, align 4		; <%struct.edge_rec*> [#uses=1]
+	%626 = getelementptr %struct.edge_rec* %615, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%627 = load %struct.edge_rec** %626, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %625, %struct.edge_rec** %626, align 4
+	store %struct.edge_rec* %627, %struct.edge_rec** %624, align 4
+	%628 = load %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
+	%629 = load %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %628, %struct.edge_rec** %616, align 4
+	store %struct.edge_rec* %629, %struct.edge_rec** %596, align 4
+	%630 = xor i32 %598, 32		; <i32> [#uses=2]
+	%631 = inttoptr i32 %630 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%632 = getelementptr %struct.edge_rec* %631, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%633 = load %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
+	%634 = ptrtoint %struct.edge_rec* %633 to i32		; <i32> [#uses=2]
+	%635 = add i32 %634, 16		; <i32> [#uses=1]
+	%636 = and i32 %635, 63		; <i32> [#uses=1]
+	%637 = and i32 %634, -64		; <i32> [#uses=1]
+	%638 = or i32 %636, %637		; <i32> [#uses=1]
+	%639 = inttoptr i32 %638 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%640 = getelementptr %struct.edge_rec* %174, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%641 = load %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
+	%642 = ptrtoint %struct.edge_rec* %641 to i32		; <i32> [#uses=2]
+	%643 = add i32 %642, 16		; <i32> [#uses=1]
+	%644 = and i32 %643, 63		; <i32> [#uses=1]
+	%645 = and i32 %642, -64		; <i32> [#uses=1]
+	%646 = or i32 %644, %645		; <i32> [#uses=1]
+	%647 = inttoptr i32 %646 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%648 = getelementptr %struct.edge_rec* %647, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%649 = load %struct.edge_rec** %648, align 4		; <%struct.edge_rec*> [#uses=1]
+	%650 = getelementptr %struct.edge_rec* %639, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%651 = load %struct.edge_rec** %650, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %649, %struct.edge_rec** %650, align 4
+	store %struct.edge_rec* %651, %struct.edge_rec** %648, align 4
+	%652 = load %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
+	%653 = load %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %652, %struct.edge_rec** %640, align 4
+	store %struct.edge_rec* %653, %struct.edge_rec** %632, align 4
+	%654 = add i32 %630, 48		; <i32> [#uses=1]
+	%655 = and i32 %654, 63		; <i32> [#uses=1]
+	%656 = and i32 %598, -64		; <i32> [#uses=1]
+	%657 = or i32 %655, %656		; <i32> [#uses=1]
+	%658 = inttoptr i32 %657 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%659 = getelementptr %struct.edge_rec* %658, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%660 = load %struct.edge_rec** %659, align 4		; <%struct.edge_rec*> [#uses=1]
+	%661 = ptrtoint %struct.edge_rec* %660 to i32		; <i32> [#uses=2]
+	%662 = add i32 %661, 16		; <i32> [#uses=1]
+	%663 = and i32 %662, 63		; <i32> [#uses=1]
+	%664 = and i32 %661, -64		; <i32> [#uses=1]
+	%665 = or i32 %663, %664		; <i32> [#uses=1]
+	%666 = inttoptr i32 %665 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	br label %bb9.i
+
+bb25.i:		; preds = %bb23.i, %bb22.i
+	%667 = add i32 %172, 16		; <i32> [#uses=1]
+	%668 = and i32 %667, 63		; <i32> [#uses=1]
+	%669 = and i32 %172, -64		; <i32> [#uses=1]
+	%670 = or i32 %668, %669		; <i32> [#uses=1]
+	%671 = inttoptr i32 %670 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%672 = getelementptr %struct.edge_rec* %671, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%673 = load %struct.edge_rec** %672, align 4		; <%struct.edge_rec*> [#uses=1]
+	%674 = ptrtoint %struct.edge_rec* %673 to i32		; <i32> [#uses=2]
+	%675 = add i32 %674, 16		; <i32> [#uses=1]
+	%676 = and i32 %675, 63		; <i32> [#uses=1]
+	%677 = and i32 %674, -64		; <i32> [#uses=1]
+	%678 = or i32 %676, %677		; <i32> [#uses=1]
+	%679 = inttoptr i32 %678 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%680 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%681 = getelementptr %struct.edge_rec* %680, i32 0, i32 1		; <%struct.edge_rec**> [#uses=5]
+	store %struct.edge_rec* %680, %struct.edge_rec** %681, align 4
+	%682 = getelementptr %struct.edge_rec* %680, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %501, %struct.VERTEX** %682, align 4
+	%683 = ptrtoint %struct.edge_rec* %680 to i32		; <i32> [#uses=4]
+	%684 = add i32 %683, 16		; <i32> [#uses=1]
+	%685 = inttoptr i32 %684 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%686 = add i32 %683, 48		; <i32> [#uses=1]
+	%687 = inttoptr i32 %686 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%688 = getelementptr %struct.edge_rec* %685, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %687, %struct.edge_rec** %688, align 4
+	%689 = add i32 %683, 32		; <i32> [#uses=1]
+	%690 = inttoptr i32 %689 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%691 = getelementptr %struct.edge_rec* %690, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %690, %struct.edge_rec** %691, align 4
+	%692 = getelementptr %struct.edge_rec* %690, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %496, %struct.VERTEX** %692, align 4
+	%693 = getelementptr %struct.edge_rec* %687, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %685, %struct.edge_rec** %693, align 4
+	%694 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%695 = ptrtoint %struct.edge_rec* %694 to i32		; <i32> [#uses=2]
+	%696 = add i32 %695, 16		; <i32> [#uses=1]
+	%697 = and i32 %696, 63		; <i32> [#uses=1]
+	%698 = and i32 %695, -64		; <i32> [#uses=1]
+	%699 = or i32 %697, %698		; <i32> [#uses=1]
+	%700 = inttoptr i32 %699 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%701 = getelementptr %struct.edge_rec* %499, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%702 = load %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
+	%703 = ptrtoint %struct.edge_rec* %702 to i32		; <i32> [#uses=2]
+	%704 = add i32 %703, 16		; <i32> [#uses=1]
+	%705 = and i32 %704, 63		; <i32> [#uses=1]
+	%706 = and i32 %703, -64		; <i32> [#uses=1]
+	%707 = or i32 %705, %706		; <i32> [#uses=1]
+	%708 = inttoptr i32 %707 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%709 = getelementptr %struct.edge_rec* %708, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%710 = load %struct.edge_rec** %709, align 4		; <%struct.edge_rec*> [#uses=1]
+	%711 = getelementptr %struct.edge_rec* %700, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%712 = load %struct.edge_rec** %711, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %710, %struct.edge_rec** %711, align 4
+	store %struct.edge_rec* %712, %struct.edge_rec** %709, align 4
+	%713 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%714 = load %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %713, %struct.edge_rec** %701, align 4
+	store %struct.edge_rec* %714, %struct.edge_rec** %681, align 4
+	%715 = xor i32 %683, 32		; <i32> [#uses=1]
+	%716 = inttoptr i32 %715 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%717 = getelementptr %struct.edge_rec* %716, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%718 = load %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
+	%719 = ptrtoint %struct.edge_rec* %718 to i32		; <i32> [#uses=2]
+	%720 = add i32 %719, 16		; <i32> [#uses=1]
+	%721 = and i32 %720, 63		; <i32> [#uses=1]
+	%722 = and i32 %719, -64		; <i32> [#uses=1]
+	%723 = or i32 %721, %722		; <i32> [#uses=1]
+	%724 = inttoptr i32 %723 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%725 = getelementptr %struct.edge_rec* %679, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%726 = load %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
+	%727 = ptrtoint %struct.edge_rec* %726 to i32		; <i32> [#uses=2]
+	%728 = add i32 %727, 16		; <i32> [#uses=1]
+	%729 = and i32 %728, 63		; <i32> [#uses=1]
+	%730 = and i32 %727, -64		; <i32> [#uses=1]
+	%731 = or i32 %729, %730		; <i32> [#uses=1]
+	%732 = inttoptr i32 %731 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%733 = getelementptr %struct.edge_rec* %732, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%734 = load %struct.edge_rec** %733, align 4		; <%struct.edge_rec*> [#uses=1]
+	%735 = getelementptr %struct.edge_rec* %724, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%736 = load %struct.edge_rec** %735, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %734, %struct.edge_rec** %735, align 4
+	store %struct.edge_rec* %736, %struct.edge_rec** %733, align 4
+	%737 = load %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
+	%738 = load %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %737, %struct.edge_rec** %725, align 4
+	store %struct.edge_rec* %738, %struct.edge_rec** %717, align 4
+	%739 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	br label %bb9.i
+
+do_merge.exit:		; preds = %bb17.i
+	%740 = getelementptr %struct.edge_rec* %ldo_addr.0.ph.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%741 = load %struct.VERTEX** %740, align 4		; <%struct.VERTEX*> [#uses=1]
+	%742 = icmp eq %struct.VERTEX* %741, %tree_addr.0.i		; <i1> [#uses=1]
+	br i1 %742, label %bb5.loopexit, label %bb2
+
+bb2:		; preds = %bb2, %do_merge.exit
+	%ldo.07 = phi %struct.edge_rec* [ %747, %bb2 ], [ %ldo_addr.0.ph.i, %do_merge.exit ]		; <%struct.edge_rec*> [#uses=1]
+	%743 = ptrtoint %struct.edge_rec* %ldo.07 to i32		; <i32> [#uses=1]
+	%744 = xor i32 %743, 32		; <i32> [#uses=1]
+	%745 = inttoptr i32 %744 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%746 = getelementptr %struct.edge_rec* %745, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%747 = load %struct.edge_rec** %746, align 4		; <%struct.edge_rec*> [#uses=3]
+	%748 = getelementptr %struct.edge_rec* %747, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%749 = load %struct.VERTEX** %748, align 4		; <%struct.VERTEX*> [#uses=1]
+	%750 = icmp eq %struct.VERTEX* %749, %tree_addr.0.i		; <i1> [#uses=1]
+	br i1 %750, label %bb5.loopexit, label %bb2
+
+bb4:		; preds = %bb5.loopexit, %bb4
+	%rdo.05 = phi %struct.edge_rec* [ %755, %bb4 ], [ %rdo_addr.0.i, %bb5.loopexit ]		; <%struct.edge_rec*> [#uses=1]
+	%751 = getelementptr %struct.edge_rec* %rdo.05, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%752 = load %struct.edge_rec** %751, align 4		; <%struct.edge_rec*> [#uses=1]
+	%753 = ptrtoint %struct.edge_rec* %752 to i32		; <i32> [#uses=1]
+	%754 = xor i32 %753, 32		; <i32> [#uses=1]
+	%755 = inttoptr i32 %754 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%756 = getelementptr %struct.edge_rec* %755, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%757 = load %struct.VERTEX** %756, align 4		; <%struct.VERTEX*> [#uses=1]
+	%758 = icmp eq %struct.VERTEX* %757, %extra		; <i1> [#uses=1]
+	br i1 %758, label %bb6, label %bb4
+
+bb5.loopexit:		; preds = %bb2, %do_merge.exit
+	%ldo.0.lcssa = phi %struct.edge_rec* [ %ldo_addr.0.ph.i, %do_merge.exit ], [ %747, %bb2 ]		; <%struct.edge_rec*> [#uses=1]
+	%759 = getelementptr %struct.edge_rec* %rdo_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%760 = load %struct.VERTEX** %759, align 4		; <%struct.VERTEX*> [#uses=1]
+	%761 = icmp eq %struct.VERTEX* %760, %extra		; <i1> [#uses=1]
+	br i1 %761, label %bb6, label %bb4
+
+bb6:		; preds = %bb5.loopexit, %bb4
+	%rdo.0.lcssa = phi %struct.edge_rec* [ %rdo_addr.0.i, %bb5.loopexit ], [ %755, %bb4 ]		; <%struct.edge_rec*> [#uses=1]
+	%tmp16 = ptrtoint %struct.edge_rec* %ldo.0.lcssa to i32		; <i32> [#uses=1]
+	%tmp4 = ptrtoint %struct.edge_rec* %rdo.0.lcssa to i32		; <i32> [#uses=1]
+	br label %bb15
+
+bb7:		; preds = %bb
+	%762 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%763 = load %struct.VERTEX** %762, align 4		; <%struct.VERTEX*> [#uses=4]
+	%764 = icmp eq %struct.VERTEX* %763, null		; <i1> [#uses=1]
+	%765 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=5]
+	%766 = getelementptr %struct.edge_rec* %765, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %765, %struct.edge_rec** %766, align 4
+	%767 = getelementptr %struct.edge_rec* %765, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	br i1 %764, label %bb10, label %bb11
+
+bb8:		; preds = %entry
+	%768 = call  i32 @puts(i8* getelementptr ([21 x i8]* @_2E_str7, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	call  void @exit(i32 -1) noreturn nounwind
+	unreachable
+
+bb10:		; preds = %bb7
+	store %struct.VERTEX* %tree, %struct.VERTEX** %767, align 4
+	%769 = ptrtoint %struct.edge_rec* %765 to i32		; <i32> [#uses=5]
+	%770 = add i32 %769, 16		; <i32> [#uses=1]
+	%771 = inttoptr i32 %770 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%772 = add i32 %769, 48		; <i32> [#uses=1]
+	%773 = inttoptr i32 %772 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%774 = getelementptr %struct.edge_rec* %771, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %773, %struct.edge_rec** %774, align 4
+	%775 = add i32 %769, 32		; <i32> [#uses=1]
+	%776 = inttoptr i32 %775 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%777 = getelementptr %struct.edge_rec* %776, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %776, %struct.edge_rec** %777, align 4
+	%778 = getelementptr %struct.edge_rec* %776, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %extra, %struct.VERTEX** %778, align 4
+	%779 = getelementptr %struct.edge_rec* %773, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %771, %struct.edge_rec** %779, align 4
+	%780 = xor i32 %769, 32		; <i32> [#uses=1]
+	br label %bb15
+
+bb11:		; preds = %bb7
+	store %struct.VERTEX* %763, %struct.VERTEX** %767, align 4
+	%781 = ptrtoint %struct.edge_rec* %765 to i32		; <i32> [#uses=6]
+	%782 = add i32 %781, 16		; <i32> [#uses=1]
+	%783 = inttoptr i32 %782 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%784 = add i32 %781, 48		; <i32> [#uses=1]
+	%785 = inttoptr i32 %784 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%786 = getelementptr %struct.edge_rec* %783, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %785, %struct.edge_rec** %786, align 4
+	%787 = add i32 %781, 32		; <i32> [#uses=1]
+	%788 = inttoptr i32 %787 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%789 = getelementptr %struct.edge_rec* %788, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %788, %struct.edge_rec** %789, align 4
+	%790 = getelementptr %struct.edge_rec* %788, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %tree, %struct.VERTEX** %790, align 4
+	%791 = getelementptr %struct.edge_rec* %785, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %783, %struct.edge_rec** %791, align 4
+	%792 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%793 = getelementptr %struct.edge_rec* %792, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %792, %struct.edge_rec** %793, align 4
+	%794 = getelementptr %struct.edge_rec* %792, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %tree, %struct.VERTEX** %794, align 4
+	%795 = ptrtoint %struct.edge_rec* %792 to i32		; <i32> [#uses=5]
+	%796 = add i32 %795, 16		; <i32> [#uses=1]
+	%797 = inttoptr i32 %796 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%798 = add i32 %795, 48		; <i32> [#uses=2]
+	%799 = inttoptr i32 %798 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%800 = getelementptr %struct.edge_rec* %797, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %799, %struct.edge_rec** %800, align 4
+	%801 = add i32 %795, 32		; <i32> [#uses=1]
+	%802 = inttoptr i32 %801 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%803 = getelementptr %struct.edge_rec* %802, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %802, %struct.edge_rec** %803, align 4
+	%804 = getelementptr %struct.edge_rec* %802, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %extra, %struct.VERTEX** %804, align 4
+	%805 = getelementptr %struct.edge_rec* %799, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %797, %struct.edge_rec** %805, align 4
+	%806 = xor i32 %781, 32		; <i32> [#uses=1]
+	%807 = inttoptr i32 %806 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%808 = getelementptr %struct.edge_rec* %807, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%809 = load %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
+	%810 = ptrtoint %struct.edge_rec* %809 to i32		; <i32> [#uses=2]
+	%811 = add i32 %810, 16		; <i32> [#uses=1]
+	%812 = and i32 %811, 63		; <i32> [#uses=1]
+	%813 = and i32 %810, -64		; <i32> [#uses=1]
+	%814 = or i32 %812, %813		; <i32> [#uses=1]
+	%815 = inttoptr i32 %814 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%816 = load %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
+	%817 = ptrtoint %struct.edge_rec* %816 to i32		; <i32> [#uses=2]
+	%818 = add i32 %817, 16		; <i32> [#uses=1]
+	%819 = and i32 %818, 63		; <i32> [#uses=1]
+	%820 = and i32 %817, -64		; <i32> [#uses=1]
+	%821 = or i32 %819, %820		; <i32> [#uses=1]
+	%822 = inttoptr i32 %821 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%823 = getelementptr %struct.edge_rec* %822, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%824 = load %struct.edge_rec** %823, align 4		; <%struct.edge_rec*> [#uses=1]
+	%825 = getelementptr %struct.edge_rec* %815, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%826 = load %struct.edge_rec** %825, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %824, %struct.edge_rec** %825, align 4
+	store %struct.edge_rec* %826, %struct.edge_rec** %823, align 4
+	%827 = load %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
+	%828 = load %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %827, %struct.edge_rec** %793, align 4
+	store %struct.edge_rec* %828, %struct.edge_rec** %808, align 4
+	%829 = xor i32 %795, 32		; <i32> [#uses=3]
+	%830 = inttoptr i32 %829 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%831 = getelementptr %struct.edge_rec* %830, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%832 = load %struct.VERTEX** %831, align 4		; <%struct.VERTEX*> [#uses=1]
+	%833 = and i32 %798, 63		; <i32> [#uses=1]
+	%834 = and i32 %795, -64		; <i32> [#uses=1]
+	%835 = or i32 %833, %834		; <i32> [#uses=1]
+	%836 = inttoptr i32 %835 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%837 = getelementptr %struct.edge_rec* %836, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%838 = load %struct.edge_rec** %837, align 4		; <%struct.edge_rec*> [#uses=1]
+	%839 = ptrtoint %struct.edge_rec* %838 to i32		; <i32> [#uses=2]
+	%840 = add i32 %839, 16		; <i32> [#uses=1]
+	%841 = and i32 %840, 63		; <i32> [#uses=1]
+	%842 = and i32 %839, -64		; <i32> [#uses=1]
+	%843 = or i32 %841, %842		; <i32> [#uses=1]
+	%844 = inttoptr i32 %843 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%845 = load %struct.VERTEX** %767, align 4		; <%struct.VERTEX*> [#uses=1]
+	%846 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%847 = getelementptr %struct.edge_rec* %846, i32 0, i32 1		; <%struct.edge_rec**> [#uses=7]
+	store %struct.edge_rec* %846, %struct.edge_rec** %847, align 4
+	%848 = getelementptr %struct.edge_rec* %846, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %832, %struct.VERTEX** %848, align 4
+	%849 = ptrtoint %struct.edge_rec* %846 to i32		; <i32> [#uses=6]
+	%850 = add i32 %849, 16		; <i32> [#uses=2]
+	%851 = inttoptr i32 %850 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%852 = add i32 %849, 48		; <i32> [#uses=1]
+	%853 = inttoptr i32 %852 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%854 = getelementptr %struct.edge_rec* %851, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %853, %struct.edge_rec** %854, align 4
+	%855 = add i32 %849, 32		; <i32> [#uses=1]
+	%856 = inttoptr i32 %855 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%857 = getelementptr %struct.edge_rec* %856, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %856, %struct.edge_rec** %857, align 4
+	%858 = getelementptr %struct.edge_rec* %856, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %845, %struct.VERTEX** %858, align 4
+	%859 = getelementptr %struct.edge_rec* %853, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %851, %struct.edge_rec** %859, align 4
+	%860 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%861 = ptrtoint %struct.edge_rec* %860 to i32		; <i32> [#uses=2]
+	%862 = add i32 %861, 16		; <i32> [#uses=1]
+	%863 = and i32 %862, 63		; <i32> [#uses=1]
+	%864 = and i32 %861, -64		; <i32> [#uses=1]
+	%865 = or i32 %863, %864		; <i32> [#uses=1]
+	%866 = inttoptr i32 %865 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%867 = getelementptr %struct.edge_rec* %844, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%868 = load %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
+	%869 = ptrtoint %struct.edge_rec* %868 to i32		; <i32> [#uses=2]
+	%870 = add i32 %869, 16		; <i32> [#uses=1]
+	%871 = and i32 %870, 63		; <i32> [#uses=1]
+	%872 = and i32 %869, -64		; <i32> [#uses=1]
+	%873 = or i32 %871, %872		; <i32> [#uses=1]
+	%874 = inttoptr i32 %873 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%875 = getelementptr %struct.edge_rec* %874, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%876 = load %struct.edge_rec** %875, align 4		; <%struct.edge_rec*> [#uses=1]
+	%877 = getelementptr %struct.edge_rec* %866, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%878 = load %struct.edge_rec** %877, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %876, %struct.edge_rec** %877, align 4
+	store %struct.edge_rec* %878, %struct.edge_rec** %875, align 4
+	%879 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%880 = load %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %879, %struct.edge_rec** %867, align 4
+	store %struct.edge_rec* %880, %struct.edge_rec** %847, align 4
+	%881 = xor i32 %849, 32		; <i32> [#uses=3]
+	%882 = inttoptr i32 %881 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%883 = getelementptr %struct.edge_rec* %882, i32 0, i32 1		; <%struct.edge_rec**> [#uses=6]
+	%884 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%885 = ptrtoint %struct.edge_rec* %884 to i32		; <i32> [#uses=2]
+	%886 = add i32 %885, 16		; <i32> [#uses=1]
+	%887 = and i32 %886, 63		; <i32> [#uses=1]
+	%888 = and i32 %885, -64		; <i32> [#uses=1]
+	%889 = or i32 %887, %888		; <i32> [#uses=1]
+	%890 = inttoptr i32 %889 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%891 = load %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
+	%892 = ptrtoint %struct.edge_rec* %891 to i32		; <i32> [#uses=2]
+	%893 = add i32 %892, 16		; <i32> [#uses=1]
+	%894 = and i32 %893, 63		; <i32> [#uses=1]
+	%895 = and i32 %892, -64		; <i32> [#uses=1]
+	%896 = or i32 %894, %895		; <i32> [#uses=1]
+	%897 = inttoptr i32 %896 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%898 = getelementptr %struct.edge_rec* %897, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%899 = load %struct.edge_rec** %898, align 4		; <%struct.edge_rec*> [#uses=1]
+	%900 = getelementptr %struct.edge_rec* %890, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%901 = load %struct.edge_rec** %900, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %899, %struct.edge_rec** %900, align 4
+	store %struct.edge_rec* %901, %struct.edge_rec** %898, align 4
+	%902 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%903 = load %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %902, %struct.edge_rec** %766, align 4
+	store %struct.edge_rec* %903, %struct.edge_rec** %883, align 4
+	%904 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%905 = load double* %904, align 4		; <double> [#uses=2]
+	%906 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%907 = load double* %906, align 4		; <double> [#uses=2]
+	%908 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%909 = load double* %908, align 4		; <double> [#uses=3]
+	%910 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%911 = load double* %910, align 4		; <double> [#uses=3]
+	%912 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%913 = load double* %912, align 4		; <double> [#uses=3]
+	%914 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%915 = load double* %914, align 4		; <double> [#uses=3]
+	%916 = fsub double %905, %913		; <double> [#uses=1]
+	%917 = fsub double %911, %915		; <double> [#uses=1]
+	%918 = fmul double %916, %917		; <double> [#uses=1]
+	%919 = fsub double %909, %913		; <double> [#uses=1]
+	%920 = fsub double %907, %915		; <double> [#uses=1]
+	%921 = fmul double %919, %920		; <double> [#uses=1]
+	%922 = fsub double %918, %921		; <double> [#uses=1]
+	%923 = fcmp ogt double %922, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %923, label %bb15, label %bb13
+
+bb13:		; preds = %bb11
+	%924 = fsub double %905, %909		; <double> [#uses=1]
+	%925 = fsub double %915, %911		; <double> [#uses=1]
+	%926 = fmul double %924, %925		; <double> [#uses=1]
+	%927 = fsub double %913, %909		; <double> [#uses=1]
+	%928 = fsub double %907, %911		; <double> [#uses=1]
+	%929 = fmul double %927, %928		; <double> [#uses=1]
+	%930 = fsub double %926, %929		; <double> [#uses=1]
+	%931 = fcmp ogt double %930, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %931, label %bb15, label %bb14
+
+bb14:		; preds = %bb13
+	%932 = and i32 %850, 63		; <i32> [#uses=1]
+	%933 = and i32 %849, -64		; <i32> [#uses=3]
+	%934 = or i32 %932, %933		; <i32> [#uses=1]
+	%935 = inttoptr i32 %934 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%936 = getelementptr %struct.edge_rec* %935, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%937 = load %struct.edge_rec** %936, align 4		; <%struct.edge_rec*> [#uses=1]
+	%938 = ptrtoint %struct.edge_rec* %937 to i32		; <i32> [#uses=2]
+	%939 = add i32 %938, 16		; <i32> [#uses=1]
+	%940 = and i32 %939, 63		; <i32> [#uses=1]
+	%941 = and i32 %938, -64		; <i32> [#uses=1]
+	%942 = or i32 %940, %941		; <i32> [#uses=1]
+	%943 = inttoptr i32 %942 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%944 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%945 = ptrtoint %struct.edge_rec* %944 to i32		; <i32> [#uses=2]
+	%946 = add i32 %945, 16		; <i32> [#uses=1]
+	%947 = and i32 %946, 63		; <i32> [#uses=1]
+	%948 = and i32 %945, -64		; <i32> [#uses=1]
+	%949 = or i32 %947, %948		; <i32> [#uses=1]
+	%950 = inttoptr i32 %949 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%951 = getelementptr %struct.edge_rec* %943, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%952 = load %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
+	%953 = ptrtoint %struct.edge_rec* %952 to i32		; <i32> [#uses=2]
+	%954 = add i32 %953, 16		; <i32> [#uses=1]
+	%955 = and i32 %954, 63		; <i32> [#uses=1]
+	%956 = and i32 %953, -64		; <i32> [#uses=1]
+	%957 = or i32 %955, %956		; <i32> [#uses=1]
+	%958 = inttoptr i32 %957 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%959 = getelementptr %struct.edge_rec* %958, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%960 = load %struct.edge_rec** %959, align 4		; <%struct.edge_rec*> [#uses=1]
+	%961 = getelementptr %struct.edge_rec* %950, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%962 = load %struct.edge_rec** %961, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %960, %struct.edge_rec** %961, align 4
+	store %struct.edge_rec* %962, %struct.edge_rec** %959, align 4
+	%963 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%964 = load %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %963, %struct.edge_rec** %951, align 4
+	store %struct.edge_rec* %964, %struct.edge_rec** %847, align 4
+	%965 = add i32 %881, 16		; <i32> [#uses=1]
+	%966 = and i32 %965, 63		; <i32> [#uses=1]
+	%967 = or i32 %966, %933		; <i32> [#uses=1]
+	%968 = inttoptr i32 %967 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%969 = getelementptr %struct.edge_rec* %968, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%970 = load %struct.edge_rec** %969, align 4		; <%struct.edge_rec*> [#uses=1]
+	%971 = ptrtoint %struct.edge_rec* %970 to i32		; <i32> [#uses=2]
+	%972 = add i32 %971, 16		; <i32> [#uses=1]
+	%973 = and i32 %972, 63		; <i32> [#uses=1]
+	%974 = and i32 %971, -64		; <i32> [#uses=1]
+	%975 = or i32 %973, %974		; <i32> [#uses=1]
+	%976 = inttoptr i32 %975 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%977 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%978 = ptrtoint %struct.edge_rec* %977 to i32		; <i32> [#uses=2]
+	%979 = add i32 %978, 16		; <i32> [#uses=1]
+	%980 = and i32 %979, 63		; <i32> [#uses=1]
+	%981 = and i32 %978, -64		; <i32> [#uses=1]
+	%982 = or i32 %980, %981		; <i32> [#uses=1]
+	%983 = inttoptr i32 %982 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%984 = getelementptr %struct.edge_rec* %976, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%985 = load %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
+	%986 = ptrtoint %struct.edge_rec* %985 to i32		; <i32> [#uses=2]
+	%987 = add i32 %986, 16		; <i32> [#uses=1]
+	%988 = and i32 %987, 63		; <i32> [#uses=1]
+	%989 = and i32 %986, -64		; <i32> [#uses=1]
+	%990 = or i32 %988, %989		; <i32> [#uses=1]
+	%991 = inttoptr i32 %990 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%992 = getelementptr %struct.edge_rec* %991, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%993 = load %struct.edge_rec** %992, align 4		; <%struct.edge_rec*> [#uses=1]
+	%994 = getelementptr %struct.edge_rec* %983, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%995 = load %struct.edge_rec** %994, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %993, %struct.edge_rec** %994, align 4
+	store %struct.edge_rec* %995, %struct.edge_rec** %992, align 4
+	%996 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%997 = load %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %996, %struct.edge_rec** %984, align 4
+	store %struct.edge_rec* %997, %struct.edge_rec** %883, align 4
+	%998 = inttoptr i32 %933 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%999 = load %struct.edge_rec** @avail_edge, align 4		; <%struct.edge_rec*> [#uses=1]
+	%1000 = getelementptr %struct.edge_rec* %998, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %999, %struct.edge_rec** %1000, align 4
+	store %struct.edge_rec* %998, %struct.edge_rec** @avail_edge, align 4
+	br label %bb15
+
+bb15:		; preds = %bb14, %bb13, %bb11, %bb10, %bb6
+	%retval.1.0 = phi i32 [ %780, %bb10 ], [ %829, %bb13 ], [ %829, %bb14 ], [ %tmp4, %bb6 ], [ %849, %bb11 ]		; <i32> [#uses=1]
+	%retval.0.0 = phi i32 [ %769, %bb10 ], [ %781, %bb13 ], [ %781, %bb14 ], [ %tmp16, %bb6 ], [ %881, %bb11 ]		; <i32> [#uses=1]
+	%agg.result162 = bitcast %struct.EDGE_PAIR* %agg.result to i64*		; <i64*> [#uses=1]
+	%1001 = zext i32 %retval.0.0 to i64		; <i64> [#uses=1]
+	%1002 = zext i32 %retval.1.0 to i64		; <i64> [#uses=1]
+	%1003 = shl i64 %1002, 32		; <i64> [#uses=1]
+	%1004 = or i64 %1003, %1001		; <i64> [#uses=1]
+	store i64 %1004, i64* %agg.result162, align 4
+	ret void
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
+declare void @exit(i32) noreturn nounwind
+
+declare %struct.edge_rec* @alloc_edge() nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
new file mode 100644
index 0000000..d477ba9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll

@@ -0,0 +1,94 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10
+
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+	%struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+declare i32 @strlen(i8* nocapture) nounwind readonly
+
+define i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind {
+entry:
+	br i1 undef, label %bb126, label %bb1
+
+bb1:		; preds = %entry
+	br i1 undef, label %cli_calloc.exit.thread, label %cli_calloc.exit
+
+cli_calloc.exit.thread:		; preds = %bb1
+	ret i32 -114
+
+cli_calloc.exit:		; preds = %bb1
+	store i16 %parts, i16* undef, align 4
+	br i1 undef, label %bb52, label %bb4
+
+bb4:		; preds = %cli_calloc.exit
+	br i1 undef, label %bb.i, label %bb1.i3
+
+bb.i:		; preds = %bb4
+	unreachable
+
+bb1.i3:		; preds = %bb4
+	br i1 undef, label %bb2.i4, label %cli_strdup.exit
+
+bb2.i4:		; preds = %bb1.i3
+	ret i32 -114
+
+cli_strdup.exit:		; preds = %bb1.i3
+	br i1 undef, label %cli_calloc.exit54.thread, label %cli_calloc.exit54
+
+cli_calloc.exit54.thread:		; preds = %cli_strdup.exit
+	ret i32 -114
+
+cli_calloc.exit54:		; preds = %cli_strdup.exit
+	br label %bb45
+
+cli_calloc.exit70.thread:		; preds = %bb45
+	unreachable
+
+cli_calloc.exit70:		; preds = %bb45
+	br i1 undef, label %bb.i83, label %bb1.i84
+
+bb.i83:		; preds = %cli_calloc.exit70
+	unreachable
+
+bb1.i84:		; preds = %cli_calloc.exit70
+	br i1 undef, label %bb2.i85, label %bb17
+
+bb2.i85:		; preds = %bb1.i84
+	unreachable
+
+bb17:		; preds = %bb1.i84
+	br i1 undef, label %bb22, label %bb.nph
+
+bb.nph:		; preds = %bb17
+	br label %bb18
+
+bb18:		; preds = %bb18, %bb.nph
+	br i1 undef, label %bb18, label %bb22
+
+bb22:		; preds = %bb18, %bb17
+	br i1 undef, label %bb25, label %bb43.preheader
+
+bb43.preheader:		; preds = %bb22
+	br i1 undef, label %bb28, label %bb45
+
+bb25:		; preds = %bb22
+	unreachable
+
+bb28:		; preds = %bb43.preheader
+	unreachable
+
+bb45:		; preds = %bb43.preheader, %cli_calloc.exit54
+	br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
+
+bb52:		; preds = %cli_calloc.exit
+	%0 = load i16* undef, align 4		; <i16> [#uses=1]
+	%1 = icmp eq i16 %0, 0		; <i1> [#uses=1]
+	%iftmp.20.0 = select i1 %1, i8* %hexsig, i8* null		; <i8*> [#uses=1]
+	%2 = tail call  i32 @strlen(i8* %iftmp.20.0) nounwind readonly		; <i32> [#uses=0]
+	unreachable
+
+bb126:		; preds = %entry
+	ret i32 -117
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll b/src/LLVM/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
new file mode 100644
index 0000000..6761687
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll

@@ -0,0 +1,95 @@
+; RUN: llc < %s -march=arm
+
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+	%struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+define i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind {
+entry:
+	br i1 undef, label %bb126, label %bb1
+
+bb1:		; preds = %entry
+	br i1 undef, label %cli_calloc.exit.thread, label %cli_calloc.exit
+
+cli_calloc.exit.thread:		; preds = %bb1
+	ret i32 -114
+
+cli_calloc.exit:		; preds = %bb1
+	br i1 undef, label %bb52, label %bb4
+
+bb4:		; preds = %cli_calloc.exit
+	br i1 undef, label %bb.i, label %bb1.i3
+
+bb.i:		; preds = %bb4
+	unreachable
+
+bb1.i3:		; preds = %bb4
+	br i1 undef, label %bb2.i4, label %cli_strdup.exit
+
+bb2.i4:		; preds = %bb1.i3
+	ret i32 -114
+
+cli_strdup.exit:		; preds = %bb1.i3
+	br i1 undef, label %cli_calloc.exit54.thread, label %cli_calloc.exit54
+
+cli_calloc.exit54.thread:		; preds = %cli_strdup.exit
+	ret i32 -114
+
+cli_calloc.exit54:		; preds = %cli_strdup.exit
+	br label %bb45
+
+cli_calloc.exit70.thread:		; preds = %bb45
+	unreachable
+
+cli_calloc.exit70:		; preds = %bb45
+	br i1 undef, label %bb.i83, label %bb1.i84
+
+bb.i83:		; preds = %cli_calloc.exit70
+	unreachable
+
+bb1.i84:		; preds = %cli_calloc.exit70
+	br i1 undef, label %bb2.i85, label %bb17
+
+bb2.i85:		; preds = %bb1.i84
+	unreachable
+
+bb17:		; preds = %bb1.i84
+	br i1 undef, label %bb22, label %bb.nph
+
+bb.nph:		; preds = %bb17
+	br label %bb18
+
+bb18:		; preds = %bb18, %bb.nph
+	br i1 undef, label %bb18, label %bb22
+
+bb22:		; preds = %bb18, %bb17
+	%0 = getelementptr i8* null, i32 10		; <i8*> [#uses=1]
+	%1 = bitcast i8* %0 to i16*		; <i16*> [#uses=1]
+	%2 = load i16* %1, align 2		; <i16> [#uses=1]
+	%3 = add i16 %2, 1		; <i16> [#uses=1]
+	%4 = zext i16 %3 to i32		; <i32> [#uses=1]
+	%5 = mul i32 %4, 3		; <i32> [#uses=1]
+	%6 = add i32 %5, -1		; <i32> [#uses=1]
+	%7 = icmp eq i32 %6, undef		; <i1> [#uses=1]
+	br i1 %7, label %bb25, label %bb43.preheader
+
+bb43.preheader:		; preds = %bb22
+	br i1 undef, label %bb28, label %bb45
+
+bb25:		; preds = %bb22
+	unreachable
+
+bb28:		; preds = %bb43.preheader
+	unreachable
+
+bb45:		; preds = %bb43.preheader, %cli_calloc.exit54
+	br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
+
+bb52:		; preds = %cli_calloc.exit
+	unreachable
+
+bb126:		; preds = %entry
+	ret i32 -117
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll b/src/LLVM/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
new file mode 100644
index 0000000..5003fbd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll

@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin10 -mattr=+vfp3
+
+@a = external global double		; <double*> [#uses=1]
+
+declare double @llvm.exp.f64(double) nounwind readonly
+
+define void @findratio(double* nocapture %res1, double* nocapture %res2) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	br i1 undef, label %bb28, label %bb
+
+bb28:		; preds = %bb
+	%0 = load double* @a, align 4		; <double> [#uses=2]
+	%1 = fadd double %0, undef		; <double> [#uses=2]
+	br i1 undef, label %bb59, label %bb60
+
+bb59:		; preds = %bb28
+	%2 = fsub double -0.000000e+00, undef		; <double> [#uses=2]
+	br label %bb61
+
+bb60:		; preds = %bb28
+	%3 = tail call double @llvm.exp.f64(double undef) nounwind		; <double> [#uses=1]
+	%4 = fsub double -0.000000e+00, %3		; <double> [#uses=2]
+	%5 = fsub double -0.000000e+00, undef		; <double> [#uses=1]
+	%6 = fsub double -0.000000e+00, undef		; <double> [#uses=1]
+	br label %bb61
+
+bb61:		; preds = %bb60, %bb59
+	%.pn201 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn111 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn452 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn85 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn238 = phi double [ 0.000000e+00, %bb59 ], [ 0.000000e+00, %bb60 ]		; <double> [#uses=1]
+	%.pn39 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn230 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn228 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn224 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn222 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn218 = phi double [ %2, %bb59 ], [ %4, %bb60 ]		; <double> [#uses=1]
+	%.pn214 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn212 = phi double [ %2, %bb59 ], [ %4, %bb60 ]		; <double> [#uses=1]
+	%.pn213 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn210 = phi double [ undef, %bb59 ], [ %5, %bb60 ]		; <double> [#uses=1]
+	%.pn202 = phi double [ undef, %bb59 ], [ %6, %bb60 ]		; <double> [#uses=0]
+	%.pn390 = fdiv double %.pn452, undef		; <double> [#uses=0]
+	%.pn145 = fdiv double %.pn238, %1		; <double> [#uses=0]
+	%.pn138 = fdiv double %.pn230, undef		; <double> [#uses=1]
+	%.pn139 = fdiv double %.pn228, undef		; <double> [#uses=1]
+	%.pn134 = fdiv double %.pn224, %0		; <double> [#uses=1]
+	%.pn135 = fdiv double %.pn222, %1		; <double> [#uses=1]
+	%.pn133 = fdiv double %.pn218, undef		; <double> [#uses=0]
+	%.pn128 = fdiv double %.pn214, undef		; <double> [#uses=1]
+	%.pn129 = fdiv double %.pn212, %.pn213		; <double> [#uses=1]
+	%.pn126 = fdiv double %.pn210, undef		; <double> [#uses=0]
+	%.pn54.in = fmul double undef, %.pn201		; <double> [#uses=1]
+	%.pn42.in = fmul double undef, undef		; <double> [#uses=1]
+	%.pn76 = fsub double %.pn138, %.pn139		; <double> [#uses=1]
+	%.pn74 = fsub double %.pn134, %.pn135		; <double> [#uses=1]
+	%.pn70 = fsub double %.pn128, %.pn129		; <double> [#uses=1]
+	%.pn54 = fdiv double %.pn54.in, 6.000000e+00		; <double> [#uses=1]
+	%.pn64 = fmul double undef, 0x3FE5555555555555		; <double> [#uses=1]
+	%.pn65 = fmul double undef, undef		; <double> [#uses=1]
+	%.pn50 = fmul double undef, %.pn111		; <double> [#uses=0]
+	%.pn42 = fdiv double %.pn42.in, 6.000000e+00		; <double> [#uses=1]
+	%.pn40 = fmul double undef, %.pn85		; <double> [#uses=0]
+	%.pn56 = fadd double %.pn76, undef		; <double> [#uses=1]
+	%.pn57 = fmul double %.pn74, undef		; <double> [#uses=1]
+	%.pn36 = fadd double undef, undef		; <double> [#uses=1]
+	%.pn37 = fmul double %.pn70, undef		; <double> [#uses=1]
+	%.pn33 = fmul double undef, 0x3FC5555555555555		; <double> [#uses=1]
+	%.pn29 = fsub double %.pn64, %.pn65		; <double> [#uses=1]
+	%.pn21 = fadd double undef, undef		; <double> [#uses=1]
+	%.pn27 = fmul double undef, 0x3FC5555555555555		; <double> [#uses=1]
+	%.pn11 = fadd double %.pn56, %.pn57		; <double> [#uses=1]
+	%.pn32 = fmul double %.pn54, undef		; <double> [#uses=1]
+	%.pn26 = fmul double %.pn42, undef		; <double> [#uses=1]
+	%.pn15 = fmul double 0.000000e+00, %.pn39		; <double> [#uses=1]
+	%.pn7 = fadd double %.pn36, %.pn37		; <double> [#uses=1]
+	%.pn30 = fsub double %.pn32, %.pn33		; <double> [#uses=1]
+	%.pn28 = fadd double %.pn30, 0.000000e+00		; <double> [#uses=1]
+	%.pn24 = fsub double %.pn28, %.pn29		; <double> [#uses=1]
+	%.pn22 = fsub double %.pn26, %.pn27		; <double> [#uses=1]
+	%.pn20 = fadd double %.pn24, undef		; <double> [#uses=1]
+	%.pn18 = fadd double %.pn22, 0.000000e+00		; <double> [#uses=1]
+	%.pn16 = fsub double %.pn20, %.pn21		; <double> [#uses=1]
+	%.pn14 = fsub double %.pn18, undef		; <double> [#uses=1]
+	%.pn12 = fadd double %.pn16, undef		; <double> [#uses=1]
+	%.pn10 = fadd double %.pn14, %.pn15		; <double> [#uses=1]
+	%.pn8 = fsub double %.pn12, undef		; <double> [#uses=1]
+	%.pn6 = fsub double %.pn10, %.pn11		; <double> [#uses=1]
+	%.pn4 = fadd double %.pn8, undef		; <double> [#uses=1]
+	%.pn2 = fadd double %.pn6, %.pn7		; <double> [#uses=1]
+	%N1.0 = fsub double %.pn4, undef		; <double> [#uses=1]
+	%D1.0 = fsub double %.pn2, undef		; <double> [#uses=2]
+	br i1 undef, label %bb62, label %bb64
+
+bb62:		; preds = %bb61
+	%7 = fadd double %D1.0, undef		; <double> [#uses=1]
+	br label %bb64
+
+bb64:		; preds = %bb62, %bb61
+	%.pn = phi double [ undef, %bb62 ], [ %N1.0, %bb61 ]		; <double> [#uses=1]
+	%.pn1 = phi double [ %7, %bb62 ], [ %D1.0, %bb61 ]		; <double> [#uses=1]
+	%x.1 = fdiv double %.pn, %.pn1		; <double> [#uses=0]
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll b/src/LLVM/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
new file mode 100644
index 0000000..a656c49
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm -mattr=+neon
+; PR4657
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+define <4 x i32> @scale(<4 x i32> %v, i32 %f) nounwind {
+entry:
+	%v_addr = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%f_addr = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%0 = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <4 x i32> %v, <4 x i32>* %v_addr
+	store i32 %f, i32* %f_addr
+	%1 = load <4 x i32>* %v_addr, align 16		; <<4 x i32>> [#uses=1]
+	%2 = load i32* %f_addr, align 4		; <i32> [#uses=1]
+	%3 = insertelement <4 x i32> undef, i32 %2, i32 0		; <<4 x i32>> [#uses=1]
+	%4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>> [#uses=1]
+	%5 = mul <4 x i32> %1, %4		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %5, <4 x i32>* %0, align 16
+	%6 = load <4 x i32>* %0, align 16		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %6, <4 x i32>* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load <4 x i32>* %retval		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %retval1
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll b/src/LLVM/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll
new file mode 100644
index 0000000..3097522
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; PR4528
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv6-elf"
+
+define i32 @file_read_actor(i32* nocapture %desc, i32* %page, i32 %offset, i32 %size) nounwind optsize {
+entry:
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i
+
+bb5.i:		; preds = %entry
+	%asmtmp.i = tail call i32 asm sideeffect "1:\09strbt\09$1,[$2]\0A2:\0A\09.section .fixup,\22ax\22\0A\09.align\092\0A3:\09mov\09$0, $3\0A\09b\092b\0A\09.previous\0A\09.section __ex_table,\22a\22\0A\09.align\093\0A\09.long\091b, 3b\0A\09.previous", "=r,r,r,i,0,~{cc}"(i8 0, i32 undef, i32 -14, i32 0) nounwind		; <i32> [#uses=1]
+	%0 = icmp eq i32 %asmtmp.i, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb6.i, label %fault_in_pages_writeable.exit
+
+bb6.i:		; preds = %bb5.i
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb7.i
+
+bb7.i:		; preds = %bb6.i
+	unreachable
+
+fault_in_pages_writeable.exit:		; preds = %bb6.i, %bb5.i, %entry
+	br i1 undef, label %bb2, label %bb3
+
+bb2:		; preds = %fault_in_pages_writeable.exit
+	unreachable
+
+bb3:		; preds = %fault_in_pages_writeable.exit
+	%1 = tail call  i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @__copy_to_user(i8*, i8*, i32)

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll
new file mode 100644
index 0000000..d666f12
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; PR4528
+
+define i32 @file_read_actor(i32 %desc, i32 %page, i32 %offset, i32 %size) nounwind optsize {
+entry:
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i
+
+bb5.i:		; preds = %entry
+	%asmtmp.i = tail call i32 asm sideeffect "1:\09strbt\09$1,[$2]\0A2:\0A\09.section .fixup,\22ax\22\0A\09.align\092\0A3:\09mov\09$0, $3\0A\09b\092b\0A\09.previous\0A\09.section __ex_table,\22a\22\0A\09.align\093\0A\09.long\091b, 3b\0A\09.previous", "=r,r,r,i,0,~{cc}"(i8 0, i32 undef, i32 -14, i32 0) nounwind		; <i32> [#uses=1]
+	br label %fault_in_pages_writeable.exit
+
+fault_in_pages_writeable.exit:		; preds = %bb5.i, %entry
+	%0 = phi i32 [ 0, %entry ], [ %asmtmp.i, %bb5.i ]		; <i32> [#uses=1]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb2, label %bb3
+
+bb2:		; preds = %fault_in_pages_writeable.exit
+	unreachable
+
+bb3:		; preds = %fault_in_pages_writeable.exit
+	%2 = tail call  i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @__copy_to_user(i8*, i8*, i32)

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll b/src/LLVM/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
new file mode 100644
index 0000000..4b41015
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; PR4528
+
+; Inline asm is allowed to contain operands "=&r", "0".
+
+%struct.device_dma_parameters = type { i32, i32 }
+%struct.iovec = type { i8*, i32 }
+
+define i32 @generic_segment_checks(%struct.iovec* nocapture %iov, i32* nocapture %nr_segs, i32* nocapture %count, i32 %access_flags) nounwind optsize {
+entry:
+  br label %bb8
+
+bb:                                               ; preds = %bb8
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb
+  %asmtmp = tail call %struct.device_dma_parameters asm "adds $1, $2, $3; sbcccs $1, $1, $0; movcc $0, #0", "=&r,=&r,r,Ir,0,~{cc}"(i8* undef, i32 undef, i32 0) nounwind; <%struct.device_dma_parameters> [#uses=1]
+  %asmresult = extractvalue %struct.device_dma_parameters %asmtmp, 0; <i32> [#uses=1]
+  %0 = icmp eq i32 %asmresult, 0                  ; <i1> [#uses=1]
+  br i1 %0, label %bb7, label %bb4
+
+bb4:                                              ; preds = %bb2
+  br i1 undef, label %bb10, label %bb9
+
+bb7:                                              ; preds = %bb2
+  %1 = add i32 %2, 1                              ; <i32> [#uses=1]
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %entry
+  %2 = phi i32 [ 0, %entry ], [ %1, %bb7 ]        ; <i32> [#uses=3]
+  %scevgep22 = getelementptr %struct.iovec* %iov, i32 %2, i32 0; <i8**> [#uses=0]
+  %3 = load i32* %nr_segs, align 4                ; <i32> [#uses=1]
+  %4 = icmp ult i32 %2, %3                        ; <i1> [#uses=1]
+  br i1 %4, label %bb, label %bb9
+
+bb9:                                              ; preds = %bb8, %bb4
+  store i32 undef, i32* %count, align 4
+  ret i32 0
+
+bb10:                                             ; preds = %bb4, %bb
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll
new file mode 100644
index 0000000..2993647
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; PR4716
+
+define void @_start() nounwind naked {
+entry:
+  tail call  void @exit(i32 undef) noreturn nounwind
+  unreachable
+}
+
+declare void @exit(i32) noreturn nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-21-PostRAKill.ll b/src/LLVM/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
new file mode 100644
index 0000000..c598fe6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-21-PostRAKill.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 -post-RA-scheduler -mcpu=cortex-a8
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.tree = type { i32, double, double, %struct.tree*, %struct.tree*, %struct.tree*, %struct.tree* }
+@g = common global %struct.tree* null
+
+define %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind {
+entry:
+  %t.idx51.val.i = load double* null              ; <double> [#uses=1]
+  br i1 undef, label %bb4.i, label %bb.i
+
+bb.i:                                             ; preds = %entry
+  unreachable
+
+bb4.i:                                            ; preds = %entry
+  %0 = load %struct.tree** @g, align 4         ; <%struct.tree*> [#uses=2]
+  %.idx45.i = getelementptr %struct.tree* %0, i32 0, i32 1 ; <double*> [#uses=1]
+  %.idx45.val.i = load double* %.idx45.i          ; <double> [#uses=1]
+  %.idx46.i = getelementptr %struct.tree* %0, i32 0, i32 2 ; <double*> [#uses=1]
+  %.idx46.val.i = load double* %.idx46.i          ; <double> [#uses=1]
+  %1 = fsub double 0.000000e+00, %.idx45.val.i    ; <double> [#uses=2]
+  %2 = fmul double %1, %1                         ; <double> [#uses=1]
+  %3 = fsub double %t.idx51.val.i, %.idx46.val.i  ; <double> [#uses=2]
+  %4 = fmul double %3, %3                         ; <double> [#uses=1]
+  %5 = fadd double %2, %4                         ; <double> [#uses=1]
+  %6 = tail call double @llvm.sqrt.f64(double %5) nounwind ; <double> [#uses=1]
+  br i1 undef, label %bb7.i4, label %bb6.i
+
+bb6.i:                                            ; preds = %bb4.i
+  br label %bb7.i4
+
+bb7.i4:                                           ; preds = %bb6.i, %bb4.i
+  %tton1.0.i = phi double [ %6, %bb6.i ], [ undef, %bb4.i ] ; <double> [#uses=0]
+  unreachable
+}
+
+declare double @llvm.sqrt.f64(double) nounwind readonly

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll b/src/LLVM/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
new file mode 100644
index 0000000..cc92c26
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
+%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
+%struct.icstruct = type { [3 x i32], i16 }
+%struct.node = type { i16, double, [3 x double], i32, i32 }
+
+declare double @floor(double) nounwind readnone
+
+define void @intcoord(%struct.icstruct* noalias nocapture sret %agg.result, i1 %a, double %b) {
+entry:
+  br i1 %a, label %bb3, label %bb1
+
+bb1:                                              ; preds = %entry
+  unreachable
+
+bb3:                                              ; preds = %entry
+  br i1 %a, label %bb7, label %bb5
+
+bb5:                                              ; preds = %bb3
+  unreachable
+
+bb7:                                              ; preds = %bb3
+  br i1 %a, label %bb11, label %bb9
+
+bb9:                                              ; preds = %bb7
+  %0 = tail call  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb7
+  %1 = getelementptr %struct.icstruct* %agg.result, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
+  store i32 0, i32* %1
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll b/src/LLVM/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
new file mode 100644
index 0000000..382038e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+%struct.Patient = type { i32, i32, i32, %struct.Village* }
+%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define %struct.Village* @alloc_tree(i32 %level, i32 %label, %struct.Village* %back, i1 %p) nounwind {
+entry:
+  br i1 %p, label %bb8, label %bb1
+
+bb1:                                              ; preds = %entry
+  %malloccall = tail call i8* @malloc(i32 ptrtoint (%struct.Village* getelementptr (%struct.Village* null, i32 1) to i32))
+  %0 = bitcast i8* %malloccall to %struct.Village*
+  %exp2 = call double @ldexp(double 1.000000e+00, i32 %level) nounwind ; <double> [#uses=1]
+  %.c = fptosi double %exp2 to i32                ; <i32> [#uses=1]
+  store i32 %.c, i32* null
+  %1 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 0 ; <%struct.List**> [#uses=1]
+  store %struct.List* null, %struct.List** %1
+  %2 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 2 ; <%struct.List**> [#uses=1]
+  store %struct.List* null, %struct.List** %2
+  ret %struct.Village* %0
+
+bb8:                                              ; preds = %entry
+  ret %struct.Village* null
+}
+
+declare double @ldexp(double, i32)
+declare noalias i8* @malloc(i32)

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-23-linkerprivate.ll b/src/LLVM/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
new file mode 100644
index 0000000..392c70a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-23-linkerprivate.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll b/src/LLVM/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll
new file mode 100644
index 0000000..5407013
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -mattr=+neon | not grep fldmfdd
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+define void @_ZN6squish10ClusterFit9Compress3EPv(%quuz* %this, i8* %block) {
+entry:
+  %0 = lshr <4 x i32> zeroinitializer, <i32 31, i32 31, i32 31, i32 31> ; <<4 x i32>> [#uses=1]
+  %1 = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> <i32 2, i32 3> ; <<2 x i32>> [#uses=1]
+  %2 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> undef, <2 x i32> %1) nounwind ; <<2 x i32>> [#uses=1]
+  %3 = extractelement <2 x i32> %2, i32 0         ; <i32> [#uses=1]
+  %not..i = icmp eq i32 %3, undef                 ; <i1> [#uses=1]
+  br i1 %not..i, label %return, label %bb221
+
+bb221:                                            ; preds = %bb221, %entry
+  br label %bb221
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll b/src/LLVM/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll
new file mode 100644
index 0000000..cac8569
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -mattr=+neon | not grep fldmfdd
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+define void @aaaa(%quuz* %this, i8* %block) {
+entry:
+  br i1 undef, label %bb.nph269, label %bb201
+
+bb.nph269:                                        ; preds = %entry
+  br label %bb12
+
+bb12:                                             ; preds = %bb194, %bb.nph269
+  %0 = fmul <4 x float> undef, undef              ; <<4 x float>> [#uses=1]
+  %1 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %3 = fadd <4 x float> undef, %2                 ; <<4 x float>> [#uses=1]
+  br i1 undef, label %bb194, label %bb186
+
+bb186:                                            ; preds = %bb12
+  br label %bb194
+
+bb194:                                            ; preds = %bb186, %bb12
+  %besterror.0.0 = phi <4 x float> [ %3, %bb186 ], [ undef, %bb12 ] ; <<4 x float>> [#uses=0]
+  %indvar.next294 = add i32 undef, 1              ; <i32> [#uses=0]
+  br label %bb12
+
+bb201:                                            ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll b/src/LLVM/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll
new file mode 100644
index 0000000..5bd30ea
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -mattr=+neon
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define void @foo() nounwind {
+entry:
+  %0 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> undef, <2 x float> undef) nounwind ; <<2 x float>> [#uses=1]
+  %tmp28 = extractelement <2 x float> %0, i32 0   ; <float> [#uses=1]
+  %1 = fcmp une float %tmp28, 4.900000e+01        ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb7
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb7:                                              ; preds = %entry
+  br i1 undef, label %bb8, label %bb9
+
+bb8:                                              ; preds = %bb7
+  unreachable
+
+bb9:                                              ; preds = %bb7
+  ret void
+}
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll b/src/LLVM/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll
new file mode 100644
index 0000000..4655962
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -mattr=+neon
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define void @aaa() nounwind {
+entry:
+  %0 = fmul <4 x float> undef, <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02, float 0x3EB0C6F7A0000000> ; <<4 x float>> [#uses=1]
+  %tmp31 = extractelement <4 x float> %0, i32 0   ; <float> [#uses=1]
+  %1 = fpext float %tmp31 to double               ; <double> [#uses=1]
+  %2 = fsub double 1.000000e+00, %1               ; <double> [#uses=1]
+  %3 = fdiv double %2, 1.000000e+00               ; <double> [#uses=1]
+  %4 = tail call double @fabs(double %3) nounwind readnone ; <double> [#uses=1]
+  %5 = fcmp ogt double %4, 1.000000e-05           ; <i1> [#uses=1]
+  br i1 %5, label %bb, label %bb7
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb7:                                              ; preds = %entry
+  unreachable
+}
+
+declare double @fabs(double)

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/src/LLVM/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
new file mode 100644
index 0000000..8bde748
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll

@@ -0,0 +1,105 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin9 -march=arm | FileCheck %s
+
+; CHECK: L_LSDA_0:
+
+
+%struct.A = type { i32* }
+
+define void @"\01-[MyFunction Name:]"() {
+entry:
+  %save_filt.1 = alloca i32
+  %save_eptr.0 = alloca i8*
+  %a = alloca %struct.A
+  %eh_exception = alloca i8*
+  %eh_selector = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  call void @_ZN1AC1Ev(%struct.A* %a)
+  invoke void @_Z3barv()
+          to label %invcont unwind label %lpad
+
+invcont:                                          ; preds = %entry
+  call void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  br label %return
+
+bb:                                               ; preds = %ppad
+  %eh_select = load i32* %eh_selector
+  store i32 %eh_select, i32* %save_filt.1, align 4
+  %eh_value = load i8** %eh_exception
+  store i8* %eh_value, i8** %save_eptr.0, align 4
+  call void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  %0 = load i8** %save_eptr.0, align 4
+  store i8* %0, i8** %eh_exception, align 4
+  %1 = load i32* %save_filt.1, align 4
+  store i32 %1, i32* %eh_selector, align 4
+  br label %Unwind
+
+return:                                           ; preds = %invcont
+  ret void
+
+lpad:                                             ; preds = %entry
+  %eh_ptr = call i8* @llvm.eh.exception()
+  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr1 = load i8** %eh_exception
+  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
+  store i32 %eh_select2, i32* %eh_selector
+  br label %ppad
+
+ppad:                                             ; preds = %lpad
+  br label %bb
+
+Unwind:                                           ; preds = %bb
+  %eh_ptr3 = load i8** %eh_exception
+  call void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
+  unreachable
+}
+
+define linkonce_odr void @_ZN1AC1Ev(%struct.A* %this) {
+entry:
+  %this_addr = alloca %struct.A*
+  %"alloca point" = bitcast i32 0 to i32
+  store %struct.A* %this, %struct.A** %this_addr
+  %0 = call i8* @_Znwm(i32 4)
+  %1 = bitcast i8* %0 to i32*
+  %2 = load %struct.A** %this_addr, align 4
+  %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0
+  store i32* %1, i32** %3, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare i8* @_Znwm(i32)
+
+define linkonce_odr void @_ZN1AD1Ev(%struct.A* %this) nounwind {
+entry:
+  %this_addr = alloca %struct.A*
+  %"alloca point" = bitcast i32 0 to i32
+  store %struct.A* %this, %struct.A** %this_addr
+  %0 = load %struct.A** %this_addr, align 4
+  %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0
+  %2 = load i32** %1, align 4
+  %3 = bitcast i32* %2 to i8*
+  call void @_ZdlPv(i8* %3) nounwind
+  br label %bb
+
+bb:                                               ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %bb
+  ret void
+}
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare void @_Z3barv()
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare void @_Unwind_SjLj_Resume(i8*)

diff --git a/src/LLVM/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll b/src/LLVM/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
new file mode 100644
index 0000000..e1e60e6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; pr4843
+define <4 x i16> @v2regbug(<4 x i16>* %B) nounwind {
+;CHECK: v2regbug:
+;CHECK: vzip.16
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32><i32 0, i32 0, i32 1, i32 1>
+	ret <4 x i16> %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-09-09-AllOnes.ll b/src/LLVM/test/CodeGen/ARM/2009-09-09-AllOnes.ll
new file mode 100644
index 0000000..8522a77
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-09-09-AllOnes.ll

@@ -0,0 +1,10 @@
+; RUN: llc -mattr=+neon < %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define void @foo() {
+entry:
+  %0 = insertelement <4 x i32> undef, i32 -1, i32 3
+  store <4 x i32> %0, <4 x i32>* undef, align 16
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/src/LLVM/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
new file mode 100644
index 0000000..0a157c9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll

@@ -0,0 +1,18 @@
+; RUN: llc -O1 -march=arm -mattr=+vfp2 -mtriple=arm-linux-gnueabi < %s | FileCheck %s
+; pr4939
+
+define void @test(double* %x, double* %y) nounwind {
+  %1 = load double* %x
+  %2 = load double* %y
+  %3 = fsub double -0.000000e+00, %1
+  %4 = fcmp ugt double %2, %3
+  br i1 %4, label %bb1, label %bb2
+
+bb1:
+;CHECK: vstrhi.64
+  store double %1, double* %y
+  br label %bb2
+
+bb2:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-09-10-postdec.ll b/src/LLVM/test/CodeGen/ARM/2009-09-10-postdec.ll
new file mode 100644
index 0000000..10653b5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-09-10-postdec.ll

@@ -0,0 +1,11 @@
+; RUN: llc -march=arm < %s | FileCheck %s
+; Radar 7213850
+
+define i32 @test(i8* %d, i32 %x, i32 %y) nounwind {
+  %1 = ptrtoint i8* %d to i32
+;CHECK: sub
+  %2 = sub i32 %x, %1
+  %3 = add nsw i32 %2, %y
+  store i8 0, i8* %d, align 1
+  ret i32 %3
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll b/src/LLVM/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
new file mode 100644
index 0000000..13adb24
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll

@@ -0,0 +1,61 @@
+; RUN: llc -mattr=+neon < %s
+; PR4965
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%struct.fr = type { [6 x %struct.pl] }
+%struct.obb = type { %"struct.m4", %"struct.p3" }
+%struct.pl = type { %"struct.p3" }
+%"struct.m4" = type { %"struct.p3", %"struct.p3", %"struct.p3", %"struct.p3" }
+%"struct.p3" = type { <4 x float> }
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define arm_aapcs_vfpcc i8 @foo(%struct.fr* nocapture %this, %struct.obb* %box) nounwind {
+entry:
+  %val.i.i = load <4 x float>* undef              ; <<4 x float>> [#uses=1]
+  %val2.i.i = load <4 x float>* null              ; <<4 x float>> [#uses=1]
+  %elt3.i.i = getelementptr inbounds %struct.obb* %box, i32 0, i32 0, i32 2, i32 0 ; <<4 x float>*> [#uses=1]
+  %val4.i.i = load <4 x float>* %elt3.i.i         ; <<4 x float>> [#uses=1]
+  %0 = shufflevector <2 x float> undef, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %1 = fadd <4 x float> undef, zeroinitializer    ; <<4 x float>> [#uses=1]
+  br label %bb33
+
+bb:                                               ; preds = %bb33
+  %2 = fmul <4 x float> %val.i.i, undef           ; <<4 x float>> [#uses=1]
+  %3 = fmul <4 x float> %val2.i.i, undef          ; <<4 x float>> [#uses=1]
+  %4 = fadd <4 x float> %3, %2                    ; <<4 x float>> [#uses=1]
+  %5 = fmul <4 x float> %val4.i.i, undef          ; <<4 x float>> [#uses=1]
+  %6 = fadd <4 x float> %5, %4                    ; <<4 x float>> [#uses=1]
+  %7 = bitcast <4 x float> %6 to <4 x i32>        ; <<4 x i32>> [#uses=1]
+  %8 = and <4 x i32> %7, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> ; <<4 x i32>> [#uses=1]
+  %9 = or <4 x i32> %8, undef                     ; <<4 x i32>> [#uses=1]
+  %10 = bitcast <4 x i32> %9 to <4 x float>       ; <<4 x float>> [#uses=1]
+  %11 = shufflevector <4 x float> %10, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %12 = shufflevector <2 x float> %11, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> undef, %12               ; <<4 x float>> [#uses=1]
+  %14 = fmul <4 x float> %0, undef                ; <<4 x float>> [#uses=1]
+  %15 = fadd <4 x float> %14, %13                 ; <<4 x float>> [#uses=1]
+  %16 = fadd <4 x float> undef, %15               ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %1, %16                  ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> zeroinitializer, %17     ; <<4 x float>> [#uses=1]
+  %19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=2]
+  %20 = shufflevector <4 x float> %19, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %21 = shufflevector <4 x float> %19, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %22 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %20, <2 x float> %21) nounwind ; <<2 x float>> [#uses=2]
+  %23 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %22, <2 x float> %22) nounwind ; <<2 x float>> [#uses=2]
+  %24 = shufflevector <2 x float> %23, <2 x float> %23, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %25 = fadd <4 x float> %24, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %tmp46 = extractelement <4 x float> %25, i32 0  ; <float> [#uses=1]
+  %26 = fcmp olt float %tmp46, 0.000000e+00       ; <i1> [#uses=1]
+  br i1 %26, label %bb41, label %bb33
+
+bb33:                                             ; preds = %bb, %entry
+  br i1 undef, label %bb34, label %bb
+
+bb34:                                             ; preds = %bb33
+  ret i8 undef
+
+bb41:                                             ; preds = %bb
+  ret i8 1
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll b/src/LLVM/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
new file mode 100644
index 0000000..758b59a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a9
+
+define arm_aapcs_vfpcc <4 x float> @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+  %1 = ptrtoint i8* %pBuffer to i32
+
+  %lsr.iv2641 = inttoptr i32 %1 to float*
+  %tmp29 = add i32 %1, 4
+  %tmp2930 = inttoptr i32 %tmp29 to float*
+  %tmp31 = add i32 %1, 8
+  %tmp3132 = inttoptr i32 %tmp31 to float*
+  %tmp33 = add i32 %1, 12
+  %tmp3334 = inttoptr i32 %tmp33 to float*
+  %tmp35 = add i32 %1, 16
+  %tmp3536 = inttoptr i32 %tmp35 to float*
+  %tmp37 = add i32 %1, 20
+  %tmp3738 = inttoptr i32 %tmp37 to float*
+  %tmp39 = add i32 %1, 24
+  %tmp3940 = inttoptr i32 %tmp39 to float*
+  %2 = load float* %lsr.iv2641, align 4
+  %3 = load float* %tmp2930, align 4
+  %4 = load float* %tmp3132, align 4
+  %5 = load float* %tmp3334, align 4
+  %6 = load float* %tmp3536, align 4
+  %7 = load float* %tmp3738, align 4
+  %8 = load float* %tmp3940, align 4
+  %9 = insertelement <4 x float> undef, float %6, i32 0
+  %10 = shufflevector <4 x float> %9, <4 x float> undef, <4 x i32> zeroinitializer
+  %11 = insertelement <4 x float> %10, float %7, i32 1
+  %12 = insertelement <4 x float> %11, float %8, i32 2
+  %13 = insertelement <4 x float> undef, float %2, i32 0
+  %14 = shufflevector <4 x float> %13, <4 x float> undef, <4 x i32> zeroinitializer
+  %15 = insertelement <4 x float> %14, float %3, i32 1
+  %16 = insertelement <4 x float> %15, float %4, i32 2
+  %17 = insertelement <4 x float> %16, float %5, i32 3
+  %18 = fsub <4 x float> zeroinitializer, %12
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <4 x i32> zeroinitializer
+  %20 = shufflevector <4 x float> %17, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %21 = shufflevector <2 x float> %20, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+
+  ret <4 x float> %21
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll b/src/LLVM/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll
new file mode 100644
index 0000000..980f8ce
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a9
+
+; PR4986
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb.preheader
+
+bb.preheader:                                     ; preds = %entry
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.preheader
+  %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %2 = insertelement <4 x float> %1, float undef, i32 2 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float undef, i32 3 ; <<4 x float>> [#uses=1]
+  %4 = fmul <4 x float> undef, %3                 ; <<4 x float>> [#uses=1]
+  %5 = extractelement <4 x float> %4, i32 3       ; <float> [#uses=1]
+  store float %5, float* undef, align 4
+  br i1 undef, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc <4 x float> @bar(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+  %1 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %2 = insertelement <4 x float> %1, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float undef, i32 2 ; <<4 x float>> [#uses=1]
+  %4 = insertelement <4 x float> %3, float undef, i32 3 ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %4, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  ret <4 x float> %6
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll b/src/LLVM/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll
new file mode 100644
index 0000000..aace475
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%bar = type { <4 x float> }
+%foo = type { %bar, %bar, %bar, %bar }
+
+declare arm_aapcs_vfpcc <4 x float> @bbb(%bar*) nounwind
+
+define arm_aapcs_vfpcc void @aaa(%foo* noalias sret %agg.result, %foo* %tfrm) nounwind {
+entry:
+  %0 = call arm_aapcs_vfpcc  <4 x float> @bbb(%bar* undef) nounwind ; <<4 x float>> [#uses=0]
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll b/src/LLVM/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll
new file mode 100644
index 0000000..30931a2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%bar = type { %foo, %foo }
+%foo = type { <4 x float> }
+
+declare arm_aapcs_vfpcc float @aaa(%foo* nocapture) nounwind readonly
+
+declare arm_aapcs_vfpcc %bar* @bbb(%bar*, <4 x float>, <4 x float>) nounwind
+
+define arm_aapcs_vfpcc void @ccc(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %0 = call arm_aapcs_vfpcc  %bar* @bbb(%bar* undef, <4 x float> undef, <4 x float> undef) nounwind ; <%bar*> [#uses=0]
+  %1 = call arm_aapcs_vfpcc  float @aaa(%foo* undef) nounwind ; <float> [#uses=0]
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll b/src/LLVM/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
new file mode 100644
index 0000000..2ff479b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%struct.1 = type { %struct.4, %struct.4 }
+%struct.4 = type { <4 x float> }
+
+define arm_aapcs_vfpcc %struct.1* @hhh3(%struct.1* %this, <4 x float> %lenation.0, <4 x float> %legalation.0) nounwind {
+entry:
+  %0 = call arm_aapcs_vfpcc  %struct.4* @sss1(%struct.4* undef, float 0.000000e+00) nounwind ; <%struct.4*> [#uses=0]
+  %1 = call arm_aapcs_vfpcc  %struct.4* @qqq1(%struct.4* null, float 5.000000e-01) nounwind ; <%struct.4*> [#uses=0]
+  %val92 = load <4 x float>* null                 ; <<4 x float>> [#uses=1]
+  %2 = call arm_aapcs_vfpcc  %struct.4* @zzz2(%struct.4* undef, <4 x float> %val92) nounwind ; <%struct.4*> [#uses=0]
+  ret %struct.1* %this
+}
+
+declare arm_aapcs_vfpcc %struct.4* @qqq1(%struct.4*, float) nounwind
+
+declare arm_aapcs_vfpcc %struct.4* @sss1(%struct.4*, float) nounwind
+
+declare arm_aapcs_vfpcc %struct.4* @zzz2(%struct.4*, <4 x float>) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2009-09-24-spill-align.ll b/src/LLVM/test/CodeGen/ARM/2009-09-24-spill-align.ll
new file mode 100644
index 0000000..8bfd026
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-09-24-spill-align.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; pr4926
+
+define void @test_vget_lanep16() nounwind {
+entry:
+  %arg0_poly16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
+  %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+; CHECK: vldr.64
+  %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
+  store i16 %1, i16* %out_poly16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll b/src/LLVM/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll
new file mode 100644
index 0000000..ea2693a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8
+; PR5055
+
+module asm ".globl\09__aeabi_f2lz"
+module asm ".set\09__aeabi_f2lz, __fixsfdi"
+module asm ""
+
+define arm_aapcs_vfpcc i64 @__fixsfdi(float %a) nounwind {
+entry:
+  %0 = fcmp olt float %a, 0.000000e+00            ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %1 = fsub float -0.000000e+00, %a               ; <float> [#uses=1]
+  %2 = tail call arm_aapcs_vfpcc  i64 @__fixunssfdi(float %1) nounwind ; <i64> [#uses=1]
+  %3 = sub i64 0, %2                              ; <i64> [#uses=1]
+  ret i64 %3
+
+bb1:                                              ; preds = %entry
+  %4 = tail call arm_aapcs_vfpcc  i64 @__fixunssfdi(float %a) nounwind ; <i64> [#uses=1]
+  ret i64 %4
+}
+
+declare arm_aapcs_vfpcc i64 @__fixunssfdi(float)

diff --git a/src/LLVM/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll b/src/LLVM/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
new file mode 100644
index 0000000..0fe3b39
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -mcpu=arm10tdmi | FileCheck %s
+; PR4687
+
+%0 = type { double, double }
+
+define void @foo(%0* noalias nocapture sret %agg.result, double %x.0, double %y.0) nounwind {
+; CHECK: foo:
+; CHECK: bl __aeabi_dadd
+; CHECK-NOT: strd
+; CHECK: mov
+  %x76 = fmul double %y.0, 0.000000e+00           ; <double> [#uses=1]
+  %x77 = fadd double %y.0, 0.000000e+00           ; <double> [#uses=1]
+  %tmpr = fadd double %x.0, %x76                  ; <double> [#uses=1]
+  %agg.result.0 = getelementptr %0* %agg.result, i32 0, i32 0 ; <double*> [#uses=1]
+  store double %tmpr, double* %agg.result.0, align 8
+  %agg.result.1 = getelementptr %0* %agg.result, i32 0, i32 1 ; <double*> [#uses=1]
+  store double %x77, double* %agg.result.1, align 8
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll b/src/LLVM/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
new file mode 100644
index 0000000..465368b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll

@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 -enable-unsafe-fp-math < %s
+; PR5367
+
+define arm_aapcs_vfpcc void @_Z27Benchmark_SceDualQuaternionPvm(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %0 = load float* undef, align 4                 ; <float> [#uses=1]
+  %1 = load float* null, align 4                  ; <float> [#uses=1]
+  %2 = insertelement <4 x float> undef, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float %1, i32 2 ; <<4 x float>> [#uses=2]
+  %4 = insertelement <4 x float> undef, float %0, i32 2 ; <<4 x float>> [#uses=1]
+  %5 = insertelement <4 x float> %4, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=4]
+  %6 = fsub <4 x float> zeroinitializer, %3       ; <<4 x float>> [#uses=1]
+  %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=2]
+  %8 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %9 = shufflevector <2 x float> %8, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=2]
+  %10 = fmul <4 x float> %7, %9                   ; <<4 x float>> [#uses=1]
+  %11 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %12 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=2]
+  %13 = shufflevector <2 x float> %12, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %14 = fmul <4 x float> %11, %13                 ; <<4 x float>> [#uses=1]
+  %15 = fadd <4 x float> %10, %14                 ; <<4 x float>> [#uses=1]
+  %16 = shufflevector <2 x float> %12, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %15, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %18 = shufflevector <4 x float> %17, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %19 = fmul <4 x float> %7, %16                  ; <<4 x float>> [#uses=1]
+  %20 = fadd <4 x float> %19, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %21 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %22 = shufflevector <4 x float> %21, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %23 = fmul <4 x float> %22, %9                  ; <<4 x float>> [#uses=1]
+  %24 = fadd <4 x float> %20, %23                 ; <<4 x float>> [#uses=1]
+  %25 = shufflevector <4 x float> %18, <4 x float> %24, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> ; <<4 x float>> [#uses=1]
+  %26 = shufflevector <4 x float> %25, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 7> ; <<4 x float>> [#uses=1]
+  %27 = fmul <4 x float> %26, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %28 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %5 ; <<4 x float>> [#uses=1]
+  %29 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
+  %30 = fmul <4 x float> zeroinitializer, %29     ; <<4 x float>> [#uses=1]
+  %31 = fmul <4 x float> %30, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+  %32 = shufflevector <4 x float> %27, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %33 = shufflevector <4 x float> %28, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %34 = shufflevector <2 x float> %33, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %35 = fmul <4 x float> %32, %34                 ; <<4 x float>> [#uses=1]
+  %36 = fadd <4 x float> %35, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %37 = shufflevector <4 x float> %5, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %38 = shufflevector <4 x float> %37, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %39 = fmul <4 x float> zeroinitializer, %38     ; <<4 x float>> [#uses=1]
+  %40 = fadd <4 x float> %36, %39                 ; <<4 x float>> [#uses=1]
+  %41 = fadd <4 x float> %40, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %42 = shufflevector <4 x float> undef, <4 x float> %41, <4 x i32> <i32 0, i32 1, i32 6, i32 3> ; <<4 x float>> [#uses=1]
+  %43 = fmul <4 x float> %42, %31                 ; <<4 x float>> [#uses=1]
+  store float undef, float* undef, align 4
+  store float 0.000000e+00, float* null, align 4
+  %44 = extractelement <4 x float> %43, i32 1     ; <float> [#uses=1]
+  store float %44, float* undef, align 4
+  br i1 undef, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/2009-10-16-Scope.ll b/src/LLVM/test/CodeGen/ARM/2009-10-16-Scope.ll
new file mode 100644
index 0000000..a2e7ff7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-10-16-Scope.ll

@@ -0,0 +1,32 @@
+; RUN: llc %s -O0 -o /dev/null -mtriple=arm-apple-darwin
+; PR 5197
+; There is not any llvm instruction assocated with !5. The code generator
+; should be able to handle this.
+
+define void @bar() nounwind ssp {
+entry:
+  %count_ = alloca i32, align 4                   ; <i32*> [#uses=2]
+  br label %do.body, !dbg !0
+
+do.body:                                          ; preds = %entry
+  call void @llvm.dbg.declare(metadata !{i32* %count_}, metadata !4)
+  %conv = ptrtoint i32* %count_ to i32, !dbg !0   ; <i32> [#uses=1]
+  %call = call i32 @foo(i32 %conv) ssp, !dbg !0   ; <i32> [#uses=0]
+  br label %do.end, !dbg !0
+
+do.end:                                           ; preds = %do.body
+  ret void, !dbg !7
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i32 @foo(i32) ssp
+
+!0 = metadata !{i32 5, i32 2, metadata !1, null}
+!1 = metadata !{i32 458763, metadata !2, i32 1, i32 1}; [DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", metadata !3, i32 4, null, i1 false, i1 true}; [DW_TAG_subprogram ]
+!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"genmodes.i", metadata !"/Users/yash/Downloads", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!4 = metadata !{i32 459008, metadata !5, metadata !"count_", metadata !3, i32 5, metadata !6}; [ DW_TAG_auto_variable ]
+!5 = metadata !{i32 458763, metadata !1, i32 1, i32 1}; [DW_TAG_lexical_block ]
+!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
+!7 = metadata !{i32 6, i32 1, metadata !2, null}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll b/src/LLVM/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll
new file mode 100644
index 0000000..0f021d2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll

@@ -0,0 +1,48 @@
+; RUN: llc -mcpu=cortex-a8 -mattr=+neon < %s | grep vneg
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%aaa = type { %fff, %fff }
+%bbb = type { [6 x %ddd] }
+%ccc = type { %eee, %fff }
+%ddd = type { %fff }
+%eee = type { %fff, %fff, %fff, %fff }
+%fff = type { %struct.vec_float4 }
+%struct.vec_float4 = type { <4 x float> }
+
+define linkonce_odr arm_aapcs_vfpcc void @foo(%eee* noalias sret %agg.result, i64 %tfrm.0.0, i64 %tfrm.0.1, i64 %tfrm.0.2, i64 %tfrm.0.3, i64 %tfrm.0.4, i64 %tfrm.0.5, i64 %tfrm.0.6, i64 %tfrm.0.7) nounwind noinline {
+entry:
+  %tmp104 = zext i64 %tfrm.0.2 to i512            ; <i512> [#uses=1]
+  %tmp105 = shl i512 %tmp104, 128                 ; <i512> [#uses=1]
+  %tmp118 = zext i64 %tfrm.0.3 to i512            ; <i512> [#uses=1]
+  %tmp119 = shl i512 %tmp118, 192                 ; <i512> [#uses=1]
+  %ins121 = or i512 %tmp119, %tmp105              ; <i512> [#uses=1]
+  %tmp99 = zext i64 %tfrm.0.4 to i512             ; <i512> [#uses=1]
+  %tmp100 = shl i512 %tmp99, 256                  ; <i512> [#uses=1]
+  %tmp123 = zext i64 %tfrm.0.5 to i512            ; <i512> [#uses=1]
+  %tmp124 = shl i512 %tmp123, 320                 ; <i512> [#uses=1]
+  %tmp96 = zext i64 %tfrm.0.6 to i512             ; <i512> [#uses=1]
+  %tmp97 = shl i512 %tmp96, 384                   ; <i512> [#uses=1]
+  %tmp128 = zext i64 %tfrm.0.7 to i512            ; <i512> [#uses=1]
+  %tmp129 = shl i512 %tmp128, 448                 ; <i512> [#uses=1]
+  %mask.masked = or i512 %tmp124, %tmp100         ; <i512> [#uses=1]
+  %ins131 = or i512 %tmp129, %tmp97               ; <i512> [#uses=1]
+  %tmp109132 = zext i64 %tfrm.0.0 to i128         ; <i128> [#uses=1]
+  %tmp113134 = zext i64 %tfrm.0.1 to i128         ; <i128> [#uses=1]
+  %tmp114133 = shl i128 %tmp113134, 64            ; <i128> [#uses=1]
+  %tmp94 = or i128 %tmp114133, %tmp109132         ; <i128> [#uses=1]
+  %tmp95 = bitcast i128 %tmp94 to <4 x float>     ; <<4 x float>> [#uses=0]
+  %tmp82 = lshr i512 %ins121, 128                 ; <i512> [#uses=1]
+  %tmp83 = trunc i512 %tmp82 to i128              ; <i128> [#uses=1]
+  %tmp84 = bitcast i128 %tmp83 to <4 x float>     ; <<4 x float>> [#uses=0]
+  %tmp86 = lshr i512 %mask.masked, 256            ; <i512> [#uses=1]
+  %tmp87 = trunc i512 %tmp86 to i128              ; <i128> [#uses=1]
+  %tmp88 = bitcast i128 %tmp87 to <4 x float>     ; <<4 x float>> [#uses=0]
+  %tmp90 = lshr i512 %ins131, 384                 ; <i512> [#uses=1]
+  %tmp91 = trunc i512 %tmp90 to i128              ; <i128> [#uses=1]
+  %tmp92 = bitcast i128 %tmp91 to <4 x float>     ; <<4 x float>> [#uses=1]
+  %tmp = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp92 ; <<4 x float>> [#uses=1]
+  %tmp28 = getelementptr inbounds %eee* %agg.result, i32 0, i32 3, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
+  store <4 x float> %tmp, <4 x float>* %tmp28, align 16
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-10-27-double-align.ll b/src/LLVM/test/CodeGen/ARM/2009-10-27-double-align.ll
new file mode 100644
index 0000000..b37de9d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-10-27-double-align.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s  -mtriple=arm-linux-gnueabi | FileCheck %s
+; RUN: llc < %s  -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s
+
+@.str = private constant [1 x i8] zeroinitializer, align 1
+
+define void @g() {
+entry:
+;CHECK: [sp, #8]
+;CHECK: [sp, #12]
+;CHECK: [sp]
+        tail call  void (i8*, ...)* @f(i8* getelementptr ([1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00)
+        ret void
+}
+
+declare void @f(i8*, ...)

diff --git a/src/LLVM/test/CodeGen/ARM/2009-10-30.ll b/src/LLVM/test/CodeGen/ARM/2009-10-30.ll
new file mode 100644
index 0000000..e46ab1e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-10-30.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s  -mtriple=armv6-linux-gnueabi  | FileCheck %s
+; This test checks that the address of the varg arguments is correctly
+; computed when there are 5 or more regular arguments.
+
+define void @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) {
+entry:
+;CHECK: sub	sp, sp, #4
+;CHECK: add	r{{[0-9]+}}, sp, #8
+;CHECK: str	r{{[0-9]+}}, [sp], #4
+;CHECK: bx	lr
+	%ap = alloca i8*, align 4
+	%ap1 = bitcast i8** %ap to i8*
+	call void @llvm.va_start(i8* %ap1)
+	ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2009-11-01-NeonMoves.ll b/src/LLVM/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
new file mode 100644
index 0000000..a18a830
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-11-01-NeonMoves.ll

@@ -0,0 +1,40 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%foo = type { <4 x float> }
+
+define arm_aapcs_vfpcc void @bar(%foo* noalias sret %agg.result, <4 x float> %quat.0) nounwind {
+entry:
+  %quat_addr = alloca %foo, align 16              ; <%foo*> [#uses=2]
+  %0 = getelementptr inbounds %foo* %quat_addr, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
+  store <4 x float> %quat.0, <4 x float>* %0
+  %1 = call arm_aapcs_vfpcc  <4 x float> @quux(%foo* %quat_addr) nounwind ; <<4 x float>> [#uses=3]
+  %2 = fmul <4 x float> %1, %1                    ; <<4 x float>> [#uses=2]
+  %3 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %4 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+;CHECK-NOT: vmov
+;CHECK: vpadd
+  %5 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %3, <2 x float> %4) nounwind ; <<2 x float>> [#uses=2]
+  %6 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %5, <2 x float> %5) nounwind ; <<2 x float>> [#uses=2]
+  %7 = shufflevector <2 x float> %6, <2 x float> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=2]
+;CHECK: vorr
+  %8 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %7) nounwind ; <<4 x float>> [#uses=3]
+  %9 = fmul <4 x float> %8, %8                    ; <<4 x float>> [#uses=1]
+  %10 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %9, <4 x float> %7) nounwind ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, %8                  ; <<4 x float>> [#uses=1]
+  %12 = fmul <4 x float> %11, %1                  ; <<4 x float>> [#uses=1]
+  %13 = call arm_aapcs_vfpcc  %foo* @baz(%foo* %agg.result, <4 x float> %12) nounwind ; <%foo*> [#uses=0]
+  ret void
+}
+
+declare arm_aapcs_vfpcc %foo* @baz(%foo*, <4 x float>) nounwind
+
+declare arm_aapcs_vfpcc <4 x float> @quux(%foo* nocapture) nounwind readonly
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/2009-11-02-NegativeLane.ll b/src/LLVM/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
new file mode 100644
index 0000000..ca5ae8b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-11-02-NegativeLane.ll

@@ -0,0 +1,21 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb
+
+bb:                                               ; preds = %bb, %entry
+; CHECK: vld1.16 {d16[], d17[]}
+  %0 = load i16* undef, align 2
+  %1 = insertelement <8 x i16> undef, i16 %0, i32 2
+  %2 = insertelement <8 x i16> %1, i16 undef, i32 3
+  %3 = mul <8 x i16> %2, %2
+  %4 = extractelement <8 x i16> %3, i32 2
+  store i16 %4, i16* undef, align 2
+  br i1 undef, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/src/LLVM/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
new file mode 100644
index 0000000..7aae3ac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll

@@ -0,0 +1,66 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+; PR5423
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+define arm_aapcs_vfpcc void @foo() {
+entry:
+  %0 = load float* null, align 4                  ; <float> [#uses=2]
+  %1 = fmul float %0, undef                       ; <float> [#uses=2]
+  %2 = fmul float 0.000000e+00, %1                ; <float> [#uses=2]
+  %3 = fmul float %0, %1                          ; <float> [#uses=1]
+  %4 = fadd float 0.000000e+00, %3                ; <float> [#uses=1]
+  %5 = fsub float 1.000000e+00, %4                ; <float> [#uses=1]
+; CHECK: foo:
+; CHECK: vmov.f32 s{{[0-9]+}}, #1.000000e+00
+  %6 = fsub float 1.000000e+00, undef             ; <float> [#uses=2]
+  %7 = fsub float %2, undef                       ; <float> [#uses=1]
+  %8 = fsub float 0.000000e+00, undef             ; <float> [#uses=3]
+  %9 = fadd float %2, undef                       ; <float> [#uses=3]
+  %10 = load float* undef, align 8                ; <float> [#uses=3]
+  %11 = fmul float %8, %10                        ; <float> [#uses=1]
+  %12 = fadd float undef, %11                     ; <float> [#uses=2]
+  %13 = fmul float undef, undef                   ; <float> [#uses=1]
+  %14 = fmul float %6, 0.000000e+00               ; <float> [#uses=1]
+  %15 = fadd float %13, %14                       ; <float> [#uses=1]
+  %16 = fmul float %9, %10                        ; <float> [#uses=1]
+  %17 = fadd float %15, %16                       ; <float> [#uses=2]
+  %18 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %19 = fadd float %18, 0.000000e+00              ; <float> [#uses=1]
+  %20 = fmul float undef, %10                     ; <float> [#uses=1]
+  %21 = fadd float %19, %20                       ; <float> [#uses=1]
+  %22 = load float* undef, align 8                ; <float> [#uses=1]
+  %23 = fmul float %5, %22                        ; <float> [#uses=1]
+  %24 = fadd float %23, undef                     ; <float> [#uses=1]
+  %25 = load float* undef, align 8                ; <float> [#uses=2]
+  %26 = fmul float %8, %25                        ; <float> [#uses=1]
+  %27 = fadd float %24, %26                       ; <float> [#uses=1]
+  %28 = fmul float %9, %25                        ; <float> [#uses=1]
+  %29 = fadd float undef, %28                     ; <float> [#uses=1]
+  %30 = fmul float %8, undef                      ; <float> [#uses=1]
+  %31 = fadd float undef, %30                     ; <float> [#uses=1]
+  %32 = fmul float %6, undef                      ; <float> [#uses=1]
+  %33 = fadd float undef, %32                     ; <float> [#uses=1]
+  %34 = fmul float %9, undef                      ; <float> [#uses=1]
+  %35 = fadd float %33, %34                       ; <float> [#uses=1]
+  %36 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %37 = fmul float %7, undef                      ; <float> [#uses=1]
+  %38 = fadd float %36, %37                       ; <float> [#uses=1]
+  %39 = fmul float undef, undef                   ; <float> [#uses=1]
+  %40 = fadd float %38, %39                       ; <float> [#uses=1]
+  store float %12, float* undef, align 8
+  store float %17, float* undef, align 4
+  store float %21, float* undef, align 8
+  store float %27, float* undef, align 8
+  store float %29, float* undef, align 4
+  store float %31, float* undef, align 8
+  store float %40, float* undef, align 8
+  store float %12, float* null, align 8
+  %41 = fmul float %17, undef                     ; <float> [#uses=1]
+  %42 = fadd float %41, undef                     ; <float> [#uses=1]
+  %43 = fmul float %35, undef                     ; <float> [#uses=1]
+  %44 = fadd float %42, %43                       ; <float> [#uses=1]
+  store float %44, float* null, align 4
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll b/src/LLVM/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll
new file mode 100644
index 0000000..efc4be1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll

@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5410
+
+%0 = type { float, float, float, float }
+%pln = type { %vec, float }
+%vec = type { [4 x float] }
+
+define arm_aapcs_vfpcc float @aaa(%vec* nocapture %ustart, %vec* nocapture %udir, %vec* nocapture %vstart, %vec* nocapture %vdir, %vec* %upoint, %vec* %vpoint) {
+entry:
+  br i1 undef, label %bb81, label %bb48
+
+bb48:                                             ; preds = %entry
+  %0 = call arm_aapcs_vfpcc  %0 @bbb(%pln* undef, %vec* %vstart, %vec* undef) nounwind ; <%0> [#uses=0]
+  ret float 0.000000e+00
+
+bb81:                                             ; preds = %entry
+  ret float 0.000000e+00
+}
+
+declare arm_aapcs_vfpcc %0 @bbb(%pln* nocapture, %vec* nocapture, %vec* nocapture) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll b/src/LLVM/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
new file mode 100644
index 0000000..6cce02d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll

@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5411
+
+%bar = type { %quad, float, float, [3 x %quux*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quuz, %quad, float, [64 x %quux], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { %quad, %quad }
+%quuz = type { [4 x %quux*], [4 x float], i32 }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quux* %a, %quux* %b, %quux* %c, i8 zeroext %forced) {
+entry:
+  br i1 undef, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  %0 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 2 ; <float*> [#uses=2]
+  %1 = load float* undef, align 4                 ; <float> [#uses=1]
+  %2 = fsub float 0.000000e+00, undef             ; <float> [#uses=2]
+  %3 = fmul float 0.000000e+00, undef             ; <float> [#uses=1]
+  %4 = load float* %0, align 4                    ; <float> [#uses=3]
+  %5 = fmul float %4, %2                          ; <float> [#uses=1]
+  %6 = fsub float %3, %5                          ; <float> [#uses=1]
+  %7 = fmul float %4, undef                       ; <float> [#uses=1]
+  %8 = fsub float %7, undef                       ; <float> [#uses=1]
+  %9 = fmul float undef, %2                       ; <float> [#uses=1]
+  %10 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %11 = fsub float %9, %10                        ; <float> [#uses=1]
+  %12 = fmul float undef, %6                      ; <float> [#uses=1]
+  %13 = fmul float 0.000000e+00, %8               ; <float> [#uses=1]
+  %14 = fadd float %12, %13                       ; <float> [#uses=1]
+  %15 = fmul float %1, %11                        ; <float> [#uses=1]
+  %16 = fadd float %14, %15                       ; <float> [#uses=1]
+  %17 = select i1 undef, float undef, float %16   ; <float> [#uses=1]
+  %18 = fdiv float %17, 0.000000e+00              ; <float> [#uses=1]
+  store float %18, float* undef, align 4
+  %19 = fmul float %4, undef                      ; <float> [#uses=1]
+  store float %19, float* %0, align 4
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll b/src/LLVM/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
new file mode 100644
index 0000000..3ff6631
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll

@@ -0,0 +1,123 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5412
+
+%bar = type { %quad, float, float, [3 x %quuz*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quux, %quad, float, [64 x %quuz], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { [4 x %quuz*], [4 x float], i32 }
+%quuz = type { %quad, %quad }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
+entry:
+  br i1 undef, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb3.i, label %bb2.i
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %bb2.i, %bb
+  %0 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=0]
+  %1 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %2 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
+  %3 = load float* %2, align 4                    ; <float> [#uses=1]
+  %4 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %5 = fsub float %3, undef                       ; <float> [#uses=2]
+  %6 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=2]
+  %7 = load float* %6, align 4                    ; <float> [#uses=1]
+  %8 = fsub float %7, undef                       ; <float> [#uses=1]
+  %9 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=2]
+  %10 = load float* %9, align 4                   ; <float> [#uses=1]
+  %11 = fsub float %10, undef                     ; <float> [#uses=2]
+  %12 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
+  %13 = load float* %12, align 4                  ; <float> [#uses=1]
+  %14 = fsub float %13, undef                     ; <float> [#uses=1]
+  %15 = load float* undef, align 4                ; <float> [#uses=1]
+  %16 = fsub float %15, undef                     ; <float> [#uses=1]
+  %17 = fmul float %5, %16                        ; <float> [#uses=1]
+  %18 = fsub float %17, 0.000000e+00              ; <float> [#uses=5]
+  %19 = fmul float %8, %11                        ; <float> [#uses=1]
+  %20 = fsub float %19, undef                     ; <float> [#uses=3]
+  %21 = fmul float %1, %14                        ; <float> [#uses=1]
+  %22 = fmul float %5, %11                        ; <float> [#uses=1]
+  %23 = fsub float %21, %22                       ; <float> [#uses=2]
+  store float %18, float* undef
+  %24 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 1 ; <float*> [#uses=2]
+  store float %20, float* %24
+  store float %23, float* undef
+  %25 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=0]
+  %26 = fmul float %18, %18                       ; <float> [#uses=1]
+  %27 = fadd float %26, undef                     ; <float> [#uses=1]
+  %28 = fadd float %27, undef                     ; <float> [#uses=1]
+  %29 = call arm_aapcs_vfpcc  float @sqrtf(float %28) readnone ; <float> [#uses=1]
+  %30 = load float* null, align 4                 ; <float> [#uses=2]
+  %31 = load float* %4, align 4                   ; <float> [#uses=2]
+  %32 = load float* %2, align 4                   ; <float> [#uses=2]
+  %33 = load float* null, align 4                 ; <float> [#uses=3]
+  %34 = load float* %6, align 4                   ; <float> [#uses=2]
+  %35 = fsub float %33, %34                       ; <float> [#uses=2]
+  %36 = fmul float %20, %35                       ; <float> [#uses=1]
+  %37 = fsub float %36, undef                     ; <float> [#uses=1]
+  %38 = fmul float %23, 0.000000e+00              ; <float> [#uses=1]
+  %39 = fmul float %18, %35                       ; <float> [#uses=1]
+  %40 = fsub float %38, %39                       ; <float> [#uses=1]
+  %41 = fmul float %18, 0.000000e+00              ; <float> [#uses=1]
+  %42 = fmul float %20, 0.000000e+00              ; <float> [#uses=1]
+  %43 = fsub float %41, %42                       ; <float> [#uses=1]
+  %44 = fmul float 0.000000e+00, %37              ; <float> [#uses=1]
+  %45 = fmul float %31, %40                       ; <float> [#uses=1]
+  %46 = fadd float %44, %45                       ; <float> [#uses=1]
+  %47 = fmul float %33, %43                       ; <float> [#uses=1]
+  %48 = fadd float %46, %47                       ; <float> [#uses=2]
+  %49 = load float* %9, align 4                   ; <float> [#uses=2]
+  %50 = fsub float %30, %49                       ; <float> [#uses=1]
+  %51 = load float* %12, align 4                  ; <float> [#uses=3]
+  %52 = fsub float %32, %51                       ; <float> [#uses=2]
+  %53 = load float* undef, align 4                ; <float> [#uses=2]
+  %54 = load float* %24, align 4                  ; <float> [#uses=2]
+  %55 = fmul float %54, undef                     ; <float> [#uses=1]
+  %56 = fmul float undef, %52                     ; <float> [#uses=1]
+  %57 = fsub float %55, %56                       ; <float> [#uses=1]
+  %58 = fmul float undef, %52                     ; <float> [#uses=1]
+  %59 = fmul float %54, %50                       ; <float> [#uses=1]
+  %60 = fsub float %58, %59                       ; <float> [#uses=1]
+  %61 = fmul float %30, %57                       ; <float> [#uses=1]
+  %62 = fmul float %32, 0.000000e+00              ; <float> [#uses=1]
+  %63 = fadd float %61, %62                       ; <float> [#uses=1]
+  %64 = fmul float %34, %60                       ; <float> [#uses=1]
+  %65 = fadd float %63, %64                       ; <float> [#uses=2]
+  %66 = fcmp olt float %48, %65                   ; <i1> [#uses=1]
+  %67 = fsub float %49, 0.000000e+00              ; <float> [#uses=1]
+  %68 = fsub float %51, %31                       ; <float> [#uses=1]
+  %69 = fsub float %53, %33                       ; <float> [#uses=1]
+  %70 = fmul float undef, %67                     ; <float> [#uses=1]
+  %71 = load float* undef, align 4                ; <float> [#uses=2]
+  %72 = fmul float %71, %69                       ; <float> [#uses=1]
+  %73 = fsub float %70, %72                       ; <float> [#uses=1]
+  %74 = fmul float %71, %68                       ; <float> [#uses=1]
+  %75 = fsub float %74, 0.000000e+00              ; <float> [#uses=1]
+  %76 = fmul float %51, %73                       ; <float> [#uses=1]
+  %77 = fadd float undef, %76                     ; <float> [#uses=1]
+  %78 = fmul float %53, %75                       ; <float> [#uses=1]
+  %79 = fadd float %77, %78                       ; <float> [#uses=1]
+  %80 = select i1 %66, float %48, float %65       ; <float> [#uses=1]
+  %81 = select i1 undef, float %80, float %79     ; <float> [#uses=1]
+  %iftmp.164.0 = select i1 undef, float %29, float 1.000000e+00 ; <float> [#uses=1]
+  %82 = fdiv float %81, %iftmp.164.0              ; <float> [#uses=1]
+  %iftmp.165.0 = select i1 undef, float %82, float 0.000000e+00 ; <float> [#uses=1]
+  store float %iftmp.165.0, float* undef, align 4
+  br i1 false, label %bb4.i97, label %ccc.exit98
+
+bb4.i97:                                          ; preds = %bb3.i
+  br label %ccc.exit98
+
+ccc.exit98:                                       ; preds = %bb4.i97, %bb3.i
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}
+
+declare arm_aapcs_vfpcc float @sqrtf(float) readnone

diff --git a/src/LLVM/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll b/src/LLVM/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
new file mode 100644
index 0000000..832ff4f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll

@@ -0,0 +1,113 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5412
+; rdar://7384107
+
+%bar = type { %quad, float, float, [3 x %quuz*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quux, %quad, float, [64 x %quuz], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { [4 x %quuz*], [4 x float], i32 }
+%quuz = type { %quad, %quad }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
+entry:
+  %0 = load %bar** undef, align 4                 ; <%bar*> [#uses=2]
+  br i1 false, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb3.i, label %bb2.i
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %bb2.i, %bb
+  %1 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
+  %2 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %3 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %4 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=1]
+  %5 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %6 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
+  %7 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %8 = fsub float undef, undef                    ; <float> [#uses=1]
+  %9 = fmul float 0.000000e+00, %8                ; <float> [#uses=1]
+  %10 = fmul float %5, 0.000000e+00               ; <float> [#uses=1]
+  %11 = fsub float %9, %10                        ; <float> [#uses=3]
+  %12 = fmul float %2, 0.000000e+00               ; <float> [#uses=1]
+  %13 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %14 = fsub float %12, %13                       ; <float> [#uses=2]
+  store float %14, float* undef
+  %15 = getelementptr inbounds %bar* %0, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=1]
+  store float 0.000000e+00, float* %15
+  %16 = fmul float %11, %11                       ; <float> [#uses=1]
+  %17 = fadd float %16, 0.000000e+00              ; <float> [#uses=1]
+  %18 = fadd float %17, undef                     ; <float> [#uses=1]
+  %19 = call arm_aapcs_vfpcc  float @sqrtf(float %18) readnone ; <float> [#uses=2]
+  %20 = fcmp ogt float %19, 0x3F1A36E2E0000000    ; <i1> [#uses=1]
+  %21 = load float* %1, align 4                   ; <float> [#uses=2]
+  %22 = load float* %3, align 4                   ; <float> [#uses=2]
+  %23 = load float* undef, align 4                ; <float> [#uses=2]
+  %24 = load float* %4, align 4                   ; <float> [#uses=2]
+  %25 = fsub float %23, %24                       ; <float> [#uses=2]
+  %26 = fmul float 0.000000e+00, %25              ; <float> [#uses=1]
+  %27 = fsub float %26, undef                     ; <float> [#uses=1]
+  %28 = fmul float %14, 0.000000e+00              ; <float> [#uses=1]
+  %29 = fmul float %11, %25                       ; <float> [#uses=1]
+  %30 = fsub float %28, %29                       ; <float> [#uses=1]
+  %31 = fsub float undef, 0.000000e+00            ; <float> [#uses=1]
+  %32 = fmul float %21, %27                       ; <float> [#uses=1]
+  %33 = fmul float undef, %30                     ; <float> [#uses=1]
+  %34 = fadd float %32, %33                       ; <float> [#uses=1]
+  %35 = fmul float %23, %31                       ; <float> [#uses=1]
+  %36 = fadd float %34, %35                       ; <float> [#uses=1]
+  %37 = load float* %6, align 4                   ; <float> [#uses=2]
+  %38 = load float* %7, align 4                   ; <float> [#uses=2]
+  %39 = fsub float %22, %38                       ; <float> [#uses=2]
+  %40 = load float* undef, align 4                ; <float> [#uses=1]
+  %41 = load float* null, align 4                 ; <float> [#uses=2]
+  %42 = fmul float %41, undef                     ; <float> [#uses=1]
+  %43 = fmul float undef, %39                     ; <float> [#uses=1]
+  %44 = fsub float %42, %43                       ; <float> [#uses=1]
+  %45 = fmul float undef, %39                     ; <float> [#uses=1]
+  %46 = fmul float %41, 0.000000e+00              ; <float> [#uses=1]
+  %47 = fsub float %45, %46                       ; <float> [#uses=1]
+  %48 = fmul float 0.000000e+00, %44              ; <float> [#uses=1]
+  %49 = fmul float %22, undef                     ; <float> [#uses=1]
+  %50 = fadd float %48, %49                       ; <float> [#uses=1]
+  %51 = fmul float %24, %47                       ; <float> [#uses=1]
+  %52 = fadd float %50, %51                       ; <float> [#uses=1]
+  %53 = fsub float %37, %21                       ; <float> [#uses=2]
+  %54 = fmul float undef, undef                   ; <float> [#uses=1]
+  %55 = fmul float undef, undef                   ; <float> [#uses=1]
+  %56 = fsub float %54, %55                       ; <float> [#uses=1]
+  %57 = fmul float undef, %53                     ; <float> [#uses=1]
+  %58 = load float* undef, align 4                ; <float> [#uses=2]
+  %59 = fmul float %58, undef                     ; <float> [#uses=1]
+  %60 = fsub float %57, %59                       ; <float> [#uses=1]
+  %61 = fmul float %58, undef                     ; <float> [#uses=1]
+  %62 = fmul float undef, %53                     ; <float> [#uses=1]
+  %63 = fsub float %61, %62                       ; <float> [#uses=1]
+  %64 = fmul float %37, %56                       ; <float> [#uses=1]
+  %65 = fmul float %38, %60                       ; <float> [#uses=1]
+  %66 = fadd float %64, %65                       ; <float> [#uses=1]
+  %67 = fmul float %40, %63                       ; <float> [#uses=1]
+  %68 = fadd float %66, %67                       ; <float> [#uses=1]
+  %69 = select i1 undef, float %36, float %52     ; <float> [#uses=1]
+  %70 = select i1 undef, float %69, float %68     ; <float> [#uses=1]
+  %iftmp.164.0 = select i1 %20, float %19, float 1.000000e+00 ; <float> [#uses=1]
+  %71 = fdiv float %70, %iftmp.164.0              ; <float> [#uses=1]
+  store float %71, float* null, align 4
+  %72 = icmp eq %bar* null, %0                    ; <i1> [#uses=1]
+  br i1 %72, label %bb4.i97, label %ccc.exit98
+
+bb4.i97:                                          ; preds = %bb3.i
+  %73 = load %bar** undef, align 4                ; <%bar*> [#uses=0]
+  br label %ccc.exit98
+
+ccc.exit98:                                       ; preds = %bb4.i97, %bb3.i
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}
+
+declare arm_aapcs_vfpcc float @sqrtf(float) readnone

diff --git a/src/LLVM/test/CodeGen/ARM/2009-11-30-LiveVariablesBug.ll b/src/LLVM/test/CodeGen/ARM/2009-11-30-LiveVariablesBug.ll
new file mode 100644
index 0000000..efe74cf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-11-30-LiveVariablesBug.ll

@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5614
+
+%"als" = type { i32 (...)** }
+%"av" = type { %"als" }
+%"c" = type { %"lsm", %"Vec3", %"av"*, float, i8, float, %"lsm", i8, %"Vec3", %"Vec3", %"Vec3", float, float, float, %"Vec3", %"Vec3" }
+%"lsm" = type { %"als", %"Vec3", %"Vec3", %"Vec3", %"Vec3" }
+%"Vec3" = type { float, float, float }
+
+define arm_aapcs_vfpcc void @foo(%"c"* %this, %"Vec3"* nocapture %adjustment) {
+entry:
+  switch i32 undef, label %return [
+    i32 1, label %bb
+    i32 2, label %bb72
+    i32 3, label %bb31
+    i32 4, label %bb79
+    i32 5, label %bb104
+  ]
+
+bb:                                               ; preds = %entry
+  ret void
+
+bb31:                                             ; preds = %entry
+  %0 = call arm_aapcs_vfpcc  %"Vec3" undef(%"lsm"* undef) ; <%"Vec3"> [#uses=1]
+  %mrv_gr69 = extractvalue %"Vec3" %0, 1 ; <float> [#uses=1]
+  %1 = fsub float %mrv_gr69, undef                ; <float> [#uses=1]
+  store float %1, float* undef, align 4
+  ret void
+
+bb72:                                             ; preds = %entry
+  ret void
+
+bb79:                                             ; preds = %entry
+  ret void
+
+bb104:                                            ; preds = %entry
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll b/src/LLVM/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll
new file mode 100644
index 0000000..f89a5de
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll

@@ -0,0 +1,33 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "armv7-apple-darwin10"
+
+%struct.int16x8_t = type { <8 x i16> }
+%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] }
+
+define void @t(%struct.int16x8x2_t* noalias nocapture sret %agg.result, <8 x i16> %tmp.0, %struct.int16x8x2_t* nocapture %dst) nounwind {
+entry:
+;CHECK: vtrn.16
+  %0 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  %1 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  %agg.result1218.0 = getelementptr %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 0, i32 0 ; <<8 x i16>*>
+  store <8 x i16> %0, <8 x i16>* %agg.result1218.0, align 16
+  %agg.result12.1.0 = getelementptr %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 1, i32 0 ; <<8 x i16>*>
+  store <8 x i16> %1, <8 x i16>* %agg.result12.1.0, align 16
+  ret void
+}
+
+; Radar 8290937: Ignore undef shuffle indices.
+; CHECK: t2
+; CHECK: vtrn.16
+define void @t2(%struct.int16x8x2_t* nocapture %ptr, <4 x i16> %a.0, <4 x i16> %b.0) nounwind {
+entry:
+  %0 = shufflevector <4 x i16> %a.0, <4 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
+  %1 = shufflevector <4 x i16> %a.0, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %ptr26.0 = getelementptr inbounds %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 0, i32 0
+  store <8 x i16> %0, <8 x i16>* %ptr26.0, align 16
+  %ptr20.1.0 = getelementptr inbounds %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 1, i32 0
+  store <8 x i16> %1, <8 x i16>* %ptr20.1.0, align 16
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll b/src/LLVM/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
new file mode 100644
index 0000000..f7adf73
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll

@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=arm-unknown-linux-gnueabi
+
+define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) {
+  %1 = load i32* undef                            ; <i32> [#uses=1]
+  %2 = sub i32 %1, 48                             ; <i32> [#uses=1]
+  br i1 undef, label %stack_overflow, label %no_overflow
+
+stack_overflow:                                   ; preds = %0
+  unreachable
+
+no_overflow:                                      ; preds = %0
+  %frame = inttoptr i32 %2 to [17 x i32]*         ; <[17 x i32]*> [#uses=4]
+  %3 = load i32* undef                            ; <i32> [#uses=1]
+  %4 = load i32* null                             ; <i32> [#uses=1]
+  %5 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1]
+  %6 = bitcast i32* %5 to [8 x i8]**              ; <[8 x i8]**> [#uses=1]
+  %7 = load [8 x i8]** %6                         ; <[8 x i8]*> [#uses=1]
+  %8 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 12 ; <i32*> [#uses=1]
+  %9 = load i32* %8                               ; <i32> [#uses=1]
+  br i1 undef, label %bci_13, label %bci_4
+
+bci_13:                                           ; preds = %no_overflow
+  br i1 undef, label %bci_30, label %bci_21
+
+bci_30:                                           ; preds = %bci_13
+  br i1 undef, label %bci_46, label %bci_35
+
+bci_46:                                           ; preds = %bci_30
+  %10 = sub i32 %4, %3                            ; <i32> [#uses=1]
+  %11 = load [8 x i8]** null                      ; <[8 x i8]*> [#uses=1]
+  %callee = bitcast [8 x i8]* %11 to [84 x i8]*   ; <[84 x i8]*> [#uses=1]
+  %12 = bitcast i8* undef to i32*                 ; <i32*> [#uses=1]
+  %base_pc7 = load i32* %12                       ; <i32> [#uses=2]
+  %13 = add i32 %base_pc7, 0                      ; <i32> [#uses=1]
+  %14 = inttoptr i32 %13 to void ([84 x i8]*, i32, [788 x i8]*)** ; <void ([84 x i8]*, i32, [788 x i8]*)**> [#uses=1]
+  %entry_point = load void ([84 x i8]*, i32, [788 x i8]*)** %14 ; <void ([84 x i8]*, i32, [788 x i8]*)*> [#uses=1]
+  %15 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 1 ; <i32*> [#uses=1]
+  %16 = ptrtoint i32* %15 to i32                  ; <i32> [#uses=1]
+  %stack_pointer_addr9 = bitcast i8* undef to i32* ; <i32*> [#uses=1]
+  store i32 %16, i32* %stack_pointer_addr9
+  %17 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 2 ; <i32*> [#uses=1]
+  store i32 %9, i32* %17
+  store i32 %10, i32* undef
+  store [84 x i8]* %method, [84 x i8]** undef
+  %18 = add i32 %base_pc, 20                      ; <i32> [#uses=1]
+  store i32 %18, i32* undef
+  store [8 x i8]* %7, [8 x i8]** undef
+  call void %entry_point([84 x i8]* %callee, i32 %base_pc7, [788 x i8]* %thread)
+  br i1 undef, label %no_exception, label %exception
+
+exception:                                        ; preds = %bci_46
+  ret void
+
+no_exception:                                     ; preds = %bci_46
+  ret void
+
+bci_35:                                           ; preds = %bci_30
+  ret void
+
+bci_21:                                           ; preds = %bci_13
+  ret void
+
+bci_4:                                            ; preds = %no_overflow
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll b/src/LLVM/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
new file mode 100644
index 0000000..b0b4cb3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll

@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=arm
+
+define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) {
+  %1 = sub i32 undef, 48                          ; <i32> [#uses=1]
+  br i1 undef, label %stack_overflow, label %no_overflow
+
+stack_overflow:                                   ; preds = %0
+  unreachable
+
+no_overflow:                                      ; preds = %0
+  %frame = inttoptr i32 %1 to [17 x i32]*         ; <[17 x i32]*> [#uses=4]
+  %2 = load i32* null                             ; <i32> [#uses=2]
+  %3 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1]
+  %4 = load i32* %3                               ; <i32> [#uses=2]
+  %5 = load [8 x i8]** undef                      ; <[8 x i8]*> [#uses=2]
+  br i1 undef, label %bci_13, label %bci_4
+
+bci_13:                                           ; preds = %no_overflow
+  br i1 undef, label %bci_30, label %bci_21
+
+bci_30:                                           ; preds = %bci_13
+  %6 = icmp sle i32 %2, %4                        ; <i1> [#uses=1]
+  br i1 %6, label %bci_46, label %bci_35
+
+bci_46:                                           ; preds = %bci_30
+  store [84 x i8]* %method, [84 x i8]** undef
+  br i1 false, label %no_exception, label %exception
+
+exception:                                        ; preds = %bci_46
+  ret void
+
+no_exception:                                     ; preds = %bci_46
+  ret void
+
+bci_35:                                           ; preds = %bci_30
+  %7 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 15 ; <i32*> [#uses=1]
+  store i32 %2, i32* %7
+  %8 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1]
+  store i32 %4, i32* %8
+  %9 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1]
+  %10 = bitcast i32* %9 to [8 x i8]**             ; <[8 x i8]**> [#uses=1]
+  store [8 x i8]* %5, [8 x i8]** %10
+  call void inttoptr (i32 13839116 to void ([788 x i8]*, i32)*)([788 x i8]* %thread, i32 7)
+  ret void
+
+bci_21:                                           ; preds = %bci_13
+  ret void
+
+bci_4:                                            ; preds = %no_overflow
+  store [8 x i8]* %5, [8 x i8]** undef
+  store i32 undef, i32* undef
+  call void inttoptr (i32 13839116 to void ([788 x i8]*, i32)*)([788 x i8]* %thread, i32 7)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll b/src/LLVM/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
new file mode 100644
index 0000000..fee8600
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s -check-prefix=V4
+; RUN: llc < %s -mtriple=armv5-unknown-eabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv6-unknown-eabi | FileCheck %s
+
+define i32 @bar(i32 %a) nounwind {
+entry:
+  %0 = tail call i32 @foo(i32 %a) nounwind ; <i32> [#uses=1]
+  %1 = add nsw i32 %0, 3                          ; <i32> [#uses=1]
+; CHECK: pop {r11, pc}
+; V4: pop
+; V4-NEXT: mov pc, lr
+  ret i32 %1
+}
+
+declare i32 @foo(i32)

diff --git a/src/LLVM/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll b/src/LLVM/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 0000000..6422689
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll

@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=arm -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+

diff --git a/src/LLVM/test/CodeGen/ARM/2010-04-09-NeonSelect.ll b/src/LLVM/test/CodeGen/ARM/2010-04-09-NeonSelect.ll
new file mode 100644
index 0000000..89d6a68
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-04-09-NeonSelect.ll

@@ -0,0 +1,23 @@
+; RUN: llc -march=arm -mattr=+neon < %s
+; Radar 7770501: Don't crash on SELECT and SELECT_CC with NEON vector values.
+
+define void @vDSP_FFT16_copv(float* nocapture %O, float* nocapture %I, i32 %Direction) nounwind {
+entry:
+  %.22 = select i1 undef, <4 x float> undef, <4 x float> zeroinitializer ; <<4 x float>> [#uses=1]
+  %0 = fadd <4 x float> undef, %.22               ; <<4 x float>> [#uses=1]
+  %1 = fsub <4 x float> %0, undef                 ; <<4 x float>> [#uses=1]
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 2, i32 6, i32 3, i32 7> ; <<4 x float>> [#uses=1]
+  %3 = shufflevector <4 x float> undef, <4 x float> %2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> ; <<4 x float>> [#uses=1]
+  %4 = fmul <4 x float> %3, <float 0.000000e+00, float 0x3FED906BC0000000, float 0x3FE6A09E60000000, float 0xBFD87DE2A0000000> ; <<4 x float>> [#uses=1]
+  %5 = fadd <4 x float> undef, %4                 ; <<4 x float>> [#uses=1]
+  %6 = fadd <4 x float> undef, %5                 ; <<4 x float>> [#uses=1]
+  %7 = fadd <4 x float> undef, %6                 ; <<4 x float>> [#uses=1]
+  br i1 undef, label %bb4, label %bb3
+
+bb3:                                              ; preds = %entry
+  %8 = shufflevector <4 x float> undef, <4 x float> %7, <4 x i32> <i32 2, i32 6, i32 3, i32 7> ; <<4 x float>> [#uses=0]
+  ret void
+
+bb4:                                              ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll b/src/LLVM/test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll
new file mode 100644
index 0000000..1354c79
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8
+; Radar 7855014
+
+define void @test1(i32 %f0, i32 %f1, i32 %f2, <4 x i32> %f3) nounwind {
+entry:
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-04-14-SplitVector.ll b/src/LLVM/test/CodeGen/ARM/2010-04-14-SplitVector.ll
new file mode 100644
index 0000000..5d0c3cf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-04-14-SplitVector.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mcpu=arm1136jf-s
+; Radar 7854640
+
+define void @test() nounwind {
+bb:
+  br i1 undef, label %bb9, label %bb10
+
+bb9:
+  %tmp63 = bitcast <4 x float> zeroinitializer to i128
+  %tmp64 = trunc i128 %tmp63 to i32
+  br label %bb10
+
+bb10:
+  %0 = phi i32 [ %tmp64, %bb9 ], [ undef, %bb ]
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll b/src/LLVM/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
new file mode 100644
index 0000000..05581c3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s
+; PR6847
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv4t-apple-darwin10"
+
+define hidden i32 @__addvsi3(i32 %a, i32 %b) nounwind {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %b}, i64 0, metadata !0)
+  %0 = add nsw i32 %b, %a, !dbg !9                ; <i32> [#uses=1]
+  ret i32 %0, !dbg !11
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"b", metadata !2, i32 93, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"__addvsi3", metadata !"__addvsi3", metadata !"__addvsi3", metadata !2, i32 94, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"libgcc2.c", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"libgcc2.c", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !6, metadata !6}
+!6 = metadata !{i32 524310, metadata !2, metadata !"SItype", metadata !7, i32 152, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 524329, metadata !"libgcc2.h", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", metadata !3} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 95, i32 0, metadata !10, null}
+!10 = metadata !{i32 524299, metadata !1, i32 94, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 100, i32 0, metadata !10, null}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-05-14-IllegalType.ll b/src/LLVM/test/CodeGen/ARM/2010-05-14-IllegalType.ll
new file mode 100644
index 0000000..99e5b09
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-05-14-IllegalType.ll

@@ -0,0 +1,10 @@
+; RUN: llc -march=thumb -mcpu=cortex-a8 -mtriple=thumbv7-eabi -float-abi=hard < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+define <4 x i64> @f_4_i64(<4 x i64> %a, <4 x i64> %b) nounwind {
+; CHECK: vadd.i64
+ %y = add <4 x i64> %a, %b
+ ret <4 x i64> %y
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll b/src/LLVM/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
new file mode 100644
index 0000000..813bf3c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll

@@ -0,0 +1,105 @@
+; RUN: llc < %s -regalloc=fast -verify-machineinstrs
+target triple = "arm-pc-linux-gnu"
+
+; This test case would accidentally use the same physreg for two virtregs
+; because allocVirtReg forgot to check if registers were already used in the
+; instruction.
+; This caused the RegScavenger to complain, but -verify-machineinstrs also
+; catches it.
+
+%struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 }
+
+@search = external global %struct.CHESS_POSITION  ; <%struct.CHESS_POSITION*> [#uses=1]
+@bishop_mobility_rr45 = external global [64 x [256 x i32]] ; <[64 x [256 x i32]]*> [#uses=1]
+
+declare fastcc i32 @FirstOne()
+
+define fastcc void @Evaluate() {
+entry:
+  br i1 false, label %cond_false186, label %cond_true
+
+cond_true:                                        ; preds = %entry
+  ret void
+
+cond_false186:                                    ; preds = %entry
+  br i1 false, label %cond_true293, label %bb203
+
+bb203:                                            ; preds = %cond_false186
+  ret void
+
+cond_true293:                                     ; preds = %cond_false186
+  br i1 false, label %cond_true298, label %cond_next317
+
+cond_true298:                                     ; preds = %cond_true293
+  br i1 false, label %cond_next518, label %cond_true397.preheader
+
+cond_next317:                                     ; preds = %cond_true293
+  ret void
+
+cond_true397.preheader:                           ; preds = %cond_true298
+  ret void
+
+cond_next518:                                     ; preds = %cond_true298
+  br i1 false, label %bb1069, label %cond_true522
+
+cond_true522:                                     ; preds = %cond_next518
+  ret void
+
+bb1069:                                           ; preds = %cond_next518
+  br i1 false, label %cond_next1131, label %bb1096
+
+bb1096:                                           ; preds = %bb1069
+  ret void
+
+cond_next1131:                                    ; preds = %bb1069
+  br i1 false, label %cond_next1207, label %cond_true1150
+
+cond_true1150:                                    ; preds = %cond_next1131
+  ret void
+
+cond_next1207:                                    ; preds = %cond_next1131
+  br i1 false, label %cond_next1219, label %cond_true1211
+
+cond_true1211:                                    ; preds = %cond_next1207
+  ret void
+
+cond_next1219:                                    ; preds = %cond_next1207
+  br i1 false, label %cond_true1223, label %cond_next1283
+
+cond_true1223:                                    ; preds = %cond_next1219
+  br i1 false, label %cond_true1254, label %cond_true1264
+
+cond_true1254:                                    ; preds = %cond_true1223
+  br i1 false, label %bb1567, label %cond_true1369.preheader
+
+cond_true1264:                                    ; preds = %cond_true1223
+  ret void
+
+cond_next1283:                                    ; preds = %cond_next1219
+  ret void
+
+cond_true1369.preheader:                          ; preds = %cond_true1254
+  ret void
+
+bb1567:                                           ; preds = %cond_true1254
+  %tmp1591 = load i64* getelementptr inbounds (%struct.CHESS_POSITION* @search, i32 0, i32 4) ; <i64> [#uses=1]
+  %tmp1572 = tail call fastcc i32 @FirstOne()     ; <i32> [#uses=1]
+  %tmp1594 = load i32* undef                      ; <i32> [#uses=1]
+  %tmp1594.upgrd.5 = trunc i32 %tmp1594 to i8     ; <i8> [#uses=1]
+  %shift.upgrd.6 = zext i8 %tmp1594.upgrd.5 to i64 ; <i64> [#uses=1]
+  %tmp1595 = lshr i64 %tmp1591, %shift.upgrd.6    ; <i64> [#uses=1]
+  %tmp1595.upgrd.7 = trunc i64 %tmp1595 to i32    ; <i32> [#uses=1]
+  %tmp1596 = and i32 %tmp1595.upgrd.7, 255        ; <i32> [#uses=1]
+  %gep.upgrd.8 = zext i32 %tmp1596 to i64         ; <i64> [#uses=1]
+  %tmp1598 = getelementptr [64 x [256 x i32]]* @bishop_mobility_rr45, i32 0, i32 %tmp1572, i64 %gep.upgrd.8 ; <i32*> [#uses=1]
+  %tmp1599 = load i32* %tmp1598                   ; <i32> [#uses=1]
+  %tmp1602 = sub i32 0, %tmp1599                  ; <i32> [#uses=1]
+  br i1 undef, label %cond_next1637, label %cond_true1607
+
+cond_true1607:                                    ; preds = %bb1567
+  ret void
+
+cond_next1637:                                    ; preds = %bb1567
+  %tmp1662 = sub i32 %tmp1602, 0                  ; <i32> [#uses=0]
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll b/src/LLVM/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
new file mode 100644
index 0000000..9461643
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -regalloc=fast
+; rdar://problem/7948106
+;; This test would spill %R4 before the call to zz, but it forgot to move the
+; 'last use' marker to the spill.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv6-apple-darwin"
+
+%struct.q = type { i32, i32 }
+
+@.str = external constant [1 x i8]                ; <[1 x i8]*> [#uses=1]
+
+define void @yy(%struct.q* %qq) nounwind {
+entry:
+  %vla6 = alloca i8, i32 undef, align 1           ; <i8*> [#uses=1]
+  %vla10 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
+  %vla14 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
+  %vla18 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
+  %tmp21 = load i32* undef                        ; <i32> [#uses=1]
+  %0 = mul i32 1, %tmp21                          ; <i32> [#uses=1]
+  %vla22 = alloca i8, i32 %0, align 1             ; <i8*> [#uses=1]
+  call  void (...)* @zz(i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0), i32 2, i32 1)
+  br i1 undef, label %if.then, label %if.end36
+
+if.then:                                          ; preds = %entry
+  %call = call  i32 (...)* @x(%struct.q* undef, i8* undef, i8* %vla6, i8* %vla10, i32 undef) ; <i32> [#uses=0]
+  %call35 = call  i32 (...)* @x(%struct.q* undef, i8* %vla14, i8* %vla18, i8* %vla22, i32 undef) ; <i32> [#uses=0]
+  unreachable
+
+if.end36:                                         ; preds = %entry
+  ret void
+}
+
+declare void @zz(...)
+
+declare i32 @x(...)

diff --git a/src/LLVM/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/src/LLVM/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
new file mode 100644
index 0000000..df9dbca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=THUMB
+; rdar://7998649
+
+%struct.foo = type { i64, i64 }
+
+define zeroext i8 @t(%struct.foo* %this) noreturn optsize {
+entry:
+; ARM:       t:
+; ARM:       str r2, [r1], r0
+
+; THUMB:     t:
+; THUMB-NOT: str r0, [r1], r0
+; THUMB:     str r2, [r1]
+  %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
+  store i32 0, i32* inttoptr (i32 8 to i32*), align 8
+  br i1 undef, label %bb.nph96, label %bb3
+
+bb3:                                              ; preds = %entry
+  %1 = load i64* %0, align 4                      ; <i64> [#uses=0]
+  unreachable
+
+bb.nph96:                                         ; preds = %entry
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-05-19-Shuffles.ll b/src/LLVM/test/CodeGen/ARM/2010-05-19-Shuffles.ll
new file mode 100644
index 0000000..587c0af
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-05-19-Shuffles.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8
+; pr7167
+
+define <8 x i8> @f1(<8 x i8> %x) nounwind {
+  %y = shufflevector <8 x i8> %x, <8 x i8> undef,
+       <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
+  ret <8 x i8> %y
+}
+
+define <8 x i8> @f2(<8 x i8> %x) nounwind {
+  %y = shufflevector <8 x i8> %x, <8 x i8> undef,
+       <8 x i32> <i32 1, i32 2, i32 0, i32 5, i32 3, i32 6, i32 7, i32 4>
+  ret <8 x i8> %y
+}
+
+define void @f3(<4 x i64>* %xp) nounwind {
+  %x = load <4 x i64>* %xp
+  %y = shufflevector <4 x i64> %x, <4 x i64> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+  store <4 x i64> %y, <4 x i64>* %xp
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/src/LLVM/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
new file mode 100644
index 0000000..e47c038
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=arm -mattr=+neon -O0 -regalloc=linearscan
+
+; This test would crash the rewriter when trying to handle a spill after one of
+; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register.
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+
+define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind {
+  %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A2, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 0 ; <<8 x i8>> [#uses=1]
+  %tmp4b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 1 ; <<8 x i8>> [#uses=1]
+  %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1]
+  %tmp4d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 1 ; <<8 x i8>> [#uses=1]
+  %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A5, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp2e = extractvalue %struct.__neon_int8x8x3_t %tmp1e, 0 ; <<8 x i8>> [#uses=1]
+  %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1]
+  %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A7, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 0 ; <<8 x i8>> [#uses=1]
+  %tmp4g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 1 ; <<8 x i8>> [#uses=1]
+  %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A8, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 0 ; <<8 x i8>> [#uses=1]
+  %tmp3h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 2 ; <<8 x i8>> [#uses=1]
+  %tmp2bd = add <8 x i8> %tmp2b, %tmp2d           ; <<8 x i8>> [#uses=1]
+  %tmp4bd = add <8 x i8> %tmp4b, %tmp4d           ; <<8 x i8>> [#uses=1]
+  %tmp2abcd = mul <8 x i8> undef, %tmp2bd         ; <<8 x i8>> [#uses=1]
+  %tmp4abcd = mul <8 x i8> undef, %tmp4bd         ; <<8 x i8>> [#uses=2]
+  call void @llvm.arm.neon.vst3.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd, i32 1)
+  %tmp2ef = sub <8 x i8> %tmp2e, %tmp2f           ; <<8 x i8>> [#uses=1]
+  %tmp2gh = sub <8 x i8> %tmp2g, %tmp2h           ; <<8 x i8>> [#uses=1]
+  %tmp3gh = sub <8 x i8> zeroinitializer, %tmp3h  ; <<8 x i8>> [#uses=1]
+  %tmp4ef = sub <8 x i8> zeroinitializer, %tmp4g  ; <<8 x i8>> [#uses=1]
+  %tmp2efgh = mul <8 x i8> %tmp2ef, %tmp2gh       ; <<8 x i8>> [#uses=1]
+  %tmp3efgh = mul <8 x i8> undef, %tmp3gh         ; <<8 x i8>> [#uses=1]
+  %tmp4efgh = mul <8 x i8> %tmp4ef, undef         ; <<8 x i8>> [#uses=2]
+  call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh, i32 1)
+  %tmp4 = sub <8 x i8> %tmp4efgh, %tmp4abcd       ; <<8 x i8>> [#uses=1]
+  tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef, i32 1)
+  ret <8 x i8> %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/src/LLVM/test/CodeGen/ARM/2010-05-21-BuildVector.ll
new file mode 100644
index 0000000..cd1c9c8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-05-21-BuildVector.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; Radar 7872877
+
+define void @test(float* %fltp, i32 %packedValue, float* %table) nounwind {
+entry:
+  %0 = load float* %fltp
+  %1 = insertelement <4 x float> undef, float %0, i32 0
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
+  %3 = shl i32 %packedValue, 16
+  %4 = ashr i32 %3, 30
+  %.sum = add i32 %4, 4
+  %5 = getelementptr inbounds float* %table, i32 %.sum
+;CHECK: vldr.32 s
+  %6 = load float* %5, align 4
+  %tmp11 = insertelement <4 x float> undef, float %6, i32 0
+  %7 = shl i32 %packedValue, 18
+  %8 = ashr i32 %7, 30
+  %.sum12 = add i32 %8, 4
+  %9 = getelementptr inbounds float* %table, i32 %.sum12
+;CHECK: vldr.32 s
+  %10 = load float* %9, align 4
+  %tmp9 = insertelement <4 x float> %tmp11, float %10, i32 1
+  %11 = shl i32 %packedValue, 20
+  %12 = ashr i32 %11, 30
+  %.sum13 = add i32 %12, 4
+  %13 = getelementptr inbounds float* %table, i32 %.sum13
+;CHECK: vldr.32 s
+  %14 = load float* %13, align 4
+  %tmp7 = insertelement <4 x float> %tmp9, float %14, i32 2
+  %15 = shl i32 %packedValue, 22
+  %16 = ashr i32 %15, 30
+  %.sum14 = add i32 %16, 4
+  %17 = getelementptr inbounds float* %table, i32 %.sum14
+;CHECK: vldr.32 s
+  %18 = load float* %17, align 4
+  %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3
+  %19 = fmul <4 x float> %tmp5, %2
+  %20 = bitcast float* %fltp to i8*
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19, i32 1)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/src/LLVM/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
new file mode 100644
index 0000000..6f48796
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm -mattr=+neon
+; Radar 8084742
+
+%struct.__int8x8x2_t = type { [2 x <8 x i8>] }
+
+define void @foo(%struct.__int8x8x2_t* nocapture %a, i8* %b) nounwind {
+entry:
+ %0 = bitcast %struct.__int8x8x2_t* %a to i128*  ; <i128*> [#uses=1]
+ %srcval = load i128* %0, align 8                ; <i128> [#uses=2]
+ %tmp6 = trunc i128 %srcval to i64               ; <i64> [#uses=1]
+ %tmp8 = lshr i128 %srcval, 64                   ; <i128> [#uses=1]
+ %tmp9 = trunc i128 %tmp8 to i64                 ; <i64> [#uses=1]
+ %tmp16.i = bitcast i64 %tmp6 to <8 x i8>        ; <<8 x i8>> [#uses=1]
+ %tmp20.i = bitcast i64 %tmp9 to <8 x i8>        ; <<8 x i8>> [#uses=1]
+ tail call void @llvm.arm.neon.vst2.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i, i32 1) nounwind
+ ret void
+}
+
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll b/src/LLVM/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
new file mode 100644
index 0000000..816a6d4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll

@@ -0,0 +1,148 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -O3 -mcpu=arm1136jf-s
+; PR7421
+
+%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
+%struct.FILE = type { i8* }
+%struct.tilebox = type { %struct.tilebox*, double, double, double, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.UNCOMBOX = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.cellbox = type { i8*, i32, i32, i32, [9 x i32], i32, i32, i32, i32, i32, i32, i32, double, double, double, double, double, i32, i32, %struct.CONTENTBOX*, %struct.UNCOMBOX*, [8 x %struct.tilebox*] }
+%struct.termbox = type { %struct.termbox*, i32, i32, i32, i32, i32 }
+
+@.str2708 = external constant [14 x i8], align 4  ; <[14 x i8]*> [#uses=1]
+
+define void @TW_oldinput(%struct.FILE* nocapture %fp) nounwind {
+entry:
+  %xcenter = alloca i32, align 4                  ; <i32*> [#uses=2]
+  %0 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 4                          ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %return
+
+bb:                                               ; preds = %bb445, %entry
+  %2 = load %struct.cellbox** undef, align 4      ; <%struct.cellbox*> [#uses=2]
+  %3 = getelementptr inbounds %struct.cellbox* %2, i32 0, i32 3 ; <i32*> [#uses=1]
+  store i32 undef, i32* %3, align 4
+  %4 = load i32* undef, align 4                   ; <i32> [#uses=3]
+  %5 = icmp eq i32 undef, 1                       ; <i1> [#uses=1]
+  br i1 %5, label %bb10, label %bb445
+
+bb10:                                             ; preds = %bb
+  br i1 undef, label %bb11, label %bb445
+
+bb11:                                             ; preds = %bb10
+  %6 = load %struct.tilebox** undef, align 4      ; <%struct.tilebox*> [#uses=3]
+  %7 = load %struct.termbox** null, align 4       ; <%struct.termbox*> [#uses=1]
+  %8 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 13 ; <i32*> [#uses=1]
+  %9 = load i32* %8, align 4                      ; <i32> [#uses=3]
+  %10 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 15 ; <i32*> [#uses=1]
+  %11 = load i32* %10, align 4                    ; <i32> [#uses=1]
+  br i1 false, label %bb12, label %bb13
+
+bb12:                                             ; preds = %bb11
+  unreachable
+
+bb13:                                             ; preds = %bb11
+  %iftmp.40.0.neg = sdiv i32 0, -2                ; <i32> [#uses=2]
+  %12 = sub nsw i32 0, %9                         ; <i32> [#uses=1]
+  %13 = sitofp i32 %12 to double                  ; <double> [#uses=1]
+  %14 = fdiv double %13, 0.000000e+00             ; <double> [#uses=1]
+  %15 = fptosi double %14 to i32                  ; <i32> [#uses=1]
+  %iftmp.41.0.in = add i32 0, %15                 ; <i32> [#uses=1]
+  %iftmp.41.0.neg = sdiv i32 %iftmp.41.0.in, -2   ; <i32> [#uses=3]
+  br i1 undef, label %bb43.loopexit, label %bb21
+
+bb21:                                             ; preds = %bb13
+  %16 = fptosi double undef to i32                ; <i32> [#uses=1]
+  %17 = fsub double undef, 0.000000e+00           ; <double> [#uses=1]
+  %not.460 = fcmp oge double %17, 5.000000e-01    ; <i1> [#uses=1]
+  %18 = zext i1 %not.460 to i32                   ; <i32> [#uses=1]
+  %iftmp.42.0 = add i32 %16, %iftmp.41.0.neg      ; <i32> [#uses=1]
+  %19 = add i32 %iftmp.42.0, %18                  ; <i32> [#uses=1]
+  store i32 %19, i32* undef, align 4
+  %20 = sub nsw i32 0, %9                         ; <i32> [#uses=1]
+  %21 = sitofp i32 %20 to double                  ; <double> [#uses=1]
+  %22 = fdiv double %21, 0.000000e+00             ; <double> [#uses=2]
+  %23 = fptosi double %22 to i32                  ; <i32> [#uses=1]
+  %24 = fsub double %22, undef                    ; <double> [#uses=1]
+  %not.461 = fcmp oge double %24, 5.000000e-01    ; <i1> [#uses=1]
+  %25 = zext i1 %not.461 to i32                   ; <i32> [#uses=1]
+  %iftmp.43.0 = add i32 %23, %iftmp.41.0.neg      ; <i32> [#uses=1]
+  %26 = add i32 %iftmp.43.0, %25                  ; <i32> [#uses=1]
+  %27 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 10 ; <i32*> [#uses=1]
+  store i32 %26, i32* %27, align 4
+  %28 = fptosi double undef to i32                ; <i32> [#uses=1]
+  %iftmp.45.0 = add i32 %28, %iftmp.40.0.neg      ; <i32> [#uses=1]
+  %29 = add i32 %iftmp.45.0, 0                    ; <i32> [#uses=1]
+  store i32 %29, i32* undef, align 4
+  br label %bb43.loopexit
+
+bb36:                                             ; preds = %bb43.loopexit, %bb36
+  %termptr.0478 = phi %struct.termbox* [ %42, %bb36 ], [ %7, %bb43.loopexit ] ; <%struct.termbox*> [#uses=1]
+  %30 = load i32* undef, align 4                  ; <i32> [#uses=1]
+  %31 = sub nsw i32 %30, %9                       ; <i32> [#uses=1]
+  %32 = sitofp i32 %31 to double                  ; <double> [#uses=1]
+  %33 = fdiv double %32, 0.000000e+00             ; <double> [#uses=1]
+  %34 = fptosi double %33 to i32                  ; <i32> [#uses=1]
+  %iftmp.46.0 = add i32 %34, %iftmp.41.0.neg      ; <i32> [#uses=1]
+  %35 = add i32 %iftmp.46.0, 0                    ; <i32> [#uses=1]
+  store i32 %35, i32* undef, align 4
+  %36 = sub nsw i32 0, %11                        ; <i32> [#uses=1]
+  %37 = sitofp i32 %36 to double                  ; <double> [#uses=1]
+  %38 = fmul double %37, 0.000000e+00             ; <double> [#uses=1]
+  %39 = fptosi double %38 to i32                  ; <i32> [#uses=1]
+  %iftmp.47.0 = add i32 %39, %iftmp.40.0.neg      ; <i32> [#uses=1]
+  %40 = add i32 %iftmp.47.0, 0                    ; <i32> [#uses=1]
+  store i32 %40, i32* undef, align 4
+  %41 = getelementptr inbounds %struct.termbox* %termptr.0478, i32 0, i32 0 ; <%struct.termbox**> [#uses=1]
+  %42 = load %struct.termbox** %41, align 4       ; <%struct.termbox*> [#uses=2]
+  %43 = icmp eq %struct.termbox* %42, null        ; <i1> [#uses=1]
+  br i1 %43, label %bb52.loopexit, label %bb36
+
+bb43.loopexit:                                    ; preds = %bb21, %bb13
+  br i1 undef, label %bb52.loopexit, label %bb36
+
+bb52.loopexit:                                    ; preds = %bb43.loopexit, %bb36
+  %44 = icmp eq i32 %4, 0                         ; <i1> [#uses=1]
+  br i1 %44, label %bb.nph485, label %bb54
+
+bb54:                                             ; preds = %bb52.loopexit
+  switch i32 %4, label %bb62 [
+    i32 2, label %bb56
+    i32 3, label %bb57
+  ]
+
+bb56:                                             ; preds = %bb54
+  br label %bb62
+
+bb57:                                             ; preds = %bb54
+  br label %bb62
+
+bb62:                                             ; preds = %bb57, %bb56, %bb54
+  unreachable
+
+bb.nph485:                                        ; preds = %bb52.loopexit
+  br label %bb248
+
+bb248:                                            ; preds = %bb322, %bb.nph485
+  %45 = icmp eq i32 undef, %4                     ; <i1> [#uses=1]
+  br i1 %45, label %bb322, label %bb249
+
+bb249:                                            ; preds = %bb248
+  %46 = getelementptr inbounds %struct.cellbox* %2, i32 0, i32 21, i32 undef ; <%struct.tilebox**> [#uses=1]
+  %47 = load %struct.tilebox** %46, align 4       ; <%struct.tilebox*> [#uses=1]
+  %48 = getelementptr inbounds %struct.tilebox* %47, i32 0, i32 11 ; <i32*> [#uses=1]
+  store i32 undef, i32* %48, align 4
+  unreachable
+
+bb322:                                            ; preds = %bb248
+  br i1 undef, label %bb248, label %bb445
+
+bb445:                                            ; preds = %bb322, %bb10, %bb
+  %49 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; <i32> [#uses=1]
+  %50 = icmp eq i32 %49, 4                        ; <i1> [#uses=1]
+  br i1 %50, label %bb, label %return
+
+return:                                           ; preds = %bb445, %entry
+  ret void
+}
+
+declare i32 @fscanf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll b/src/LLVM/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
new file mode 100644
index 0000000..ac8e809
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll

@@ -0,0 +1,146 @@
+; RUN: llc < %s -march=arm -mtriple=armv4t-unknown-linux-gnueabi  | FileCheck %s
+; PR 7433
+; XFAIL: *
+
+%0 = type { i8*, i8* }
+%1 = type { i8*, i8*, i8* }
+%"class.llvm::Record" = type { i32, %"class.std::basic_string", %"class.llvm::SMLoc", %"class.std::vector", %"class.std::vector", %"class.std::vector" }
+%"class.llvm::RecordVal" = type { %"class.std::basic_string", %"struct.llvm::Init"*, i32, %"struct.llvm::Init"* }
+%"class.llvm::SMLoc" = type { i8* }
+%"class.llvm::StringInit" = type { [8 x i8], %"class.std::basic_string" }
+%"class.std::basic_string" = type { %"class.llvm::SMLoc" }
+%"class.std::vector" = type { [12 x i8] }
+%"struct.llvm::Init" = type { i32 (...)** }
+
+@_ZTIN4llvm5RecTyE = external constant %0         ; <%0*> [#uses=1]
+@_ZTIN4llvm4InitE = external constant %0          ; <%0*> [#uses=1]
+@_ZTIN4llvm11RecordRecTyE = external constant %1  ; <%1*> [#uses=1]
+@.str8 = external constant [47 x i8]              ; <[47 x i8]*> [#uses=1]
+@_ZTIN4llvm9UnsetInitE = external constant %1     ; <%1*> [#uses=1]
+@.str51 = external constant [45 x i8]             ; <[45 x i8]*> [#uses=1]
+@__PRETTY_FUNCTION__._ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs = external constant [116 x i8] ; <[116 x i8]*> [#uses=1]
+
+@_ZN4llvm9RecordValC1ERKSsPNS_5RecTyEj = alias void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)* @_ZN4llvm9RecordValC2ERKSsPNS_5RecTyEj ; <void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)*> [#uses=0]
+
+declare i8* @__dynamic_cast(i8*, i8*, i8*, i32)
+
+declare void @__assert_fail(i8*, i8*, i32, i8*) noreturn
+
+declare void @_ZN4llvm9RecordValC2ERKSsPNS_5RecTyEj(%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32) align 2
+
+define %"struct.llvm::Init"* @_ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs(%"class.llvm::StringInit"* %this, %"class.llvm::Record"* %R, %"class.llvm::RecordVal"* %RV, %"class.std::basic_string"* %FieldName) align 2 {
+;CHECK:  ldmia sp!, {r4, r5, r6, r7, r8, lr}
+;CHECK:  bx  r12  @ TAILCALL
+entry:
+  %.loc = alloca i32                              ; <i32*> [#uses=2]
+  %tmp.i = getelementptr inbounds %"class.llvm::StringInit"* %this, i32 0, i32 0, i32 4 ; <i8*> [#uses=1]
+  %0 = bitcast i8* %tmp.i to %"struct.llvm::Init"** ; <%"struct.llvm::Init"**> [#uses=1]
+  %tmp2.i = load %"struct.llvm::Init"** %0        ; <%"struct.llvm::Init"*> [#uses=2]
+  %1 = icmp eq %"struct.llvm::Init"* %tmp2.i, null ; <i1> [#uses=1]
+  br i1 %1, label %entry.return_crit_edge, label %tmpbb
+
+entry.return_crit_edge:                           ; preds = %entry
+  br label %return
+
+tmpbb:                                            ; preds = %entry
+  %2 = bitcast %"struct.llvm::Init"* %tmp2.i to i8* ; <i8*> [#uses=1]
+  %3 = tail call i8* @__dynamic_cast(i8* %2, i8* bitcast (%0* @_ZTIN4llvm5RecTyE to i8*), i8* bitcast (%1* @_ZTIN4llvm11RecordRecTyE to i8*), i32 -1) ; <i8*> [#uses=1]
+  %phitmp = icmp eq i8* %3, null                  ; <i1> [#uses=1]
+  br i1 %phitmp, label %.return_crit_edge, label %if.then
+
+.return_crit_edge:                                ; preds = %tmpbb
+  br label %return
+
+if.then:                                          ; preds = %tmpbb
+  %tmp2.i.i.i.i = getelementptr inbounds %"class.llvm::StringInit"* %this, i32 0, i32 1, i32 0, i32 0 ; <i8**> [#uses=1]
+  %tmp3.i.i.i.i = load i8** %tmp2.i.i.i.i         ; <i8*> [#uses=2]
+  %arrayidx.i.i.i.i = getelementptr inbounds i8* %tmp3.i.i.i.i, i32 -12 ; <i8*> [#uses=1]
+  %tmp.i.i.i = bitcast i8* %arrayidx.i.i.i.i to i32* ; <i32*> [#uses=1]
+  %tmp2.i.i.i = load i32* %tmp.i.i.i              ; <i32> [#uses=1]
+  %tmp.i5 = getelementptr inbounds %"class.llvm::Record"* %R, i32 0, i32 4 ; <%"class.std::vector"*> [#uses=1]
+  %tmp2.i.i = getelementptr inbounds %"class.llvm::Record"* %R, i32 0, i32 4, i32 0, i32 4 ; <i8*> [#uses=1]
+  %4 = bitcast i8* %tmp2.i.i to %"class.llvm::RecordVal"** ; <%"class.llvm::RecordVal"**> [#uses=1]
+  %tmp3.i.i6 = load %"class.llvm::RecordVal"** %4 ; <%"class.llvm::RecordVal"*> [#uses=1]
+  %tmp5.i.i = bitcast %"class.std::vector"* %tmp.i5 to %"class.llvm::RecordVal"** ; <%"class.llvm::RecordVal"**> [#uses=1]
+  %tmp6.i.i = load %"class.llvm::RecordVal"** %tmp5.i.i ; <%"class.llvm::RecordVal"*> [#uses=5]
+  %sub.ptr.lhs.cast.i.i = ptrtoint %"class.llvm::RecordVal"* %tmp3.i.i6 to i32 ; <i32> [#uses=1]
+  %sub.ptr.rhs.cast.i.i = ptrtoint %"class.llvm::RecordVal"* %tmp6.i.i to i32 ; <i32> [#uses=1]
+  %sub.ptr.sub.i.i = sub i32 %sub.ptr.lhs.cast.i.i, %sub.ptr.rhs.cast.i.i ; <i32> [#uses=1]
+  %sub.ptr.div.i.i = ashr i32 %sub.ptr.sub.i.i, 4 ; <i32> [#uses=1]
+  br label %codeRepl
+
+codeRepl:                                         ; preds = %if.then
+  %targetBlock = call i1 @_ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs_for.cond.i(i32 %sub.ptr.div.i.i, %"class.llvm::RecordVal"* %tmp6.i.i, i32 %tmp2.i.i.i, i8* %tmp3.i.i.i.i, i32* %.loc) ; <i1> [#uses=1]
+  %.reload = load i32* %.loc                      ; <i32> [#uses=3]
+  br i1 %targetBlock, label %for.cond.i.return_crit_edge, label %_ZN4llvm6Record8getValueENS_9StringRefE.exit
+
+for.cond.i.return_crit_edge:                      ; preds = %codeRepl
+  br label %return
+
+_ZN4llvm6Record8getValueENS_9StringRefE.exit:     ; preds = %codeRepl
+  %add.ptr.i.i = getelementptr inbounds %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload ; <%"class.llvm::RecordVal"*> [#uses=2]
+  %tobool5 = icmp eq %"class.llvm::RecordVal"* %add.ptr.i.i, null ; <i1> [#uses=1]
+  br i1 %tobool5, label %_ZN4llvm6Record8getValueENS_9StringRefE.exit.return_crit_edge, label %if.then6
+
+_ZN4llvm6Record8getValueENS_9StringRefE.exit.return_crit_edge: ; preds = %_ZN4llvm6Record8getValueENS_9StringRefE.exit
+  br label %return
+
+if.then6:                                         ; preds = %_ZN4llvm6Record8getValueENS_9StringRefE.exit
+  %cmp = icmp eq %"class.llvm::RecordVal"* %add.ptr.i.i, %RV ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then6.if.end_crit_edge, label %land.lhs.true
+
+if.then6.if.end_crit_edge:                        ; preds = %if.then6
+  br label %if.end
+
+land.lhs.true:                                    ; preds = %if.then6
+  %tobool10 = icmp eq %"class.llvm::RecordVal"* %RV, null ; <i1> [#uses=1]
+  br i1 %tobool10, label %lor.lhs.false, label %land.lhs.true.return_crit_edge
+
+land.lhs.true.return_crit_edge:                   ; preds = %land.lhs.true
+  br label %return
+
+lor.lhs.false:                                    ; preds = %land.lhs.true
+  %tmp.i3 = getelementptr inbounds %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload, i32 3 ; <%"struct.llvm::Init"**> [#uses=1]
+  %tmp2.i4 = load %"struct.llvm::Init"** %tmp.i3  ; <%"struct.llvm::Init"*> [#uses=2]
+  %5 = icmp eq %"struct.llvm::Init"* %tmp2.i4, null ; <i1> [#uses=1]
+  br i1 %5, label %lor.lhs.false.if.end_crit_edge, label %tmpbb1
+
+lor.lhs.false.if.end_crit_edge:                   ; preds = %lor.lhs.false
+  br label %if.end
+
+tmpbb1:                                           ; preds = %lor.lhs.false
+  %6 = bitcast %"struct.llvm::Init"* %tmp2.i4 to i8* ; <i8*> [#uses=1]
+  %7 = tail call i8* @__dynamic_cast(i8* %6, i8* bitcast (%0* @_ZTIN4llvm4InitE to i8*), i8* bitcast (%1* @_ZTIN4llvm9UnsetInitE to i8*), i32 -1) ; <i8*> [#uses=1]
+  %phitmp32 = icmp eq i8* %7, null                ; <i1> [#uses=1]
+  br i1 %phitmp32, label %.if.end_crit_edge, label %.return_crit_edge1
+
+.return_crit_edge1:                               ; preds = %tmpbb1
+  br label %return
+
+.if.end_crit_edge:                                ; preds = %tmpbb1
+  br label %if.end
+
+if.end:                                           ; preds = %.if.end_crit_edge, %lor.lhs.false.if.end_crit_edge, %if.then6.if.end_crit_edge
+  %tmp.i1 = getelementptr inbounds %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload, i32 3 ; <%"struct.llvm::Init"**> [#uses=1]
+  %tmp2.i2 = load %"struct.llvm::Init"** %tmp.i1  ; <%"struct.llvm::Init"*> [#uses=3]
+  %8 = bitcast %"class.llvm::StringInit"* %this to %"struct.llvm::Init"* ; <%"struct.llvm::Init"*> [#uses=1]
+  %cmp19 = icmp eq %"struct.llvm::Init"* %tmp2.i2, %8 ; <i1> [#uses=1]
+  br i1 %cmp19, label %cond.false, label %cond.end
+
+cond.false:                                       ; preds = %if.end
+  tail call void @__assert_fail(i8* getelementptr inbounds ([45 x i8]* @.str51, i32 0, i32 0), i8* getelementptr inbounds ([47 x i8]* @.str8, i32 0, i32 0), i32 1141, i8* getelementptr inbounds ([116 x i8]* @__PRETTY_FUNCTION__._ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs, i32 0, i32 0)) noreturn
+  unreachable
+
+cond.end:                                         ; preds = %if.end
+  %9 = bitcast %"struct.llvm::Init"* %tmp2.i2 to %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)***> [#uses=1]
+  %10 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** %9 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1]
+  %vfn = getelementptr inbounds %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %10, i32 8 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1]
+  %11 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %vfn ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*> [#uses=1]
+  %call25 = tail call %"struct.llvm::Init"* %11(%"struct.llvm::Init"* %tmp2.i2, %"class.llvm::Record"* %R, %"class.llvm::RecordVal"* %RV, %"class.std::basic_string"* %FieldName) ; <%"struct.llvm::Init"*> [#uses=1]
+  ret %"struct.llvm::Init"* %call25
+
+return:                                           ; preds = %.return_crit_edge1, %land.lhs.true.return_crit_edge, %_ZN4llvm6Record8getValueENS_9StringRefE.exit.return_crit_edge, %for.cond.i.return_crit_edge, %.return_crit_edge, %entry.return_crit_edge
+  ret %"struct.llvm::Init"* null
+}
+
+declare i1 @_ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs_for.cond.i(i32, %"class.llvm::RecordVal"*, i32, i8*, i32*)

diff --git a/src/LLVM/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll b/src/LLVM/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
new file mode 100644
index 0000000..cdb11c7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll

@@ -0,0 +1,75 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin3.0.0-iphoneos"
+
+@length = common global i32 0, align 4            ; <i32*> [#uses=1]
+
+define void @x0(i8* nocapture %buf, i32 %nbytes) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8* %buf}, i64 0, metadata !0), !dbg !15
+  tail call void @llvm.dbg.value(metadata !{i32 %nbytes}, i64 0, metadata !8), !dbg !16
+  %tmp = load i32* @length, !dbg !17              ; <i32> [#uses=3]
+  %cmp = icmp eq i32 %tmp, -1, !dbg !17           ; <i1> [#uses=1]
+  %cmp.not = xor i1 %cmp, true                    ; <i1> [#uses=1]
+  %cmp3 = icmp ult i32 %tmp, %nbytes, !dbg !17    ; <i1> [#uses=1]
+  %or.cond = and i1 %cmp.not, %cmp3               ; <i1> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{i32 %tmp}, i64 0, metadata !8), !dbg !17
+  %nbytes.addr.0 = select i1 %or.cond, i32 %tmp, i32 %nbytes ; <i32> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !18, i64 0, metadata !10), !dbg !19
+  br label %while.cond, !dbg !20
+
+while.cond:                                       ; preds = %while.body, %entry
+  %0 = phi i32 [ 0, %entry ], [ %inc, %while.body ] ; <i32> [#uses=3]
+  %buf.addr.0 = getelementptr i8* %buf, i32 %0    ; <i8*> [#uses=1]
+  %cmp7 = icmp ult i32 %0, %nbytes.addr.0, !dbg !20 ; <i1> [#uses=1]
+  br i1 %cmp7, label %land.rhs, label %while.end, !dbg !20
+
+land.rhs:                                         ; preds = %while.cond
+  %call = tail call i32 @x1() nounwind optsize, !dbg !20 ; <i32> [#uses=2]
+  %cmp9 = icmp eq i32 %call, -1, !dbg !20         ; <i1> [#uses=1]
+  br i1 %cmp9, label %while.end, label %while.body, !dbg !20
+
+while.body:                                       ; preds = %land.rhs
+  %conv = trunc i32 %call to i8, !dbg !21         ; <i8> [#uses=1]
+  store i8 %conv, i8* %buf.addr.0, !dbg !21
+  %inc = add i32 %0, 1, !dbg !23                  ; <i32> [#uses=1]
+  br label %while.cond, !dbg !24
+
+while.end:                                        ; preds = %land.rhs, %while.cond
+  ret void, !dbg !25
+}
+
+declare i32 @x1() optsize
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.lv.fn = !{!0, !8, !10, !12}
+!llvm.dbg.gv = !{!14}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"buf", metadata !2, i32 4, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"x0", metadata !"x0", metadata !"x0", metadata !2, i32 5, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"t.c", metadata !"/private/tmp", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 12, metadata !"t.c", metadata !".", metadata !"clang 2.0", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{null}
+!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 524324, metadata !2, metadata !"unsigned char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 524545, metadata !1, metadata !"nbytes", metadata !2, i32 4, metadata !9} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 524324, metadata !2, metadata !"unsigned long", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 524544, metadata !11, metadata !"nread", metadata !2, i32 6, metadata !9} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 524299, metadata !1, i32 5, i32 1} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 524544, metadata !11, metadata !"c", metadata !2, i32 7, metadata !13} ; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 524340, i32 0, metadata !2, metadata !"length", metadata !"length", metadata !"length", metadata !2, i32 1, metadata !13, i1 false, i1 true, i32* @length} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 4, i32 24, metadata !1, null}
+!16 = metadata !{i32 4, i32 43, metadata !1, null}
+!17 = metadata !{i32 9, i32 2, metadata !11, null}
+!18 = metadata !{i32 0}
+!19 = metadata !{i32 10, i32 2, metadata !11, null}
+!20 = metadata !{i32 11, i32 2, metadata !11, null}
+!21 = metadata !{i32 12, i32 3, metadata !22, null}
+!22 = metadata !{i32 524299, metadata !11, i32 11, i32 45} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 13, i32 3, metadata !22, null}
+!24 = metadata !{i32 14, i32 2, metadata !22, null}
+!25 = metadata !{i32 15, i32 1, metadata !11, null}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/src/LLVM/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
new file mode 100644
index 0000000..b9d5600
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -O0 -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; This tests the fast register allocator's handling of partial redefines:
+;
+;      %reg1028:dsub_0<def>, %reg1028:dsub_1<def> = VLD1q64 %reg1025...
+;      %reg1030:dsub_1<def> = COPY %reg1028:dsub_0<kill>
+;
+; %reg1028 gets allocated %Q0, and if %reg1030 is reloaded for the partial
+; redef, it cannot also get %Q0.
+
+; CHECK: vld1.64 {d16, d17}, [r{{.}}]
+; CHECK-NOT: vld1.64 {d16, d17}
+; CHECK: vmov.f64 d19, d16
+
+define i32 @test(i8* %arg) nounwind {
+entry:
+ %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %arg, i32 1)
+ %1 = shufflevector <2 x i64> undef, <2 x i64> %0, <2 x i32> <i32 1, i32 2>
+ store <2 x i64> %1, <2 x i64>* undef, align 16
+ ret i32 undef
+}
+
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly

diff --git a/src/LLVM/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll b/src/LLVM/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
new file mode 100644
index 0000000..984583e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mattr=+neon
+
+@.str271 = external constant [21 x i8], align 4   ; <[21 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32, i8**)* @main to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+  %0 = shufflevector <2 x i64> undef, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2> ; <<2 x i64>> [#uses=1]
+  store <2 x i64> %0, <2 x i64>* undef, align 16
+  %val4723 = load <8 x i16>* undef                ; <<8 x i16>> [#uses=1]
+  call void @PrintShortX(i8* getelementptr inbounds ([21 x i8]* @.str271, i32 0, i32 0), <8 x i16> %val4723, i32 0) nounwind
+  ret i32 undef
+}
+
+declare void @PrintShortX(i8*, <8 x i16>, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/src/LLVM/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
new file mode 100644
index 0000000..c03c815
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll

@@ -0,0 +1,95 @@
+; RUN: llc -enable-correct-eh-support < %s
+; PR7716
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10.0.0"
+
+%0 = type { i8*, i8* }
+%struct.A = type { i32 }
+
+@d = internal global i32 0, align 4               ; <i32*> [#uses=6]
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* ; <i8**> [#uses=1]
+@_ZTS1A = internal constant [3 x i8] c"1A\00"     ; <[3 x i8]*> [#uses=1]
+@_ZTI1A = internal constant %0 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1A, i32 0, i32 0) } ; <%0*> [#uses=1]
+@.str2 = private constant [18 x i8] c"c == %d, d == %d\0A\00" ; <[18 x i8]*> [#uses=1]
+@.str3 = private constant [16 x i8] c"A(const A&) %d\0A\00" ; <[16 x i8]*> [#uses=1]
+@.str4 = private constant [9 x i8] c"~A() %d\0A\00" ; <[9 x i8]*> [#uses=1]
+@.str5 = private constant [8 x i8] c"A() %d\0A\00" ; <[8 x i8]*> [#uses=1]
+@str = internal constant [14 x i8] c"Throwing 1...\00" ; <[14 x i8]*> [#uses=1]
+@str1 = internal constant [8 x i8] c"Caught.\00"  ; <[8 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
+
+declare void @_Unwind_SjLj_Resume(i8*)
+
+define internal void @_ZN1AD1Ev(%struct.A* nocapture %this) nounwind ssp align 2 {
+entry:
+  %tmp.i = getelementptr inbounds %struct.A* %this, i32 0, i32 0 ; <i32*> [#uses=1]
+  %tmp2.i = load i32* %tmp.i                      ; <i32> [#uses=1]
+  %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 %tmp2.i) nounwind ; <i32> [#uses=0]
+  %tmp3.i = load i32* @d                          ; <i32> [#uses=1]
+  %inc.i = add nsw i32 %tmp3.i, 1                 ; <i32> [#uses=1]
+  store i32 %inc.i, i32* @d
+  ret void
+}
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+define i32 @main() ssp {
+entry:
+  %puts.i = tail call i32 @puts(i8* getelementptr inbounds ([14 x i8]* @str, i32 0, i32 0)) ; <i32> [#uses=0]
+  %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind ; <i8*> [#uses=2]
+  %tmp2.i.i.i = bitcast i8* %exception.i to i32*  ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp2.i.i.i
+  %call.i.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str5, i32 0, i32 0), i32 1) nounwind ; <i32> [#uses=0]
+  invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (%0* @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1AD1Ev to i8*)) noreturn
+          to label %.noexc unwind label %lpad
+
+.noexc:                                           ; preds = %entry
+  unreachable
+
+try.cont:                                         ; preds = %lpad
+  %0 = tail call i8* @__cxa_get_exception_ptr(i8* %exn) nounwind ; <i8*> [#uses=0]
+  %call.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str3, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
+  %1 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind ; <i8*> [#uses=0]
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @str1, i32 0, i32 0)) ; <i32> [#uses=0]
+  %call.i.i3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
+  %tmp3.i.i = load i32* @d                        ; <i32> [#uses=1]
+  %inc.i.i4 = add nsw i32 %tmp3.i.i, 1            ; <i32> [#uses=1]
+  store i32 %inc.i.i4, i32* @d
+  tail call void @__cxa_end_catch()
+  %tmp13 = load i32* @d                           ; <i32> [#uses=1]
+  %call14 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str2, i32 0, i32 0), i32 2, i32 %tmp13) ; <i32> [#uses=0]
+  %tmp16 = load i32* @d                           ; <i32> [#uses=1]
+  %cmp = icmp ne i32 %tmp16, 2                    ; <i1> [#uses=1]
+  %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
+  ret i32 %conv
+
+lpad:                                             ; preds = %entry
+  %exn = tail call i8* @llvm.eh.exception() nounwind ; <i8*> [#uses=4]
+  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* bitcast (%0* @_ZTI1A to i8*), i8* null) nounwind ; <i32> [#uses=1]
+  %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%0* @_ZTI1A to i8*)) nounwind ; <i32> [#uses=1]
+  %3 = icmp eq i32 %eh.selector, %2               ; <i1> [#uses=1]
+  br i1 %3, label %try.cont, label %eh.resume
+
+eh.resume:                                        ; preds = %lpad
+  tail call void @_Unwind_SjLj_Resume(i8* %exn) noreturn
+  unreachable
+}
+
+declare i8* @__cxa_get_exception_ptr(i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+declare i32 @puts(i8* nocapture) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2010-08-04-EHCrash.ll b/src/LLVM/test/CodeGen/ARM/2010-08-04-EHCrash.ll
new file mode 100644
index 0000000..f57b7e6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-08-04-EHCrash.ll

@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+; <rdar://problem/8264008>
+
+define linkonce_odr arm_apcscc void @func1() {
+entry:
+  %save_filt.936 = alloca i32                     ; <i32*> [#uses=2]
+  %save_eptr.935 = alloca i8*                     ; <i8**> [#uses=2]
+  %eh_exception = alloca i8*                      ; <i8**> [#uses=5]
+  %eh_selector = alloca i32                       ; <i32*> [#uses=3]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call arm_apcscc  void @func2()
+  br label %return
+
+bb:                                               ; No predecessors!
+  %eh_select = load i32* %eh_selector             ; <i32> [#uses=1]
+  store i32 %eh_select, i32* %save_filt.936, align 4
+  %eh_value = load i8** %eh_exception             ; <i8*> [#uses=1]
+  store i8* %eh_value, i8** %save_eptr.935, align 4
+  invoke arm_apcscc  void @func3()
+          to label %invcont unwind label %lpad
+
+invcont:                                          ; preds = %bb
+  %tmp6 = load i8** %save_eptr.935, align 4          ; <i8*> [#uses=1]
+  store i8* %tmp6, i8** %eh_exception, align 4
+  %tmp7 = load i32* %save_filt.936, align 4          ; <i32> [#uses=1]
+  store i32 %tmp7, i32* %eh_selector, align 4
+  br label %Unwind
+
+bb12:                                             ; preds = %ppad
+  call arm_apcscc  void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+
+lpad:                                             ; preds = %bb
+  %eh_ptr = call i8* @llvm.eh.exception()         ; <i8*> [#uses=1]
+  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr13 = load i8** %eh_exception             ; <i8*> [#uses=1]
+  %eh_select14 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr13, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 1)
+  store i32 %eh_select14, i32* %eh_selector
+  br label %ppad
+
+ppad:
+  br label %bb12
+
+Unwind:
+  %eh_ptr15 = load i8** %eh_exception
+  call arm_apcscc  void @_Unwind_SjLj_Resume(i8* %eh_ptr15)
+  unreachable
+}
+
+declare arm_apcscc void @func2()
+
+declare arm_apcscc void @_ZSt9terminatev() noreturn nounwind
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare arm_apcscc void @_Unwind_SjLj_Resume(i8*)
+
+declare arm_apcscc void @func3()
+
+declare arm_apcscc i32 @__gxx_personality_sj0(...)

diff --git a/src/LLVM/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/src/LLVM/test/CodeGen/ARM/2010-08-04-StackVariable.ll
new file mode 100644
index 0000000..6aeaa26
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-08-04-StackVariable.ll

@@ -0,0 +1,124 @@
+; RUN: llc -O0 -mtriple=arm-apple-darwin < %s | grep DW_OP_breg
+; Use DW_OP_breg in variable's location expression if the variable is in a stack slot.
+
+%struct.SVal = type { i8*, i32 }
+
+define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !23), !dbg !24
+  call void @llvm.dbg.value(metadata !{%struct.SVal* %location}, i64 0, metadata !25), !dbg !24
+  %0 = icmp ne i32 %i, 0, !dbg !27                ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb1, !dbg !27
+
+bb:                                               ; preds = %entry
+  %1 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1]
+  %2 = load i32* %1, align 8, !dbg !29            ; <i32> [#uses=1]
+  %3 = add i32 %2, %i, !dbg !29                   ; <i32> [#uses=1]
+  br label %bb2, !dbg !29
+
+bb1:                                              ; preds = %entry
+  %4 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1]
+  %5 = load i32* %4, align 8, !dbg !30            ; <i32> [#uses=1]
+  %6 = sub i32 %5, 1, !dbg !30                    ; <i32> [#uses=1]
+  br label %bb2, !dbg !30
+
+bb2:                                              ; preds = %bb1, %bb
+  %.0 = phi i32 [ %3, %bb ], [ %6, %bb1 ]         ; <i32> [#uses=1]
+  br label %return, !dbg !29
+
+return:                                           ; preds = %bb2
+  ret i32 %.0, !dbg !29
+}
+
+define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2  {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.value(metadata !{%struct.SVal* %this}, i64 0, metadata !31), !dbg !34
+  %0 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; <i8**> [#uses=1]
+  store i8* null, i8** %0, align 8, !dbg !34
+  %1 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; <i32*> [#uses=1]
+  store i32 0, i32* %1, align 8, !dbg !34
+  br label %return, !dbg !34
+
+return:                                           ; preds = %entry
+  ret void, !dbg !35
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @main() nounwind ssp {
+entry:
+  %0 = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=3]
+  %v = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=4]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.SVal* %v}, metadata !38), !dbg !41
+  call void @_ZN4SValC1Ev(%struct.SVal* %v) nounwind, !dbg !41
+  %1 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; <i32*> [#uses=1]
+  store i32 1, i32* %1, align 8, !dbg !42
+  %2 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
+  %3 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
+  %4 = load i8** %3, align 8, !dbg !43            ; <i8*> [#uses=1]
+  store i8* %4, i8** %2, align 8, !dbg !43
+  %5 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
+  %6 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 8, !dbg !43            ; <i32> [#uses=1]
+  store i32 %7, i32* %5, align 8, !dbg !43
+  %8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0]
+  call void @llvm.dbg.value(metadata !{i32 %8}, i64 0, metadata !44), !dbg !43
+  br label %return, !dbg !45
+
+return:                                           ; preds = %entry
+  ret i32 0, !dbg !45
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !9, !16, !17, !20}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524307, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
+!2 = metadata !{i32 524329, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
+!5 = metadata !{i32 524301, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 524301, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
+!8 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{null, metadata !12, metadata !13}
+!12 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!15 = metadata !{null, metadata !12}
+!16 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ]
+!18 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!19 = metadata !{metadata !13, metadata !13, metadata !1}
+!20 = metadata !{i32 524334, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!21 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!22 = metadata !{metadata !13}
+!23 = metadata !{i32 524545, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 16, i32 0, metadata !17, null}
+!25 = metadata !{i32 524545, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 524304, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
+!27 = metadata !{i32 17, i32 0, metadata !28, null}
+!28 = metadata !{i32 524299, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 18, i32 0, metadata !28, null}
+!30 = metadata !{i32 20, i32 0, metadata !28, null}
+!31 = metadata !{i32 524545, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32} ; [ DW_TAG_arg_variable ]
+!32 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
+!33 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 11, i32 0, metadata !16, null}
+!35 = metadata !{i32 11, i32 0, metadata !36, null}
+!36 = metadata !{i32 524299, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!37 = metadata !{i32 524299, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!38 = metadata !{i32 524544, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1} ; [ DW_TAG_auto_variable ]
+!39 = metadata !{i32 524299, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 524299, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 24, i32 0, metadata !39, null}
+!42 = metadata !{i32 25, i32 0, metadata !39, null}
+!43 = metadata !{i32 26, i32 0, metadata !39, null}
+!44 = metadata !{i32 524544, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13} ; [ DW_TAG_auto_variable ]
+!45 = metadata !{i32 27, i32 0, metadata !39, null}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll b/src/LLVM/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll
new file mode 100644
index 0000000..d282091
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll

@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+
+declare noalias i8* @malloc(i32) nounwind
+
+define internal void @gl_DrawPixels(i32 %width, i32 %height, i32 %format, i32 %type, i8* %pixels) nounwind {
+entry:
+  br i1 undef, label %bb3.i, label %bb3
+
+bb3.i:                                            ; preds = %entry
+  unreachable
+
+gl_error.exit:                                    ; preds = %bb22
+  ret void
+
+bb3:                                              ; preds = %entry
+  br i1 false, label %bb5, label %bb4
+
+bb4:                                              ; preds = %bb3
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3
+  br i1 undef, label %bb19, label %bb22
+
+bb19:                                             ; preds = %bb5
+  switch i32 %type, label %bb3.i6.i [
+    i32 5120, label %bb1.i13
+    i32 5121, label %bb1.i13
+    i32 6656, label %bb9.i.i6
+  ]
+
+bb9.i.i6:                                         ; preds = %bb19
+  br label %bb1.i13
+
+bb3.i6.i:                                         ; preds = %bb19
+  unreachable
+
+bb1.i13:                                          ; preds = %bb9.i.i6, %bb19, %bb19
+  br i1 undef, label %bb3.i17, label %bb2.i16
+
+bb2.i16:                                          ; preds = %bb1.i13
+  unreachable
+
+bb3.i17:                                          ; preds = %bb1.i13
+  br i1 undef, label %bb4.i18, label %bb23.i
+
+bb4.i18:                                          ; preds = %bb3.i17
+  %0 = mul nsw i32 %height, %width
+  %1 = and i32 %0, 7
+  %not..i = icmp ne i32 %1, 0
+  %2 = zext i1 %not..i to i32
+  %storemerge2.i = add i32 0, %2
+  %3 = call noalias i8* @malloc(i32 %storemerge2.i) nounwind
+  br i1 undef, label %bb3.i9, label %bb9.i
+
+bb9.i:                                            ; preds = %bb4.i18
+  br i1 undef, label %bb13.i19, label %bb.i24.i
+
+bb13.i19:                                         ; preds = %bb9.i
+  br i1 undef, label %bb14.i20, label %bb15.i
+
+bb14.i20:                                         ; preds = %bb13.i19
+  unreachable
+
+bb15.i:                                           ; preds = %bb13.i19
+  unreachable
+
+bb.i24.i:                                         ; preds = %bb.i24.i, %bb9.i
+  %storemerge1.i21.i = phi i32 [ %4, %bb.i24.i ], [ 0, %bb9.i ]
+  %4 = add i32 %storemerge1.i21.i, 1
+  %exitcond47.i = icmp eq i32 %4, %storemerge2.i
+  br i1 %exitcond47.i, label %bb22, label %bb.i24.i
+
+bb23.i:                                           ; preds = %bb3.i17
+  unreachable
+
+bb3.i9:                                           ; preds = %bb4.i18
+  unreachable
+
+bb22:                                             ; preds = %bb.i24.i, %bb5
+  br i1 undef, label %gl_error.exit, label %bb23
+
+bb23:                                             ; preds = %bb22
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll b/src/LLVM/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
new file mode 100644
index 0000000..bda14bc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
+; This tests that MC/asm header conversion is smooth
+;
+; CHECK:      .syntax unified
+; CHECK: .eabi_attribute 20, 1
+; CHECK: .eabi_attribute 21, 1
+; CHECK: .eabi_attribute 23, 3
+; CHECK: .eabi_attribute 24, 1
+; CHECK: .eabi_attribute 25, 1
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll b/src/LLVM/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
new file mode 100644
index 0000000..99db637
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll

@@ -0,0 +1,37 @@
+; RUN: llc  %s -mtriple=arm-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=BASIC %s 
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -march=arm -mcpu=cortex-a8 \
+; RUN:    -mattr=-neon,-vfp3,+vfp2 \
+; RUN:    -arm-reserve-r9 -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=CORTEXA8 %s
+
+
+; This tests that the extpected ARM attributes are emitted.
+;
+; BASIC:        .ARM.attributes
+; BASIC-NEXT:         0x70000003
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x0000003c
+; BASIC-NEXT:         0x00000020
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x00000001
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         '411f0000 00616561 62690001 15000000 06020801 09011401 15011703 18011901'
+
+; CORTEXA8:        .ARM.attributes
+; CORTEXA8-NEXT:         0x70000003
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x0000003c
+; CORTEXA8-NEXT:         0x0000002f
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x00000001
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         '412e0000 00616561 62690001 24000000 05434f52 5445582d 41380006 0a074108 0109020a 02140115 01170318 011901'
+
+define i32 @f(i64 %z) {
+       ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll b/src/LLVM/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
new file mode 100644
index 0000000..32d350e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=arm1136jf-s | FileCheck %s
+; Radar 8589805: Counting the number of microcoded operations, such as for an
+; LDM instruction, was causing an assertion failure because the microop count
+; was being treated as an instruction count.
+
+; CHECK: push
+; CHECK: pop
+; CHECK: pop
+; CHECK: pop
+
+define i32 @test(i32 %x) {
+entry:
+  %0 = tail call signext i16 undef(i32* undef)
+  switch i32 undef, label %bb3 [
+    i32 0, label %bb4
+    i32 1, label %bb1
+    i32 2, label %bb2
+  ]
+
+bb1:
+  ret i32 1
+
+bb2:
+  ret i32 2
+
+bb3:
+  ret i32 1
+
+bb4:
+  ret i32 3
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll b/src/LLVM/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
new file mode 100644
index 0000000..0422094
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll

@@ -0,0 +1,85 @@
+; RUN: llc < %s -verify-machineinstrs -spiller=standard
+; RUN: llc < %s -verify-machineinstrs -spiller=inline
+; PR8612
+;
+; This test has an inline asm with early-clobber arguments.
+; It is big enough that one of the early clobber registers is spilled.
+;
+; All the spillers would get the live ranges wrong when spilling an early
+; clobber, allowing the undef register to be allocated to the same register as
+; the early clobber.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32"
+target triple = "armv7-eabi"
+
+%0 = type { i32, i32 }
+
+define void @foo(i32* %in) nounwind {
+entry:
+  br label %bb.i
+
+bb.i:                                             ; preds = %bb.i, %entry
+  br i1 undef, label %bb10.preheader.i, label %bb.i
+
+bb10.preheader.i:                                 ; preds = %bb.i
+  br label %bb10.i
+
+bb10.i:                                           ; preds = %bb10.i, %bb10.preheader.i
+  br i1 undef, label %bb27.i, label %bb10.i
+
+bb27.i:                                           ; preds = %bb10.i
+  br label %bb28.i
+
+bb28.i:                                           ; preds = %bb28.i, %bb27.i
+  br i1 undef, label %presymmetry.exit, label %bb28.i
+
+presymmetry.exit:                                 ; preds = %bb28.i
+  %tmp175387 = or i32 undef, 12
+  %scevgep101.i = getelementptr i32* %in, i32 undef
+  %tmp189401 = or i32 undef, 7
+  %scevgep97.i = getelementptr i32* %in, i32 undef
+  %tmp198410 = or i32 undef, 1
+  %scevgep.i48 = getelementptr i32* %in, i32 undef
+  %0 = load i32* %scevgep.i48, align 4
+  %1 = add nsw i32 %0, 0
+  store i32 %1, i32* undef, align 4
+  %asmtmp.i.i33.i.i.i = tail call %0 asm "smull\09$0, $1, $2, $3", "=&r,=&r,%r,r,~{cc}"(i32 undef, i32 1518500250) nounwind
+  %asmresult1.i.i34.i.i.i = extractvalue %0 %asmtmp.i.i33.i.i.i, 1
+  %2 = shl i32 %asmresult1.i.i34.i.i.i, 1
+  %3 = load i32* null, align 4
+  %4 = load i32* undef, align 4
+  %5 = sub nsw i32 %3, %4
+  %6 = load i32* undef, align 4
+  %7 = load i32* null, align 4
+  %8 = sub nsw i32 %6, %7
+  %9 = load i32* %scevgep97.i, align 4
+  %10 = load i32* undef, align 4
+  %11 = sub nsw i32 %9, %10
+  %12 = load i32* null, align 4
+  %13 = load i32* %scevgep101.i, align 4
+  %14 = sub nsw i32 %12, %13
+  %15 = load i32* %scevgep.i48, align 4
+  %16 = load i32* null, align 4
+  %17 = add nsw i32 %16, %15
+  %18 = sub nsw i32 %15, %16
+  %19 = load i32* undef, align 4
+  %20 = add nsw i32 %19, %2
+  %21 = sub nsw i32 %19, %2
+  %22 = add nsw i32 %14, %5
+  %23 = sub nsw i32 %5, %14
+  %24 = add nsw i32 %11, %8
+  %25 = sub nsw i32 %8, %11
+  %26 = add nsw i32 %21, %23
+  store i32 %26, i32* %scevgep.i48, align 4
+  %27 = sub nsw i32 %25, %18
+  store i32 %27, i32* null, align 4
+  %28 = sub nsw i32 %23, %21
+  store i32 %28, i32* undef, align 4
+  %29 = add nsw i32 %18, %25
+  store i32 %29, i32* undef, align 4
+  %30 = add nsw i32 %17, %22
+  store i32 %30, i32* %scevgep101.i, align 4
+  %31 = add nsw i32 %20, %24
+  store i32 %31, i32* null, align 4
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/src/LLVM/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
new file mode 100644
index 0000000..e3c18ce
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-11-29-PrologueBug.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB2
+; rdar://8690640
+
+define i32* @t(i32* %x) nounwind {
+entry:
+; ARM: t:
+; ARM: push
+; ARM: mov r7, sp
+; ARM: bl _foo
+; ARM: bl _foo
+; ARM: bl _foo
+; ARM: pop {r7, pc}
+
+; THUMB2: t:
+; THUMB2: push
+; THUMB2: mov r7, sp
+; THUMB2: blx _foo
+; THUMB2: blx _foo
+; THUMB2: blx _foo
+; THUMB2: pop
+  %0 = tail call i32* @foo(i32* %x) nounwind
+  %1 = tail call i32* @foo(i32* %0) nounwind
+  %2 = tail call i32* @foo(i32* %1) nounwind
+  ret i32* %2
+}
+
+declare i32* @foo(i32*)

diff --git a/src/LLVM/test/CodeGen/ARM/2010-11-30-reloc-movt.ll b/src/LLVM/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
new file mode 100644
index 0000000..8b164c5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-11-30-reloc-movt.ll

@@ -0,0 +1,42 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=OBJ %s
+
+target triple = "armv7-none-linux-gnueabi"
+
+@a = external global i8
+
+define arm_aapcs_vfpcc i32 @barf() nounwind {
+entry:
+  %0 = tail call arm_aapcs_vfpcc  i32 @foo(i8* @a) nounwind
+  ret i32 %0
+; OBJ:         '.text'
+; OBJ-NEXT:    'sh_type'
+; OBJ-NEXT:    'sh_flags'
+; OBJ-NEXT:    'sh_addr'
+; OBJ-NEXT:    'sh_offset'
+; OBJ-NEXT:    'sh_size'
+; OBJ-NEXT:    'sh_link'
+; OBJ-NEXT:    'sh_info'
+; OBJ-NEXT:    'sh_addralign'
+; OBJ-NEXT:    'sh_entsize'
+; OBJ-NEXT:    '_section_data', '00482de9 000000e3 000040e3 feffffeb 0088bde8'
+
+; OBJ:            Relocation 0
+; OBJ-NEXT:       'r_offset', 0x00000004
+; OBJ-NEXT:       'r_sym', 0x000007
+; OBJ-NEXT:        'r_type', 0x2b
+
+; OBJ:          Relocation 1
+; OBJ-NEXT:       'r_offset', 0x00000008
+; OBJ-NEXT:       'r_sym'
+; OBJ-NEXT:        'r_type', 0x2c
+
+; OBJ:          # Relocation 2
+; OBJ-NEXT:       'r_offset', 0x0000000c
+; OBJ-NEXT:       'r_sym', 0x000008
+; OBJ-NEXT:       'r_type', 0x1c
+
+}
+
+declare arm_aapcs_vfpcc i32 @foo(i8*)
+

diff --git a/src/LLVM/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/src/LLVM/test/CodeGen/ARM/2010-12-07-PEIBug.ll
new file mode 100644
index 0000000..c65952b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-12-07-PEIBug.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; rdar://8728956
+
+define hidden void @foo() nounwind ssp {
+entry:
+; CHECK: foo:
+; CHECK: push {r7, lr}
+; CHECK-NEXT: mov r7, sp
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: vpush {d10, d11}
+  %tmp40 = load <4 x i8>* undef
+  %tmp41 = extractelement <4 x i8> %tmp40, i32 2
+  %conv42 = zext i8 %tmp41 to i32
+  %conv43 = sitofp i32 %conv42 to float
+  %div44 = fdiv float %conv43, 2.560000e+02
+  %vecinit45 = insertelement <4 x float> undef, float %div44, i32 2
+  %vecinit46 = insertelement <4 x float> %vecinit45, float 1.000000e+00, i32 3
+  store <4 x float> %vecinit46, <4 x float>* undef
+  br i1 undef, label %if.then105, label %if.else109
+
+if.then105:                                       ; preds = %entry
+  br label %if.end114
+
+if.else109:                                       ; preds = %entry
+  br label %if.end114
+
+if.end114:                                        ; preds = %if.else109, %if.then105
+  %call185 = call float @bar()
+  %vecinit186 = insertelement <4 x float> undef, float %call185, i32 1
+  %call189 = call float @bar()
+  %vecinit190 = insertelement <4 x float> %vecinit186, float %call189, i32 2
+  %vecinit191 = insertelement <4 x float> %vecinit190, float 1.000000e+00, i32 3
+  store <4 x float> %vecinit191, <4 x float>* undef
+; CHECK: vpop {d10, d11}
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r7, pc}
+  ret void
+}
+
+declare hidden float @bar() nounwind readnone ssp

diff --git a/src/LLVM/test/CodeGen/ARM/2010-12-08-tpsoft.ll b/src/LLVM/test/CodeGen/ARM/2010-12-08-tpsoft.ll
new file mode 100644
index 0000000..b8ed819
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-12-08-tpsoft.ll

@@ -0,0 +1,52 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -o - | \
+; RUN:    FileCheck  -check-prefix=ELFASM %s 
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=ELFOBJ %s
+
+;; Make sure that bl __aeabi_read_tp is materiazlied and fixed up correctly
+;; in the obj case. 
+
+@i = external thread_local global i32
+@a = external global i8
+@b = external global [10 x i8]
+
+define arm_aapcs_vfpcc i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  switch i32 %0, label %bb2 [
+    i32 12, label %bb
+    i32 13, label %bb1
+  ]
+
+bb:                                               ; preds = %entry
+  %1 = tail call arm_aapcs_vfpcc  i32 @foo(i8* @a) nounwind
+  ret i32 %1
+; ELFASM:       	bl	__aeabi_read_tp
+
+
+; ELFOBJ:   '.text'
+; ELFOBJ-NEXT:  'sh_type'
+; ELFOBJ-NEXT:  'sh_flags'
+; ELFOBJ-NEXT:  'sh_addr'
+; ELFOBJ-NEXT:  'sh_offset'
+; ELFOBJ-NEXT:  'sh_size'
+; ELFOBJ-NEXT:  'sh_link'
+; ELFOBJ-NEXT:  'sh_info'
+; ELFOBJ-NEXT:  'sh_addralign'
+; ELFOBJ-NEXT:  'sh_entsize'
+;;;               BL __aeabi_read_tp is ---+
+;;;                                        V
+; ELFOBJ-NEXT:  00482de9 3c009fe5 00109fe7 feffffeb
+
+
+bb1:                                              ; preds = %entry
+  %2 = tail call arm_aapcs_vfpcc  i32 @bar(i32* bitcast ([10 x i8]* @b to i32*)) nounwind
+  ret i32 %2
+
+bb2:                                              ; preds = %entry
+  ret i32 -1
+}
+
+declare arm_aapcs_vfpcc i32 @foo(i8*)
+
+declare arm_aapcs_vfpcc i32 @bar(i32*)

diff --git a/src/LLVM/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/src/LLVM/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
new file mode 100644
index 0000000..5cfbb4f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll

@@ -0,0 +1,36 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=OBJ %s
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -o - | \
+; RUN:    FileCheck  -check-prefix=ASM %s
+
+
+@dummy = internal global i32 666
+@array00 = internal global [80 x i8] zeroinitializer, align 1
+@sum = internal global i32 55
+@STRIDE = internal global i32 8
+
+; ASM:          .type   array00,%object         @ @array00
+; ASM-NEXT:     .lcomm  array00,80
+; ASM-NEXT:     .type   _MergedGlobals,%object  @ @_MergedGlobals
+
+
+
+; OBJ:          Section 4
+; OBJ-NEXT:     '.bss'
+
+; OBJ:          'array00'
+; OBJ-NEXT:     'st_value', 0x00000000
+; OBJ-NEXT:     'st_size', 0x00000050
+; OBJ-NEXT:     'st_bind', 0x0
+; OBJ-NEXT:     'st_type', 0x1
+; OBJ-NEXT:     'st_other', 0x00
+; OBJ-NEXT:     'st_shndx', 0x0004
+
+define i32 @main(i32 %argc) nounwind {
+  %1 = load i32* @sum, align 4
+  %2 = getelementptr [80 x i8]* @array00, i32 0, i32 %argc
+  %3 = load i8* %2
+  %4 = zext i8 %3 to i32
+  %5 = add i32 %1, %4
+  ret i32 %5
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll b/src/LLVM/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll
new file mode 100644
index 0000000..a2f50b5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10
+; <rdar://problem/8782198>
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:64-n32"
+target triple = "armv6-apple-darwin10"
+
+define void @func() nounwind optsize {
+entry:
+  %buf = alloca [8096 x i8], align 1
+  br label %bb
+
+bb:
+  %p.2 = getelementptr [8096 x i8]* %buf, i32 0, i32 0
+  store i8 undef, i8* %p.2, align 1
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/src/LLVM/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
new file mode 100644
index 0000000..9484212
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll

@@ -0,0 +1,126 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+@x1 = internal global i8 1
+@x2 = internal global i8 1
+@x3 = internal global i8 1
+@x4 = internal global i8 1
+@x5 = global i8 1
+
+; Check debug info output for merged global.
+; DW_AT_location
+; DW_OP_addr
+; DW_OP_plus
+; .long __MergedGlobals
+; DW_OP_constu
+; offset
+
+;CHECK:        .ascii   "x2"                   @ DW_AT_name
+;CHECK-NEXT:        .byte   0
+;CHECK-NEXT:        @ DW_AT_type
+;CHECK-NEXT:        @ DW_AT_decl_file
+;CHECK-NEXT:        @ DW_AT_decl_line
+;CHECK-NEXT:        @ DW_AT_location
+;CHECK-NEXT:        .byte   3
+;CHECK-NEXT:        .long   __MergedGlobals
+;CHECK-NEXT:        .byte   16
+;CHECK-NEXT:        .byte   1
+;CHECK-NEXT:        .byte   34
+
+define zeroext i8 @get1(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !10), !dbg !30
+  %0 = load i8* @x1, align 4, !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !11), !dbg !30
+  store i8 %a, i8* @x1, align 4, !dbg !30
+  ret i8 %0, !dbg !31
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define zeroext i8 @get2(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !18), !dbg !32
+  %0 = load i8* @x2, align 4, !dbg !32
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !19), !dbg !32
+  store i8 %a, i8* @x2, align 4, !dbg !32
+  ret i8 %0, !dbg !33
+}
+
+define zeroext i8 @get3(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !21), !dbg !34
+  %0 = load i8* @x3, align 4, !dbg !34
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !22), !dbg !34
+  store i8 %a, i8* @x3, align 4, !dbg !34
+  ret i8 %0, !dbg !35
+}
+
+define zeroext i8 @get4(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !24), !dbg !36
+  %0 = load i8* @x4, align 4, !dbg !36
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !25), !dbg !36
+  store i8 %a, i8* @x4, align 4, !dbg !36
+  ret i8 %0, !dbg !37
+}
+
+define zeroext i8 @get5(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !27), !dbg !38
+  %0 = load i8* @x5, align 4, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !28), !dbg !38
+  store i8 %a, i8* @x5, align 4, !dbg !38
+  ret i8 %0, !dbg !39
+}
+
+!llvm.dbg.sp = !{!0, !6, !7, !8, !9}
+!llvm.dbg.lv.get1 = !{!10, !11}
+!llvm.dbg.gv = !{!13, !14, !15, !16, !17}
+!llvm.dbg.lv.get2 = !{!18, !19}
+!llvm.dbg.lv.get3 = !{!21, !22}
+!llvm.dbg.lv.get4 = !{!24, !25}
+!llvm.dbg.lv.get5 = !{!27, !28}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"foo.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !5}
+!5 = metadata !{i32 589860, metadata !1, metadata !"_Bool", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", metadata !1, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", metadata !1, i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", metadata !1, i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", metadata !1, i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 4, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 590080, metadata !12, metadata !"b", metadata !1, i32 4, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 589835, metadata !0, i32 4, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x1", metadata !"x1", metadata !"", metadata !1, i32 3, metadata !5, i1 true, i1 true, i8* @x1} ; [ DW_TAG_variable ]
+!14 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x2", metadata !"x2", metadata !"", metadata !1, i32 6, metadata !5, i1 true, i1 true, i8* @x2} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x3", metadata !"x3", metadata !"", metadata !1, i32 9, metadata !5, i1 true, i1 true, i8* @x3} ; [ DW_TAG_variable ]
+!16 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x4", metadata !"x4", metadata !"", metadata !1, i32 12, metadata !5, i1 true, i1 true, i8* @x4} ; [ DW_TAG_variable ]
+!17 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x5", metadata !"x5", metadata !"", metadata !1, i32 15, metadata !5, i1 false, i1 true, i8* @x5} ; [ DW_TAG_variable ]
+!18 = metadata !{i32 590081, metadata !6, metadata !"a", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 590080, metadata !20, metadata !"b", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!20 = metadata !{i32 589835, metadata !6, i32 7, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!21 = metadata !{i32 590081, metadata !7, metadata !"a", metadata !1, i32 10, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 590080, metadata !23, metadata !"b", metadata !1, i32 10, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!23 = metadata !{i32 589835, metadata !7, i32 10, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{i32 590081, metadata !8, metadata !"a", metadata !1, i32 13, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 590080, metadata !26, metadata !"b", metadata !1, i32 13, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 589835, metadata !8, i32 13, i32 0, metadata !1, i32 3} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 590081, metadata !9, metadata !"a", metadata !1, i32 16, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 590080, metadata !29, metadata !"b", metadata !1, i32 16, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 589835, metadata !9, i32 16, i32 0, metadata !1, i32 4} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 4, i32 0, metadata !0, null}
+!31 = metadata !{i32 4, i32 0, metadata !12, null}
+!32 = metadata !{i32 7, i32 0, metadata !6, null}
+!33 = metadata !{i32 7, i32 0, metadata !20, null}
+!34 = metadata !{i32 10, i32 0, metadata !7, null}
+!35 = metadata !{i32 10, i32 0, metadata !23, null}
+!36 = metadata !{i32 13, i32 0, metadata !8, null}
+!37 = metadata !{i32 13, i32 0, metadata !26, null}
+!38 = metadata !{i32 16, i32 0, metadata !9, null}
+!39 = metadata !{i32 16, i32 0, metadata !29, null}

diff --git a/src/LLVM/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll b/src/LLVM/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
new file mode 100644
index 0000000..85a1137
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll

@@ -0,0 +1,128 @@
+; RUN: llc < %s -asm-verbose=false -O3 -mtriple=armv6-apple-darwin -relocation-model=pic  -mcpu=arm1136jf-s | FileCheck %s
+; rdar://8959122 illegal register operands for UMULL instruction
+;   in cfrac nightly test.
+; Armv6 generates a umull that must write to two distinct destination regs.
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:64-n32"
+target triple = "armv6-apple-darwin10"
+
+define void @ptoa() nounwind {
+entry:
+  br i1 false, label %bb3, label %bb
+
+bb:                                               ; preds = %entry
+  br label %bb3
+
+bb3:                                              ; preds = %bb, %entry
+  %0 = call noalias i8* @malloc() nounwind
+  br i1 undef, label %bb46, label %bb8
+
+bb8:                                              ; preds = %bb3
+  %1 = getelementptr inbounds i8* %0, i32 0
+  store i8 0, i8* %1, align 1
+  %2 = call i32 @ptou() nounwind
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %3 = udiv i32 %2, 10
+  %4 = urem i32 %3, 10
+  %5 = icmp ult i32 %4, 10
+  %6 = trunc i32 %4 to i8
+  %7 = or i8 %6, 48
+  %8 = add i8 %6, 87
+  %iftmp.5.0.1 = select i1 %5, i8 %7, i8 %8
+  store i8 %iftmp.5.0.1, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %9 = udiv i32 %2, 100
+  %10 = urem i32 %9, 10
+  %11 = icmp ult i32 %10, 10
+  %12 = trunc i32 %10 to i8
+  %13 = or i8 %12, 48
+  %14 = add i8 %12, 87
+  %iftmp.5.0.2 = select i1 %11, i8 %13, i8 %14
+  store i8 %iftmp.5.0.2, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %15 = udiv i32 %2, 10000
+  %16 = urem i32 %15, 10
+  %17 = icmp ult i32 %16, 10
+  %18 = trunc i32 %16 to i8
+  %19 = or i8 %18, 48
+  %20 = add i8 %18, 87
+  %iftmp.5.0.4 = select i1 %17, i8 %19, i8 %20
+  store i8 %iftmp.5.0.4, i8* null, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %21 = udiv i32 %2, 100000
+  %22 = urem i32 %21, 10
+  %23 = icmp ult i32 %22, 10
+  %iftmp.5.0.5 = select i1 %23, i8 0, i8 undef
+  store i8 %iftmp.5.0.5, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %24 = udiv i32 %2, 1000000
+  %25 = urem i32 %24, 10
+  %26 = icmp ult i32 %25, 10
+  %27 = trunc i32 %25 to i8
+  %28 = or i8 %27, 48
+  %29 = add i8 %27, 87
+  %iftmp.5.0.6 = select i1 %26, i8 %28, i8 %29
+  store i8 %iftmp.5.0.6, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %30 = udiv i32 %2, 10000000
+  %31 = urem i32 %30, 10
+  %32 = icmp ult i32 %31, 10
+  %33 = trunc i32 %31 to i8
+  %34 = or i8 %33, 48
+  %35 = add i8 %33, 87
+  %iftmp.5.0.7 = select i1 %32, i8 %34, i8 %35
+  store i8 %iftmp.5.0.7, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %36 = udiv i32 %2, 100000000
+  %37 = urem i32 %36, 10
+  %38 = icmp ult i32 %37, 10
+  %39 = trunc i32 %37 to i8
+  %40 = or i8 %39, 48
+  %41 = add i8 %39, 87
+  %iftmp.5.0.8 = select i1 %38, i8 %40, i8 %41
+  store i8 %iftmp.5.0.8, i8* null, align 1
+  unreachable
+
+bb46:                                             ; preds = %bb3
+  ret void
+}
+
+declare noalias i8* @malloc() nounwind
+
+declare i32 @ptou()

diff --git a/src/LLVM/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll b/src/LLVM/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
new file mode 100644
index 0000000..f3d7888
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll

@@ -0,0 +1,89 @@
+; RUN: llc < %s -asm-verbose=false -O3  -mtriple=armv5e-none-linux-gnueabi | FileCheck %s
+; PR8986: PostRA antidependence breaker must respect "earlyclobber".
+; armv5e generates mulv5 that cannot used the same reg for src/dest.
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32"
+target triple = "armv5e-none-linux-gnueabi"
+
+define hidden fastcc void @storeAtts() nounwind {
+entry:
+  %.SV116 = alloca i8**
+  br i1 undef, label %meshBB520, label %meshBB464
+
+bb15:                                             ; preds = %meshBB424
+  br i1 undef, label %bb216, label %meshBB396
+
+bb22:                                             ; preds = %meshBB396
+  br label %cBB564
+
+cBB564:                                           ; preds = %cBB564, %bb22
+  br label %cBB564
+
+poolStoreString.exit.thread:                      ; preds = %meshBB424
+  ret void
+
+bb78:                                             ; preds = %meshBB412
+  unreachable
+
+bb129:                                            ; preds = %meshBB540
+  br i1 undef, label %bb131.loopexit, label %meshBB540
+
+bb131.loopexit:                                   ; preds = %bb129
+  br label %bb131
+
+bb131:                                            ; preds = %bb135, %bb131.loopexit
+  br i1 undef, label %bb134, label %meshBB396
+
+bb134:                                            ; preds = %bb131
+  unreachable
+
+bb135:                                            ; preds = %meshBB396
+  %uriHash.1.phi.load = load i32* undef
+  %.load120 = load i8*** %.SV116
+  %.phi24 = load i8* null
+  %.phi26 = load i8** null
+  store i8 %.phi24, i8* %.phi26, align 1
+  %0 = getelementptr inbounds i8* %.phi26, i32 1
+  store i8* %0, i8** %.load120, align 4
+  ; CHECK: mul [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{(lr|r[0-9]+)$}}
+  %1 = mul i32 %uriHash.1.phi.load, 1000003
+  %2 = xor i32 0, %1
+  store i32 %2, i32* null
+  %3 = load i8* null, align 1
+  %4 = icmp eq i8 %3, 0
+  store i8* %0, i8** undef
+  br i1 %4, label %meshBB472, label %bb131
+
+bb212:                                            ; preds = %meshBB540
+  unreachable
+
+bb216:                                            ; preds = %bb15
+  ret void
+
+meshBB396:                                        ; preds = %bb131, %bb15
+  br i1 undef, label %bb135, label %bb22
+
+meshBB412:                                        ; preds = %meshBB464
+  br i1 undef, label %meshBB504, label %bb78
+
+meshBB424:                                        ; preds = %meshBB464
+  br i1 undef, label %poolStoreString.exit.thread, label %bb15
+
+meshBB464:                                        ; preds = %entry
+  br i1 undef, label %meshBB424, label %meshBB412
+
+meshBB472:                                        ; preds = %meshBB504, %bb135
+  unreachable
+
+meshBB504:                                        ; preds = %meshBB412
+  br label %meshBB472
+
+meshBB520:                                        ; preds = %entry
+  br label %meshBB540
+
+meshBB540:                                        ; preds = %meshBB520, %bb129
+  br i1 undef, label %bb212, label %bb129
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll b/src/LLVM/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll
new file mode 100644
index 0000000..81babe0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8
+
+; rdar://9117613
+
+%struct.mo = type { i32, %struct.mo_pops* }
+%struct.mo_pops = type { void (%struct.mo*)*, void (%struct.mo*)*, i32 (%struct.mo*, i32*, i32)*, i32 (%struct.mo*)*, i32 (%struct.mo*, i64, i32, i32, i32*, i64, i32)*, i32 (%struct.mo*, i64, i32, i64*, i32*, i32, i32, i32)*, i32 (%struct.mo*, i64, i32)*, i32 (%struct.mo*, i64, i64, i32)*, i32 (%struct.mo*, i64, i64, i32)*, i32 (%struct.mo*, i32)*, i32 (%struct.mo*)*, i32 (%struct.mo*, i32)*, i8* }
+%struct.ui = type { %struct.mo*, i32*, i32, i32*, i32*, i64, i32*, i32*, i32* }
+
+
+define internal fastcc i32 @t(i32* %vp, i32 %withfsize, i64 %filesize) nounwind {
+entry:
+  br i1 undef, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb1:                                              ; preds = %entry
+  %0 = call %struct.ui* @vn_pp_to_ui(i32* undef) nounwind
+  call void @llvm.memset.p0i8.i32(i8* undef, i8 0, i32 40, i32 4, i1 false)
+  %1 = getelementptr inbounds %struct.ui* %0, i32 0, i32 0
+  store %struct.mo* undef, %struct.mo** %1, align 4
+  %2 = getelementptr inbounds %struct.ui* %0, i32 0, i32 5
+  %3 = load i64* %2, align 4
+  %4 = call i32 @mo_create_nnm(%struct.mo* undef, i64 %3, i32** undef) nounwind
+  br i1 undef, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  unreachable
+
+bb3:                                              ; preds = %bb1
+  br i1 undef, label %bb4, label %bb6
+
+bb4:                                              ; preds = %bb3
+  %5 = call i32 @vn_size(i32* %vp, i64* %2, i32* undef) nounwind
+  unreachable
+
+bb6:                                              ; preds = %bb3
+  ret i32 0
+}
+
+declare %struct.ui* @vn_pp_to_ui(i32*)
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+declare i32 @mo_create_nnm(%struct.mo*, i64, i32**)
+
+declare i32 @vn_size(i32*, i64*, i32*)

diff --git a/src/LLVM/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/src/LLVM/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
new file mode 100644
index 0000000..ccda281
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+
+; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4.
+; rdar://9133587
+
+%struct.Outer = type { i32, [2 x %"struct.Outer::Inner"] }
+%"struct.Outer::Inner" = type { i32, i32, i8, i8 }
+
+@oStruct = external global %struct.Outer, align 4
+
+define void @main() nounwind {
+; CHECK: main:
+; CHECK-NOT: ldrd
+; CHECK: mul
+for.body.lr.ph:
+  br label %for.body
+
+for.body:                                         ; preds = %_Z14printIsNotZeroi.exit17.for.body_crit_edge, %for.body.lr.ph
+  %tmp3 = phi i1 [ false, %for.body.lr.ph ], [ %phitmp27, %_Z14printIsNotZeroi.exit17.for.body_crit_edge ]
+  %i.022 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %_Z14printIsNotZeroi.exit17.for.body_crit_edge ]
+  %x = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 0
+  %y = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 1
+  %inc = add i32 %i.022, 1
+  br i1 %tmp3, label %_Z14printIsNotZeroi.exit, label %if.then.i
+
+if.then.i:                                        ; preds = %for.body
+  unreachable
+
+_Z14printIsNotZeroi.exit:                         ; preds = %for.body
+  %tmp8 = load i32* %x, align 4, !tbaa !0
+  %tmp11 = load i32* %y, align 4, !tbaa !0
+  %mul = mul nsw i32 %tmp11, %tmp8
+  %tobool.i14 = icmp eq i32 %mul, 0
+  br i1 %tobool.i14, label %_Z14printIsNotZeroi.exit17, label %if.then.i16
+
+if.then.i16:                                      ; preds = %_Z14printIsNotZeroi.exit
+  unreachable
+
+_Z14printIsNotZeroi.exit17:                       ; preds = %_Z14printIsNotZeroi.exit
+  br i1 undef, label %_Z14printIsNotZeroi.exit17.for.body_crit_edge, label %for.end
+
+_Z14printIsNotZeroi.exit17.for.body_crit_edge:    ; preds = %_Z14printIsNotZeroi.exit17
+  %b.phi.trans.insert = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %inc, i32 3
+  %tmp3.pre = load i8* %b.phi.trans.insert, align 1, !tbaa !3
+  %phitmp27 = icmp eq i8 undef, 0
+  br label %for.body
+
+for.end:                                          ; preds = %_Z14printIsNotZeroi.exit17
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"bool", metadata !1}

diff --git a/src/LLVM/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll b/src/LLVM/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
new file mode 100644
index 0000000..7c9af6f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+
+; subs r4, #1
+; cmp r4, 0
+; bgt
+; cmp cannot be optimized away since it will clear the overflow bit.
+; gt / ge, lt, le conditions all depend on V bit.
+; rdar://9172742
+
+define i32 @t() nounwind {
+; CHECK: t:
+entry:
+  br label %bb2
+
+bb:                                               ; preds = %bb2
+  %0 = tail call i32 @rand() nounwind
+  %1 = icmp eq i32 %0, 50
+  br i1 %1, label %bb3, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp = tail call i32 @puts() nounwind
+  %indvar.next = add i32 %indvar, 1
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %entry
+; CHECK: bb2
+; CHECK: subs [[REG:r[0-9]+]], #1
+; CHECK: cmp [[REG]], #0
+; CHECK: bgt
+  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
+  %tries.0 = sub i32 2147483647, %indvar
+  %tmp1 = icmp sgt i32 %tries.0, 0
+  br i1 %tmp1, label %bb, label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  ret i32 0
+}
+
+declare i32 @rand()
+
+declare i32 @puts() nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2011-04-07-schediv.ll b/src/LLVM/test/CodeGen/ARM/2011-04-07-schediv.ll
new file mode 100644
index 0000000..19f756f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-04-07-schediv.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
+; Tests preRAsched support for VRegCycle interference.
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+define void @t(i32 %src_width, float* nocapture %src_copy_start, float* nocapture %dst_copy_start, i32 %src_copy_start_index) nounwind optsize {
+entry:
+  %src_copy_start6 = bitcast float* %src_copy_start to i8*
+  %0 = icmp eq i32 %src_width, 0
+  br i1 %0, label %return, label %bb
+
+; Make sure the scheduler schedules all uses of the preincrement
+; induction variable before defining the postincrement value.
+; CHECK: t:
+; CHECK: %bb
+; CHECK-NOT: mov
+bb:                                               ; preds = %entry, %bb
+  %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
+  %tmp = mul i32 %j.05, %src_copy_start_index
+  %uglygep = getelementptr i8* %src_copy_start6, i32 %tmp
+  %src_copy_start_addr.04 = bitcast i8* %uglygep to float*
+  %dst_copy_start_addr.03 = getelementptr float* %dst_copy_start, i32 %j.05
+  %1 = load float* %src_copy_start_addr.04, align 4
+  store float %1, float* %dst_copy_start_addr.03, align 4
+  %2 = add i32 %j.05, 1
+  %exitcond = icmp eq i32 %2, %src_width
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll b/src/LLVM/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
new file mode 100644
index 0000000..568718c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+
+; Overly aggressive LICM simply adds copies of constants
+; rdar://9266679
+
+define zeroext i1 @t(i32* nocapture %A, i32 %size, i32 %value) nounwind readonly ssp {
+; CHECK: t:
+entry:
+  br label %for.cond
+
+for.cond:
+  %0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ult i32 %0, %size
+  br i1 %cmp, label %for.body, label %return
+
+for.body:
+; CHECK: %for.
+; CHECK: movs r{{[0-9]+}}, #{{[01]}}
+  %arrayidx = getelementptr i32* %A, i32 %0
+  %tmp4 = load i32* %arrayidx, align 4
+  %cmp6 = icmp eq i32 %tmp4, %value
+  br i1 %cmp6, label %return, label %for.inc
+
+; CHECK: %for.
+; CHECK: movs r{{[0-9]+}}, #{{[01]}}
+
+for.inc:
+  %inc = add i32 %0, 1
+  br label %for.cond
+
+return:
+  %retval.0 = phi i1 [ true, %for.body ], [ false, %for.cond ]
+  ret i1 %retval.0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2011-04-12-AlignBug.ll b/src/LLVM/test/CodeGen/ARM/2011-04-12-AlignBug.ll
new file mode 100644
index 0000000..317be94
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-04-12-AlignBug.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10.0.0"
+
+; CHECK: align 3
+@.v = linker_private unnamed_addr constant <4 x i32> <i32 1, i32 2, i32 3, i32 4>, align 8
+; CHECK: align 2
+@.strA = linker_private unnamed_addr constant [4 x i8] c"bar\00"
+; CHECK-NOT: align
+@.strB = linker_private unnamed_addr constant [4 x i8] c"foo\00", align 1
+@.strC = linker_private unnamed_addr constant [4 x i8] c"baz\00", section "__TEXT,__cstring,cstring_literals", align 1

diff --git a/src/LLVM/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll b/src/LLVM/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
new file mode 100644
index 0000000..eb23de0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -regalloc=fast
+; Previously we'd crash as out of registers on this input by clobbering all of
+; the aliases.
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10.0.0"
+
+define void @_Z8TestCasev() nounwind ssp {
+entry:
+  %a = alloca float, align 4
+  %tmp = load float* %a, align 4
+  call void asm sideeffect "", "w,~{s0},~{s16}"(float %tmp) nounwind, !srcloc !0
+  ret void
+}
+
+!0 = metadata !{i32 109}

diff --git a/src/LLVM/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll b/src/LLVM/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll
new file mode 100644
index 0000000..e712e08
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
+
+; CHECK: _f
+; CHECK-NOT: ands
+; CHECK: cmp
+; CHECK: blxle _g
+
+define i32 @f(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %and = and i32 %b, %a
+  %cmp = icmp slt i32 %and, 1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g(i32 %a, i32 %b) nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 %and
+}
+
+declare void @g(...)

diff --git a/src/LLVM/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll b/src/LLVM/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll
new file mode 100644
index 0000000..5404cf5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
+
+; CHECK: _f
+; CHECK: adds
+; CHECK-NOT: cmp
+; CHECK: blxeq _h
+
+define i32 @f(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %add = add nsw i32 %b, %a
+  %cmp = icmp eq i32 %add, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @h(i32 %a, i32 %b) nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 %add
+}
+
+; CHECK: _g
+; CHECK: orrs
+; CHECK-NOT: cmp
+; CHECK: blxeq _h
+
+define i32 @g(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %add = or i32 %b, %a
+  %cmp = icmp eq i32 %add, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @h(i32 %a, i32 %b) nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 %add
+}
+
+declare void @h(...)

diff --git a/src/LLVM/test/CodeGen/ARM/2011-04-26-SchedTweak.ll b/src/LLVM/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
new file mode 100644
index 0000000..ed7dd03
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-04-26-SchedTweak.ll

@@ -0,0 +1,70 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s
+
+; Do not move the umull above previous call which would require use of
+; more callee-saved registers and introduce copies.
+; rdar://9329627
+
+%struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* }
+%struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 }
+
+@FuncPtr = external hidden unnamed_addr global %struct.FF*
+@.str1 = external hidden unnamed_addr constant [6 x i8], align 4
+@G = external unnamed_addr global i32
+@.str2 = external hidden unnamed_addr constant [58 x i8], align 4
+@.str3 = external hidden unnamed_addr constant [58 x i8], align 4
+
+define i32 @test() nounwind optsize ssp {
+entry:
+; CHECK: test:
+; CHECK: push
+; CHECK-NOT: push
+  %block_size = alloca i32, align 4
+  %block_count = alloca i32, align 4
+  %index_cache = alloca i32, align 4
+  store i32 0, i32* %index_cache, align 4
+  %tmp = load i32* @G, align 4
+  %tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
+  switch i32 %tmp1, label %bb8 [
+    i32 0, label %bb
+    i32 536870913, label %bb4
+    i32 536870914, label %bb6
+  ]
+
+bb:
+  %tmp2 = load i32* @G, align 4
+  %tmp4 = icmp eq i32 %tmp2, 0
+  br i1 %tmp4, label %bb1, label %bb8
+
+bb1:
+; CHECK: %bb1
+; CHECK-NOT: umull
+; CHECK: blx _Get
+; CHECK: umull
+; CHECK: blx _foo
+  %tmp5 = load i32* %block_size, align 4
+  %tmp6 = load i32* %block_count, align 4
+  %tmp7 = call %struct.FF* @Get() nounwind
+  store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
+  %tmp10 = zext i32 %tmp6 to i64
+  %tmp11 = zext i32 %tmp5 to i64
+  %tmp12 = mul nsw i64 %tmp10, %tmp11
+  %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind
+  br label %bb8
+
+bb4:
+  ret i32 0
+
+bb6:
+  ret i32 1
+
+bb8:
+  ret i32 -1
+}
+
+declare i32 @printf(i8*, ...)
+
+declare %struct.FF* @Get()
+
+declare i32 @foo(i8*, i64, i32)
+
+declare i32 @bar(i32, i32, i32)

diff --git a/src/LLVM/test/CodeGen/ARM/2011-04-27-IfCvtBug.ll b/src/LLVM/test/CodeGen/ARM/2011-04-27-IfCvtBug.ll
new file mode 100644
index 0000000..0741049
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-04-27-IfCvtBug.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios
+
+; If converter was being too cute. It look for root BBs (which don't have
+; successors) and use inverse depth first search to traverse the BBs. However
+; that doesn't work when the CFG has infinite loops. Simply do a linear
+; traversal of all BBs work just fine.
+
+; rdar://9344645
+
+%struct.hc = type { i32, i32, i32, i32 }
+
+define i32 @t(i32 %type) optsize {
+entry:
+  br i1 undef, label %if.then, label %if.else
+
+if.then:
+  unreachable
+
+if.else:
+  br i1 undef, label %if.then15, label %if.else18
+
+if.then15:
+  unreachable
+
+if.else18:
+  switch i32 %type, label %if.else173 [
+    i32 3, label %if.then115
+    i32 1, label %if.then102
+  ]
+
+if.then102:
+  br i1 undef, label %cond.true10.i, label %t.exit
+
+cond.true10.i:
+  br label %t.exit
+
+t.exit:
+  unreachable
+
+if.then115:
+  br i1 undef, label %if.else163, label %if.else145
+
+if.else145:
+  %call150 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34865152) optsize
+  br label %while.body172
+
+if.else163:
+  %call168 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34078720) optsize
+  br label %while.body172
+
+while.body172:
+  br label %while.body172
+
+if.else173:
+  ret i32 -1
+}
+
+declare hidden fastcc %struct.hc* @foo(%struct.hc* nocapture, i32) nounwind optsize
+

diff --git a/src/LLVM/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll b/src/LLVM/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
new file mode 100644
index 0000000..0b5f962
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll

@@ -0,0 +1,93 @@
+; RUN: llc < %s -verify-machineinstrs
+; <rdar://problem/9187612>
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin"
+
+define void @func() unnamed_addr align 2 {
+entry:
+  br label %for.cond
+
+for.cond:
+  %tmp2 = phi i32 [ 0, %entry ], [ %add, %for.cond.backedge ]
+  %cmp = icmp ult i32 %tmp2, 14
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %add = add i32 %tmp2, 1
+  switch i32 %tmp2, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb
+    i32 4, label %sw.bb
+    i32 5, label %sw.bb
+    i32 10, label %sw.bb
+  ]
+
+sw.bb:
+  invoke void @foo()
+          to label %invoke.cont17 unwind label %lpad
+
+invoke.cont17:
+  invoke void @foo()
+          to label %for.cond.backedge unwind label %lpad26
+
+for.cond.backedge:
+  br label %for.cond
+
+lpad:
+  %exn = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  invoke void @foo()
+          to label %eh.resume unwind label %terminate.lpad
+
+lpad26:
+  %exn27 = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector28 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn27, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  invoke void @foo()
+          to label %eh.resume unwind label %terminate.lpad
+
+sw.default:
+  br label %for.cond.backedge
+
+for.end:
+  invoke void @foo()
+          to label %call8.i.i.i.noexc unwind label %lpad44
+
+call8.i.i.i.noexc:
+  ret void
+
+lpad44:
+  %exn45 = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector46 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn45, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  invoke void @foo()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:
+  %exn.slot.0 = phi i8* [ %exn27, %lpad26 ], [ %exn, %lpad ], [ %exn45, %lpad44 ]
+  tail call void @_Unwind_SjLj_Resume_or_Rethrow(i8* %exn.slot.0) noreturn
+  unreachable
+
+terminate.lpad:
+  %exn51 = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector52 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn51, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  tail call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @foo()
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare void @_Unwind_SjLj_Resume_or_Rethrow(i8*)
+
+declare void @_ZSt9terminatev()
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"bool", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}

diff --git a/src/LLVM/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll b/src/LLVM/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll
new file mode 100644
index 0000000..7f0f795
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -relocation-model=pic -mcpu=cortex-a8 -arm-tail-calls=1 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+%struct._RuneCharClass = type { [14 x i8], i32 }
+%struct._RuneEntry = type { i32, i32, i32, i32* }
+%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i32, i8**)*, i32 (i32, i8*, i32, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* }
+%struct._RuneRange = type { i32, %struct._RuneEntry* }
+%struct.__collate_st_chain_pri = type { [10 x i32], [2 x i32] }
+%struct.__collate_st_char_pri = type { [2 x i32] }
+%struct.__collate_st_info = type { [2 x i8], i8, i8, [2 x i32], [2 x i32], i32, i32 }
+%struct.__collate_st_large_char_pri = type { i32, %struct.__collate_st_char_pri }
+%struct.__collate_st_subst = type { i32, [10 x i32] }
+%struct.__xlocale_st_collate = type { i32, void (i8*)*, [32 x i8], %struct.__collate_st_info, [2 x %struct.__collate_st_subst*], %struct.__collate_st_chain_pri*, %struct.__collate_st_large_char_pri*, [256 x %struct.__collate_st_char_pri] }
+%struct.__xlocale_st_messages = type { i32, void (i8*)*, i8*, %struct.lc_messages_T }
+%struct.__xlocale_st_monetary = type { i32, void (i8*)*, i8*, %struct.lc_monetary_T }
+%struct.__xlocale_st_numeric = type { i32, void (i8*)*, i8*, %struct.lc_numeric_T }
+%struct.__xlocale_st_runelocale = type { i32, void (i8*)*, [32 x i8], i32, i32, i32 (i32*, i8*, i32, %union.__mbstate_t*, %struct._xlocale*)*, i32 (%union.__mbstate_t*, %struct._xlocale*)*, i32 (i32*, i8**, i32, i32, %union.__mbstate_t*, %struct._xlocale*)*, i32 (i8*, i32, %union.__mbstate_t*, %struct._xlocale*)*, i32 (i8*, i32**, i32, i32, %union.__mbstate_t*, %struct._xlocale*)*, i32, %struct._RuneLocale }
+%struct.__xlocale_st_time = type { i32, void (i8*)*, i8*, %struct.lc_time_T }
+%struct._xlocale = type { i32, void (i8*)*, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, %union.__mbstate_t, i32, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, %struct.__xlocale_st_collate*, %struct.__xlocale_st_runelocale*, %struct.__xlocale_st_messages*, %struct.__xlocale_st_monetary*, %struct.__xlocale_st_numeric*, %struct._xlocale*, %struct.__xlocale_st_time*, %struct.lconv }
+%struct.lc_messages_T = type { i8*, i8*, i8*, i8* }
+%struct.lc_monetary_T = type { i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
+%struct.lc_numeric_T = type { i8*, i8*, i8* }
+%struct.lc_time_T = type { [12 x i8*], [12 x i8*], [7 x i8*], [7 x i8*], i8*, i8*, i8*, i8*, i8*, i8*, [12 x i8*], i8*, i8* }
+%struct.lconv = type { i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+%union.__mbstate_t = type { i64, [120 x i8] }
+
+@"\01_fnmatch.initial" = external constant %union.__mbstate_t, align 4
+
+; CHECK: _fnmatch
+; CHECK: blx _fnmatch1
+
+define i32 @"\01_fnmatch"(i8* %pattern, i8* %string, i32 %flags) nounwind optsize {
+entry:
+  %call4 = tail call i32 @fnmatch1(i8* %pattern, i8* %string, i8* %string, i32 %flags, %union.__mbstate_t* byval @"\01_fnmatch.initial", %union.__mbstate_t* byval @"\01_fnmatch.initial", %struct._xlocale* undef, i32 64) optsize
+  ret i32 %call4
+}
+
+declare i32 @fnmatch1(i8*, i8*, i8*, i32, %union.__mbstate_t* byval, %union.__mbstate_t* byval, %struct._xlocale*, i32) nounwind optsize

diff --git a/src/LLVM/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/src/LLVM/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
new file mode 100644
index 0000000..7baacfe
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+%struct.A = type <{ i16, i16, i32, i16, i16, i32, i16, [8 x %struct.B], [418 x i8], %struct.C }>
+%struct.B = type <{ i32, i16, i16 }>
+%struct.C = type { i16, i32, i16, i16 }
+
+; CHECK: f
+; CHECK: push {r1, r2, r3}
+; CHECK: add sp, #12
+; CHECK: b.w _puts
+
+define void @f(i8* %s, %struct.A* nocapture byval %a) nounwind optsize {
+entry:
+  %puts = tail call i32 @puts(i8* %s)
+  ret void
+}
+
+declare i32 @puts(i8* nocapture) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll b/src/LLVM/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
new file mode 100644
index 0000000..1b5b8a9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
+; CHECK: .zerofill __DATA,__bss,__MergedGlobals,16,2
+
+%struct.config = type { i16, i16, i16, i16 }
+
+@prev = external global [0 x i16]
+@max_lazy_match = internal unnamed_addr global i32 0, align 4
+@read_buf = external global i32 (i8*, i32)*
+@window = external global [0 x i8]
+@lookahead = internal unnamed_addr global i32 0, align 4
+@eofile.b = internal unnamed_addr global i1 false
+@ins_h = internal unnamed_addr global i32 0, align 4

diff --git a/src/LLVM/test/CodeGen/ARM/2011-07-10-GlobalMergeBug.ll b/src/LLVM/test/CodeGen/ARM/2011-07-10-GlobalMergeBug.ll
new file mode 100644
index 0000000..2970cd2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-07-10-GlobalMergeBug.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; CHECK-NOT: MergedGlobals
+
+@a = internal unnamed_addr global i1 false
+@b = internal global [64 x i8] zeroinitializer, align 64

diff --git a/src/LLVM/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/src/LLVM/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
new file mode 100644
index 0000000..f681c34
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll

@@ -0,0 +1,124 @@
+; RUN: llc < %s | FileCheck %s
+
+; Check debug info output for merged global.
+; DW_AT_location
+; DW_OP_addr
+; DW_OP_plus
+; .long __MergedGlobals
+; DW_OP_constu
+; offset
+
+;CHECK:        .ascii   "x2"                   @ DW_AT_name
+;CHECK-NEXT:        .byte   0
+;CHECK-NEXT:        @ DW_AT_type
+;CHECK-NEXT:        @ DW_AT_decl_file
+;CHECK-NEXT:        @ DW_AT_decl_line
+;CHECK-NEXT:        @ DW_AT_location
+;CHECK-NEXT:        .byte   3
+;CHECK-NEXT:        .long   __MergedGlobals
+;CHECK-NEXT:        .byte   16
+; 4 is byte offset of x2 in __MergedGobals
+;CHECK-NEXT:        .byte   4
+;CHECK-NEXT:        .byte   34
+
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.7.0"
+
+@x1 = internal unnamed_addr global i32 1, align 4
+@x2 = internal unnamed_addr global i32 2, align 4
+@x3 = internal unnamed_addr global i32 3, align 4
+@x4 = internal unnamed_addr global i32 4, align 4
+@x5 = global i32 0, align 4
+
+define i32 @get1(i32 %a) nounwind optsize ssp {
+  tail call void @llvm.dbg.value(metadata !{i32 %a}, i64 0, metadata !10), !dbg !30
+  %1 = load i32* @x1, align 4, !dbg !31
+  tail call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !11), !dbg !31
+  store i32 %a, i32* @x1, align 4, !dbg !31
+  ret i32 %1, !dbg !31
+}
+
+define i32 @get2(i32 %a) nounwind optsize ssp {
+  tail call void @llvm.dbg.value(metadata !{i32 %a}, i64 0, metadata !13), !dbg !32
+  %1 = load i32* @x2, align 4, !dbg !33
+  tail call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !14), !dbg !33
+  store i32 %a, i32* @x2, align 4, !dbg !33
+  ret i32 %1, !dbg !33
+}
+
+define i32 @get3(i32 %a) nounwind optsize ssp {
+  tail call void @llvm.dbg.value(metadata !{i32 %a}, i64 0, metadata !16), !dbg !34
+  %1 = load i32* @x3, align 4, !dbg !35
+  tail call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !17), !dbg !35
+  store i32 %a, i32* @x3, align 4, !dbg !35
+  ret i32 %1, !dbg !35
+}
+
+define i32 @get4(i32 %a) nounwind optsize ssp {
+  tail call void @llvm.dbg.value(metadata !{i32 %a}, i64 0, metadata !19), !dbg !36
+  %1 = load i32* @x4, align 4, !dbg !37
+  tail call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !20), !dbg !37
+  store i32 %a, i32* @x4, align 4, !dbg !37
+  ret i32 %1, !dbg !37
+}
+
+define i32 @get5(i32 %a) nounwind optsize ssp {
+  tail call void @llvm.dbg.value(metadata !{i32 %a}, i64 0, metadata !27), !dbg !38
+  %1 = load i32* @x5, align 4, !dbg !39
+  tail call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !28), !dbg !39
+  store i32 %a, i32* @x5, align 4, !dbg !39
+  ret i32 %1, !dbg !39
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1, !6, !7, !8, !9}
+!llvm.dbg.lv.get1 = !{!10, !11}
+!llvm.dbg.lv.get2 = !{!13, !14}
+!llvm.dbg.lv.get3 = !{!16, !17}
+!llvm.dbg.lv.get4 = !{!19, !20}
+!llvm.dbg.gv = !{!22, !23, !24, !25, !26}
+!llvm.dbg.lv.get5 = !{!27, !28}
+
+!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"ss3.c", metadata !"/private/tmp", metadata !"clang", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get1", metadata !"get1", metadata !"", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get1, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"ss3.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get2", metadata !"get2", metadata !"", metadata !2, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get2, null, null} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get3", metadata !"get3", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get3, null, null} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get4", metadata !"get4", metadata !"", metadata !2, i32 14, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get4, null, null} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get5", metadata !"get5", metadata !"", metadata !2, i32 17, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get5, null, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 590081, metadata !1, metadata !"a", metadata !2, i32 16777221, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 590080, metadata !12, metadata !"b", metadata !2, i32 5, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 589835, metadata !1, i32 5, i32 19, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 590081, metadata !6, metadata !"a", metadata !2, i32 16777224, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!14 = metadata !{i32 590080, metadata !15, metadata !"b", metadata !2, i32 8, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 589835, metadata !6, i32 8, i32 17, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 590081, metadata !7, metadata !"a", metadata !2, i32 16777227, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 590080, metadata !18, metadata !"b", metadata !2, i32 11, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 589835, metadata !7, i32 11, i32 19, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 590081, metadata !8, metadata !"a", metadata !2, i32 16777230, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 590080, metadata !21, metadata !"b", metadata !2, i32 14, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!21 = metadata !{i32 589835, metadata !8, i32 14, i32 19, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x5", metadata !"x5", metadata !"", metadata !2, i32 16, metadata !5, i32 0, i32 1, i32* @x5} ; [ DW_TAG_variable ]
+!23 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x4", metadata !"x4", metadata !"", metadata !2, i32 13, metadata !5, i32 1, i32 1, i32* @x4} ; [ DW_TAG_variable ]
+!24 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x3", metadata !"x3", metadata !"", metadata !2, i32 10, metadata !5, i32 1, i32 1, i32* @x3} ; [ DW_TAG_variable ]
+!25 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x2", metadata !"x2", metadata !"", metadata !2, i32 7, metadata !5, i32 1, i32 1, i32* @x2} ; [ DW_TAG_variable ]
+!26 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x1", metadata !"x1", metadata !"", metadata !2, i32 4, metadata !5, i32 1, i32 1, i32* @x1} ; [ DW_TAG_variable ]
+!27 = metadata !{i32 590081, metadata !9, metadata !"a", metadata !2, i32 16777233, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 590080, metadata !29, metadata !"b", metadata !2, i32 17, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 589835, metadata !9, i32 17, i32 19, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 5, i32 16, metadata !1, null}
+!31 = metadata !{i32 5, i32 32, metadata !12, null}
+!32 = metadata !{i32 8, i32 14, metadata !6, null}
+!33 = metadata !{i32 8, i32 29, metadata !15, null}
+!34 = metadata !{i32 11, i32 16, metadata !7, null}
+!35 = metadata !{i32 11, i32 32, metadata !18, null}
+!36 = metadata !{i32 14, i32 16, metadata !8, null}
+!37 = metadata !{i32 14, i32 32, metadata !21, null}
+!38 = metadata !{i32 17, i32 16, metadata !9, null}
+!39 = metadata !{i32 17, i32 32, metadata !29, null}

diff --git a/src/LLVM/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll b/src/LLVM/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll
new file mode 100644
index 0000000..3cbc4cd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll

@@ -0,0 +1,12 @@
+; RUN: llc %s -mtriple=thumbv7-apple-darwin -verify-machineinstrs -mcpu=cortex-a9 -O0 -o -
+; Make sure that the VMOVQQQQ pseudo instruction is handled properly
+; by codegen.
+
+define void @test_vmovqqqq_pseudo() nounwind ssp {
+entry:
+  %vld3_lane = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> zeroinitializer, i32 7, i32 2)
+  store { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, { <8 x i16>, <8 x i16>, <8 x i16> }* undef
+  ret void
+}
+
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly

diff --git a/src/LLVM/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll b/src/LLVM/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
new file mode 100644
index 0000000..17264ee
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll

@@ -0,0 +1,100 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; Test that ldmia_ret preserves implicit operands for return values.
+;
+; This CFG is reduced from a benchmark miscompile. With current
+; if-conversion heuristics, one of the return paths is if-converted
+; into sw.bb18 resulting in an ldmia_ret in the middle of the
+; block. The postra scheduler needs to know that the return implicitly
+; uses the return register, otherwise its antidep breaker scavenges
+; the register in order to hoist the constant load required to test
+; the switch.
+
+declare i32 @getint()
+declare i1 @getbool()
+declare void @foo(i32)
+declare i32 @bar(i32)
+
+define i32 @test(i32 %in1, i32 %in2) nounwind {
+entry:
+  %call = tail call zeroext i1 @getbool() nounwind
+  br i1 %call, label %sw.bb18, label %sw.bb2
+
+sw.bb2:                                           ; preds = %entry
+  %cmp = tail call zeroext i1 @getbool() nounwind
+  br i1 %cmp, label %sw.epilog58, label %land.lhs.true
+
+land.lhs.true:                                    ; preds = %sw.bb2
+  %cmp13 = tail call zeroext i1 @getbool() nounwind
+  br i1 %cmp13, label %if.then, label %sw.epilog58
+
+if.then:                                          ; preds = %land.lhs.true
+  tail call void @foo(i32 %in1) nounwind
+  br label %sw.epilog58
+
+; load the return value
+; CHECK: movs	[[RRET:r.]], #2
+; hoist the switch constant without clobbering RRET
+; CHECK: movw
+; CHECK-NOT: [[RRET]]
+; CHECK: , #63707
+; CHECK-NOT: [[RRET]]
+; CHECK: tst
+; If-convert the return
+; CHECK: it	ne
+; Fold the CSR+return into a pop
+; CHECK: popne	{r4, r5, r7, pc}
+sw.bb18:
+  %call20 = tail call i32 @bar(i32 %in2) nounwind
+  switch i32 %call20, label %sw.default56 [
+    i32 168, label %sw.bb21
+    i32 165, label %sw.bb21
+    i32 261, label %sw.epilog58
+    i32 188, label %sw.epilog58
+    i32 187, label %sw.epilog58
+    i32 186, label %sw.epilog58
+    i32 185, label %sw.epilog58
+    i32 184, label %sw.epilog58
+    i32 175, label %sw.epilog58
+    i32 174, label %sw.epilog58
+    i32 173, label %sw.epilog58
+    i32 172, label %sw.epilog58
+    i32 171, label %sw.epilog58
+    i32 167, label %sw.epilog58
+    i32 166, label %sw.epilog58
+    i32 164, label %sw.epilog58
+    i32 163, label %sw.epilog58
+    i32 161, label %sw.epilog58
+    i32 160, label %sw.epilog58
+    i32 -1, label %sw.bb33
+  ]
+
+sw.bb21:                                          ; preds = %sw.bb18, %sw.bb18
+  tail call void @foo(i32 %in2) nounwind
+  %call28 = tail call i32 @getint() nounwind
+  %tobool = icmp eq i32 %call28, 0
+  br i1 %tobool, label %if.then29, label %sw.epilog58
+
+if.then29:                                        ; preds = %sw.bb21
+  tail call void @foo(i32 %in2) nounwind
+  br label %sw.epilog58
+
+sw.bb33:                                          ; preds = %sw.bb18
+  %cmp42 = tail call zeroext i1 @getbool() nounwind
+  br i1 %cmp42, label %sw.default56, label %land.lhs.true44
+
+land.lhs.true44:                                  ; preds = %sw.bb33
+  %call50 = tail call i32 @getint() nounwind
+  %cmp51 = icmp slt i32 %call50, 0
+  br i1 %cmp51, label %if.then53, label %sw.default56
+
+if.then53:                                        ; preds = %land.lhs.true44
+  tail call void @foo(i32 %in2) nounwind
+  br label %sw.default56
+
+sw.default56:                                     ; preds = %sw.bb33, %land.lhs.true44, %if.then53, %sw.bb18
+  br label %sw.epilog58
+
+sw.epilog58:
+  %retval.0 = phi i32 [ 4, %sw.default56 ], [ 2, %sw.bb21 ], [ 2, %if.then29 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb18 ], [ 2, %sw.bb2 ], [ 2, %land.lhs.true ], [ 2, %if.then ]
+  ret i32 %retval.0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2011-08-29-SchedCycle.ll b/src/LLVM/test/CodeGen/ARM/2011-08-29-SchedCycle.ll
new file mode 100644
index 0000000..be188ef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-08-29-SchedCycle.ll

@@ -0,0 +1,45 @@
+; RUN: llc %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -o -
+
+; When a i64 sub is expanded to subc + sube.
+;   libcall #1
+;      \
+;       \        subc 
+;        \       /  \
+;         \     /    \
+;          \   /    libcall #2
+;           sube
+;
+; If the libcalls are not serialized (i.e. both have chains which are dag
+; entry), legalizer can serialize them in arbitrary orders. If it's
+; unlucky, it can force libcall #2 before libcall #1 in the above case.
+;
+;   subc
+;    |
+;   libcall #2
+;    |
+;   libcall #1
+;    |
+;   sube
+;
+; However since subc and sube are "glued" together, this ends up being a
+; cycle when the scheduler combine subc and sube as a single scheduling
+; unit.
+;
+; The right solution is to fix LegalizeType too chains the libcalls together.
+; However, LegalizeType is not processing nodes in order. The fix now is to
+; fix subc / sube (and addc / adde) to use physical register dependency instead.
+; rdar://10019576
+
+define void @t() nounwind {
+entry:
+  %tmp = load i64* undef, align 4
+  %tmp5 = udiv i64 %tmp, 30
+  %tmp13 = and i64 %tmp5, 64739244643450880
+  %tmp16 = sub i64 0, %tmp13
+  %tmp19 = and i64 %tmp16, 63
+  %tmp20 = urem i64 %tmp19, 3
+  %tmp22 = and i64 %tmp16, -272346829004752
+  store i64 %tmp22, i64* undef, align 4
+  store i64 %tmp20, i64* undef, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll b/src/LLVM/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll
new file mode 100644
index 0000000..6647ed8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll

@@ -0,0 +1,34 @@
+; RUN: llc -O3 -mtriple=armv6-apple-darwin -relocation-model=pic < %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:64-n32"
+
+define void @compdecomp() nounwind {
+entry:
+  %heap = alloca [256 x i32], align 4
+  br i1 undef, label %bb25.lr.ph, label %bb17
+
+bb17:                                             ; preds = %bb17, %entry
+  br label %bb17
+
+bb25.lr.ph:                                       ; preds = %entry
+  %0 = sdiv i32 undef, 2
+  br label %bb5.i
+
+bb.i:                                             ; preds = %bb5.i
+  %1 = shl nsw i32 %k_addr.0.i, 1
+  %.sum8.i = add i32 %1, -1
+  %2 = getelementptr inbounds [256 x i32]* %heap, i32 0, i32 %.sum8.i
+  %3 = load i32* %2, align 4
+  br i1 false, label %bb5.i, label %bb4.i
+
+bb4.i:                                            ; preds = %bb.i
+  %.sum10.i = add i32 %k_addr.0.i, -1
+  %4 = getelementptr inbounds [256 x i32]* %heap, i32 0, i32 %.sum10.i
+  store i32 %3, i32* %4, align 4
+  br label %bb5.i
+
+bb5.i:                                            ; preds = %bb5.i, %bb4.i, %bb.i, %bb25.lr.ph
+  %k_addr.0.i = phi i32 [ %1, %bb4.i ], [ undef, %bb25.lr.ph ], [ undef, %bb5.i ], [ undef, %bb.i ]
+  %5 = icmp slt i32 %0, %k_addr.0.i
+  br i1 %5, label %bb5.i, label %bb.i
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll b/src/LLVM/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll
new file mode 100644
index 0000000..8fe9102
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll

@@ -0,0 +1,23 @@
+; RUN: llc -mtriple=armv7-- < %s -mattr=-neon
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32"
+target triple = "armv7-none-linux-gnueabi"
+
+@x1 = common global <3 x i16> zeroinitializer
+@y1 = common global <3 x i16> zeroinitializer
+@z1 = common global <3 x i16> zeroinitializer
+@x2 = common global <4 x i16> zeroinitializer
+@y2 = common global <4 x i16> zeroinitializer
+@z2 = common global <4 x i16> zeroinitializer
+
+define void @f() {
+  %1 = load <3 x i16>* @x1
+  %2 = load <3 x i16>* @y1
+  %3 = sdiv <3 x i16> %1, %2
+  store <3 x i16> %3, <3 x i16>* @z1
+  %4 = load <4 x i16>* @x2
+  %5 = load <4 x i16>* @y2
+  %6 = sdiv <4 x i16> %4, %5
+  store <4 x i16> %6, <4 x i16>* @z2
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2011-09-19-cpsr.ll b/src/LLVM/test/CodeGen/ARM/2011-09-19-cpsr.ll
new file mode 100644
index 0000000..749a6d2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-09-19-cpsr.ll

@@ -0,0 +1,54 @@
+; RUN: llc -march=thumb -mcpu=cortex-a8 < %s
+; rdar://problem/10137436: sqlite3 miscompile
+;
+; CHECK: subs
+; CHECK: cmp
+; CHECK: it
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-ios4.0.0"
+
+declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
+
+define hidden fastcc i32 @sqlite3VdbeExec(i32* %p) nounwind {
+entry:
+  br label %sqlite3VarintLen.exit7424
+
+sqlite3VarintLen.exit7424:                        ; preds = %do.body.i7423
+  br label %do.body.i
+
+do.body.i:                                        ; preds = %do.body.i, %sqlite3VarintLen.exit7424
+  br i1 undef, label %do.body.i, label %sqlite3VarintLen.exit
+
+sqlite3VarintLen.exit:                            ; preds = %do.body.i
+  %sub2322 = add i64 undef, undef
+  br i1 undef, label %too_big, label %if.end2327
+
+if.end2327:                                       ; preds = %sqlite3VarintLen.exit
+  br i1 undef, label %if.end2341, label %no_mem
+
+if.end2341:                                       ; preds = %if.end2327
+  br label %for.body2355
+
+for.body2355:                                     ; preds = %for.body2355, %if.end2341
+  %add2366 = add nsw i32 undef, undef
+  br i1 undef, label %for.body2377, label %for.body2355
+
+for.body2377:                                     ; preds = %for.body2355
+  %conv23836154 = zext i32 %add2366 to i64
+  %sub2384 = sub i64 %sub2322, %conv23836154
+  %conv2385 = trunc i64 %sub2384 to i32
+  %len.0.i = select i1 undef, i32 %conv2385, i32 undef
+  %sub.i7384 = sub nsw i32 %len.0.i, 0
+  %call.i.i7385 = call i8* @__memset_chk(i8* undef, i32 0, i32 %sub.i7384, i32 undef) nounwind
+  unreachable
+
+too_big:                                          ; preds = %sqlite3VarintLen.exit
+  unreachable
+
+no_mem:                                           ; preds = %if.end2327, %for.body, %entry.no_mem_crit_edge
+  unreachable
+
+sqlite3ErrStr.exit:                               ; preds = %if.then82
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll b/src/LLVM/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll
new file mode 100644
index 0000000..c6f4a93
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll

@@ -0,0 +1,30 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 < %s
+
+; rdar://10196296
+; ARM target specific dag combine created a cycle in DAG.
+
+define void @t() nounwind ssp {
+  %1 = load i64* undef, align 4
+  %2 = shl i32 5, 0
+  %3 = zext i32 %2 to i64
+  %4 = and i64 %1, %3
+  %5 = lshr i64 %4, undef
+  switch i64 %5, label %8 [
+    i64 0, label %9
+    i64 1, label %6
+    i64 4, label %9
+    i64 5, label %7
+  ]
+
+; <label>:6                                       ; preds = %0
+  unreachable
+
+; <label>:7                                       ; preds = %0
+  unreachable
+
+; <label>:8                                       ; preds = %0
+  unreachable
+
+; <label>:9                                       ; preds = %0, %0
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/addrmode.ll b/src/LLVM/test/CodeGen/ARM/addrmode.ll
new file mode 100644
index 0000000..0b8e2c0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/addrmode.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -stats |& grep asm-printer | grep 4

+

+define i32 @t1(i32 %a) {

+	%b = mul i32 %a, 9

+        %c = inttoptr i32 %b to i32*

+        %d = load i32* %c

+	ret i32 %d

+}

+

+define i32 @t2(i32 %a) {

+	%b = mul i32 %a, -7

+        %c = inttoptr i32 %b to i32*

+        %d = load i32* %c

+	ret i32 %d

+}


diff --git a/src/LLVM/test/CodeGen/ARM/aliases.ll b/src/LLVM/test/CodeGen/ARM/aliases.ll
new file mode 100644
index 0000000..62cbc14
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/aliases.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -o %t

+; RUN: grep { = } %t   | count 5

+; RUN: grep globl %t | count 4

+; RUN: grep weak %t  | count 1

+

+@bar = external global i32

+@foo1 = alias i32* @bar

+@foo2 = alias i32* @bar

+

+%FunTy = type i32()

+

+declare i32 @foo_f()

+@bar_f = alias weak %FunTy* @foo_f

+

+@bar_i = alias internal i32* @bar

+

+@A = alias bitcast (i32* @bar to i64*)

+

+define i32 @test() {

+entry:

+   %tmp = load i32* @foo1

+   %tmp1 = load i32* @foo2

+   %tmp0 = load i32* @bar_i

+   %tmp2 = call i32 @foo_f()

+   %tmp3 = add i32 %tmp, %tmp2

+   %tmp4 = call %FunTy* @bar_f()

+   %tmp5 = add i32 %tmp3, %tmp4

+   %tmp6 = add i32 %tmp1, %tmp5

+   %tmp7 = add i32 %tmp6, %tmp0

+   ret i32 %tmp7

+}


diff --git a/src/LLVM/test/CodeGen/ARM/align.ll b/src/LLVM/test/CodeGen/ARM/align.ll
new file mode 100644
index 0000000..dde8190
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/align.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ELF

+; RUN: llc < %s -mtriple=arm-apple-darwin10 | FileCheck %s -check-prefix=DARWIN

+

+@a = global i1 true

+; no alignment

+

+@b = global i8 1

+; no alignment

+

+@c = global i16 2

+;ELF: .align 1

+;ELF: c:

+;DARWIN: .align 1

+;DARWIN: _c:

+

+@d = global i32 3

+;ELF: .align 2

+;ELF: d:

+;DARWIN: .align 2

+;DARWIN: _d:

+

+@e = global i64 4

+;ELF: .align 3

+;ELF: e

+;DARWIN: .align 3

+;DARWIN: _e:

+

+@f = global float 5.0

+;ELF: .align 2

+;ELF: f:

+;DARWIN: .align 2

+;DARWIN: _f:

+

+@g = global double 6.0

+;ELF: .align 3

+;ELF: g:

+;DARWIN: .align 3

+;DARWIN: _g:

+

+@bar = common global [75 x i8] zeroinitializer, align 128

+;ELF: .comm bar,75,128

+;DARWIN: .comm _bar,75,7


diff --git a/src/LLVM/test/CodeGen/ARM/alloca.ll b/src/LLVM/test/CodeGen/ARM/alloca.ll
new file mode 100644
index 0000000..599690e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/alloca.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s

+

+define void @f(i32 %a) {

+entry:

+; CHECK: add  r11, sp, #4

+        %tmp = alloca i8, i32 %a                ; <i8*> [#uses=1]

+        call void @g( i8* %tmp, i32 %a, i32 1, i32 2, i32 3 )

+        ret void

+; CHECK: sub  sp, r11, #4

+}

+

+declare void @g(i8*, i32, i32, i32, i32)


diff --git a/src/LLVM/test/CodeGen/ARM/argaddr.ll b/src/LLVM/test/CodeGen/ARM/argaddr.ll
new file mode 100644
index 0000000..15e8c19
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/argaddr.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm

+

+define void @f(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {

+entry:

+        %a_addr = alloca i32            ; <i32*> [#uses=2]

+        %b_addr = alloca i32            ; <i32*> [#uses=2]

+        %c_addr = alloca i32            ; <i32*> [#uses=2]

+        %d_addr = alloca i32            ; <i32*> [#uses=2]

+        %e_addr = alloca i32            ; <i32*> [#uses=2]

+        store i32 %a, i32* %a_addr

+        store i32 %b, i32* %b_addr

+        store i32 %c, i32* %c_addr

+        store i32 %d, i32* %d_addr

+        store i32 %e, i32* %e_addr

+        call void @g( i32* %a_addr, i32* %b_addr, i32* %c_addr, i32* %d_addr, i32* %e_addr )

+        ret void

+}

+

+declare void @g(i32*, i32*, i32*, i32*, i32*)


diff --git a/src/LLVM/test/CodeGen/ARM/arguments-nosplit-double.ll b/src/LLVM/test/CodeGen/ARM/arguments-nosplit-double.ll
new file mode 100644
index 0000000..770e41d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arguments-nosplit-double.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | not grep r3
+; PR4059
+
+define i32 @f(i64 %z, i32 %a, double %b) {
+	%tmp = call i32 @g(double %b)
+	ret i32 %tmp
+}
+
+declare i32 @g(double)

diff --git a/src/LLVM/test/CodeGen/ARM/arguments-nosplit-i64.ll b/src/LLVM/test/CodeGen/ARM/arguments-nosplit-i64.ll
new file mode 100644
index 0000000..815edfd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arguments-nosplit-i64.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | not grep r3
+; PR4058
+
+define i32 @f(i64 %z, i32 %a, i64 %b) {
+	%tmp = call i32 @g(i64 %b)
+	ret i32 %tmp
+}
+
+declare i32 @g(i64)

diff --git a/src/LLVM/test/CodeGen/ARM/arguments.ll b/src/LLVM/test/CodeGen/ARM/arguments.ll
new file mode 100644
index 0000000..82912f1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arguments.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 | FileCheck %s -check-prefix=ELF

+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+vfp2 | FileCheck %s -check-prefix=DARWIN

+

+define i32 @f1(i32 %a, i64 %b) {

+; ELF: f1:

+; ELF: mov r0, r2

+; DARWIN: f1:

+; DARWIN: mov r0, r1

+        %tmp = call i32 @g1(i64 %b)

+        ret i32 %tmp

+}

+

+; test that allocating the double to r2/r3 makes r1 unavailable on gnueabi.

+define i32 @f2() nounwind optsize {

+; ELF: f2:

+; ELF: mov  [[REGISTER:(r[0-9]+)]], #128

+; ELF: str  [[REGISTER]], [

+; DARWIN: f2:

+; DARWIN: mov	r3, #128

+entry:

+  %0 = tail call i32 (i32, ...)* @g2(i32 5, double 1.600000e+01, i32 128) nounwind optsize ; <i32> [#uses=1]

+  %not. = icmp ne i32 %0, 128                     ; <i1> [#uses=1]

+  %.0 = zext i1 %not. to i32                      ; <i32> [#uses=1]

+  ret i32 %.0

+}

+

+; test that on gnueabi a 64 bit value at this position will cause r3 to go

+; unused and the value stored in [sp]

+; ELF: f3:

+; ELF: ldr r0, [sp]

+; ELF-NEXT: mov pc, lr

+; DARWIN: f3:

+; DARWIN: mov r0, r3

+; DARWIN-NEXT: mov pc, lr

+define i32 @f3(i32 %i, i32 %j, i32 %k, i64 %l, ...) {

+entry:

+  %0 = trunc i64 %l to i32

+  ret i32 %0

+}

+

+declare i32 @g1(i64)

+

+declare i32 @g2(i32 %i, ...)


diff --git a/src/LLVM/test/CodeGen/ARM/arguments2.ll b/src/LLVM/test/CodeGen/ARM/arguments2.ll
new file mode 100644
index 0000000..a515ad7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arguments2.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define i32 @f(i32 %a, i128 %b) {
+        %tmp = call i32 @g(i128 %b)
+        ret i32 %tmp
+}
+
+declare i32 @g(i128)

diff --git a/src/LLVM/test/CodeGen/ARM/arguments3.ll b/src/LLVM/test/CodeGen/ARM/arguments3.ll
new file mode 100644
index 0000000..58f64c6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arguments3.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define i64 @f(i32 %a, i128 %b) {
+        %tmp = call i64 @g(i128 %b)
+        ret i64 %tmp
+}
+
+declare i64 @g(i128)

diff --git a/src/LLVM/test/CodeGen/ARM/arguments4.ll b/src/LLVM/test/CodeGen/ARM/arguments4.ll
new file mode 100644
index 0000000..f5f4207b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arguments4.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define float @f(i32 %a, i128 %b) {
+        %tmp = call float @g(i128 %b)
+        ret float %tmp
+}
+
+declare float @g(i128)

diff --git a/src/LLVM/test/CodeGen/ARM/arguments5.ll b/src/LLVM/test/CodeGen/ARM/arguments5.ll
new file mode 100644
index 0000000..388a8eb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arguments5.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define double @f(i32 %a, i128 %b) {
+        %tmp = call double @g(i128 %b)
+        ret double %tmp
+}
+
+declare double @g(i128)

diff --git a/src/LLVM/test/CodeGen/ARM/arguments6.ll b/src/LLVM/test/CodeGen/ARM/arguments6.ll
new file mode 100644
index 0000000..3f757fe
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arguments6.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define i128 @f(i32 %a, i128 %b) {
+        %tmp = call i128 @g(i128 %b)
+        ret i128 %tmp
+}
+
+declare i128 @g(i128)

diff --git a/src/LLVM/test/CodeGen/ARM/arguments7.ll b/src/LLVM/test/CodeGen/ARM/arguments7.ll
new file mode 100644
index 0000000..fa97ee8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arguments7.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define double @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b) {
+        %tmp = call double @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b)
+        ret double %tmp
+}
+
+declare double @g(i32, i32, i32, i32, double)

diff --git a/src/LLVM/test/CodeGen/ARM/arguments8.ll b/src/LLVM/test/CodeGen/ARM/arguments8.ll
new file mode 100644
index 0000000..abe059b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arguments8.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define i64 @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b) {
+        %tmp = call i64 @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b)
+        ret i64 %tmp
+}
+
+declare i64 @g(i32, i32, i32, i32, i64)

diff --git a/src/LLVM/test/CodeGen/ARM/arguments_f64_backfill.ll b/src/LLVM/test/CodeGen/ARM/arguments_f64_backfill.ll
new file mode 100644
index 0000000..062133e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arguments_f64_backfill.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | FileCheck %s
+
+define float @f(float %z, double %a, float %b) {
+; CHECK: vmov.f32 s0, s1
+        %tmp = call float @g(float %b)
+        ret float %tmp
+}
+
+declare float @g(float)

diff --git a/src/LLVM/test/CodeGen/ARM/arm-and-tst-peephole.ll b/src/LLVM/test/CodeGen/ARM/arm-and-tst-peephole.ll
new file mode 100644
index 0000000..0762070
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arm-and-tst-peephole.ll

@@ -0,0 +1,112 @@
+; RUN: llc < %s -march=arm | FileCheck -check-prefix=ARM %s
+; RUN: llc < %s -march=thumb | FileCheck -check-prefix=THUMB %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck -check-prefix=T2 %s
+
+; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified.
+
+%struct.Foo = type { i8* }
+
+; ARM:   foo
+; THUMB: foo
+; T2:    foo
+define %struct.Foo* @foo(%struct.Foo* %this, i32 %acc) nounwind readonly align 2 {
+entry:
+  %scevgep = getelementptr %struct.Foo* %this, i32 1
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %sw.bb, %entry
+  %lsr.iv2 = phi %struct.Foo* [ %scevgep3, %sw.bb ], [ %scevgep, %entry ]
+  %lsr.iv = phi i32 [ %lsr.iv.next, %sw.bb ], [ 1, %entry ]
+  %acc.tr = phi i32 [ %or, %sw.bb ], [ %acc, %entry ]
+  %lsr.iv24 = bitcast %struct.Foo* %lsr.iv2 to i8**
+  %scevgep5 = getelementptr i8** %lsr.iv24, i32 -1
+  %tmp2 = load i8** %scevgep5
+  %0 = ptrtoint i8* %tmp2 to i32
+
+; ARM:      ands {{r[0-9]+}}, {{r[0-9]+}}, #3
+; ARM-NEXT: beq
+
+; THUMB:      movs r[[R0:[0-9]+]], #3
+; THUMB-NEXT: ands r[[R0]], r
+; THUMB-NEXT: cmp r[[R0]], #0
+; THUMB-NEXT: beq
+
+; T2:      ands {{r[0-9]+}}, {{r[0-9]+}}, #3
+; T2-NEXT: beq
+
+  %and = and i32 %0, 3
+  %tst = icmp eq i32 %and, 0
+  br i1 %tst, label %sw.bb, label %tailrecurse.switch
+
+tailrecurse.switch:                               ; preds = %tailrecurse
+  switch i32 %and, label %sw.epilog [
+    i32 1, label %sw.bb
+    i32 3, label %sw.bb6
+    i32 2, label %sw.bb8
+  ]
+
+sw.bb:                                            ; preds = %tailrecurse.switch, %tailrecurse
+  %shl = shl i32 %acc.tr, 1
+  %or = or i32 %and, %shl
+  %lsr.iv.next = add i32 %lsr.iv, 1
+  %scevgep3 = getelementptr %struct.Foo* %lsr.iv2, i32 1
+  br label %tailrecurse
+
+sw.bb6:                                           ; preds = %tailrecurse.switch
+  ret %struct.Foo* %lsr.iv2
+
+sw.bb8:                                           ; preds = %tailrecurse.switch
+  %tmp1 = add i32 %acc.tr, %lsr.iv
+  %add.ptr11 = getelementptr inbounds %struct.Foo* %this, i32 %tmp1
+  ret %struct.Foo* %add.ptr11
+
+sw.epilog:                                        ; preds = %tailrecurse.switch
+  ret %struct.Foo* undef
+}
+
+; Another test that exercises the AND/TST peephole optimization and also
+; generates a predicated ANDS instruction. Check that the predicate is printed
+; after the "S" modifier on the instruction.
+
+%struct.S = type { i8* (i8*)*, [1 x i8] }
+
+; ARM: bar
+; THUMB: bar
+; T2: bar
+define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
+entry:
+  %0 = getelementptr inbounds %struct.S* %x, i32 0, i32 1, i32 0
+  %1 = load i8* %0, align 1
+  %2 = zext i8 %1 to i32
+; ARM: ands
+; THUMB: ands
+; T2: ands
+  %3 = and i32 %2, 112
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  %5 = getelementptr inbounds %struct.S* %y, i32 0, i32 1, i32 0
+  %6 = load i8* %5, align 1
+  %7 = zext i8 %6 to i32
+; ARM: andsne
+; THUMB: ands
+; T2: andsne
+  %8 = and i32 %7, 112
+  %9 = icmp eq i32 %8, 0
+  br i1 %9, label %return, label %bb2
+
+bb2:                                              ; preds = %bb
+  %10 = icmp eq i32 %3, 16
+  %11 = icmp eq i32 %8, 16
+  %or.cond = or i1 %10, %11
+  br i1 %or.cond, label %bb4, label %return
+
+bb4:                                              ; preds = %bb2
+  %12 = ptrtoint %struct.S* %x to i32
+  %phitmp = trunc i32 %12 to i8
+  ret i8 %phitmp
+
+return:                                           ; preds = %bb2, %bb, %entry
+  ret i8 1
+}

diff --git a/src/LLVM/test/CodeGen/ARM/arm-asm.ll b/src/LLVM/test/CodeGen/ARM/arm-asm.ll
new file mode 100644
index 0000000..bc4fb50
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arm-asm.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm

+

+define void @frame_dummy() {

+entry:

+        %tmp1 = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0,~{dirflag},~{fpsr},~{flags}"( void (i8*)* null )           ; <void (i8*)*> [#uses=0]

+        ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/arm-frameaddr.ll b/src/LLVM/test/CodeGen/ARM/arm-frameaddr.ll
new file mode 100644
index 0000000..2cf1422
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arm-frameaddr.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=LINUX
+; PR4344
+; PR4416
+
+define i8* @t() nounwind {
+entry:
+; DARWIN: t:
+; DARWIN: mov r0, r7
+
+; LINUX: t:
+; LINUX: mov r0, r11
+	%0 = call i8* @llvm.frameaddress(i32 0)
+        ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/arm-modifier.ll b/src/LLVM/test/CodeGen/ARM/arm-modifier.ll
new file mode 100644
index 0000000..396de37
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arm-modifier.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+
+define i32 @foo(float %scale, float %scale2) nounwind {
+entry:
+  %scale.addr = alloca float, align 4
+  %scale2.addr = alloca float, align 4
+  store float %scale, float* %scale.addr, align 4
+  store float %scale2, float* %scale2.addr, align 4
+  %tmp = load float* %scale.addr, align 4
+  %tmp1 = load float* %scale2.addr, align 4
+  call void asm sideeffect "vmul.f32    q0, q0, ${0:y} \0A\09vmul.f32    q1, q1, ${0:y} \0A\09vmul.f32    q1, q0, ${1:y} \0A\09", "w,w,~{q0},~{q1}"(float %tmp, float %tmp1) nounwind
+  ret i32 0
+}
+
+define void @f0() nounwind {
+entry:
+; CHECK: f0
+; CHECK: .word -1
+call void asm sideeffect ".word ${0:B} \0A\09", "i"(i32 0) nounwind
+ret void
+}
+
+define void @f1() nounwind {
+entry:
+; CHECK: f1
+; CHECK: .word 65535
+call void asm sideeffect ".word ${0:L} \0A\09", "i"(i32 -1) nounwind
+ret void
+}
+
+@f2_ptr = internal global i32* @f2_var, align 4
+@f2_var = external global i32
+
+define void @f2() nounwind {
+entry:
+; CHECK: f2
+; CHECK: ldr r0, [r{{[0-9]+}}]
+call void asm sideeffect "ldr r0, [${0:m}]\0A\09", "*m,~{r0}"(i32** @f2_ptr) nounwind
+ret void
+}
+
+@f3_ptr = internal global i64* @f3_var, align 4
+@f3_var = external global i64
+@f3_var2 = external global i64
+
+define void @f3() nounwind {
+entry:
+; CHECK: f3
+; CHECK: stm {{lr|r[0-9]+}}, {[[REG1:(r[0-9]+)]], r{{[0-9]+}}}
+; CHECK: adds {{lr|r[0-9]+}}, [[REG1]]
+; CHECK: ldm {{lr|r[0-9]+}}, {r{{[0-9]+}}, r{{[0-9]+}}}
+%tmp = load i64* @f3_var, align 4
+%tmp1 = load i64* @f3_var2, align 4
+%0 = call i64 asm sideeffect "stm ${0:m}, ${1:M}\0A\09adds $3, $1\0A\09", "=*m,=r,1,r"(i64** @f3_ptr, i64 %tmp, i64 %tmp1) nounwind
+store i64 %0, i64* @f3_var, align 4
+%1 = call i64 asm sideeffect "ldm ${1:m}, ${0:M}\0A\09", "=r,*m"(i64** @f3_ptr) nounwind
+store i64 %1, i64* @f3_var, align 4
+ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/arm-negative-stride.ll b/src/LLVM/test/CodeGen/ARM/arm-negative-stride.ll
new file mode 100644
index 0000000..0dc58e5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arm-negative-stride.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=arm | FileCheck %s

+

+; This loop is rewritten with an indvar which counts down, which

+; frees up a register from holding the trip count.

+

+define void @test(i32* %P, i32 %A, i32 %i) nounwind {

+entry:

+; CHECK: str r1, [{{r.*}}, {{r.*}}, lsl #2]

+        icmp eq i32 %i, 0               ; <i1>:0 [#uses=1]

+        br i1 %0, label %return, label %bb

+

+bb:             ; preds = %bb, %entry

+        %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]          ; <i32> [#uses=2]

+        %i_addr.09.0 = sub i32 %i, %indvar              ; <i32> [#uses=1]

+        %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0         ; <i32*> [#uses=1]

+        store i32 %A, i32* %tmp2

+        %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]

+        icmp eq i32 %indvar.next, %i            ; <i1>:1 [#uses=1]

+        br i1 %1, label %return, label %bb

+

+return:         ; preds = %bb, %entry

+        ret void

+}

+

+; This loop has a non-address use of the count-up indvar, so

+; it'll remain. Now the original store uses a negative-stride address.

+

+define void @test_with_forced_iv(i32* %P, i32 %A, i32 %i) nounwind {

+entry:

+; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]

+        icmp eq i32 %i, 0               ; <i1>:0 [#uses=1]

+        br i1 %0, label %return, label %bb

+

+bb:             ; preds = %bb, %entry

+        %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]          ; <i32> [#uses=2]

+        %i_addr.09.0 = sub i32 %i, %indvar              ; <i32> [#uses=1]

+        %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0         ; <i32*> [#uses=1]

+        store i32 %A, i32* %tmp2

+        store i32 %indvar, i32* null

+        %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]

+        icmp eq i32 %indvar.next, %i            ; <i1>:1 [#uses=1]

+        br i1 %1, label %return, label %bb

+

+return:         ; preds = %bb, %entry

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/ARM/arm-returnaddr.ll b/src/LLVM/test/CodeGen/ARM/arm-returnaddr.ll
new file mode 100644
index 0000000..95edaad
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/arm-returnaddr.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
+; rdar://8015977
+; rdar://8020118
+
+define i8* @rt0(i32 %x) nounwind readnone {
+entry:
+; CHECK: rt0:
+; CHECK: {r7, lr}
+; CHECK: mov r0, lr
+  %0 = tail call i8* @llvm.returnaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @rt2() nounwind readnone {
+entry:
+; CHECK: rt2:
+; CHECK: {r7, lr}
+; CHECK: ldr r[[R0:[0-9]+]], [r7]
+; CHECK: ldr r0, [r0]
+; CHECK: ldr r0, [r0, #4]
+  %0 = tail call i8* @llvm.returnaddress(i32 2)
+  ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/armv4.ll b/src/LLVM/test/CodeGen/ARM/armv4.ll
new file mode 100644
index 0000000..6b213d5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/armv4.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=armv4t-unknown-eabi | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=armv4-unknown-eabi -mcpu=strongarm | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=armv7-unknown-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=armv6-unknown-eabi | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=armv4t-unknown-eabi | FileCheck %s -check-prefix=THUMB
+
+define i32 @test(i32 %a) nounwind readnone {
+entry:
+; ARM: mov pc
+; THUMB: bx
+  ret i32 %a
+}

diff --git a/src/LLVM/test/CodeGen/ARM/atomic-64bit.ll b/src/LLVM/test/CodeGen/ARM/atomic-64bit.ll
new file mode 100644
index 0000000..e9609ac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/atomic-64bit.ll

@@ -0,0 +1,128 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
+
+define i64 @test1(i64* %ptr, i64 %val) {
+; CHECK: test1
+; CHECK: dmb ish
+; CHECK: ldrexd r2, r3
+; CHECK: adds r0, r2
+; CHECK: adc r1, r3
+; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+  %r = atomicrmw add i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test2(i64* %ptr, i64 %val) {
+; CHECK: test2
+; CHECK: dmb ish
+; CHECK: ldrexd r2, r3
+; CHECK: subs r0, r2
+; CHECK: sbc r1, r3
+; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+  %r = atomicrmw sub i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test3(i64* %ptr, i64 %val) {
+; CHECK: test3
+; CHECK: dmb ish
+; CHECK: ldrexd r2, r3
+; CHECK: and r0, r2
+; CHECK: and r1, r3
+; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+  %r = atomicrmw and i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test4(i64* %ptr, i64 %val) {
+; CHECK: test4
+; CHECK: dmb ish
+; CHECK: ldrexd r2, r3
+; CHECK: orr r0, r2
+; CHECK: orr r1, r3
+; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+  %r = atomicrmw or i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test5(i64* %ptr, i64 %val) {
+; CHECK: test5
+; CHECK: dmb ish
+; CHECK: ldrexd r2, r3
+; CHECK: eor r0, r2
+; CHECK: eor r1, r3
+; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+  %r = atomicrmw xor i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test6(i64* %ptr, i64 %val) {
+; CHECK: test6
+; CHECK: dmb ish
+; CHECK: ldrexd r2, r3
+; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+  %r = atomicrmw xchg i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
+; CHECK: test7
+; CHECK: dmb ish
+; CHECK: ldrexd r2, r3
+; CHECK: cmp r2
+; CHECK: cmpeq r3
+; CHECK: bne
+; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+  %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst
+  ret i64 %r
+}
+
+; Compiles down to cmpxchg
+; FIXME: Should compile to a single ldrexd
+define i64 @test8(i64* %ptr) {
+; CHECK: test8
+; CHECK: ldrexd r2, r3
+; CHECK: cmp r2
+; CHECK: cmpeq r3
+; CHECK: bne
+; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+  %r = load atomic i64* %ptr seq_cst, align 8
+  ret i64 %r
+}
+
+; Compiles down to atomicrmw xchg; there really isn't any more efficient
+; way to write it.
+define void @test9(i64* %ptr, i64 %val) {
+; CHECK: test9
+; CHECK: dmb ish
+; CHECK: ldrexd r2, r3
+; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+  store atomic i64 %val, i64* %ptr seq_cst, align 8
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/atomic-cmp.ll b/src/LLVM/test/CodeGen/ARM/atomic-cmp.ll
new file mode 100644
index 0000000..82726da
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/atomic-cmp.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -verify-machineinstrs   | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -verify-machineinstrs | FileCheck %s -check-prefix=T2
+; rdar://8964854
+
+define i8 @t(i8* %a, i8 %b, i8 %c) nounwind {
+; ARM: t:
+; ARM: ldrexb
+; ARM: strexb
+
+; T2: t:
+; T2: ldrexb
+; T2: strexb
+  %tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic
+  ret i8 %tmp0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/atomic-load-store.ll b/src/LLVM/test/CodeGen/ARM/atomic-load-store.ll
new file mode 100644
index 0000000..12a8fe4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/atomic-load-store.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s -check-prefix=THUMBTWO
+; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s -check-prefix=THUMBONE
+
+define void @test1(i32* %ptr, i32 %val1) {
+; ARM: test1
+; ARM: dmb ish
+; ARM-NEXT: str
+; ARM-NEXT: dmb ish
+; THUMBONE: test1
+; THUMBONE: __sync_lock_test_and_set_4
+; THUMBTWO: test1
+; THUMBTWO: dmb ish
+; THUMBTWO-NEXT: str
+; THUMBTWO-NEXT: dmb ish
+  store atomic i32 %val1, i32* %ptr seq_cst, align 4
+  ret void
+}
+
+define i32 @test2(i32* %ptr) {
+; ARM: test2
+; ARM: ldr
+; ARM-NEXT: dmb ish
+; THUMBONE: test2
+; THUMBONE: __sync_val_compare_and_swap_4
+; THUMBTWO: test2
+; THUMBTWO: ldr
+; THUMBTWO-NEXT: dmb ish
+  %val = load atomic i32* %ptr seq_cst, align 4
+  ret i32 %val
+}
+
+define void @test3(i8* %ptr1, i8* %ptr2) {
+; ARM: test3
+; ARM: ldrb
+; ARM: strb
+; THUMBTWO: test3
+; THUMBTWO: ldrb
+; THUMBTWO: strb
+; THUMBONE: test3
+; THUMBONE: ldrb
+; THUMBONE: strb
+  %val = load atomic i8* %ptr1 unordered, align 1
+  store atomic i8 %val, i8* %ptr2 unordered, align 1
+  ret void
+}
+
+define void @test4(i8* %ptr1, i8* %ptr2) {
+; THUMBONE: test4
+; THUMBONE: ___sync_val_compare_and_swap_1
+; THUMBONE: ___sync_lock_test_and_set_1
+  %val = load atomic i8* %ptr1 seq_cst, align 1
+  store atomic i8 %val, i8* %ptr2 seq_cst, align 1
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/atomic-op.ll b/src/LLVM/test/CodeGen/ARM/atomic-op.ll
new file mode 100644
index 0000000..02ce5a1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/atomic-op.ll

@@ -0,0 +1,81 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
+
+define void @func(i32 %argc, i8** %argv) nounwind {
+entry:
+	%argc.addr = alloca i32		; <i32*> [#uses=1]
+	%argv.addr = alloca i8**		; <i8***> [#uses=1]
+	%val1 = alloca i32		; <i32*> [#uses=2]
+	%val2 = alloca i32		; <i32*> [#uses=15]
+	%andt = alloca i32		; <i32*> [#uses=2]
+	%ort = alloca i32		; <i32*> [#uses=2]
+	%xort = alloca i32		; <i32*> [#uses=2]
+	%old = alloca i32		; <i32*> [#uses=18]
+	%temp = alloca i32		; <i32*> [#uses=2]
+	store i32 %argc, i32* %argc.addr
+	store i8** %argv, i8*** %argv.addr
+	store i32 0, i32* %val1
+	store i32 31, i32* %val2
+	store i32 3855, i32* %andt
+	store i32 3855, i32* %ort
+	store i32 3855, i32* %xort
+	store i32 4, i32* %temp
+	%tmp = load i32* %temp
+  ; CHECK: ldrex
+  ; CHECK: add
+  ; CHECK: strex
+  %0 = atomicrmw add i32* %val1, i32 %tmp monotonic
+	store i32 %0, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: sub
+  ; CHECK: strex
+  %1 = atomicrmw sub i32* %val2, i32 30 monotonic
+	store i32 %1, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: add
+  ; CHECK: strex
+  %2 = atomicrmw add i32* %val2, i32 1 monotonic
+	store i32 %2, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: sub
+  ; CHECK: strex
+  %3 = atomicrmw sub i32* %val2, i32 1 monotonic
+	store i32 %3, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: and
+  ; CHECK: strex
+  %4 = atomicrmw and i32* %andt, i32 4080 monotonic
+	store i32 %4, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: or
+  ; CHECK: strex
+  %5 = atomicrmw or i32* %ort, i32 4080 monotonic
+	store i32 %5, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: eor
+  ; CHECK: strex
+  %6 = atomicrmw xor i32* %xort, i32 4080 monotonic
+	store i32 %6, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %7 = atomicrmw min i32* %val2, i32 16 monotonic
+	store i32 %7, i32* %old
+	%neg = sub i32 0, 1		; <i32> [#uses=1]
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %8 = atomicrmw min i32* %val2, i32 %neg monotonic
+	store i32 %8, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %9 = atomicrmw max i32* %val2, i32 1 monotonic
+	store i32 %9, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %10 = atomicrmw max i32* %val2, i32 0 monotonic
+	store i32 %10, i32* %old
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/available_externally.ll b/src/LLVM/test/CodeGen/ARM/available_externally.ll
new file mode 100644
index 0000000..0f646d5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/available_externally.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | FileCheck %s
+; rdar://9027648
+
+@A = available_externally hidden constant i32 1
+@B = external hidden constant i32
+
+define i32 @t1() {
+  %tmp = load i32* @A
+  store i32 %tmp, i32* @B
+  ret i32 %tmp
+}
+
+; CHECK:      L_A$non_lazy_ptr:
+; CHECK-NEXT: .long _A
+; CHECK:      L_B$non_lazy_ptr:
+; CHECK-NEXT: .long _B

diff --git a/src/LLVM/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/src/LLVM/test/CodeGen/ARM/avoid-cpsr-rmw.ll
new file mode 100644
index 0000000..92aff70
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/avoid-cpsr-rmw.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; Avoid some 's' 16-bit instruction which partially update CPSR (and add false
+; dependency) when it isn't dependent on last CPSR defining instruction.
+; rdar://8928208
+
+define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
+ entry:
+; CHECK: t:
+; CHECK: muls [[REG:(r[0-9]+)]], r2, r3
+; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r0, r1
+; CHECK-NEXT: muls r0, [[REG2]], [[REG]]
+  %0 = mul nsw i32 %a, %b
+  %1 = mul nsw i32 %c, %d
+  %2 = mul nsw i32 %0, %1
+  ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/ARM/bfc.ll b/src/LLVM/test/CodeGen/ARM/bfc.ll
new file mode 100644
index 0000000..c4a44b4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/bfc.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+; 4278190095 = 0xff00000f
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: bfc
+    %tmp = and i32 %a, 4278190095
+    ret i32 %tmp
+}
+
+; 4286578688 = 0xff800000
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: bfc
+    %tmp = and i32 %a, 4286578688
+    ret i32 %tmp
+}
+
+; 4095 = 0x00000fff
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: bfc
+    %tmp = and i32 %a, 4095
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/ARM/bfi.ll b/src/LLVM/test/CodeGen/ARM/bfi.ll
new file mode 100644
index 0000000..84f3813
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/bfi.ll

@@ -0,0 +1,76 @@
+; RUN: llc -march=arm -mattr=+v6t2 < %s | FileCheck %s
+
+%struct.F = type { [3 x i8], i8 }
+
+@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1]
+
+define void @f1([1 x i32] %f.coerce0) nounwind {
+entry:
+; CHECK: f1
+; CHECK: mov r2, #10
+; CHECK: bfi r1, r2, #22, #4
+  %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
+  %1 = and i32 %0, -62914561                      ; <i32> [#uses=1]
+  %2 = or i32 %1, 41943040                        ; <i32> [#uses=1]
+  store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
+  ret void
+}
+
+define i32 @f2(i32 %A, i32 %B) nounwind {
+entry:
+; CHECK: f2
+; CHECK: lsr{{.*}}#7
+; CHECK: bfi r0, r1, #7, #16
+  %and = and i32 %A, -8388481                     ; <i32> [#uses=1]
+  %and2 = and i32 %B, 8388480                     ; <i32> [#uses=1]
+  %or = or i32 %and2, %and                        ; <i32> [#uses=1]
+  ret i32 %or
+}
+
+define i32 @f3(i32 %A, i32 %B) nounwind {
+entry:
+; CHECK: f3
+; CHECK: lsr{{.*}} #7
+; CHECK: bfi {{.*}}, #7, #16
+  %and = and i32 %A, 8388480                      ; <i32> [#uses=1]
+  %and2 = and i32 %B, -8388481                    ; <i32> [#uses=1]
+  %or = or i32 %and2, %and                        ; <i32> [#uses=1]
+  ret i32 %or
+}
+
+; rdar://8752056
+define i32 @f4(i32 %a) nounwind {
+; CHECK: f4
+; CHECK: movw [[R1:r[0-9]+]], #3137
+; CHECK: bfi [[R1]], {{r[0-9]+}}, #15, #5
+  %1 = shl i32 %a, 15
+  %ins7 = and i32 %1, 1015808
+  %ins12 = or i32 %ins7, 3137
+  ret i32 %ins12
+}
+
+; rdar://8458663
+define i32 @f5(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: f5:
+; CHECK-NOT: bfc
+; CHECK: bfi r0, r1, #20, #4
+  %0 = and i32 %a, -15728641
+  %1 = shl i32 %b, 20
+  %2 = and i32 %1, 15728640
+  %3 = or i32 %2, %0
+  ret i32 %3
+}
+
+; rdar://9609030
+define i32 @f6(i32 %a, i32 %b) nounwind readnone {
+entry:
+; CHECK: f6:
+; CHECK-NOT: bic
+; CHECK: bfi r0, r1, #8, #9
+  %and = and i32 %a, -130817
+  %and2 = shl i32 %b, 8
+  %shl = and i32 %and2, 130816
+  %or = or i32 %shl, %and
+  ret i32 %or
+}

diff --git a/src/LLVM/test/CodeGen/ARM/bfx.ll b/src/LLVM/test/CodeGen/ARM/bfx.ll
new file mode 100644
index 0000000..519c135
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/bfx.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=arm -mattr=+v7 | FileCheck %s
+
+define i32 @sbfx1(i32 %a) {
+; CHECK: sbfx1
+; CHECK: sbfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = trunc i32 %t1 to i11
+	%t3 = sext i11 %t2 to i32
+	ret i32 %t3
+}
+
+define i32 @ubfx1(i32 %a) {
+; CHECK: ubfx1
+; CHECK: ubfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = trunc i32 %t1 to i11
+	%t3 = zext i11 %t2 to i32
+	ret i32 %t3
+}
+
+define i32 @ubfx2(i32 %a) {
+; CHECK: ubfx2
+; CHECK: ubfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = and i32 %t1, 2047
+	ret i32 %t2
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/bic.ll b/src/LLVM/test/CodeGen/ARM/bic.ll
new file mode 100644
index 0000000..1dfd627
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/bic.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+; CHECK: bic	r0, r0, r1
+
+define i32 @f2(i32 %a, i32 %b) {
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = and i32 %tmp, %a
+    ret i32 %tmp1
+}
+
+; CHECK: bic	r0, r0, r1

diff --git a/src/LLVM/test/CodeGen/ARM/bits.ll b/src/LLVM/test/CodeGen/ARM/bits.ll
new file mode 100644
index 0000000..99444dd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/bits.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=arm | FileCheck %s

+

+define i32 @f1(i32 %a, i32 %b) {

+entry:

+; CHECK: f1

+; CHECK: and r0, r1, r0

+	%tmp2 = and i32 %b, %a		; <i32> [#uses=1]

+	ret i32 %tmp2

+}

+

+define i32 @f2(i32 %a, i32 %b) {

+entry:

+; CHECK: f2

+; CHECK: orr r0, r1, r0

+	%tmp2 = or i32 %b, %a		; <i32> [#uses=1]

+	ret i32 %tmp2

+}

+

+define i32 @f3(i32 %a, i32 %b) {

+entry:

+; CHECK: f3

+; CHECK: eor r0, r1, r0

+	%tmp2 = xor i32 %b, %a		; <i32> [#uses=1]

+	ret i32 %tmp2

+}

+

+define i32 @f4(i32 %a, i32 %b) {

+entry:

+; CHECK: f4

+; CHECK: lsl

+	%tmp3 = shl i32 %a, %b		; <i32> [#uses=1]

+	ret i32 %tmp3

+}

+

+define i32 @f5(i32 %a, i32 %b) {

+entry:

+; CHECK: f5

+; CHECK: asr

+	%tmp3 = ashr i32 %a, %b		; <i32> [#uses=1]

+	ret i32 %tmp3

+}


diff --git a/src/LLVM/test/CodeGen/ARM/bswap-inline-asm.ll b/src/LLVM/test/CodeGen/ARM/bswap-inline-asm.ll
new file mode 100644
index 0000000..472213d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/bswap-inline-asm.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 | FileCheck %s
+
+define i32 @t1(i32 %x) nounwind {
+; CHECK: t1:
+; CHECK-NOT: InlineAsm
+; CHECK: rev
+  %asmtmp = tail call i32 asm "rev $0, $1\0A", "=l,l"(i32 %x) nounwind
+  ret i32 %asmtmp
+}

diff --git a/src/LLVM/test/CodeGen/ARM/bx_fold.ll b/src/LLVM/test/CodeGen/ARM/bx_fold.ll
new file mode 100644
index 0000000..574878a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/bx_fold.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=armv5t-apple-darwin | FileCheck %s

+

+define void @test(i32 %Ptr, i8* %L) {

+entry:

+	br label %bb1

+

+bb:		; preds = %bb1

+	%gep.upgrd.1 = zext i32 %indvar to i64		; <i64> [#uses=1]

+	%tmp7 = getelementptr i8* %L, i64 %gep.upgrd.1		; <i8*> [#uses=1]

+	store i8 0, i8* %tmp7

+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]

+	br label %bb1

+

+bb1:		; preds = %bb, %entry

+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]

+	%i.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]

+	%tmp = tail call i32 (...)* @bar( )		; <i32> [#uses=1]

+	%tmp2 = add i32 %i.0, %tmp		; <i32> [#uses=1]

+	%Ptr_addr.0 = sub i32 %Ptr, %tmp2		; <i32> [#uses=0]

+	%tmp12 = icmp eq i32 %i.0, %Ptr		; <i1> [#uses=1]

+	%tmp12.not = xor i1 %tmp12, true		; <i1> [#uses=1]

+	%bothcond = and i1 %tmp12.not, false		; <i1> [#uses=1]

+	br i1 %bothcond, label %bb, label %bb18

+

+bb18:		; preds = %bb1

+; CHECK-NOT: bx

+; CHECK: pop

+	ret void

+}

+

+declare i32 @bar(...)


diff --git a/src/LLVM/test/CodeGen/ARM/call-tc.ll b/src/LLVM/test/CodeGen/ARM/call-tc.ll
new file mode 100644
index 0000000..f78d998
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/call-tc.ll

@@ -0,0 +1,98 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D
+
+; Enable tailcall optimization for iOS 5.0
+; rdar://9120031
+
+@t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @t1() {
+; CHECKELF: t1:
+; CHECKELF: bl g(PLT)
+        call void @g( i32 1, i32 2, i32 3, i32 4 )
+        ret void
+}
+
+define void @t2() {
+; CHECKV6: t2:
+; CHECKV6: bx r0
+; CHECKT2D: t2:
+; CHECKT2D: ldr
+; CHECKT2D-NEXT: ldr
+; CHECKT2D-NEXT: bx r0
+        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
+        ret void
+}
+
+define void @t3() {
+; CHECKV6: t3:
+; CHECKV6: b _t2
+; CHECKELF: t3:
+; CHECKELF: b t2(PLT)
+; CHECKT2D: t3:
+; CHECKT2D: b.w _t2
+
+        tail call void @t2( )            ; <i32> [#uses=0]
+        ret void
+}
+
+; Sibcall optimization of expanded libcalls. rdar://8707777
+define double @t4(double %a) nounwind readonly ssp {
+entry:
+; CHECKV6: t4:
+; CHECKV6: b _sin
+; CHECKELF: t4:
+; CHECKELF: b sin(PLT)
+  %0 = tail call double @sin(double %a) nounwind readonly ; <double> [#uses=1]
+  ret double %0
+}
+
+define float @t5(float %a) nounwind readonly ssp {
+entry:
+; CHECKV6: t5:
+; CHECKV6: b _sinf
+; CHECKELF: t5:
+; CHECKELF: b sinf(PLT)
+  %0 = tail call float @sinf(float %a) nounwind readonly ; <float> [#uses=1]
+  ret float %0
+}
+
+declare float @sinf(float) nounwind readonly
+
+declare double @sin(double) nounwind readonly
+
+define i32 @t6(i32 %a, i32 %b) nounwind readnone {
+entry:
+; CHECKV6: t6:
+; CHECKV6: b ___divsi3
+; CHECKELF: t6:
+; CHECKELF: b __aeabi_idiv(PLT)
+  %0 = sdiv i32 %a, %b
+  ret i32 %0
+}
+
+; Make sure the tail call instruction isn't deleted
+; rdar://8309338
+declare void @foo() nounwind
+
+define void @t7() nounwind {
+entry:
+; CHECKT2D: t7:
+; CHECKT2D: blxeq _foo
+; CHECKT2D-NEXT: pop.w
+; CHECKT2D-NEXT: b.w _foo
+  br i1 undef, label %bb, label %bb1.lr.ph
+
+bb1.lr.ph:
+  tail call void @foo() nounwind
+  unreachable
+
+bb:
+  tail call void @foo() nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/call.ll b/src/LLVM/test/CodeGen/ARM/call.ll
new file mode 100644
index 0000000..8c6bd2f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/call.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s -check-prefix=CHECKV4

+; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5

+; RUN: llc < %s -mtriple=armv6-linux-gnueabi\

+; RUN:   -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF

+

+@t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]

+

+declare void @g(i32, i32, i32, i32)

+

+define void @f() {

+; CHECKELF: PLT

+        call void @g( i32 1, i32 2, i32 3, i32 4 )

+        ret void

+}

+

+define void @g.upgrd.1() {

+; CHECKV4: mov lr, pc

+; CHECKV5: blx

+        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]

+        %tmp.upgrd.2 = call i32 %tmp( )            ; <i32> [#uses=0]

+        ret void

+}

+

+define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {

+; CHECKV4: m_231b

+; CHECKV4: bx r{{.*}}

+BB0:

+  %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]

+  %t35 = volatile load i32* %5                    ; <i32> [#uses=1]

+  %6 = inttoptr i32 %t35 to i32**                 ; <i32**> [#uses=1]

+  %7 = getelementptr i32** %6, i32 86             ; <i32**> [#uses=1]

+  %8 = load i32** %7                              ; <i32*> [#uses=1]

+  %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1]

+  %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1]

+  ret i32* %10

+}


diff --git a/src/LLVM/test/CodeGen/ARM/call_nolink.ll b/src/LLVM/test/CodeGen/ARM/call_nolink.ll
new file mode 100644
index 0000000..e82037f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/call_nolink.ll

@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \

+; RUN:   not grep {bx lr}

+

+	%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }

+@r = external global [14 x i32]		; <[14 x i32]*> [#uses=4]

+@isa = external global [13 x %struct.anon]		; <[13 x %struct.anon]*> [#uses=1]

+@pgm = external global [2 x { i32, [3 x i32] }]		; <[2 x { i32, [3 x i32] }]*> [#uses=4]

+@numi = external global i32		; <i32*> [#uses=1]

+@counter = external global [2 x i32]		; <[2 x i32]*> [#uses=1]

+

+

+define void @main_bb_2E_i_bb205_2E_i_2E_i_bb115_2E_i_2E_i() {

+newFuncRoot:

+	br label %bb115.i.i

+

+bb115.i.i.bb170.i.i_crit_edge.exitStub:		; preds = %bb115.i.i

+	ret void

+

+bb115.i.i.bb115.i.i_crit_edge:		; preds = %bb115.i.i

+	br label %bb115.i.i

+

+bb115.i.i:		; preds = %bb115.i.i.bb115.i.i_crit_edge, %newFuncRoot

+	%i_addr.3210.0.i.i = phi i32 [ %tmp166.i.i, %bb115.i.i.bb115.i.i_crit_edge ], [ 0, %newFuncRoot ]		; <i32> [#uses=7]

+	%tmp124.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 0		; <i32*> [#uses=1]

+	%tmp125.i.i = load i32* %tmp124.i.i		; <i32> [#uses=1]

+	%tmp126.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp125.i.i		; <i32*> [#uses=1]

+	%tmp127.i.i = load i32* %tmp126.i.i		; <i32> [#uses=1]

+	%tmp131.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 1		; <i32*> [#uses=1]

+	%tmp132.i.i = load i32* %tmp131.i.i		; <i32> [#uses=1]

+	%tmp133.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp132.i.i		; <i32*> [#uses=1]

+	%tmp134.i.i = load i32* %tmp133.i.i		; <i32> [#uses=1]

+	%tmp138.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 2		; <i32*> [#uses=1]

+	%tmp139.i.i = load i32* %tmp138.i.i		; <i32> [#uses=1]

+	%tmp140.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp139.i.i		; <i32*> [#uses=1]

+	%tmp141.i.i = load i32* %tmp140.i.i		; <i32> [#uses=1]

+	%tmp143.i.i = add i32 %i_addr.3210.0.i.i, 12		; <i32> [#uses=1]

+	%tmp146.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 0		; <i32*> [#uses=1]

+	%tmp147.i.i = load i32* %tmp146.i.i		; <i32> [#uses=1]

+	%tmp149.i.i = getelementptr [13 x %struct.anon]* @isa, i32 0, i32 %tmp147.i.i, i32 0		; <i32 (i32, i32, i32)**> [#uses=1]

+	%tmp150.i.i = load i32 (i32, i32, i32)** %tmp149.i.i		; <i32 (i32, i32, i32)*> [#uses=1]

+	%tmp154.i.i = tail call i32 %tmp150.i.i( i32 %tmp127.i.i, i32 %tmp134.i.i, i32 %tmp141.i.i )		; <i32> [#uses=1]

+	%tmp155.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp143.i.i		; <i32*> [#uses=1]

+	store i32 %tmp154.i.i, i32* %tmp155.i.i

+	%tmp159.i.i = getelementptr [2 x i32]* @counter, i32 0, i32 %i_addr.3210.0.i.i		; <i32*> [#uses=2]

+	%tmp160.i.i = load i32* %tmp159.i.i		; <i32> [#uses=1]

+	%tmp161.i.i = add i32 %tmp160.i.i, 1		; <i32> [#uses=1]

+	store i32 %tmp161.i.i, i32* %tmp159.i.i

+	%tmp166.i.i = add i32 %i_addr.3210.0.i.i, 1		; <i32> [#uses=2]

+	%tmp168.i.i = load i32* @numi		; <i32> [#uses=1]

+	icmp slt i32 %tmp166.i.i, %tmp168.i.i		; <i1>:0 [#uses=1]

+	br i1 %0, label %bb115.i.i.bb115.i.i_crit_edge, label %bb115.i.i.bb170.i.i_crit_edge.exitStub

+}


diff --git a/src/LLVM/test/CodeGen/ARM/carry.ll b/src/LLVM/test/CodeGen/ARM/carry.ll
new file mode 100644
index 0000000..f84774d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/carry.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: subs r
+; CHECK: sbc r
+entry:
+	%tmp = sub i64 %a, %b
+	ret i64 %tmp
+}
+
+define i64 @f2(i64 %a, i64 %b) {
+; CHECK: f2:
+; CHECK: adc r
+; CHECK: subs r
+; CHECK: sbc r
+entry:
+        %tmp1 = shl i64 %a, 1
+	%tmp2 = sub i64 %tmp1, %b
+	ret i64 %tmp2
+}
+
+; add with live carry
+define i64 @f3(i32 %al, i32 %bl) {
+; CHECK: f3:
+; CHECK: adds r
+; CHECK: adc r
+entry:
+        ; unsigned wide add
+        %aw = zext i32 %al to i64
+        %bw = zext i32 %bl to i64
+        %cw = add i64 %aw, %bw
+        ; ch == carry bit
+        %ch = lshr i64 %cw, 32
+	%dw = add i64 %ch, %bw
+	ret i64 %dw
+}
+
+; rdar://10073745
+define i64 @f4(i64 %x) nounwind readnone {
+entry:
+; CHECK: f4:
+; CHECK: rsbs r
+; CHECK: rsc r
+  %0 = sub nsw i64 0, %x
+  ret i64 %0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/clz.ll b/src/LLVM/test/CodeGen/ARM/clz.ll
new file mode 100644
index 0000000..1183578
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/clz.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s

+

+declare i32 @llvm.ctlz.i32(i32)

+

+define i32 @test(i32 %x) {

+; CHECK: test

+; CHECK: clz r0, r0

+        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x )

+        ret i32 %tmp.1

+}


diff --git a/src/LLVM/test/CodeGen/ARM/code-placement.ll b/src/LLVM/test/CodeGen/ARM/code-placement.ll
new file mode 100644
index 0000000..91ef659
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/code-placement.ll

@@ -0,0 +1,79 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; PHI elimination shouldn't break backedge.
+; rdar://8263994
+
+%struct.list_data_s = type { i16, i16 }
+%struct.list_head = type { %struct.list_head*, %struct.list_data_s* }
+
+define arm_apcscc %struct.list_head* @t1(%struct.list_head* %list) nounwind {
+entry:
+; CHECK: t1:
+  %0 = icmp eq %struct.list_head* %list, null
+  br i1 %0, label %bb2, label %bb
+
+bb:
+; CHECK: LBB0_2:
+; CHECK: bne LBB0_2
+; CHECK-NOT: b LBB0_2
+; CHECK: bx lr
+  %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
+  %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
+  %1 = getelementptr inbounds %struct.list_head* %list_addr.05, i32 0, i32 0
+  %2 = load %struct.list_head** %1, align 4
+  store %struct.list_head* %next.04, %struct.list_head** %1, align 4
+  %3 = icmp eq %struct.list_head* %2, null
+  br i1 %3, label %bb2, label %bb
+
+bb2:
+  %next.0.lcssa = phi %struct.list_head* [ null, %entry ], [ %list_addr.05, %bb ]
+  ret %struct.list_head* %next.0.lcssa
+}
+
+; Optimize loop entry, eliminate intra loop branches
+; rdar://8117827
+define i32 @t2(i32 %passes, i32* nocapture %src, i32 %size) nounwind readonly {
+entry:
+; CHECK: t2:
+; CHECK: beq LBB1_[[RET:.]]
+  %0 = icmp eq i32 %passes, 0                     ; <i1> [#uses=1]
+  br i1 %0, label %bb5, label %bb.nph15
+
+; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
+bb1:                                              ; preds = %bb2.preheader, %bb1
+; CHECK: LBB1_[[BB1:.]]: @ %bb1
+; CHECK: bne LBB1_[[BB1]]
+  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
+  %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
+  %tmp17 = sub i32 %i.07, %indvar                 ; <i32> [#uses=1]
+  %scevgep = getelementptr i32* %src, i32 %tmp17  ; <i32*> [#uses=1]
+  %1 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %2 = add nsw i32 %1, %sum.08                    ; <i32> [#uses=2]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %size     ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb3, label %bb1
+
+bb3:                                              ; preds = %bb1, %bb2.preheader
+; CHECK: LBB1_[[BB3:.]]: @ %bb3
+; CHECK: bne LBB1_[[PREHDR]]
+; CHECK-NOT: b LBB1_
+  %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
+  %3 = add i32 %pass.011, 1                       ; <i32> [#uses=2]
+  %exitcond18 = icmp eq i32 %3, %passes           ; <i1> [#uses=1]
+  br i1 %exitcond18, label %bb5, label %bb2.preheader
+
+bb.nph15:                                         ; preds = %entry
+  %i.07 = add i32 %size, -1                       ; <i32> [#uses=2]
+  %4 = icmp sgt i32 %i.07, -1                     ; <i1> [#uses=1]
+  br label %bb2.preheader
+
+bb2.preheader:                                    ; preds = %bb3, %bb.nph15
+  %pass.011 = phi i32 [ 0, %bb.nph15 ], [ %3, %bb3 ] ; <i32> [#uses=1]
+  %sum.110 = phi i32 [ 0, %bb.nph15 ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=2]
+  br i1 %4, label %bb1, label %bb3
+
+; CHECK: LBB1_[[RET]]: @ %bb5
+; CHECK: pop
+bb5:                                              ; preds = %bb3, %entry
+  %sum.1.lcssa = phi i32 [ 0, %entry ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=1]
+  ret i32 %sum.1.lcssa
+}

diff --git a/src/LLVM/test/CodeGen/ARM/compare-call.ll b/src/LLVM/test/CodeGen/ARM/compare-call.ll
new file mode 100644
index 0000000..7fe35a5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/compare-call.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \

+; RUN:   grep vcmpe.f32

+

+define void @test3(float* %glob, i32 %X) {

+entry:

+        %tmp = load float* %glob                ; <float> [#uses=1]

+        %tmp2 = getelementptr float* %glob, i32 2               ; <float*> [#uses=1]

+        %tmp3 = load float* %tmp2               ; <float> [#uses=1]

+        %tmp.upgrd.1 = fcmp ogt float %tmp, %tmp3               ; <i1> [#uses=1]

+        br i1 %tmp.upgrd.1, label %cond_true, label %UnifiedReturnBlock

+

+cond_true:              ; preds = %entry

+        %tmp.upgrd.2 = tail call i32 (...)* @bar( )             ; <i32> [#uses=0]

+        ret void

+

+UnifiedReturnBlock:             ; preds = %entry

+        ret void

+}

+

+declare i32 @bar(...)


diff --git a/src/LLVM/test/CodeGen/ARM/constants.ll b/src/LLVM/test/CodeGen/ARM/constants.ll
new file mode 100644
index 0000000..11790d7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/constants.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=armv4t-unknown-linux-gnueabi -disable-cgp-branch-opts | FileCheck %s

+

+define i32 @f1() {

+; CHECK: f1

+; CHECK: mov r0, #0

+        ret i32 0

+}

+

+define i32 @f2() {

+; CHECK: f2

+; CHECK: mov r0, #255

+        ret i32 255

+}

+

+define i32 @f3() {

+; CHECK: f3

+; CHECK: mov r0, #256

+        ret i32 256

+}

+

+define i32 @f4() {

+; CHECK: f4

+; CHECK: orr{{.*}}#256

+        ret i32 257

+}

+

+define i32 @f5() {

+; CHECK: f5

+; CHECK: mov r0, #-1073741761

+        ret i32 -1073741761

+}

+

+define i32 @f6() {

+; CHECK: f6

+; CHECK: mov r0, #1008

+        ret i32 1008

+}

+

+define void @f7(i32 %a) {

+; CHECK: f7

+; CHECK: cmp r0, #65536

+        %b = icmp ugt i32 %a, 65536

+        br i1 %b, label %r, label %r

+r:

+        ret void

+}

+

+%t1 = type { <3 x float>, <3 x float> }

+

+@const1 = global %t1 { <3 x float> zeroinitializer,

+                       <3 x float> <float 1.000000e+00,

+                                    float 2.000000e+00,

+                                    float 3.000000e+00> }, align 16

+; CHECK: const1

+; CHECK: .zero 16

+; CHECK: float 1.0

+; CHECK: float 2.0

+; CHECK: float 3.0

+; CHECK: .zero 4


diff --git a/src/LLVM/test/CodeGen/ARM/crash-O0.ll b/src/LLVM/test/CodeGen/ARM/crash-O0.ll
new file mode 100644
index 0000000..8bce4e0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/crash-O0.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -O0 -relocation-model=pic -disable-fp-elim
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv6-apple-darwin10"
+
+%struct0 = type { i32, i32 }
+
+; This function would crash RegAllocFast because it tried to spill %CPSR.
+define arm_apcscc void @clobber_cc() nounwind noinline ssp {
+entry:
+  %asmtmp = call %struct0 asm sideeffect "...", "=&r,=&r,r,Ir,r,~{cc},~{memory}"(i32* undef, i32 undef, i32 1) nounwind ; <%0> [#uses=0]
+  unreachable
+}
+
+@.str523 = private constant [256 x i8] c"<Unknown>\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 4 ; <[256 x i8]*> [#uses=1]
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+; This function uses the scavenger for an ADDri instruction.
+; ARMBaseRegisterInfo::estimateRSStackSizeLimit must return a 255 limit.
+define arm_apcscc void @scavence_ADDri() nounwind {
+entry:
+  %letter = alloca i8                             ; <i8*> [#uses=0]
+  %prodvers = alloca [256 x i8]                   ; <[256 x i8]*> [#uses=1]
+  %buildver = alloca [256 x i8]                   ; <[256 x i8]*> [#uses=0]
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* getelementptr inbounds ([256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false)
+  %prodvers2 = bitcast [256 x i8]* %prodvers to i8* ; <i8*> [#uses=1]
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %prodvers2, i8* getelementptr inbounds ([256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false)
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/ARM/crash-greedy-v6.ll b/src/LLVM/test/CodeGen/ARM/crash-greedy-v6.ll
new file mode 100644
index 0000000..fd42254
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/crash-greedy-v6.ll

@@ -0,0 +1,32 @@
+; RUN: llc -disable-fp-elim -relocation-model=pic < %s
+target triple = "armv6-apple-ios"
+
+; Reduced from 177.mesa. This test causes a live range split before an LDR_POST instruction.
+; That requires leaveIntvBefore to be very accurate about the redefined value number.
+define internal void @sample_nearest_3d(i8* nocapture %tObj, i32 %n, float* nocapture %s, float* nocapture %t, float* nocapture %u, float* nocapture %lambda, i8* nocapture %red, i8* nocapture %green, i8* nocapture %blue, i8* nocapture %alpha) nounwind ssp {
+entry:
+  br i1 undef, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %i.031 = phi i32 [ 0, %for.body.lr.ph ], [ %0, %for.body ]
+  %arrayidx11 = getelementptr float* %t, i32 %i.031
+  %arrayidx15 = getelementptr float* %u, i32 %i.031
+  %arrayidx19 = getelementptr i8* %red, i32 %i.031
+  %arrayidx22 = getelementptr i8* %green, i32 %i.031
+  %arrayidx25 = getelementptr i8* %blue, i32 %i.031
+  %arrayidx28 = getelementptr i8* %alpha, i32 %i.031
+  %tmp12 = load float* %arrayidx11, align 4
+  tail call fastcc void @sample_3d_nearest(i8* %tObj, i8* undef, float undef, float %tmp12, float undef, i8* %arrayidx19, i8* %arrayidx22, i8* %arrayidx25, i8* %arrayidx28)
+  %0 = add i32 %i.031, 1
+  %exitcond = icmp eq i32 %0, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare fastcc void @sample_3d_nearest(i8* nocapture, i8* nocapture, float, float, float, i8* nocapture, i8* nocapture, i8* nocapture, i8* nocapture) nounwind ssp
+

diff --git a/src/LLVM/test/CodeGen/ARM/crash-greedy.ll b/src/LLVM/test/CodeGen/ARM/crash-greedy.ll
new file mode 100644
index 0000000..8a865e2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/crash-greedy.ll

@@ -0,0 +1,84 @@
+; RUN: llc < %s -regalloc=greedy -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -verify-machineinstrs | FileCheck %s
+;
+; ARM tests that crash or fail with the greedy register allocator.
+
+target triple = "thumbv7-apple-darwin"
+
+declare double @exp(double)
+
+; CHECK: remat_subreg
+define void @remat_subreg(float* nocapture %x, i32* %y, i32 %n, i32 %z, float %c, float %lambda, float* nocapture %ret_f, float* nocapture %ret_df) nounwind {
+entry:
+  %conv16 = fpext float %lambda to double
+  %mul17 = fmul double %conv16, -1.000000e+00
+  br i1 undef, label %cond.end.us, label %cond.end
+
+cond.end.us:                                      ; preds = %entry
+  unreachable
+
+cond.end:                                         ; preds = %cond.end, %entry
+  %mul = fmul double undef, 0.000000e+00
+  %add = fadd double undef, %mul
+  %add46 = fadd double undef, undef
+  %add75 = fadd double 0.000000e+00, undef
+  br i1 undef, label %for.end, label %cond.end
+
+for.end:                                          ; preds = %cond.end
+  %conv78 = sitofp i32 %z to double
+  %conv83 = fpext float %c to double
+  %mul84 = fmul double %mul17, %conv83
+  %call85 = tail call double @exp(double %mul84) nounwind
+  %mul86 = fmul double %conv78, %call85
+  %add88 = fadd double 0.000000e+00, %mul86
+; CHECK: blx _exp
+  %call100 = tail call double @exp(double %mul84) nounwind
+  %mul101 = fmul double undef, %call100
+  %add103 = fadd double %add46, %mul101
+  %mul111 = fmul double undef, %conv83
+  %mul119 = fmul double %mul111, undef
+  %add121 = fadd double undef, %mul119
+  %div = fdiv double 1.000000e+00, %conv16
+  %div126 = fdiv double %add, %add75
+  %sub = fsub double %div, %div126
+  %div129 = fdiv double %add103, %add88
+  %add130 = fadd double %sub, %div129
+  %conv131 = fptrunc double %add130 to float
+  store float %conv131, float* %ret_f, align 4
+  %mul139 = fmul double %div129, %div129
+  %div142 = fdiv double %add121, %add88
+  %sub143 = fsub double %mul139, %div142
+; %lambda is passed on the stack, and the stack slot load is rematerialized.
+; The rematted load of a float constrains the D register used for the mul.
+; CHECK: vldr
+  %mul146 = fmul float %lambda, %lambda
+  %conv147 = fpext float %mul146 to double
+  %div148 = fdiv double 1.000000e+00, %conv147
+  %sub149 = fsub double %sub143, %div148
+  %conv150 = fptrunc double %sub149 to float
+  store float %conv150, float* %ret_df, align 4
+  ret void
+}
+
+; CHECK: insert_elem
+; This test has a sub-register copy with a kill flag:
+;   %vreg6:ssub_3<def> = COPY %vreg6:ssub_2<kill>; QPR_VFP2:%vreg6
+; The rewriter must do something sensible with that, or the scavenger crashes.
+define void @insert_elem() nounwind {
+entry:
+  br i1 undef, label %if.end251, label %if.then84
+
+if.then84:                                        ; preds = %entry
+  br i1 undef, label %if.end251, label %if.then195
+
+if.then195:                                       ; preds = %if.then84
+  %div = fdiv float 1.000000e+00, undef
+  %vecinit207 = insertelement <4 x float> undef, float %div, i32 1
+  %vecinit208 = insertelement <4 x float> %vecinit207, float 1.000000e+00, i32 2
+  %vecinit209 = insertelement <4 x float> %vecinit208, float 1.000000e+00, i32 3
+  %mul216 = fmul <4 x float> zeroinitializer, %vecinit209
+  store <4 x float> %mul216, <4 x float>* undef, align 16
+  br label %if.end251
+
+if.end251:                                        ; preds = %if.then195, %if.then84, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/crash.ll b/src/LLVM/test/CodeGen/ARM/crash.ll
new file mode 100644
index 0000000..0f6f33e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/crash.ll

@@ -0,0 +1,71 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -verify-arm-pseudo-expand
+
+; <rdar://problem/8529919>
+%struct.foo = type { i32, i32 }
+
+define void @func() nounwind {
+entry:
+  %tmp = load i32* undef, align 4
+  br label %bb1
+
+bb1:
+  %tmp1 = and i32 %tmp, 16
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %invok.1.i = select i1 %tmp2, i32 undef, i32 0
+  %tmp119 = add i32 %invok.1.i, 0
+  br i1 undef, label %bb2, label %exit
+
+bb2:
+  %tmp120 = add i32 %tmp119, 0
+  %scevgep810.i = getelementptr %struct.foo* null, i32 %tmp120, i32 1
+  store i32 undef, i32* %scevgep810.i, align 4
+  br i1 undef, label %bb2, label %bb3
+
+bb3:
+  br i1 %tmp2, label %bb2, label %bb2
+
+exit:
+  ret void
+}
+
+; PR10520 - REG_SEQUENCE with implicit-def operands.
+define arm_aapcs_vfpcc void @foo() nounwind align 2 {
+bb:
+  %tmp = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp8 = bitcast <1 x i64> %tmp to <2 x float>
+  %tmp9 = shufflevector <2 x float> %tmp8, <2 x float> %tmp8, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp10 = fmul <4 x float> undef, %tmp9
+  %tmp11 = fadd <4 x float> %tmp10, undef
+  %tmp12 = fadd <4 x float> undef, %tmp11
+  %tmp13 = bitcast <4 x float> %tmp12 to i128
+  %tmp14 = bitcast i128 %tmp13 to <4 x float>
+  %tmp15 = bitcast <4 x float> %tmp14 to i128
+  %tmp16 = bitcast i128 %tmp15 to <4 x float>
+  %tmp17 = bitcast <4 x float> %tmp16 to i128
+  %tmp18 = bitcast i128 %tmp17 to <4 x float>
+  %tmp19 = bitcast <4 x float> %tmp18 to i128
+  %tmp20 = bitcast i128 %tmp19 to <4 x float>
+  store <4 x float> %tmp20, <4 x float>* undef, align 16
+  ret void
+}
+
+; PR10520, second bug. NEONMoveFixPass needs to preserve implicit operands.
+define arm_aapcs_vfpcc void @pr10520_2() nounwind align 2 {
+bb:
+  %tmp76 = shufflevector <2 x i64> zeroinitializer, <2 x i64> zeroinitializer, <1 x i32> <i32 1>
+  %tmp77 = bitcast <1 x i64> %tmp76 to <2 x float>
+  %tmp78 = shufflevector <2 x float> %tmp77, <2 x float> %tmp77, <4 x i32> zeroinitializer
+  %tmp81 = fmul <4 x float> undef, %tmp78
+  %tmp82 = fadd <4 x float> %tmp81, undef
+  %tmp85 = fadd <4 x float> %tmp82, undef
+  %tmp86 = bitcast <4 x float> %tmp85 to i128
+  %tmp136 = bitcast i128 %tmp86 to <4 x float>
+  %tmp137 = bitcast <4 x float> %tmp136 to i128
+  %tmp138 = bitcast i128 %tmp137 to <4 x float>
+  %tmp139 = bitcast <4 x float> %tmp138 to i128
+  %tmp152 = bitcast i128 %tmp139 to <4 x float>
+  %tmp153 = bitcast <4 x float> %tmp152 to i128
+  %tmp154 = bitcast i128 %tmp153 to <4 x float>
+  store <4 x float> %tmp154, <4 x float>* undef, align 16
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/cse-libcalls.ll b/src/LLVM/test/CodeGen/ARM/cse-libcalls.ll
new file mode 100644
index 0000000..0dcf9dd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/cse-libcalls.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=arm | grep {bl.\*__ltdf} | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+; Without CSE of libcalls, there are two calls in the output instead of one.
+
+define i32 @u_f_nonbon(double %lambda) nounwind {
+entry:
+	%tmp19.i.i = load double* null, align 4		; <double> [#uses=2]
+	%tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00		; <i1> [#uses=1]
+	%dielectric.0.i = select i1 %tmp6.i, double 1.000000e+00, double %tmp19.i.i		; <double> [#uses=1]
+	%tmp10.i4 = fdiv double 0x4074C2D71F36262D, %dielectric.0.i		; <double> [#uses=1]
+	br i1 false, label %bb28.i, label %bb508.i
+
+bb28.i:		; preds = %bb28.i, %entry
+	br i1 false, label %bb502.loopexit.i, label %bb28.i
+
+bb.nph53.i:		; preds = %bb502.loopexit.i
+	%tmp354.i = fsub double -0.000000e+00, %tmp10.i4		; <double> [#uses=0]
+	br label %bb244.i
+
+bb244.i:		; preds = %bb244.i, %bb.nph53.i
+	br label %bb244.i
+
+bb502.loopexit.i:		; preds = %bb28.i
+	br i1 false, label %bb.nph53.i, label %bb508.i
+
+bb508.i:		; preds = %bb502.loopexit.i, %entry
+	ret i32 1
+}

diff --git a/src/LLVM/test/CodeGen/ARM/ctors_dtors.ll b/src/LLVM/test/CodeGen/ARM/ctors_dtors.ll
new file mode 100644
index 0000000..34ccd8e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ctors_dtors.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN

+; RUN: llc < %s -mtriple=arm-linux-gnu     | FileCheck %s -check-prefix=ELF

+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI

+

+; DARWIN: .section	__DATA,__mod_init_func,mod_init_funcs

+; DARWIN: .section	__DATA,__mod_term_func,mod_term_funcs

+

+; ELF: .section .ctors,"aw",%progbits

+; ELF: .section .dtors,"aw",%progbits

+

+; GNUEABI: .section .init_array,"aw",%init_array

+; GNUEABI: .section .fini_array,"aw",%fini_array

+

+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @__mf_init } ]                ; <[1 x { i32, void ()* }]*> [#uses=0]

+@llvm.global_dtors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @__mf_fini } ]                ; <[1 x { i32, void ()* }]*> [#uses=0]

+

+define void @__mf_init() {

+entry:

+        ret void

+}

+

+define void @__mf_fini() {

+entry:

+        ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ctz.ll b/src/LLVM/test/CodeGen/ARM/ctz.ll
new file mode 100644
index 0000000..1d2ced3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ctz.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+declare i32 @llvm.cttz.i32(i32)
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: rbit
+; CHECK: clz
+  %tmp = call i32 @llvm.cttz.i32( i32 %a )
+  ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/ARM/debug-info-arg.ll b/src/LLVM/test/CodeGen/ARM/debug-info-arg.ll
new file mode 100644
index 0000000..b0270f9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/debug-info-arg.ll

@@ -0,0 +1,65 @@
+; RUN: llc < %s | FileCheck %s
+; Test to check argument y's debug info uses FI
+; Radar 10048772
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.7.0"
+
+%struct.tag_s = type { i32, i32, i32 }
+
+define void @foo(%struct.tag_s* nocapture %this, %struct.tag_s* %c, i64 %x, i64 %y, %struct.tag_s* nocapture %ptr1, %struct.tag_s* nocapture %ptr2) nounwind ssp {
+  tail call void @llvm.dbg.value(metadata !{%struct.tag_s* %this}, i64 0, metadata !5), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{%struct.tag_s* %c}, i64 0, metadata !13), !dbg !21
+  tail call void @llvm.dbg.value(metadata !{i64 %x}, i64 0, metadata !14), !dbg !22
+  tail call void @llvm.dbg.value(metadata !{i64 %y}, i64 0, metadata !17), !dbg !23
+;CHECK:	@DEBUG_VALUE: foo:y <- R7+4294967295
+  tail call void @llvm.dbg.value(metadata !{%struct.tag_s* %ptr1}, i64 0, metadata !18), !dbg !24
+  tail call void @llvm.dbg.value(metadata !{%struct.tag_s* %ptr2}, i64 0, metadata !19), !dbg !25
+  %1 = icmp eq %struct.tag_s* %c, null, !dbg !26
+  br i1 %1, label %3, label %2, !dbg !26
+
+; <label>:2                                       ; preds = %0
+  tail call void @foobar(i64 %x, i64 %y) nounwind, !dbg !28
+  br label %3, !dbg !28
+
+; <label>:3                                       ; preds = %0, %2
+  ret void, !dbg !29
+}
+
+declare void @foobar(i64, i64)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1}
+!llvm.dbg.lv.foo = !{!5, !13, !14, !17, !18, !19}
+
+!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772", metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 590081, metadata !1, metadata !"this", metadata !2, i32 16777227, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!6 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 589843, metadata !0, metadata !"tag_s", metadata !2, i32 5, i64 96, i64 32, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!8 = metadata !{metadata !9, metadata !11, metadata !12}
+!9 = metadata !{i32 589837, metadata !7, metadata !"x", metadata !2, i32 6, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 589837, metadata !7, metadata !"y", metadata !2, i32 7, i64 32, i64 32, i64 32, i32 0, metadata !10} ; [ DW_TAG_member ]
+!12 = metadata !{i32 589837, metadata !7, metadata !"z", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
+!13 = metadata !{i32 590081, metadata !1, metadata !"c", metadata !2, i32 33554443, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!14 = metadata !{i32 590081, metadata !1, metadata !"x", metadata !2, i32 50331659, metadata !15, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 589846, metadata !0, metadata !"UInt64", metadata !2, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ]
+!16 = metadata !{i32 589860, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 590081, metadata !1, metadata !"y", metadata !2, i32 67108875, metadata !15, i32 0} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 590081, metadata !1, metadata !"ptr1", metadata !2, i32 83886091, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 590081, metadata !1, metadata !"ptr2", metadata !2, i32 100663307, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 11, i32 24, metadata !1, null}
+!21 = metadata !{i32 11, i32 44, metadata !1, null}
+!22 = metadata !{i32 11, i32 54, metadata !1, null}
+!23 = metadata !{i32 11, i32 64, metadata !1, null}
+!24 = metadata !{i32 11, i32 81, metadata !1, null}
+!25 = metadata !{i32 11, i32 101, metadata !1, null}
+!26 = metadata !{i32 12, i32 3, metadata !27, null}
+!27 = metadata !{i32 589835, metadata !1, i32 11, i32 107, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 13, i32 5, metadata !27, null}
+!29 = metadata !{i32 14, i32 1, metadata !27, null}

diff --git a/src/LLVM/test/CodeGen/ARM/debug-info-blocks.ll b/src/LLVM/test/CodeGen/ARM/debug-info-blocks.ll
new file mode 100644
index 0000000..2c59316
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/debug-info-blocks.ll

@@ -0,0 +1,245 @@
+; RUN: llc -O0 < %s | FileCheck %s
+; CHECK: @DEBUG_VALUE: mydata <- [sp+#4]+#0
+; Radar 9331779
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.7.0"
+
+%0 = type opaque
+%1 = type { [4 x i32] }
+%2 = type <{ i8*, i32, i32, i8*, %struct.Re*, i8*, %3*, %struct.my_struct* }>
+%3 = type opaque
+%struct.CP = type { float, float }
+%struct.CR = type { %struct.CP, %struct.CP }
+%struct.Re = type { i32, i32 }
+%struct.__block_byref_mydata = type { i8*, %struct.__block_byref_mydata*, i32, i32, i8*, i8*, %0* }
+%struct.my_struct = type opaque
+
+@"\01L_OBJC_SELECTOR_REFERENCES_13" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"OBJC_IVAR_$_MyWork._bounds" = external hidden global i32, section "__DATA, __objc_const", align 4
+@"OBJC_IVAR_$_MyWork._data" = external hidden global i32, section "__DATA, __objc_const", align 4
+@"\01L_OBJC_SELECTOR_REFERENCES_222" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp {
+  %1 = alloca %0*, align 4
+  %bounds = alloca %struct.CR, align 4
+  %data = alloca %struct.CR, align 4
+  call void @llvm.dbg.value(metadata !{i8* %.block_descriptor}, i64 0, metadata !27), !dbg !129
+  store %0* %loadedMydata, %0** %1, align 4
+  call void @llvm.dbg.declare(metadata !{%0** %1}, metadata !130), !dbg !131
+  %2 = bitcast %struct.CR* %bounds to %1*
+  %3 = getelementptr %1* %2, i32 0, i32 0
+  store [4 x i32] %bounds.coerce0, [4 x i32]* %3
+  call void @llvm.dbg.declare(metadata !{%struct.CR* %bounds}, metadata !132), !dbg !133
+  %4 = bitcast %struct.CR* %data to %1*
+  %5 = getelementptr %1* %4, i32 0, i32 0
+  store [4 x i32] %data.coerce0, [4 x i32]* %5
+  call void @llvm.dbg.declare(metadata !{%struct.CR* %data}, metadata !134), !dbg !135
+  %6 = bitcast i8* %.block_descriptor to %2*
+  %7 = getelementptr inbounds %2* %6, i32 0, i32 6
+  call void @llvm.dbg.declare(metadata !{%2* %6}, metadata !136), !dbg !137
+  call void @llvm.dbg.declare(metadata !{%2* %6}, metadata !138), !dbg !137
+  call void @llvm.dbg.declare(metadata !{%2* %6}, metadata !139), !dbg !140
+  %8 = load %0** %1, align 4, !dbg !141
+  %9 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_13", !dbg !141
+  %10 = bitcast %0* %8 to i8*, !dbg !141
+  %11 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %10, i8* %9), !dbg !141
+  %12 = bitcast i8* %11 to %0*, !dbg !141
+  %13 = getelementptr inbounds %2* %6, i32 0, i32 5, !dbg !141
+  %14 = load i8** %13, !dbg !141
+  %15 = bitcast i8* %14 to %struct.__block_byref_mydata*, !dbg !141
+  %16 = getelementptr inbounds %struct.__block_byref_mydata* %15, i32 0, i32 1, !dbg !141
+  %17 = load %struct.__block_byref_mydata** %16, !dbg !141
+  %18 = getelementptr inbounds %struct.__block_byref_mydata* %17, i32 0, i32 6, !dbg !141
+  store %0* %12, %0** %18, align 4, !dbg !141
+  %19 = getelementptr inbounds %2* %6, i32 0, i32 6, !dbg !143
+  %20 = load %3** %19, align 4, !dbg !143
+  %21 = load i32* @"OBJC_IVAR_$_MyWork._data", !dbg !143
+  %22 = bitcast %3* %20 to i8*, !dbg !143
+  %23 = getelementptr inbounds i8* %22, i32 %21, !dbg !143
+  %24 = bitcast i8* %23 to %struct.CR*, !dbg !143
+  %25 = bitcast %struct.CR* %24 to i8*, !dbg !143
+  %26 = bitcast %struct.CR* %data to i8*, !dbg !143
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %25, i8* %26, i32 16, i32 4, i1 false), !dbg !143
+  %27 = getelementptr inbounds %2* %6, i32 0, i32 6, !dbg !144
+  %28 = load %3** %27, align 4, !dbg !144
+  %29 = load i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144
+  %30 = bitcast %3* %28 to i8*, !dbg !144
+  %31 = getelementptr inbounds i8* %30, i32 %29, !dbg !144
+  %32 = bitcast i8* %31 to %struct.CR*, !dbg !144
+  %33 = bitcast %struct.CR* %32 to i8*, !dbg !144
+  %34 = bitcast %struct.CR* %bounds to i8*, !dbg !144
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %33, i8* %34, i32 16, i32 4, i1 false), !dbg !144
+  %35 = getelementptr inbounds %2* %6, i32 0, i32 6, !dbg !145
+  %36 = load %3** %35, align 4, !dbg !145
+  %37 = getelementptr inbounds %2* %6, i32 0, i32 5, !dbg !145
+  %38 = load i8** %37, !dbg !145
+  %39 = bitcast i8* %38 to %struct.__block_byref_mydata*, !dbg !145
+  %40 = getelementptr inbounds %struct.__block_byref_mydata* %39, i32 0, i32 1, !dbg !145
+  %41 = load %struct.__block_byref_mydata** %40, !dbg !145
+  %42 = getelementptr inbounds %struct.__block_byref_mydata* %41, i32 0, i32 6, !dbg !145
+  %43 = load %0** %42, align 4, !dbg !145
+  %44 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_222", !dbg !145
+  %45 = bitcast %3* %36 to i8*, !dbg !145
+  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*)*)(i8* %45, i8* %44, %0* %43), !dbg !145
+  ret void, !dbg !146
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.enum = !{!1, !1, !5, !5, !9, !14, !19, !19, !14, !14, !14, !19, !19, !19}
+!llvm.dbg.sp = !{!23}
+
+!0 = metadata !{i32 589841, i32 0, i32 16, metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm", metadata !"Apple clang version 2.1", i1 true, i1 false, metadata !"", i32 2} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589828, metadata !0, metadata !"", metadata !2, i32 248, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !3, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!2 = metadata !{i32 589865, metadata !"header.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 589864, metadata !"Ver1", i64 0} ; [ DW_TAG_enumerator ]
+!5 = metadata !{i32 589828, metadata !0, metadata !"Mode", metadata !6, i32 79, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!6 = metadata !{i32 589865, metadata !"header2.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 589864, metadata !"One", i64 0} ; [ DW_TAG_enumerator ]
+!9 = metadata !{i32 589828, metadata !0, metadata !"", metadata !10, i32 15, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !11, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!10 = metadata !{i32 589865, metadata !"header3.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!11 = metadata !{metadata !12, metadata !13}
+!12 = metadata !{i32 589864, metadata !"Unknown", i64 0} ; [ DW_TAG_enumerator ]
+!13 = metadata !{i32 589864, metadata !"Known", i64 1} ; [ DW_TAG_enumerator ]
+!14 = metadata !{i32 589828, metadata !0, metadata !"", metadata !15, i32 20, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!15 = metadata !{i32 589865, metadata !"Private.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!16 = metadata !{metadata !17, metadata !18}
+!17 = metadata !{i32 589864, metadata !"Single", i64 0} ; [ DW_TAG_enumerator ]
+!18 = metadata !{i32 589864, metadata !"Double", i64 1} ; [ DW_TAG_enumerator ]
+!19 = metadata !{i32 589828, metadata !0, metadata !"", metadata !20, i32 14, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!20 = metadata !{i32 589865, metadata !"header4.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!21 = metadata !{metadata !22}
+!22 = metadata !{i32 589864, metadata !"Eleven", i64 0} ; [ DW_TAG_enumerator ]
+!23 = metadata !{i32 589870, i32 0, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", metadata !24, i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 589865, metadata !"MyLibrary.m", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!25 = metadata !{i32 589845, metadata !24, metadata !"", metadata !24, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !26, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!26 = metadata !{null}
+!27 = metadata !{i32 590081, metadata !23, metadata !".block_descriptor", metadata !24, i32 16777825, metadata !28, i32 64} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ]
+!29 = metadata !{i32 589843, metadata !24, metadata !"__block_literal_14", metadata !24, i32 609, i64 256, i64 32, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!30 = metadata !{metadata !31, metadata !33, metadata !35, metadata !36, metadata !37, metadata !48, metadata !89, metadata !124}
+!31 = metadata !{i32 589837, metadata !24, metadata !"__isa", metadata !24, i32 609, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
+!32 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!33 = metadata !{i32 589837, metadata !24, metadata !"__flags", metadata !24, i32 609, i64 32, i64 32, i64 32, i32 0, metadata !34} ; [ DW_TAG_member ]
+!34 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!35 = metadata !{i32 589837, metadata !24, metadata !"__reserved", metadata !24, i32 609, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
+!36 = metadata !{i32 589837, metadata !24, metadata !"__FuncPtr", metadata !24, i32 609, i64 32, i64 32, i64 96, i32 0, metadata !32} ; [ DW_TAG_member ]
+!37 = metadata !{i32 589837, metadata !24, metadata !"__descriptor", metadata !24, i32 609, i64 32, i64 32, i64 128, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ]
+!39 = metadata !{i32 589843, metadata !0, metadata !"__block_descriptor_withcopydispose", metadata !40, i32 307, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !41, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!40 = metadata !{i32 589865, metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!41 = metadata !{metadata !42, metadata !44, metadata !45, metadata !47}
+!42 = metadata !{i32 589837, metadata !40, metadata !"reserved", metadata !40, i32 307, i64 32, i64 32, i64 0, i32 0, metadata !43} ; [ DW_TAG_member ]
+!43 = metadata !{i32 589860, metadata !0, metadata !"long unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!44 = metadata !{i32 589837, metadata !40, metadata !"Size", metadata !40, i32 307, i64 32, i64 32, i64 32, i32 0, metadata !43} ; [ DW_TAG_member ]
+!45 = metadata !{i32 589837, metadata !40, metadata !"CopyFuncPtr", metadata !40, i32 307, i64 32, i64 32, i64 64, i32 0, metadata !46} ; [ DW_TAG_member ]
+!46 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!47 = metadata !{i32 589837, metadata !40, metadata !"DestroyFuncPtr", metadata !40, i32 307, i64 32, i64 32, i64 96, i32 0, metadata !46} ; [ DW_TAG_member ]
+!48 = metadata !{i32 589837, metadata !24, metadata !"mydata", metadata !24, i32 609, i64 32, i64 32, i64 160, i32 0, metadata !49} ; [ DW_TAG_member ]
+!49 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ]
+!50 = metadata !{i32 589843, metadata !24, metadata !"", metadata !24, i32 0, i64 224, i64 0, i32 0, i32 16, i32 0, metadata !51, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!51 = metadata !{metadata !52, metadata !53, metadata !54, metadata !55, metadata !56, metadata !57, metadata !58}
+!52 = metadata !{i32 589837, metadata !24, metadata !"__isa", metadata !24, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
+!53 = metadata !{i32 589837, metadata !24, metadata !"__forwarding", metadata !24, i32 0, i64 32, i64 32, i64 32, i32 0, metadata !32} ; [ DW_TAG_member ]
+!54 = metadata !{i32 589837, metadata !24, metadata !"__flags", metadata !24, i32 0, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
+!55 = metadata !{i32 589837, metadata !24, metadata !"__size", metadata !24, i32 0, i64 32, i64 32, i64 96, i32 0, metadata !34} ; [ DW_TAG_member ]
+!56 = metadata !{i32 589837, metadata !24, metadata !"__copy_helper", metadata !24, i32 0, i64 32, i64 32, i64 128, i32 0, metadata !32} ; [ DW_TAG_member ]
+!57 = metadata !{i32 589837, metadata !24, metadata !"__destroy_helper", metadata !24, i32 0, i64 32, i64 32, i64 160, i32 0, metadata !32} ; [ DW_TAG_member ]
+!58 = metadata !{i32 589837, metadata !24, metadata !"mydata", metadata !24, i32 0, i64 32, i64 32, i64 192, i32 0, metadata !59} ; [ DW_TAG_member ]
+!59 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !60} ; [ DW_TAG_pointer_type ]
+!60 = metadata !{i32 589843, metadata !24, metadata !"UIMydata", metadata !61, i32 26, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !62, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!61 = metadata !{i32 589865, metadata !"header11.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!62 = metadata !{metadata !63, metadata !71, metadata !75, metadata !79}
+!63 = metadata !{i32 589852, metadata !60, null, metadata !61, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!64 = metadata !{i32 589843, metadata !40, metadata !"NSO", metadata !65, i32 66, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !66, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!65 = metadata !{i32 589865, metadata !"NSO.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!66 = metadata !{metadata !67}
+!67 = metadata !{i32 589837, metadata !65, metadata !"isa", metadata !65, i32 67, i64 32, i64 32, i64 0, i32 2, metadata !68, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!68 = metadata !{i32 589846, metadata !0, metadata !"Class", metadata !40, i32 197, i64 0, i64 0, i64 0, i32 0, metadata !69} ; [ DW_TAG_typedef ]
+!69 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ]
+!70 = metadata !{i32 589843, metadata !0, metadata !"objc_class", metadata !40, i32 0, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!71 = metadata !{i32 589837, metadata !61, metadata !"_mydataRef", metadata !61, i32 28, i64 32, i64 32, i64 32, i32 0, metadata !72, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!72 = metadata !{i32 589846, metadata !0, metadata !"CFTypeRef", metadata !24, i32 313, i64 0, i64 0, i64 0, i32 0, metadata !73} ; [ DW_TAG_typedef ]
+!73 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !74} ; [ DW_TAG_pointer_type ]
+!74 = metadata !{i32 589862, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ]
+!75 = metadata !{i32 589837, metadata !61, metadata !"_scale", metadata !61, i32 29, i64 32, i64 32, i64 64, i32 0, metadata !76, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!76 = metadata !{i32 589846, metadata !0, metadata !"Float", metadata !77, i32 89, i64 0, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_typedef ]
+!77 = metadata !{i32 589865, metadata !"header12.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!78 = metadata !{i32 589860, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!79 = metadata !{i32 589837, metadata !61, metadata !"_mydataFlags", metadata !61, i32 37, i64 8, i64 8, i64 96, i32 0, metadata !80, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!80 = metadata !{i32 589843, metadata !0, metadata !"", metadata !61, i32 30, i64 8, i64 8, i32 0, i32 0, i32 0, metadata !81, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!81 = metadata !{metadata !82, metadata !84, metadata !85, metadata !86, metadata !87, metadata !88}
+!82 = metadata !{i32 589837, metadata !61, metadata !"named", metadata !61, i32 31, i64 1, i64 32, i64 0, i32 0, metadata !83} ; [ DW_TAG_member ]
+!83 = metadata !{i32 589860, metadata !0, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!84 = metadata !{i32 589837, metadata !61, metadata !"mydataO", metadata !61, i32 32, i64 3, i64 32, i64 1, i32 0, metadata !83} ; [ DW_TAG_member ]
+!85 = metadata !{i32 589837, metadata !61, metadata !"cached", metadata !61, i32 33, i64 1, i64 32, i64 4, i32 0, metadata !83} ; [ DW_TAG_member ]
+!86 = metadata !{i32 589837, metadata !61, metadata !"hasBeenCached", metadata !61, i32 34, i64 1, i64 32, i64 5, i32 0, metadata !83} ; [ DW_TAG_member ]
+!87 = metadata !{i32 589837, metadata !61, metadata !"hasPattern", metadata !61, i32 35, i64 1, i64 32, i64 6, i32 0, metadata !83} ; [ DW_TAG_member ]
+!88 = metadata !{i32 589837, metadata !61, metadata !"isCIMydata", metadata !61, i32 36, i64 1, i64 32, i64 7, i32 0, metadata !83} ; [ DW_TAG_member ]
+!89 = metadata !{i32 589837, metadata !24, metadata !"self", metadata !24, i32 609, i64 32, i64 32, i64 192, i32 0, metadata !90} ; [ DW_TAG_member ]
+!90 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ]
+!91 = metadata !{i32 589843, metadata !40, metadata !"MyWork", metadata !24, i32 36, i64 384, i64 32, i32 0, i32 0, i32 0, metadata !92, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!92 = metadata !{metadata !93, metadata !98, metadata !101, metadata !107, metadata !123}
+!93 = metadata !{i32 589852, metadata !91, null, metadata !24, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !94} ; [ DW_TAG_inheritance ]
+!94 = metadata !{i32 589843, metadata !40, metadata !"twork", metadata !95, i32 43, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !96, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!95 = metadata !{i32 589865, metadata !"header13.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!96 = metadata !{metadata !97}
+!97 = metadata !{i32 589852, metadata !94, null, metadata !95, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!98 = metadata !{i32 589837, metadata !24, metadata !"_itemID", metadata !24, i32 38, i64 64, i64 32, i64 32, i32 1, metadata !99, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!99 = metadata !{i32 589846, metadata !0, metadata !"uint64_t", metadata !40, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !100} ; [ DW_TAG_typedef ]
+!100 = metadata !{i32 589860, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!101 = metadata !{i32 589837, metadata !24, metadata !"_library", metadata !24, i32 39, i64 32, i64 32, i64 96, i32 1, metadata !102, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!102 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !103} ; [ DW_TAG_pointer_type ]
+!103 = metadata !{i32 589843, metadata !40, metadata !"MyLibrary2", metadata !104, i32 22, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !105, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!104 = metadata !{i32 589865, metadata !"header14.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!105 = metadata !{metadata !106}
+!106 = metadata !{i32 589852, metadata !103, null, metadata !104, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!107 = metadata !{i32 589837, metadata !24, metadata !"_bounds", metadata !24, i32 40, i64 128, i64 32, i64 128, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!108 = metadata !{i32 589846, metadata !0, metadata !"CR", metadata !40, i32 33, i64 0, i64 0, i64 0, i32 0, metadata !109} ; [ DW_TAG_typedef ]
+!109 = metadata !{i32 589843, metadata !0, metadata !"CR", metadata !77, i32 29, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !110, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!110 = metadata !{metadata !111, metadata !117}
+!111 = metadata !{i32 589837, metadata !77, metadata !"origin", metadata !77, i32 30, i64 64, i64 32, i64 0, i32 0, metadata !112} ; [ DW_TAG_member ]
+!112 = metadata !{i32 589846, metadata !0, metadata !"CP", metadata !77, i32 17, i64 0, i64 0, i64 0, i32 0, metadata !113} ; [ DW_TAG_typedef ]
+!113 = metadata !{i32 589843, metadata !0, metadata !"CP", metadata !77, i32 13, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !114, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!114 = metadata !{metadata !115, metadata !116}
+!115 = metadata !{i32 589837, metadata !77, metadata !"x", metadata !77, i32 14, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
+!116 = metadata !{i32 589837, metadata !77, metadata !"y", metadata !77, i32 15, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
+!117 = metadata !{i32 589837, metadata !77, metadata !"size", metadata !77, i32 31, i64 64, i64 32, i64 64, i32 0, metadata !118} ; [ DW_TAG_member ]
+!118 = metadata !{i32 589846, metadata !0, metadata !"Size", metadata !77, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !119} ; [ DW_TAG_typedef ]
+!119 = metadata !{i32 589843, metadata !0, metadata !"Size", metadata !77, i32 21, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !120, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!120 = metadata !{metadata !121, metadata !122}
+!121 = metadata !{i32 589837, metadata !77, metadata !"width", metadata !77, i32 22, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
+!122 = metadata !{i32 589837, metadata !77, metadata !"height", metadata !77, i32 23, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
+!123 = metadata !{i32 589837, metadata !24, metadata !"_data", metadata !24, i32 40, i64 128, i64 32, i64 256, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!124 = metadata !{i32 589837, metadata !24, metadata !"semi", metadata !24, i32 609, i64 32, i64 32, i64 224, i32 0, metadata !125} ; [ DW_TAG_member ]
+!125 = metadata !{i32 589846, metadata !0, metadata !"d_t", metadata !24, i32 35, i64 0, i64 0, i64 0, i32 0, metadata !126} ; [ DW_TAG_typedef ]
+!126 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !127} ; [ DW_TAG_pointer_type ]
+!127 = metadata !{i32 589843, metadata !0, metadata !"my_struct", metadata !128, i32 49, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!128 = metadata !{i32 589865, metadata !"header15.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!129 = metadata !{i32 609, i32 144, metadata !23, null}
+!130 = metadata !{i32 590081, metadata !23, metadata !"loadedMydata", metadata !24, i32 33555041, metadata !59, i32 0} ; [ DW_TAG_arg_variable ]
+!131 = metadata !{i32 609, i32 155, metadata !23, null}
+!132 = metadata !{i32 590081, metadata !23, metadata !"bounds", metadata !24, i32 50332257, metadata !108, i32 0} ; [ DW_TAG_arg_variable ]
+!133 = metadata !{i32 609, i32 175, metadata !23, null}
+!134 = metadata !{i32 590081, metadata !23, metadata !"data", metadata !24, i32 67109473, metadata !108, i32 0} ; [ DW_TAG_arg_variable ]
+!135 = metadata !{i32 609, i32 190, metadata !23, null}
+!136 = metadata !{i32 590080, metadata !23, metadata !"mydata", metadata !24, i32 604, metadata !50, i32 0, i64 1, i64 20, i64 2, i64 1, i64 4, i64 2, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
+!137 = metadata !{i32 604, i32 49, metadata !23, null}
+!138 = metadata !{i32 590080, metadata !23, metadata !"self", metadata !40, i32 604, metadata !90, i32 0, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
+!139 = metadata !{i32 590080, metadata !23, metadata !"semi", metadata !24, i32 607, metadata !125, i32 0, i64 1, i64 28} ; [ DW_TAG_auto_variable ]
+!140 = metadata !{i32 607, i32 30, metadata !23, null}
+!141 = metadata !{i32 610, i32 17, metadata !142, null}
+!142 = metadata !{i32 589835, metadata !23, i32 609, i32 200, metadata !24, i32 94} ; [ DW_TAG_lexical_block ]
+!143 = metadata !{i32 611, i32 17, metadata !142, null}
+!144 = metadata !{i32 612, i32 17, metadata !142, null}
+!145 = metadata !{i32 613, i32 17, metadata !142, null}
+!146 = metadata !{i32 615, i32 13, metadata !142, null}

diff --git a/src/LLVM/test/CodeGen/ARM/debug-info-branch-folding.ll b/src/LLVM/test/CodeGen/ARM/debug-info-branch-folding.ll
new file mode 100644
index 0000000..9bdae43
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/debug-info-branch-folding.ll

@@ -0,0 +1,94 @@
+; RUN: llc < %s - | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.6.7"
+
+;CHECK: 	vadd.f32	q4, q8, q8
+;CHECK-NEXT: Ltmp
+;CHECK-NEXT: 	@DEBUG_VALUE: y <- Q4+0
+;CHECK-NEXT:    @DEBUG_VALUE: x <- Q4+0
+
+
+@.str = external constant [13 x i8]
+
+declare <4 x float> @test0001(float) nounwind readnone ssp
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  br label %for.body9
+
+for.body9:                                        ; preds = %for.body9, %entry
+  %add19 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
+  tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39
+  %add20 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
+  tail call void @llvm.dbg.value(metadata !{<4 x float> %add20}, i64 0, metadata !28), !dbg !39
+  br i1 undef, label %for.end54, label %for.body9, !dbg !44
+
+for.end54:                                        ; preds = %for.body9
+  %tmp115 = extractelement <4 x float> %add19, i32 1
+  %conv6.i75 = fpext float %tmp115 to double, !dbg !45
+  %call.i82 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45
+  %tmp116 = extractelement <4 x float> %add20, i32 1
+  %conv6.i76 = fpext float %tmp116 to double, !dbg !45
+  %call.i83 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i76, double undef, double undef) nounwind, !dbg !45
+  ret i32 0, !dbg !49
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !10, !14}
+!llvm.dbg.lv.test0001 = !{!18}
+!llvm.dbg.lv.main = !{!19, !20, !24, !26, !27, !28, !29}
+!llvm.dbg.lv.printFV = !{!30}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"build2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"build2.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589846, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 590083, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
+!7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 589857, i64 0, i64 3}         ; [ DW_TAG_subrange_type ]
+!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 589870, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!16 = metadata !{i32 589845, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{null}
+!18 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 590080, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
+!25 = metadata !{i32 589835, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 590080, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
+!27 = metadata !{i32 590080, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!28 = metadata !{i32 590080, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 590080, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!30 = metadata !{i32 590081, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0} ; [ DW_TAG_arg_variable ]
+!31 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 589846, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
+!33 = metadata !{i32 589847, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
+!34 = metadata !{metadata !35, metadata !37}
+!35 = metadata !{i32 589837, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
+!36 = metadata !{i32 589846, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!37 = metadata !{i32 589837, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 589825, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!39 = metadata !{i32 79, i32 7, metadata !40, null}
+!40 = metadata !{i32 589835, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 589835, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ]
+!42 = metadata !{i32 589835, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ]
+!43 = metadata !{i32 589835, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ]
+!44 = metadata !{i32 75, i32 5, metadata !42, null}
+!45 = metadata !{i32 42, i32 2, metadata !46, metadata !48}
+!46 = metadata !{i32 589835, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ]
+!47 = metadata !{i32 589835, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ]
+!48 = metadata !{i32 95, i32 3, metadata !25, null}
+!49 = metadata !{i32 99, i32 3, metadata !25, null}

diff --git a/src/LLVM/test/CodeGen/ARM/debug-info-d16-reg.ll b/src/LLVM/test/CodeGen/ARM/debug-info-d16-reg.ll
new file mode 100644
index 0000000..8c9095e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/debug-info-d16-reg.ll

@@ -0,0 +1,105 @@
+; RUN: llc < %s - | FileCheck %s
+; Radar 9309221
+; Test dwarf reg no for d16
+;CHECK: DW_OP_regx
+;CHECK-NEXT: 272
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+@.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00", align 4
+@.str1 = private unnamed_addr constant [6 x i8] c"point\00", align 4
+
+define i32 @inlineprinter(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !19), !dbg !26
+  tail call void @llvm.dbg.value(metadata !{double %val}, i64 0, metadata !20), !dbg !26
+  tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !21), !dbg !26
+  %0 = zext i8 %c to i32, !dbg !27
+  %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !27
+  ret i32 0, !dbg !29
+}
+
+define i32 @printer(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize noinline {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !16), !dbg !30
+  tail call void @llvm.dbg.value(metadata !{double %val}, i64 0, metadata !17), !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !18), !dbg !30
+  %0 = zext i8 %c to i32, !dbg !31
+  %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !31
+  ret i32 0, !dbg !33
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !22), !dbg !34
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !23), !dbg !34
+  %0 = sitofp i32 %argc to double, !dbg !35
+  %1 = fadd double %0, 5.555552e+05, !dbg !35
+  tail call void @llvm.dbg.value(metadata !{double %1}, i64 0, metadata !24), !dbg !35
+  %2 = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0)) nounwind, !dbg !36
+  %3 = getelementptr inbounds i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !37
+  %4 = trunc i32 %argc to i8, !dbg !37
+  %5 = add i8 %4, 97, !dbg !37
+  tail call void @llvm.dbg.value(metadata !{i8* %3}, i64 0, metadata !19) nounwind, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{double %1}, i64 0, metadata !20) nounwind, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{i8 %5}, i64 0, metadata !21) nounwind, !dbg !38
+  %6 = zext i8 %5 to i32, !dbg !39
+  %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %3, double %1, i32 %6) nounwind, !dbg !39
+  %8 = tail call i32 @printer(i8* %3, double %1, i8 zeroext %5) nounwind, !dbg !40
+  ret i32 0, !dbg !41
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!llvm.dbg.sp = !{!0, !9, !10}
+!llvm.dbg.lv.printer = !{!16, !17, !18}
+!llvm.dbg.lv.inlineprinter = !{!19, !20, !21}
+!llvm.dbg.lv.main = !{!22, !23, !24}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"a.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"/tmp/a.c", metadata !"/tmp", metadata !"(LLVM build 00)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8}
+!5 = metadata !{i32 589860, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 589860, metadata !1, metadata !"double", metadata !1, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 589860, metadata !1, metadata !"unsigned char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !5, metadata !5, metadata !13}
+!13 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 589860, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!16 = metadata !{i32 590081, metadata !0, metadata !"ptr", metadata !1, i32 11, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 590081, metadata !0, metadata !"val", metadata !1, i32 11, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 590081, metadata !0, metadata !"c", metadata !1, i32 11, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 590081, metadata !9, metadata !"ptr", metadata !1, i32 4, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 590081, metadata !9, metadata !"val", metadata !1, i32 4, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 590081, metadata !9, metadata !"c", metadata !1, i32 4, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 17, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 17, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 590080, metadata !25, metadata !"dval", metadata !1, i32 19, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
+!25 = metadata !{i32 589835, metadata !10, i32 18, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 4, i32 0, metadata !9, null}
+!27 = metadata !{i32 6, i32 0, metadata !28, null}
+!28 = metadata !{i32 589835, metadata !9, i32 5, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 7, i32 0, metadata !28, null}
+!30 = metadata !{i32 11, i32 0, metadata !0, null}
+!31 = metadata !{i32 13, i32 0, metadata !32, null}
+!32 = metadata !{i32 589835, metadata !0, i32 12, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!33 = metadata !{i32 14, i32 0, metadata !32, null}
+!34 = metadata !{i32 17, i32 0, metadata !10, null}
+!35 = metadata !{i32 19, i32 0, metadata !25, null}
+!36 = metadata !{i32 20, i32 0, metadata !25, null}
+!37 = metadata !{i32 21, i32 0, metadata !25, null}
+!38 = metadata !{i32 4, i32 0, metadata !9, metadata !37}
+!39 = metadata !{i32 6, i32 0, metadata !28, metadata !37}
+!40 = metadata !{i32 22, i32 0, metadata !25, null}
+!41 = metadata !{i32 23, i32 0, metadata !25, null}

diff --git a/src/LLVM/test/CodeGen/ARM/debug-info-qreg.ll b/src/LLVM/test/CodeGen/ARM/debug-info-qreg.ll
new file mode 100644
index 0000000..e83a83d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/debug-info-qreg.ll

@@ -0,0 +1,94 @@
+; RUN: llc < %s - | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.6.7"
+
+;CHECK: DW_OP_regx for Q register: D1
+;CHECK-NEXT: byte
+;CHECK-NEXT: byte
+;CHECK-NEXT: DW_OP_piece 8
+;CHECK-NEXT: byte   8
+;CHECK-NEXT: DW_OP_regx for Q register: D2
+;CHECK-NEXT: byte
+;CHECK-NEXT: byte
+;CHECK-NEXT: DW_OP_piece 8
+;CHECK-NEXT: byte   8
+
+@.str = external constant [13 x i8]
+
+declare <4 x float> @test0001(float) nounwind readnone ssp
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  br label %for.body9
+
+for.body9:                                        ; preds = %for.body9, %entry
+  %add19 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
+  br i1 undef, label %for.end54, label %for.body9, !dbg !44
+
+for.end54:                                        ; preds = %for.body9
+  tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39
+  %tmp115 = extractelement <4 x float> %add19, i32 1
+  %conv6.i75 = fpext float %tmp115 to double, !dbg !45
+  %call.i82 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45
+  ret i32 0, !dbg !49
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !10, !14}
+!llvm.dbg.lv.test0001 = !{!18}
+!llvm.dbg.lv.main = !{!19, !20, !24, !26, !27, !28, !29}
+!llvm.dbg.lv.printFV = !{!30}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"build2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"build2.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589846, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 590083, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
+!7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 589857, i64 0, i64 3}         ; [ DW_TAG_subrange_type ]
+!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 589870, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!16 = metadata !{i32 589845, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{null}
+!18 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 590080, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
+!25 = metadata !{i32 589835, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 590080, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
+!27 = metadata !{i32 590080, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!28 = metadata !{i32 590080, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 590080, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!30 = metadata !{i32 590081, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0} ; [ DW_TAG_arg_variable ]
+!31 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 589846, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
+!33 = metadata !{i32 589847, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
+!34 = metadata !{metadata !35, metadata !37}
+!35 = metadata !{i32 589837, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
+!36 = metadata !{i32 589846, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!37 = metadata !{i32 589837, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 589825, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!39 = metadata !{i32 79, i32 7, metadata !40, null}
+!40 = metadata !{i32 589835, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 589835, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ]
+!42 = metadata !{i32 589835, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ]
+!43 = metadata !{i32 589835, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ]
+!44 = metadata !{i32 75, i32 5, metadata !42, null}
+!45 = metadata !{i32 42, i32 2, metadata !46, metadata !48}
+!46 = metadata !{i32 589835, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ]
+!47 = metadata !{i32 589835, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ]
+!48 = metadata !{i32 95, i32 3, metadata !25, null}
+!49 = metadata !{i32 99, i32 3, metadata !25, null}

diff --git a/src/LLVM/test/CodeGen/ARM/debug-info-s16-reg.ll b/src/LLVM/test/CodeGen/ARM/debug-info-s16-reg.ll
new file mode 100644
index 0000000..548c9bd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/debug-info-s16-reg.ll

@@ -0,0 +1,116 @@
+; RUN: llc < %s - | FileCheck %s
+; Radar 9309221
+; Test dwarf reg no for s16
+;CHECK: DW_OP_regx for S register
+;CHECK-NEXT: byte
+;CHECK-NEXT: byte
+;CHECK-NEXT: DW_OP_bit_piece 32 0
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.6.7"
+
+@.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00"
+@.str1 = private unnamed_addr constant [6 x i8] c"point\00"
+
+define i32 @inlineprinter(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !8), !dbg !24
+  tail call void @llvm.dbg.value(metadata !{float %val}, i64 0, metadata !10), !dbg !25
+  tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !12), !dbg !26
+  %conv = fpext float %val to double, !dbg !27
+  %conv3 = zext i8 %c to i32, !dbg !27
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !27
+  ret i32 0, !dbg !29
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind optsize
+
+define i32 @printer(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !14), !dbg !30
+  tail call void @llvm.dbg.value(metadata !{float %val}, i64 0, metadata !15), !dbg !31
+  tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !16), !dbg !32
+  %conv = fpext float %val to double, !dbg !33
+  %conv3 = zext i8 %c to i32, !dbg !33
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !33
+  ret i32 0, !dbg !35
+}
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !17), !dbg !36
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !18), !dbg !37
+  %conv = sitofp i32 %argc to double, !dbg !38
+  %add = fadd double %conv, 5.555552e+05, !dbg !38
+  %conv1 = fptrunc double %add to float, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{float %conv1}, i64 0, metadata !22), !dbg !38
+  %call = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0)) nounwind optsize, !dbg !39
+  %add.ptr = getelementptr i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !40
+  %add5 = add nsw i32 %argc, 97, !dbg !40
+  %conv6 = trunc i32 %add5 to i8, !dbg !40
+  tail call void @llvm.dbg.value(metadata !{i8* %add.ptr}, i64 0, metadata !8) nounwind, !dbg !41
+  tail call void @llvm.dbg.value(metadata !{float %conv1}, i64 0, metadata !10) nounwind, !dbg !42
+  tail call void @llvm.dbg.value(metadata !{i8 %conv6}, i64 0, metadata !12) nounwind, !dbg !43
+  %conv.i = fpext float %conv1 to double, !dbg !44
+  %conv3.i = and i32 %add5, 255, !dbg !44
+  %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %add.ptr, double %conv.i, i32 %conv3.i) nounwind optsize, !dbg !44
+  %call14 = tail call i32 @printer(i8* %add.ptr, float %conv1, i8 zeroext %conv6) optsize, !dbg !45
+  ret i32 0, !dbg !46
+}
+
+declare i32 @puts(i8* nocapture) nounwind optsize
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !6, !7}
+!llvm.dbg.lv.inlineprinter = !{!8, !10, !12}
+!llvm.dbg.lv.printer = !{!14, !15, !16}
+!llvm.dbg.lv.main = !{!17, !18, !22}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 590081, metadata !0, metadata !"ptr", metadata !1, i32 16777220, metadata !9, i32 0} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 590081, metadata !0, metadata !"val", metadata !1, i32 33554436, metadata !11, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 590081, metadata !0, metadata !"c", metadata !1, i32 50331652, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 589860, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 590081, metadata !6, metadata !"ptr", metadata !1, i32 16777227, metadata !9, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 590081, metadata !6, metadata !"val", metadata !1, i32 33554443, metadata !11, i32 0} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 590081, metadata !6, metadata !"c", metadata !1, i32 50331659, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 590081, metadata !7, metadata !"argc", metadata !1, i32 16777233, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 590081, metadata !7, metadata !"argv", metadata !1, i32 33554449, metadata !19, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
+!21 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!22 = metadata !{i32 590080, metadata !23, metadata !"dval", metadata !1, i32 19, metadata !11, i32 0} ; [ DW_TAG_auto_variable ]
+!23 = metadata !{i32 589835, metadata !7, i32 18, i32 1, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{i32 4, i32 22, metadata !0, null}
+!25 = metadata !{i32 4, i32 33, metadata !0, null}
+!26 = metadata !{i32 4, i32 52, metadata !0, null}
+!27 = metadata !{i32 6, i32 3, metadata !28, null}
+!28 = metadata !{i32 589835, metadata !0, i32 5, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 7, i32 3, metadata !28, null}
+!30 = metadata !{i32 11, i32 42, metadata !6, null}
+!31 = metadata !{i32 11, i32 53, metadata !6, null}
+!32 = metadata !{i32 11, i32 72, metadata !6, null}
+!33 = metadata !{i32 13, i32 3, metadata !34, null}
+!34 = metadata !{i32 589835, metadata !6, i32 12, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!35 = metadata !{i32 14, i32 3, metadata !34, null}
+!36 = metadata !{i32 17, i32 15, metadata !7, null}
+!37 = metadata !{i32 17, i32 28, metadata !7, null}
+!38 = metadata !{i32 19, i32 31, metadata !23, null}
+!39 = metadata !{i32 20, i32 3, metadata !23, null}
+!40 = metadata !{i32 21, i32 3, metadata !23, null}
+!41 = metadata !{i32 4, i32 22, metadata !0, metadata !40}
+!42 = metadata !{i32 4, i32 33, metadata !0, metadata !40}
+!43 = metadata !{i32 4, i32 52, metadata !0, metadata !40}
+!44 = metadata !{i32 6, i32 3, metadata !28, metadata !40}
+!45 = metadata !{i32 22, i32 3, metadata !23, null}
+!46 = metadata !{i32 23, i32 1, metadata !23, null}

diff --git a/src/LLVM/test/CodeGen/ARM/debug-info-sreg2.ll b/src/LLVM/test/CodeGen/ARM/debug-info-sreg2.ll
new file mode 100644
index 0000000..ee777ce
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/debug-info-sreg2.ll

@@ -0,0 +1,61 @@
+; RUN: llc < %s - | FileCheck %s
+; Radar 9376013
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.6.7"
+
+;CHECK: Ldebug_loc0:
+;CHECK-NEXT:        .long   Ltmp1
+;CHECK-NEXT:        .long   Ltmp2
+;CHECK-NEXT: Lset8 = Ltmp10-Ltmp9                    @ Loc expr size
+;CHECK-NEXT:        .short  Lset8
+;CHECK-NEXT: Ltmp9:
+;CHECK-NEXT:        .byte   144                     @ DW_OP_regx for S register
+
+define void @_Z3foov() optsize ssp {
+entry:
+  %call = tail call float @_Z3barv() optsize, !dbg !11
+  tail call void @llvm.dbg.value(metadata !{float %call}, i64 0, metadata !5), !dbg !11
+  %call16 = tail call float @_Z2f2v() optsize, !dbg !12
+  %cmp7 = fcmp olt float %call, %call16, !dbg !12
+  br i1 %cmp7, label %for.body, label %for.end, !dbg !12
+
+for.body:                                         ; preds = %entry, %for.body
+  %k.08 = phi float [ %inc, %for.body ], [ %call, %entry ]
+  %call4 = tail call float @_Z2f3f(float %k.08) optsize, !dbg !13
+  %inc = fadd float %k.08, 1.000000e+00, !dbg !14
+  %call1 = tail call float @_Z2f2v() optsize, !dbg !12
+  %cmp = fcmp olt float %inc, %call1, !dbg !12
+  br i1 %cmp, label %for.body, label %for.end, !dbg !12
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void, !dbg !15
+}
+
+declare float @_Z3barv() optsize
+
+declare float @_Z2f2v() optsize
+
+declare float @_Z2f3f(float) optsize
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1}
+!llvm.dbg.lv._Z3foov = !{!5, !8}
+
+!0 = metadata !{i32 589841, i32 0, i32 4, metadata !"k.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 130845)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"k.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 590080, metadata !6, metadata !"k", metadata !2, i32 6, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
+!6 = metadata !{i32 589835, metadata !1, i32 5, i32 12, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 589860, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 590080, metadata !9, metadata !"y", metadata !2, i32 8, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 589835, metadata !10, i32 7, i32 25, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 589835, metadata !6, i32 7, i32 3, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 6, i32 18, metadata !6, null}
+!12 = metadata !{i32 7, i32 3, metadata !6, null}
+!13 = metadata !{i32 8, i32 20, metadata !9, null}
+!14 = metadata !{i32 7, i32 20, metadata !10, null}
+!15 = metadata !{i32 10, i32 1, metadata !6, null}

diff --git a/src/LLVM/test/CodeGen/ARM/dg.exp b/src/LLVM/test/CodeGen/ARM/dg.exp
new file mode 100644
index 0000000..ea4f76e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp

+

+if { [llvm_supports_target ARM] } {

+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]

+}


diff --git a/src/LLVM/test/CodeGen/ARM/div.ll b/src/LLVM/test/CodeGen/ARM/div.ll
new file mode 100644
index 0000000..4ab13a7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/div.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=CHECK-ARM

+

+define i32 @f1(i32 %a, i32 %b) {

+entry:

+; CHECK-ARM: f1

+; CHECK-ARM: __divsi3

+        %tmp1 = sdiv i32 %a, %b         ; <i32> [#uses=1]

+        ret i32 %tmp1

+}

+

+define i32 @f2(i32 %a, i32 %b) {

+entry:

+; CHECK-ARM: f2

+; CHECK-ARM: __udivsi3

+        %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]

+        ret i32 %tmp1

+}

+

+define i32 @f3(i32 %a, i32 %b) {

+entry:

+; CHECK-ARM: f3

+; CHECK-ARM: __modsi3

+        %tmp1 = srem i32 %a, %b         ; <i32> [#uses=1]

+        ret i32 %tmp1

+}

+

+define i32 @f4(i32 %a, i32 %b) {

+entry:

+; CHECK-ARM: f4

+; CHECK-ARM: __umodsi3

+        %tmp1 = urem i32 %a, %b         ; <i32> [#uses=1]

+        ret i32 %tmp1

+}

+


diff --git a/src/LLVM/test/CodeGen/ARM/divmod.ll b/src/LLVM/test/CodeGen/ARM/divmod.ll
new file mode 100644
index 0000000..49c4103
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/divmod.ll

@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=arm-apple-ios5.0 | FileCheck %s
+
+define void @foo(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
+entry:
+; CHECK: foo:
+; CHECK: bl ___divmodsi4
+; CHECK-NOT: bl ___divmodsi4
+  %div = sdiv i32 %x, %y
+  store i32 %div, i32* %P, align 4
+  %rem = srem i32 %x, %y
+  %arrayidx6 = getelementptr inbounds i32* %P, i32 1
+  store i32 %rem, i32* %arrayidx6, align 4
+  ret void
+}
+
+define void @bar(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
+entry:
+; CHECK: bar:
+; CHECK: bl ___udivmodsi4
+; CHECK-NOT: bl ___udivmodsi4
+  %div = udiv i32 %x, %y
+  store i32 %div, i32* %P, align 4
+  %rem = urem i32 %x, %y
+  %arrayidx6 = getelementptr inbounds i32* %P, i32 1
+  store i32 %rem, i32* %arrayidx6, align 4
+  ret void
+}
+
+; rdar://9280991
+@flags = external unnamed_addr global i32
+@tabsize = external unnamed_addr global i32
+
+define void @do_indent(i32 %cols) nounwind {
+entry:
+; CHECK: do_indent:
+  %0 = load i32* @flags, align 4
+  %1 = and i32 %0, 67108864
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb1, label %bb
+
+bb:
+; CHECK: bl ___divmodsi4
+  %3 = load i32* @tabsize, align 4
+  %4 = srem i32 %cols, %3
+  %5 = sdiv i32 %cols, %3
+  %6 = tail call i32 @llvm.objectsize.i32(i8* null, i1 false)
+  %7 = tail call i8* @__memset_chk(i8* null, i32 9, i32 %5, i32 %6) nounwind
+  br label %bb1
+
+bb1:
+  %line_indent_len.0 = phi i32 [ %4, %bb ], [ 0, %entry ]
+  %8 = getelementptr inbounds i8* null, i32 %line_indent_len.0
+  store i8 0, i8* %8, align 1
+  ret void
+}
+
+declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readnone
+declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/dyn-stackalloc.ll b/src/LLVM/test/CodeGen/ARM/dyn-stackalloc.ll
new file mode 100644
index 0000000..84a9c4d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/dyn-stackalloc.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=arm

+

+%struct.comment = type { i8**, i32*, i32, i8* }

+%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }

+%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }

+

+@str215 = external global [2 x i8]

+

+define void @t1(%struct.state* %v) {

+  %tmp6 = load i32* null

+  %tmp8 = alloca float, i32 %tmp6

+  store i32 1, i32* null

+  br i1 false, label %bb123.preheader, label %return

+

+bb123.preheader:                                  ; preds = %0

+  br i1 false, label %bb43, label %return

+

+bb43:                                             ; preds = %bb123.preheader

+  call fastcc void @f1(float* %tmp8, float* null, i32 0)

+  %tmp70 = load i32* null

+  %tmp85 = getelementptr float* %tmp8, i32 0

+  call fastcc void @f2(float* null, float* null, float* %tmp85, i32 %tmp70)

+  ret void

+

+return:                                           ; preds = %bb123.preheader, %0

+  ret void

+}

+

+declare fastcc void @f1(float*, float*, i32)

+

+declare fastcc void @f2(float*, float*, float*, i32)

+

+define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {

+  %tmp1 = call i32 @strlen(i8* %tag)

+  %tmp3 = call i32 @strlen(i8* %contents)

+  %tmp4 = add i32 %tmp1, 2

+  %tmp5 = add i32 %tmp4, %tmp3

+  %tmp6 = alloca i8, i32 %tmp5

+  %tmp9 = call i8* @strcpy(i8* %tmp6, i8* %tag)

+  %tmp6.len = call i32 @strlen(i8* %tmp6)

+  %tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len

+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false)

+  %tmp15 = call i8* @strcat(i8* %tmp6, i8* %contents)

+  call fastcc void @comment_add(%struct.comment* %vc, i8* %tmp6)

+  ret void

+}

+

+declare i32 @strlen(i8*)

+

+declare i8* @strcat(i8*, i8*)

+

+declare fastcc void @comment_add(%struct.comment*, i8*)

+

+declare i8* @strcpy(i8*, i8*)

+

+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind


diff --git a/src/LLVM/test/CodeGen/ARM/eh-resume-darwin.ll b/src/LLVM/test/CodeGen/ARM/eh-resume-darwin.ll
new file mode 100644
index 0000000..e475508
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/eh-resume-darwin.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+target triple = "armv6-apple-macosx10.6"
+
+declare void @func()
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare void @llvm.eh.resume(i8*, i32)
+
+declare i32 @__gxx_personality_sj0(...)
+
+define void @test0() {
+entry:
+  invoke void @func()
+    to label %cont unwind label %lpad
+
+cont:
+  ret void
+
+lpad:
+  %exn = call i8* @llvm.eh.exception()
+  %sel = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
+  call void @llvm.eh.resume(i8* %exn, i32 %sel) noreturn
+  unreachable
+}
+
+; CHECK: __Unwind_SjLj_Resume

diff --git a/src/LLVM/test/CodeGen/ARM/elf-lcomm-align.ll b/src/LLVM/test/CodeGen/ARM/elf-lcomm-align.ll
new file mode 100644
index 0000000..4679299
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/elf-lcomm-align.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -O0 | FileCheck %s
+; run with -O0 to avoid arm global merging.
+
+@c = internal global i8 0, align 1
+@x = internal global i32 0, align 4
+
+; CHECK: .lcomm c,1
+; .lcomm doesn't support alignment.
+; CHECK: .local x
+; CHECK-NEXT: .comm x,4,4
+
+define i32 @foo() nounwind {
+  ret i32 sub (i32 ptrtoint (i8* @c to i32), i32 ptrtoint (i32* @x to i32))
+}

diff --git a/src/LLVM/test/CodeGen/ARM/extloadi1.ll b/src/LLVM/test/CodeGen/ARM/extloadi1.ll
new file mode 100644
index 0000000..624b0b2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/extloadi1.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm

+@handler_installed.6144.b = external global i1          ; <i1*> [#uses=1]

+

+define void @__mf_sigusr1_respond() {

+entry:

+        %tmp8.b = load i1* @handler_installed.6144.b            ; <i1> [#uses=1]

+        br i1 false, label %cond_true7, label %cond_next

+

+cond_next:              ; preds = %entry

+        br i1 %tmp8.b, label %bb, label %cond_next3

+

+cond_next3:             ; preds = %cond_next

+        ret void

+

+bb:             ; preds = %cond_next

+        ret void

+

+cond_true7:             ; preds = %entry

+        ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/fabss.ll b/src/LLVM/test/CodeGen/ARM/fabss.ll
new file mode 100644
index 0000000..45c322d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fabss.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test(float %a, float %b) {
+entry:
+        %dum = fadd float %a, %b
+	%0 = tail call float @fabsf(float %dum)
+        %dum1 = fadd float %0, %b
+	ret float %dum1
+}
+
+declare float @fabsf(float)
+
+; VFP2: test:
+; VFP2: 	vabs.f32	s1, s1
+
+; NFP1: test:
+; NFP1: 	vabs.f32	d1, d1
+; NFP0: test:
+; NFP0: 	vabs.f32	s1, s1
+
+; CORTEXA8: test:
+; CORTEXA8:     vadd.f32        [[D1:d[0-9]+]]
+; CORTEXA8: 	vabs.f32	{{d[0-9]+}}, [[D1]]
+
+; CORTEXA9: test:
+; CORTEXA9: 	vabs.f32	s{{.}}, s{{.}}

diff --git a/src/LLVM/test/CodeGen/ARM/fadds.ll b/src/LLVM/test/CodeGen/ARM/fadds.ll
new file mode 100644
index 0000000..e35103c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fadds.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fadd float %a, %b
+	ret float %0
+}
+
+; VFP2: test:
+; VFP2: 	vadd.f32	s0, s1, s0
+
+; NFP1: test:
+; NFP1: 	vadd.f32	d0, d1, d0
+; NFP0: test:
+; NFP0: 	vadd.f32	s0, s1, s0
+
+; CORTEXA8: test:
+; CORTEXA8: 	vadd.f32	d0, d1, d0
+; CORTEXA9: test:
+; CORTEXA9: 	vadd.f32	s{{.}}, s{{.}}, s{{.}}

diff --git a/src/LLVM/test/CodeGen/ARM/fast-isel-crash.ll b/src/LLVM/test/CodeGen/ARM/fast-isel-crash.ll
new file mode 100644
index 0000000..370c70f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fast-isel-crash.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-darwin
+
+%union.anon = type { <16 x i32> }
+
+@__md0 = external global [137 x i8]
+
+define internal void @stretch(<4 x i8> addrspace(1)* %src, <4 x i8> addrspace(1)* %dst, i32 %width, i32 %height, i32 %iLS, i32 %oLS, <2 x float> %c, <4 x float> %param) nounwind {
+entry:
+  ret void
+}
+
+define internal i32 @_Z13get_global_idj(i32 %dim) nounwind ssp {
+entry:
+  ret i32 undef
+}
+
+define void @wrap(i8 addrspace(1)* addrspace(1)* %arglist, i32 addrspace(1)* %gtid) nounwind ssp {
+entry:
+  call void @stretch(<4 x i8> addrspace(1)* undef, <4 x i8> addrspace(1)* undef, i32 undef, i32 undef, i32 undef, i32 undef, <2 x float> undef, <4 x float> undef)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fast-isel-crash2.ll b/src/LLVM/test/CodeGen/ARM/fast-isel-crash2.ll
new file mode 100644
index 0000000..aa06299
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fast-isel-crash2.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-darwin
+; rdar://9515076
+; (Make sure this doesn't crash.)
+
+define i32 @test(i32 %i) {
+  %t = trunc i32 %i to i4
+  %r = sext i4 %t to i32
+  ret i32 %r
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fast-isel-pred.ll b/src/LLVM/test/CodeGen/ARM/fast-isel-pred.ll
new file mode 100644
index 0000000..8de54ad
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fast-isel-pred.ll

@@ -0,0 +1,58 @@
+; RUN: llc -O0 -mtriple=armv7-apple-darwin < %s
+
+define i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32, align 4
+  %X = alloca <4 x i32>, align 16
+  %Y = alloca <4 x float>, align 16
+  store i32 0, i32* %retval
+  %tmp = load <4 x i32>* %X, align 16
+  call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y)
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define internal void @__aa(<4 x i32> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
+entry:
+  %__a.addr.i = alloca <4 x i32>, align 16
+  %v.addr = alloca <4 x i32>, align 16
+  %p.addr = alloca i8*, align 4
+  %offset.addr = alloca i32, align 4
+  %constants.addr = alloca <4 x float>*, align 4
+  store <4 x i32> %v, <4 x i32>* %v.addr, align 16
+  store i8* %p, i8** %p.addr, align 4
+  store i32 %offset, i32* %offset.addr, align 4
+  store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
+  %tmp = load <4 x i32>* %v.addr, align 16
+  store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16
+  %tmp.i = load <4 x i32>* %__a.addr.i, align 16
+  %0 = bitcast <4 x i32> %tmp.i to <16 x i8>
+  %1 = bitcast <16 x i8> %0 to <4 x i32>
+  %vcvt.i = sitofp <4 x i32> %1 to <4 x float>
+  %tmp1 = load i8** %p.addr, align 4
+  %tmp2 = load i32* %offset.addr, align 4
+  %tmp3 = load <4 x float>** %constants.addr, align 4
+  call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3)
+  ret void
+}
+
+define internal void @__bb(<4 x float> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
+entry:
+  %v.addr = alloca <4 x float>, align 16
+  %p.addr = alloca i8*, align 4
+  %offset.addr = alloca i32, align 4
+  %constants.addr = alloca <4 x float>*, align 4
+  %data = alloca i64, align 4
+  store <4 x float> %v, <4 x float>* %v.addr, align 16
+  store i8* %p, i8** %p.addr, align 4
+  store i32 %offset, i32* %offset.addr, align 4
+  store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
+  %tmp = load i64* %data, align 4
+  %tmp1 = load i8** %p.addr, align 4
+  %tmp2 = load i32* %offset.addr, align 4
+  %add.ptr = getelementptr i8* %tmp1, i32 %tmp2
+  %0 = bitcast i8* %add.ptr to i64*
+  %arrayidx = getelementptr inbounds i64* %0, i32 0
+  store i64 %tmp, i64* %arrayidx
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fast-isel-redefinition.ll b/src/LLVM/test/CodeGen/ARM/fast-isel-redefinition.ll
new file mode 100644
index 0000000..08dcc64
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fast-isel-redefinition.ll

@@ -0,0 +1,11 @@
+; RUN: llc -O0 -regalloc=linearscan < %s
+; This isn't exactly a useful set of command-line options, but check that it
+; doesn't crash.  (It was crashing because a register was getting redefined.)
+
+target triple = "thumbv7-apple-macosx10.6.7"
+
+define i32 @f(i32* %x) nounwind ssp {
+  %y = getelementptr inbounds i32* %x, i32 5000
+  %tmp103 = load i32* %y, align 4
+  ret i32 %tmp103
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fast-isel-static.ll b/src/LLVM/test/CodeGen/ARM/fast-isel-static.ll
new file mode 100644
index 0000000..a86e325
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fast-isel-static.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -relocation-model=static | FileCheck -check-prefix=NORM %s
+
+define void @myadd(float* %sum, float* %addend) nounwind {
+entry:
+  %sum.addr = alloca float*, align 4
+  %addend.addr = alloca float*, align 4
+  store float* %sum, float** %sum.addr, align 4
+  store float* %addend, float** %addend.addr, align 4
+  %tmp = load float** %sum.addr, align 4
+  %tmp1 = load float* %tmp
+  %tmp2 = load float** %addend.addr, align 4
+  %tmp3 = load float* %tmp2
+  %add = fadd float %tmp1, %tmp3
+  %tmp4 = load float** %sum.addr, align 4
+  store float %add, float* %tmp4
+  ret void
+}
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+  %ztot = alloca float, align 4
+  %z = alloca float, align 4
+  store float 0.000000e+00, float* %ztot, align 4
+  store float 1.000000e+00, float* %z, align 4
+; CHECK-LONG: blx     r
+; CHECK-NORM: bl      _myadd
+  call void @myadd(float* %ztot, float* %z)
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fast-isel.ll b/src/LLVM/test/CodeGen/ARM/fast-isel.ll
new file mode 100644
index 0000000..465e85f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fast-isel.ll

@@ -0,0 +1,160 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+
+; Very basic fast-isel functionality.
+define i32 @add(i32 %a, i32 %b) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr
+  store i32 %b, i32* %b.addr
+  %tmp = load i32* %a.addr
+  %tmp1 = load i32* %b.addr
+  %add = add nsw i32 %tmp, %tmp1
+  ret i32 %add
+}
+
+; Check truncate to bool
+define void @test1(i32 %tmp) nounwind {
+entry:
+%tobool = trunc i32 %tmp to i1
+br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+call void @test1(i32 0)
+br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+ret void
+; ARM: test1:
+; ARM: tst r0, #1
+; THUMB: test1:
+; THUMB: tst.w r0, #1
+}
+
+; Check some simple operations with immediates
+define void @test2(i32 %tmp, i32* %ptr) nounwind {
+; THUMB: test2:
+; ARM: test2:
+
+b1:
+  %a = add i32 %tmp, 4096
+  store i32 %a, i32* %ptr
+  br label %b2
+
+; THUMB: add.w {{.*}} #4096
+; ARM: add {{.*}} #4096
+
+b2:
+  %b = add i32 %tmp, 4095
+  store i32 %b, i32* %ptr
+  br label %b3
+; THUMB: addw {{.*}} #4095
+; ARM: movw {{.*}} #4095
+; ARM: add
+
+b3:
+  %c = or i32 %tmp, 4
+  store i32 %c, i32* %ptr
+  ret void
+
+; THUMB: orr {{.*}} #4
+; ARM: orr {{.*}} #4
+}
+
+define void @test3(i32 %tmp, i32* %ptr1, i16* %ptr2, i8* %ptr3) nounwind {
+; THUMB: test3:
+; ARM: test3:
+
+bb1:
+  %a1 = trunc i32 %tmp to i16
+  %a2 = trunc i16 %a1 to i8
+  %a3 = trunc i8 %a2 to i1
+  %a4 = zext i1 %a3 to i8
+  store i8 %a4, i8* %ptr3
+  %a5 = zext i8 %a4 to i16
+  store i16 %a5, i16* %ptr2
+  %a6 = zext i16 %a5 to i32
+  store i32 %a6, i32* %ptr1
+  br label %bb2
+
+; THUMB: and
+; THUMB: strb
+; THUMB: uxtb
+; THUMB: strh
+; THUMB: uxth
+; ARM: and
+; ARM: strb
+; ARM: uxtb
+; ARM: strh
+; ARM: uxth
+
+bb2:
+  %b1 = trunc i32 %tmp to i16
+  %b2 = trunc i16 %b1 to i8
+  store i8 %b2, i8* %ptr3
+  %b3 = sext i8 %b2 to i16
+  store i16 %b3, i16* %ptr2
+  %b4 = sext i16 %b3 to i32
+  store i32 %b4, i32* %ptr1
+  br label %bb3
+
+; THUMB: strb
+; THUMB: sxtb
+; THUMB: strh
+; THUMB: sxth
+; ARM: strb
+; ARM: sxtb
+; ARM: strh
+; ARM: sxth
+
+bb3:
+  %c1 = load i8* %ptr3
+  %c2 = load i16* %ptr2
+  %c3 = load i32* %ptr1
+  %c4 = zext i8 %c1 to i32
+  %c5 = sext i16 %c2 to i32
+  %c6 = add i32 %c4, %c5
+  %c7 = sub i32 %c3, %c6
+  store i32 %c7, i32* %ptr1
+  ret void
+
+; THUMB: ldrb
+; THUMB: ldrh
+; THUMB: uxtb
+; THUMB: sxth
+; THUMB: add
+; THUMB: sub
+; ARM: ldrb
+; ARM: ldrh
+; ARM: uxtb
+; ARM: sxth
+; ARM: add
+; ARM: sub
+}
+
+; Check loads/stores with globals
+@test4g = external global i32
+
+define void @test4() {
+  %a = load i32* @test4g
+  %b = add i32 %a, 1
+  store i32 %b, i32* @test4g
+  ret void
+
+; THUMB: ldr.n r0, LCPI4_1
+; THUMB: ldr r0, [r0]
+; THUMB: ldr r0, [r0]
+; THUMB: adds r0, #1
+; THUMB: ldr.n r1, LCPI4_0
+; THUMB: ldr r1, [r1]
+; THUMB: str r0, [r1]
+
+; ARM: ldr r0, LCPI4_1
+; ARM: ldr r0, [r0]
+; ARM: ldr r0, [r0]
+; ARM: add r0, r0, #1
+; ARM: ldr r1, LCPI4_0
+; ARM: ldr r1, [r1]
+; ARM: str r0, [r1]
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fcopysign.ll b/src/LLVM/test/CodeGen/ARM/fcopysign.ll
new file mode 100644
index 0000000..4018f51
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fcopysign.ll

@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT

+; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD

+

+; rdar://8984306

+define float @test1(float %x, float %y) nounwind {

+entry:

+; SOFT: test1:

+; SOFT: lsr r1, r1, #31

+; SOFT: bfi r0, r1, #31, #1

+

+; HARD: test1:

+; HARD: vmov.i32 [[REG1:(d[0-9]+)]], #0x80000000

+; HARD: vbsl [[REG1]], d

+  %0 = tail call float @copysignf(float %x, float %y) nounwind

+  ret float %0

+}

+

+define double @test2(double %x, double %y) nounwind {

+entry:

+; SOFT: test2:

+; SOFT: lsr r2, r3, #31

+; SOFT: bfi r1, r2, #31, #1

+

+; HARD: test2:

+; HARD: vmov.i32 [[REG2:(d[0-9]+)]], #0x80000000

+; HARD: vshl.i64 [[REG2]], [[REG2]], #32

+; HARD: vbsl [[REG2]], d1, d0

+  %0 = tail call double @copysign(double %x, double %y) nounwind

+  ret double %0

+}

+

+define double @test3(double %x, double %y, double %z) nounwind {

+entry:

+; SOFT: test3:

+; SOFT: vmov.i32 [[REG3:(d[0-9]+)]], #0x80000000

+; SOFT: vshl.i64 [[REG3]], [[REG3]], #32

+; SOFT: vbsl [[REG3]],

+  %0 = fmul double %x, %y

+  %1 = tail call double @copysign(double %0, double %z) nounwind

+  ret double %1

+}

+

+; rdar://9059537

+define i32 @test4() ssp {

+entry:

+; SOFT: test4:

+; SOFT: vmov.f64 [[REG4:(d[0-9]+)]], #1.000000e+00

+; This S-reg must be the first sub-reg of the last D-reg on vbsl.

+; SOFT: vcvt.f32.f64 {{s1?[02468]}}, [[REG4]]

+; SOFT: vshr.u64 [[REG4]], [[REG4]], #32

+; SOFT: vmov.i32 [[REG5:(d[0-9]+)]], #0x80000000

+; SOFT: vbsl [[REG5]], [[REG4]], {{d[0-9]+}}

+  %call80 = tail call double @copysign(double 1.000000e+00, double undef)

+  %conv81 = fptrunc double %call80 to float

+  %tmp88 = bitcast float %conv81 to i32

+  ret i32 %tmp88

+}

+

+; rdar://9287902

+define float @test5() nounwind {

+entry:

+; SOFT: test5:

+; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000

+; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1

+; SOFT: vshr.u64 [[REG7]], [[REG7]], #32

+; SOFT: vbsl [[REG6]], [[REG7]], 

+  %0 = tail call double (...)* @bar() nounwind

+  %1 = fptrunc double %0 to float

+  %2 = tail call float @copysignf(float 5.000000e-01, float %1) nounwind readnone

+  %3 = fadd float %1, %2

+  ret float %3

+}

+

+declare double @bar(...)

+declare double @copysign(double, double) nounwind

+declare float @copysignf(float, float) nounwind


diff --git a/src/LLVM/test/CodeGen/ARM/fdivs.ll b/src/LLVM/test/CodeGen/ARM/fdivs.ll
new file mode 100644
index 0000000..31c1ca9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fdivs.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fdiv float %a, %b
+	ret float %0
+}
+
+; VFP2: test:
+; VFP2: 	vdiv.f32	s0, s1, s0
+
+; NFP1: test:
+; NFP1: 	vdiv.f32	s0, s1, s0
+; NFP0: test:
+; NFP0: 	vdiv.f32	s0, s1, s0
+
+; CORTEXA8: test:
+; CORTEXA8: 	vdiv.f32	s0, s1, s0
+; CORTEXA9: test:
+; CORTEXA9: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}

diff --git a/src/LLVM/test/CodeGen/ARM/fixunsdfdi.ll b/src/LLVM/test/CodeGen/ARM/fixunsdfdi.ll
new file mode 100644
index 0000000..6db2385
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fixunsdfdi.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep vstr.64
+
+define hidden i64 @__fixunsdfdi(double %x) nounwind readnone {
+entry:
+	%x14 = bitcast double %x to i64		; <i64> [#uses=1]
+	br i1 true, label %bb3, label %bb10
+
+bb3:		; preds = %entry
+	br i1 true, label %bb5, label %bb7
+
+bb5:		; preds = %bb3
+	%u.in.mask = and i64 %x14, -4294967296		; <i64> [#uses=1]
+	%.ins = or i64 0, %u.in.mask		; <i64> [#uses=1]
+	%0 = bitcast i64 %.ins to double		; <double> [#uses=1]
+	%1 = fsub double %x, %0		; <double> [#uses=1]
+	%2 = fptosi double %1 to i32		; <i32> [#uses=1]
+	%3 = add i32 %2, 0		; <i32> [#uses=1]
+	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
+	%5 = shl i64 %4, 32		; <i64> [#uses=1]
+	%6 = or i64 %5, 0		; <i64> [#uses=1]
+	ret i64 %6
+
+bb7:		; preds = %bb3
+	ret i64 0
+
+bb10:		; preds = %entry
+	ret i64 0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/flag-crash.ll b/src/LLVM/test/CodeGen/ARM/flag-crash.ll
new file mode 100644
index 0000000..9c61944
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/flag-crash.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -O3 -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 -relocation-model=pic
+; PR7484
+
+%struct.gs_matrix = type { float, i32, float, i32, float, i32, float, i32, float, i32, float, i32 }
+
+define fastcc void @func(%struct.gs_matrix* nocapture %pm1) nounwind {
+entry:
+  %0 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 6
+  %1 = load float* %0, align 4
+  %2 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 8
+  %3 = load float* %2, align 4
+  %4 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 2
+  %5 = bitcast float* %4 to i32*
+  %6 = load i32* %5, align 4
+  %7 = or i32 0, %6
+  %.mask = and i32 %7, 2147483647
+  %8 = icmp eq i32 %.mask, 0
+  br i1 %8, label %bb, label %bb11
+
+bb:
+  ret void
+
+bb11:
+  %9 = fmul float %1, undef
+  %10 = fmul float %3, undef
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fmacs.ll b/src/LLVM/test/CodeGen/ARM/fmacs.ll
new file mode 100644
index 0000000..b63f609
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fmacs.ll

@@ -0,0 +1,104 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s -check-prefix=HARD
+
+define float @t1(float %acc, float %a, float %b) {
+entry:
+; VFP2: t1:
+; VFP2: vmla.f32
+
+; NEON: t1:
+; NEON: vmla.f32
+
+; A8: t1:
+; A8: vmul.f32
+; A8: vadd.f32
+	%0 = fmul float %a, %b
+        %1 = fadd float %acc, %0
+	ret float %1
+}
+
+define double @t2(double %acc, double %a, double %b) {
+entry:
+; VFP2: t2:
+; VFP2: vmla.f64
+
+; NEON: t2:
+; NEON: vmla.f64
+
+; A8: t2:
+; A8: vmul.f64
+; A8: vadd.f64
+	%0 = fmul double %a, %b
+        %1 = fadd double %acc, %0
+	ret double %1
+}
+
+define float @t3(float %acc, float %a, float %b) {
+entry:
+; VFP2: t3:
+; VFP2: vmla.f32
+
+; NEON: t3:
+; NEON: vmla.f32
+
+; A8: t3:
+; A8: vmul.f32
+; A8: vadd.f32
+	%0 = fmul float %a, %b
+        %1 = fadd float %0, %acc
+	ret float %1
+}
+
+; It's possible to make use of fp vmla / vmls on Cortex-A9.
+; rdar://8659675
+define void @t4(float %acc1, float %a, float %b, float %acc2, float %c, float* %P1, float* %P2) {
+entry:
+; A8: t4:
+; A8: vmul.f32
+; A8: vmul.f32
+; A8: vadd.f32
+; A8: vadd.f32
+
+; Two vmla with now RAW hazard
+; A9: t4:
+; A9: vmla.f32
+; A9: vmla.f32
+
+; HARD: t4:
+; HARD: vmla.f32 s0, s1, s2
+; HARD: vmla.f32 s3, s1, s4
+  %0 = fmul float %a, %b
+  %1 = fadd float %acc1, %0
+  %2 = fmul float %a, %c
+  %3 = fadd float %acc2, %2
+  store float %1, float* %P1
+  store float %3, float* %P2
+  ret void
+}
+
+define float @t5(float %a, float %b, float %c, float %d, float %e) {
+entry:
+; A8: t5:
+; A8: vmul.f32
+; A8: vmul.f32
+; A8: vadd.f32
+; A8: vadd.f32
+
+; A9: t5:
+; A9: vmla.f32
+; A9: vmul.f32
+; A9: vadd.f32
+
+; HARD: t5:
+; HARD: vmla.f32 s4, s0, s1
+; HARD: vmul.f32 s0, s2, s3
+; HARD: vadd.f32 s0, s4, s0
+  %0 = fmul float %a, %b
+  %1 = fadd float %e, %0
+  %2 = fmul float %c, %d
+  %3 = fadd float %1, %2
+  ret float %3
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fmdrr-fmrrd.ll b/src/LLVM/test/CodeGen/ARM/fmdrr-fmrrd.ll
new file mode 100644
index 0000000..eb72faf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fmdrr-fmrrd.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmdrr
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmrrd
+
+; naive codegen for this is:
+; _i:
+;        fmdrr d0, r0, r1
+;        fmrrd r0, r1, d0
+;        bx lr
+
+define i64 @test(double %X) {
+        %Y = bitcast double %X to i64
+        ret i64 %Y
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fmscs.ll b/src/LLVM/test/CodeGen/ARM/fmscs.ll
new file mode 100644
index 0000000..a182833
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fmscs.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+
+define float @t1(float %acc, float %a, float %b) {
+entry:
+; VFP2: t1:
+; VFP2: vnmls.f32
+
+; NEON: t1:
+; NEON: vnmls.f32
+
+; A8: t1:
+; A8: vmul.f32
+; A8: vsub.f32
+	%0 = fmul float %a, %b
+        %1 = fsub float %0, %acc
+	ret float %1
+}
+
+define double @t2(double %acc, double %a, double %b) {
+entry:
+; VFP2: t2:
+; VFP2: vnmls.f64
+
+; NEON: t2:
+; NEON: vnmls.f64
+
+; A8: t2:
+; A8: vmul.f64
+; A8: vsub.f64
+	%0 = fmul double %a, %b
+        %1 = fsub double %0, %acc
+	ret double %1
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fmuls.ll b/src/LLVM/test/CodeGen/ARM/fmuls.ll
new file mode 100644
index 0000000..bc118b8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fmuls.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fmul float %a, %b
+	ret float %0
+}
+
+; VFP2: test:
+; VFP2: 	vmul.f32	s0, s1, s0
+
+; NFP1: test:
+; NFP1: 	vmul.f32	d0, d1, d0
+; NFP0: test:
+; NFP0: 	vmul.f32	s0, s1, s0
+
+; CORTEXA8: test:
+; CORTEXA8: 	vmul.f32	d0, d1, d0
+; CORTEXA9: test:
+; CORTEXA9: 	vmul.f32	s{{.}}, s{{.}}, s{{.}}

diff --git a/src/LLVM/test/CodeGen/ARM/fnegs.ll b/src/LLVM/test/CodeGen/ARM/fnegs.ll
new file mode 100644
index 0000000..418b598
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fnegs.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test1(float* %a) {
+entry:
+	%0 = load float* %a, align 4		; <float> [#uses=2]
+	%1 = fsub float -0.000000e+00, %0		; <float> [#uses=2]
+	%2 = fpext float %1 to double		; <double> [#uses=1]
+	%3 = fcmp olt double %2, 1.234000e+00		; <i1> [#uses=1]
+	%retval = select i1 %3, float %1, float %0		; <float> [#uses=1]
+	ret float %retval
+}
+; VFP2: test1:
+; VFP2: 	vneg.f32	s{{.*}}, s{{.*}}
+
+; NFP1: test1:
+; NFP1: 	vneg.f32	d{{.*}}, d{{.*}}
+
+; NFP0: test1:
+; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
+
+; CORTEXA8: test1:
+; CORTEXA8: 	vneg.f32	d{{.*}}, d{{.*}}
+
+; CORTEXA9: test1:
+; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
+
+define float @test2(float* %a) {
+entry:
+	%0 = load float* %a, align 4		; <float> [#uses=2]
+	%1 = fmul float -1.000000e+00, %0		; <float> [#uses=2]
+	%2 = fpext float %1 to double		; <double> [#uses=1]
+	%3 = fcmp olt double %2, 1.234000e+00		; <i1> [#uses=1]
+	%retval = select i1 %3, float %1, float %0		; <float> [#uses=1]
+	ret float %retval
+}
+; VFP2: test2:
+; VFP2: 	vneg.f32	s{{.*}}, s{{.*}}
+
+; NFP1: test2:
+; NFP1: 	vneg.f32	d{{.*}}, d{{.*}}
+
+; NFP0: test2:
+; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
+
+; CORTEXA8: test2:
+; CORTEXA8: 	vneg.f32	d{{.*}}, d{{.*}}
+
+; CORTEXA9: test2:
+; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
+

diff --git a/src/LLVM/test/CodeGen/ARM/fnmacs.ll b/src/LLVM/test/CodeGen/ARM/fnmacs.ll
new file mode 100644
index 0000000..1763d46
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fnmacs.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+
+define float @t1(float %acc, float %a, float %b) {
+entry:
+; VFP2: t1:
+; VFP2: vmls.f32
+
+; NEON: t1:
+; NEON: vmls.f32
+
+; A8: t1:
+; A8: vmul.f32
+; A8: vsub.f32
+	%0 = fmul float %a, %b
+        %1 = fsub float %acc, %0
+	ret float %1
+}
+
+define double @t2(double %acc, double %a, double %b) {
+entry:
+; VFP2: t2:
+; VFP2: vmls.f64
+
+; NEON: t2:
+; NEON: vmls.f64
+
+; A8: t2:
+; A8: vmul.f64
+; A8: vsub.f64
+	%0 = fmul double %a, %b
+        %1 = fsub double %acc, %0
+	ret double %1
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fnmscs.ll b/src/LLVM/test/CodeGen/ARM/fnmscs.ll
new file mode 100644
index 0000000..6081712
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fnmscs.ll

@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8
+
+define float @t1(float %acc, float %a, float %b) nounwind {
+entry:
+; VFP2: t1:
+; VFP2: vnmla.f32
+
+; NEON: t1:
+; NEON: vnmla.f32
+
+; A8: t1:
+; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
+; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+	%0 = fmul float %a, %b
+	%1 = fsub float -0.0, %0
+        %2 = fsub float %1, %acc
+	ret float %2
+}
+
+define float @t2(float %acc, float %a, float %b) nounwind {
+entry:
+; VFP2: t2:
+; VFP2: vnmla.f32
+
+; NEON: t2:
+; NEON: vnmla.f32
+
+; A8: t2:
+; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
+; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+	%0 = fmul float %a, %b
+	%1 = fmul float -1.0, %0
+        %2 = fsub float %1, %acc
+	ret float %2
+}
+
+define double @t3(double %acc, double %a, double %b) nounwind {
+entry:
+; VFP2: t3:
+; VFP2: vnmla.f64
+
+; NEON: t3:
+; NEON: vnmla.f64
+
+; A8: t3:
+; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
+; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
+	%0 = fmul double %a, %b
+	%1 = fsub double -0.0, %0
+        %2 = fsub double %1, %acc
+	ret double %2
+}
+
+define double @t4(double %acc, double %a, double %b) nounwind {
+entry:
+; VFP2: t4:
+; VFP2: vnmla.f64
+
+; NEON: t4:
+; NEON: vnmla.f64
+
+; A8: t4:
+; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
+; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
+	%0 = fmul double %a, %b
+	%1 = fmul double -1.0, %0
+        %2 = fsub double %1, %acc
+	ret double %2
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fnmul.ll b/src/LLVM/test/CodeGen/ARM/fnmul.ll
new file mode 100644
index 0000000..13b802a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fnmul.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep vnmul.f64

+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep vmul.f64

+

+

+define double @t1(double %a, double %b) {

+entry:

+        %tmp2 = fsub double -0.000000e+00, %a            ; <double> [#uses=1]

+        %tmp4 = fmul double %tmp2, %b            ; <double> [#uses=1]

+        ret double %tmp4

+}

+


diff --git a/src/LLVM/test/CodeGen/ARM/fnmuls.ll b/src/LLVM/test/CodeGen/ARM/fnmuls.ll
new file mode 100644
index 0000000..3223885
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fnmuls.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+define arm_aapcs_vfpcc float @test1(float %a, float %b) nounwind {
+; CHECK: vnmul.f32 s0, s0, s1 
+entry:
+	%0 = fmul float %a, %b
+        %1 = fsub float -0.0, %0
+	ret float %1
+}
+
+define arm_aapcs_vfpcc float @test2(float %a, float %b) nounwind {
+; CHECK: vnmul.f32 s0, s0, s1 
+entry:
+	%0 = fmul float %a, %b
+        %1 = fmul float -1.0, %0
+	ret float %1
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/fold-const.ll b/src/LLVM/test/CodeGen/ARM/fold-const.ll
new file mode 100644
index 0000000..227e4e8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fold-const.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm -mattr=+v7 | FileCheck %s
+
+define i32 @f(i32 %a) nounwind readnone optsize ssp {
+entry:
+  %conv = zext i32 %a to i64
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %conv)
+; CHECK: clz
+; CHECK-NOT: adds
+  %cast = trunc i64 %tmp1 to i32
+  %sub = sub nsw i32 63, %cast
+  ret i32 %sub
+}
+
+declare i64 @llvm.ctlz.i64(i64) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/formal.ll b/src/LLVM/test/CodeGen/ARM/formal.ll
new file mode 100644
index 0000000..4ac10ba
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/formal.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+declare void @bar(i64 %x, i64 %y)
+
+define void @foo() {
+  call void @bar(i64 2, i64 3)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fp-arg-shuffle.ll b/src/LLVM/test/CodeGen/ARM/fp-arg-shuffle.ll
new file mode 100644
index 0000000..ae02b79
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fp-arg-shuffle.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+neon -float-abi=soft | FileCheck %s
+
+; CHECK: function1
+; CHECK-NOT: vmov
+define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp {
+entry:
+  %call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind
+  ret double %call
+}
+
+declare double @function2(double, double, double, double, double, double)

diff --git a/src/LLVM/test/CodeGen/ARM/fp.ll b/src/LLVM/test/CodeGen/ARM/fp.ll
new file mode 100644
index 0000000..b339ec9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fp.ll

@@ -0,0 +1,78 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s

+

+define float @f(i32 %a) {

+;CHECK: f:

+;CHECK: vmov

+;CHECK-NEXT: vcvt.f32.s32

+;CHECK-NEXT: vmov

+entry:

+        %tmp = sitofp i32 %a to float           ; <float> [#uses=1]

+        ret float %tmp

+}

+

+define double @g(i32 %a) {

+;CHECK: g:

+;CHECK: vmov

+;CHECK-NEXT: vcvt.f64.s32

+;CHECK-NEXT: vmov

+entry:

+        %tmp = sitofp i32 %a to double          ; <double> [#uses=1]

+        ret double %tmp

+}

+

+define double @uint_to_double(i32 %a) {

+;CHECK: uint_to_double:

+;CHECK: vmov

+;CHECK-NEXT: vcvt.f64.u32

+;CHECK-NEXT: vmov

+entry:

+        %tmp = uitofp i32 %a to double          ; <double> [#uses=1]

+        ret double %tmp

+}

+

+define float @uint_to_float(i32 %a) {

+;CHECK: uint_to_float:

+;CHECK: vmov

+;CHECK-NEXT: vcvt.f32.u32

+;CHECK-NEXT: vmov

+entry:

+        %tmp = uitofp i32 %a to float           ; <float> [#uses=1]

+        ret float %tmp

+}

+

+define double @h(double* %v) {

+;CHECK: h:

+;CHECK: vldr.64

+;CHECK-NEXT: vmov

+entry:

+        %tmp = load double* %v          ; <double> [#uses=1]

+        ret double %tmp

+}

+

+define float @h2() {

+;CHECK: h2:

+;CHECK: mov r0, #1065353216

+entry:

+        ret float 1.000000e+00

+}

+

+define double @f2(double %a) {

+;CHECK: f2:

+;CHECK-NOT: vmov

+        ret double %a

+}

+

+define void @f3() {

+;CHECK: f3:

+;CHECK-NOT: vmov

+;CHECK: f4

+entry:

+        %tmp = call double @f5( )               ; <double> [#uses=1]

+        call void @f4( double %tmp )

+        ret void

+}

+

+declare void @f4(double)

+

+declare double @f5()

+


diff --git a/src/LLVM/test/CodeGen/ARM/fp16.ll b/src/LLVM/test/CodeGen/ARM/fp16.ll
new file mode 100644
index 0000000..c5583b9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fp16.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s | FileCheck %s
+; RUN: llc -mattr=+vfp3,+fp16 < %s | FileCheck --check-prefix=CHECK-FP16 %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv7-eabi"
+
+@x = global i16 12902
+@y = global i16 0
+@z = common global i16 0
+
+define arm_aapcs_vfpcc void @foo() nounwind {
+; CHECK: foo:
+; CHECK-FP6: foo:
+entry:
+  %0 = load i16* @x, align 2
+  %1 = load i16* @y, align 2
+  %2 = tail call float @llvm.convert.from.fp16(i16 %0)
+; CHECK: __gnu_h2f_ieee
+; CHECK-FP16: vcvtb.f16.f32
+  %3 = tail call float @llvm.convert.from.fp16(i16 %1)
+; CHECK: __gnu_h2f_ieee
+; CHECK-FP16: vcvtb.f16.f32
+  %4 = fadd float %2, %3
+  %5 = tail call i16 @llvm.convert.to.fp16(float %4)
+; CHECK: __gnu_f2h_ieee
+; CHECK-FP16: vcvtb.f32.f16
+  store i16 %5, i16* @x, align 2
+  ret void
+}
+
+declare float @llvm.convert.from.fp16(i16) nounwind readnone
+
+declare i16 @llvm.convert.to.fp16(float) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/fp_convert.ll b/src/LLVM/test/CodeGen/ARM/fp_convert.ll
new file mode 100644
index 0000000..7002cec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fp_convert.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
+
+define i32 @test1(float %a, float %b) {
+; VFP2: test1:
+; VFP2: vcvt.s32.f32 s{{.}}, s{{.}}
+; NEON: test1:
+; NEON: vadd.f32 [[D0:d[0-9]+]]
+; NEON: vcvt.s32.f32 d0, [[D0]]
+entry:
+        %0 = fadd float %a, %b
+        %1 = fptosi float %0 to i32
+	ret i32 %1
+}
+
+define i32 @test2(float %a, float %b) {
+; VFP2: test2:
+; VFP2: vcvt.u32.f32 s{{.}}, s{{.}}
+; NEON: test2:
+; NEON: vadd.f32 [[D0:d[0-9]+]]
+; NEON: vcvt.u32.f32 d0, [[D0]]
+entry:
+        %0 = fadd float %a, %b
+        %1 = fptoui float %0 to i32
+	ret i32 %1
+}
+
+define float @test3(i32 %a, i32 %b) {
+; VFP2: test3:
+; VFP2: vcvt.f32.u32 s{{.}}, s{{.}}
+; NEON: test3:
+; NEON: vcvt.f32.u32 d0, d0
+entry:
+        %0 = add i32 %a, %b
+        %1 = uitofp i32 %0 to float
+	ret float %1
+}
+
+define float @test4(i32 %a, i32 %b) {
+; VFP2: test4:
+; VFP2: vcvt.f32.s32 s{{.}}, s{{.}}
+; NEON: test4:
+; NEON: vcvt.f32.s32 d0, d0
+entry:
+        %0 = add i32 %a, %b
+        %1 = sitofp i32 %0 to float
+	ret float %1
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fparith.ll b/src/LLVM/test/CodeGen/ARM/fparith.ll
new file mode 100644
index 0000000..aefa492
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fparith.ll

@@ -0,0 +1,101 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s

+

+define float @f1(float %a, float %b) {

+;CHECK: f1:

+;CHECK: vadd.f32

+entry:

+	%tmp = fadd float %a, %b		; <float> [#uses=1]

+	ret float %tmp

+}

+

+define double @f2(double %a, double %b) {

+;CHECK: f2:

+;CHECK: vadd.f64

+entry:

+	%tmp = fadd double %a, %b		; <double> [#uses=1]

+	ret double %tmp

+}

+

+define float @f3(float %a, float %b) {

+;CHECK: f3:

+;CHECK: vmul.f32

+entry:

+	%tmp = fmul float %a, %b		; <float> [#uses=1]

+	ret float %tmp

+}

+

+define double @f4(double %a, double %b) {

+;CHECK: f4:

+;CHECK: vmul.f64

+entry:

+	%tmp = fmul double %a, %b		; <double> [#uses=1]

+	ret double %tmp

+}

+

+define float @f5(float %a, float %b) {

+;CHECK: f5:

+;CHECK: vsub.f32

+entry:

+	%tmp = fsub float %a, %b		; <float> [#uses=1]

+	ret float %tmp

+}

+

+define double @f6(double %a, double %b) {

+;CHECK: f6:

+;CHECK: vsub.f64

+entry:

+	%tmp = fsub double %a, %b		; <double> [#uses=1]

+	ret double %tmp

+}

+

+define float @f7(float %a) {

+;CHECK: f7:

+;CHECK: eor

+entry:

+	%tmp1 = fsub float -0.000000e+00, %a		; <float> [#uses=1]

+	ret float %tmp1

+}

+

+define double @f8(double %a) {

+;CHECK: f8:

+;CHECK: vneg.f64

+entry:

+	%tmp1 = fsub double -0.000000e+00, %a		; <double> [#uses=1]

+	ret double %tmp1

+}

+

+define float @f9(float %a, float %b) {

+;CHECK: f9:

+;CHECK: vdiv.f32

+entry:

+	%tmp1 = fdiv float %a, %b		; <float> [#uses=1]

+	ret float %tmp1

+}

+

+define double @f10(double %a, double %b) {

+;CHECK: f10:

+;CHECK: vdiv.f64

+entry:

+	%tmp1 = fdiv double %a, %b		; <double> [#uses=1]

+	ret double %tmp1

+}

+

+define float @f11(float %a) {

+;CHECK: f11:

+;CHECK: bic

+entry:

+	%tmp1 = call float @fabsf( float %a )		; <float> [#uses=1]

+	ret float %tmp1

+}

+

+declare float @fabsf(float)

+

+define double @f12(double %a) {

+;CHECK: f12:

+;CHECK: vabs.f64

+entry:

+	%tmp1 = call double @fabs( double %a )		; <double> [#uses=1]

+	ret double %tmp1

+}

+

+declare double @fabs(double)


diff --git a/src/LLVM/test/CodeGen/ARM/fpcmp-opt.ll b/src/LLVM/test/CodeGen/ARM/fpcmp-opt.ll
new file mode 100644
index 0000000..7c0dd0e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fpcmp-opt.ll

@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
+; rdar://7461510
+
+define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
+entry:
+; FINITE: t1:
+; FINITE-NOT: vldr
+; FINITE: ldr
+; FINITE: ldr
+; FINITE: cmp r0, r1
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: beq
+
+; NAN: t1:
+; NAN: vldr.32 s0,
+; NAN: vldr.32 s1,
+; NAN: vcmpe.f32 s1, s0
+; NAN: vmrs apsr_nzcv, fpscr
+; NAN: beq
+  %0 = load float* %a
+  %1 = load float* %b
+  %2 = fcmp une float %0, %1
+  br i1 %2, label %bb1, label %bb2
+
+bb1:
+  %3 = call i32 @bar()
+  ret i32 %3
+
+bb2:
+  %4 = call i32 @foo()
+  ret i32 %4
+}
+
+define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
+entry:
+; FINITE: t2:
+; FINITE-NOT: vldr
+; FINITE: ldrd r0, r1, [r0]
+; FINITE-NOT: b LBB
+; FINITE: cmp r0, #0
+; FINITE: cmpeq r1, #0
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: bne
+  %0 = load double* %a
+  %1 = fcmp oeq double %0, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+
+bb1:
+  %2 = call i32 @bar()
+  ret i32 %2
+
+bb2:
+  %3 = call i32 @foo()
+  ret i32 %3
+}
+
+define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
+entry:
+; FINITE: t3:
+; FINITE-NOT: vldr
+; FINITE: ldr r0, [r0]
+; FINITE: cmp r0, #0
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: bne
+  %0 = load float* %a
+  %1 = fcmp oeq float %0, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+
+bb1:
+  %2 = call i32 @bar()
+  ret i32 %2
+
+bb2:
+  %3 = call i32 @foo()
+  ret i32 %3
+}
+
+declare i32 @bar()
+declare i32 @foo()

diff --git a/src/LLVM/test/CodeGen/ARM/fpcmp.ll b/src/LLVM/test/CodeGen/ARM/fpcmp.ll
new file mode 100644
index 0000000..9ce48ab
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fpcmp.ll

@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s

+

+define i32 @f1(float %a) {

+;CHECK: f1:

+;CHECK: vcmpe.f32

+;CHECK: movmi

+entry:

+        %tmp = fcmp olt float %a, 1.000000e+00          ; <i1> [#uses=1]

+        %tmp1 = zext i1 %tmp to i32              ; <i32> [#uses=1]

+        ret i32 %tmp1

+}

+

+define i32 @f2(float %a) {

+;CHECK: f2:

+;CHECK: vcmpe.f32

+;CHECK: moveq

+entry:

+        %tmp = fcmp oeq float %a, 1.000000e+00          ; <i1> [#uses=1]

+        %tmp2 = zext i1 %tmp to i32              ; <i32> [#uses=1]

+        ret i32 %tmp2

+}

+

+define i32 @f3(float %a) {

+;CHECK: f3:

+;CHECK: vcmpe.f32

+;CHECK: movgt

+entry:

+        %tmp = fcmp ogt float %a, 1.000000e+00          ; <i1> [#uses=1]

+        %tmp3 = zext i1 %tmp to i32              ; <i32> [#uses=1]

+        ret i32 %tmp3

+}

+

+define i32 @f4(float %a) {

+;CHECK: f4:

+;CHECK: vcmpe.f32

+;CHECK: movge

+entry:

+        %tmp = fcmp oge float %a, 1.000000e+00          ; <i1> [#uses=1]

+        %tmp4 = zext i1 %tmp to i32              ; <i32> [#uses=1]

+        ret i32 %tmp4

+}

+

+define i32 @f5(float %a) {

+;CHECK: f5:

+;CHECK: vcmpe.f32

+;CHECK: movls

+entry:

+        %tmp = fcmp ole float %a, 1.000000e+00          ; <i1> [#uses=1]

+        %tmp5 = zext i1 %tmp to i32              ; <i32> [#uses=1]

+        ret i32 %tmp5

+}

+

+define i32 @f6(float %a) {

+;CHECK: f6:

+;CHECK: vcmpe.f32

+;CHECK: movne

+entry:

+        %tmp = fcmp une float %a, 1.000000e+00          ; <i1> [#uses=1]

+        %tmp6 = zext i1 %tmp to i32              ; <i32> [#uses=1]

+        ret i32 %tmp6

+}

+

+define i32 @g1(double %a) {

+;CHECK: g1:

+;CHECK: vcmpe.f64

+;CHECK: movmi

+entry:

+        %tmp = fcmp olt double %a, 1.000000e+00         ; <i1> [#uses=1]

+        %tmp7 = zext i1 %tmp to i32              ; <i32> [#uses=1]

+        ret i32 %tmp7

+}


diff --git a/src/LLVM/test/CodeGen/ARM/fpcmp_ueq.ll b/src/LLVM/test/CodeGen/ARM/fpcmp_ueq.ll
new file mode 100644
index 0000000..bacf6e1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fpcmp_ueq.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep moveq 

+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s

+

+define i32 @f7(float %a, float %b) {

+entry:

+; CHECK: f7:

+; CHECK: vcmpe.f32

+; CHECK: vmrs apsr_nzcv, fpscr

+; CHECK: movweq

+; CHECK-NOT: vmrs

+; CHECK: movwvs

+    %tmp = fcmp ueq float %a,%b

+    %retval = select i1 %tmp, i32 666, i32 42

+    ret i32 %retval

+}

+


diff --git a/src/LLVM/test/CodeGen/ARM/fpconsts.ll b/src/LLVM/test/CodeGen/ARM/fpconsts.ll
new file mode 100644
index 0000000..638dde9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fpconsts.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=arm -mattr=+vfp3 | FileCheck %s
+
+define float @t1(float %x) nounwind readnone optsize {
+entry:
+; CHECK: t1:
+; CHECK: vmov.f32 s{{.*}}, #4.000000e+00
+  %0 = fadd float %x, 4.000000e+00
+  ret float %0
+}
+
+define double @t2(double %x) nounwind readnone optsize {
+entry:
+; CHECK: t2:
+; CHECK: vmov.f64 d{{.*}}, #3.000000e+00
+  %0 = fadd double %x, 3.000000e+00
+  ret double %0
+}
+
+define double @t3(double %x) nounwind readnone optsize {
+entry:
+; CHECK: t3:
+; CHECK: vmov.f64 d{{.*}}, #-1.300000e+01
+  %0 = fmul double %x, -1.300000e+01
+  ret double %0
+}
+
+define float @t4(float %x) nounwind readnone optsize {
+entry:
+; CHECK: t4:
+; CHECK: vmov.f32 s{{.*}}, #-2.400000e+01
+  %0 = fmul float %x, -2.400000e+01
+  ret float %0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/fpconv.ll b/src/LLVM/test/CodeGen/ARM/fpconv.ll
new file mode 100644
index 0000000..e3fd1f8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fpconv.ll

@@ -0,0 +1,102 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP

+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s

+

+define float @f1(double %x) {

+;CHECK-VFP: f1:

+;CHECK-VFP: vcvt.f32.f64

+;CHECK: f1:

+;CHECK: truncdfsf2

+entry:

+	%tmp1 = fptrunc double %x to float		; <float> [#uses=1]

+	ret float %tmp1

+}

+

+define double @f2(float %x) {

+;CHECK-VFP: f2:

+;CHECK-VFP: vcvt.f64.f32

+;CHECK: f2:

+;CHECK: extendsfdf2

+entry:

+	%tmp1 = fpext float %x to double		; <double> [#uses=1]

+	ret double %tmp1

+}

+

+define i32 @f3(float %x) {

+;CHECK-VFP: f3:

+;CHECK-VFP: vcvt.s32.f32

+;CHECK: f3:

+;CHECK: fixsfsi

+entry:

+	%tmp = fptosi float %x to i32		; <i32> [#uses=1]

+	ret i32 %tmp

+}

+

+define i32 @f4(float %x) {

+;CHECK-VFP: f4:

+;CHECK-VFP: vcvt.u32.f32

+;CHECK: f4:

+;CHECK: fixunssfsi

+entry:

+	%tmp = fptoui float %x to i32		; <i32> [#uses=1]

+	ret i32 %tmp

+}

+

+define i32 @f5(double %x) {

+;CHECK-VFP: f5:

+;CHECK-VFP: vcvt.s32.f64

+;CHECK: f5:

+;CHECK: fixdfsi

+entry:

+	%tmp = fptosi double %x to i32		; <i32> [#uses=1]

+	ret i32 %tmp

+}

+

+define i32 @f6(double %x) {

+;CHECK-VFP: f6:

+;CHECK-VFP: vcvt.u32.f64

+;CHECK: f6:

+;CHECK: fixunsdfsi

+entry:

+	%tmp = fptoui double %x to i32		; <i32> [#uses=1]

+	ret i32 %tmp

+}

+

+define float @f7(i32 %a) {

+;CHECK-VFP: f7:

+;CHECK-VFP: vcvt.f32.s32

+;CHECK: f7:

+;CHECK: floatsisf

+entry:

+	%tmp = sitofp i32 %a to float		; <float> [#uses=1]

+	ret float %tmp

+}

+

+define double @f8(i32 %a) {

+;CHECK-VFP: f8:

+;CHECK-VFP: vcvt.f64.s32

+;CHECK: f8:

+;CHECK: floatsidf

+entry:

+	%tmp = sitofp i32 %a to double		; <double> [#uses=1]

+	ret double %tmp

+}

+

+define float @f9(i32 %a) {

+;CHECK-VFP: f9:

+;CHECK-VFP: vcvt.f32.u32

+;CHECK: f9:

+;CHECK: floatunsisf

+entry:

+	%tmp = uitofp i32 %a to float		; <float> [#uses=1]

+	ret float %tmp

+}

+

+define double @f10(i32 %a) {

+;CHECK-VFP: f10:

+;CHECK-VFP: vcvt.f64.u32

+;CHECK: f10:

+;CHECK: floatunsidf

+entry:

+	%tmp = uitofp i32 %a to double		; <double> [#uses=1]

+	ret double %tmp

+}


diff --git a/src/LLVM/test/CodeGen/ARM/fpmem.ll b/src/LLVM/test/CodeGen/ARM/fpmem.ll
new file mode 100644
index 0000000..1d23cbf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fpmem.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s

+

+define float @f1(float %a) {

+; CHECK: f1:

+; CHECK: mov r0, #0

+        ret float 0.000000e+00

+}

+

+define float @f2(float* %v, float %u) {

+; CHECK: f2:

+; CHECK: vldr.32{{.*}}[

+        %tmp = load float* %v           ; <float> [#uses=1]

+        %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]

+        ret float %tmp1

+}

+

+define float @f2offset(float* %v, float %u) {

+; CHECK: f2offset:

+; CHECK: vldr.32{{.*}}, #4]

+        %addr = getelementptr float* %v, i32 1

+        %tmp = load float* %addr

+        %tmp1 = fadd float %tmp, %u

+        ret float %tmp1

+}

+

+define float @f2noffset(float* %v, float %u) {

+; CHECK: f2noffset:

+; CHECK: vldr.32{{.*}}, #-4]

+        %addr = getelementptr float* %v, i32 -1

+        %tmp = load float* %addr

+        %tmp1 = fadd float %tmp, %u

+        ret float %tmp1

+}

+

+define void @f3(float %a, float %b, float* %v) {

+; CHECK: f3:

+; CHECK: vstr.32{{.*}}[

+        %tmp = fadd float %a, %b         ; <float> [#uses=1]

+        store float %tmp, float* %v

+        ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/fpow.ll b/src/LLVM/test/CodeGen/ARM/fpow.ll
new file mode 100644
index 0000000..6d48792
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fpow.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm
+
+define double @t(double %x, double %y) nounwind optsize {
+entry:
+	%0 = tail call double @llvm.pow.f64( double %x, double %y )		; <double> [#uses=1]
+	ret double %0
+}
+
+declare double @llvm.pow.f64(double, double) nounwind readonly

diff --git a/src/LLVM/test/CodeGen/ARM/fpowi.ll b/src/LLVM/test/CodeGen/ARM/fpowi.ll
new file mode 100644
index 0000000..5614637
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fpowi.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep powidf2
+; PR1287
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "arm-unknown-linux-gnueabi"
+
+define double @_ZSt3powdi(double %__x, i32 %__i) {
+entry:
+	%tmp3 = call double @llvm.powi.f64( double %__x, i32 %__i )
+        ret double %tmp3
+}
+
+declare double @llvm.powi.f64(double, i32)
+

diff --git a/src/LLVM/test/CodeGen/ARM/fptoint.ll b/src/LLVM/test/CodeGen/ARM/fptoint.ll
new file mode 100644
index 0000000..8406382
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fptoint.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | FileCheck %s

+

+@i = weak global i32 0		; <i32*> [#uses=2]

+@u = weak global i32 0		; <i32*> [#uses=2]

+

+define i32 @foo1(float *%x) {

+        %tmp1 = load float* %x

+	%tmp2 = bitcast float %tmp1 to i32

+	ret i32 %tmp2

+}

+

+define i64 @foo2(double *%x) {

+        %tmp1 = load double* %x

+	%tmp2 = bitcast double %tmp1 to i64

+	ret i64 %tmp2

+}

+

+define void @foo5(float %x) {

+	%tmp1 = fptosi float %x to i32

+	store i32 %tmp1, i32* @i

+	ret void

+}

+

+define void @foo6(float %x) {

+	%tmp1 = fptoui float %x to i32

+	store i32 %tmp1, i32* @u

+	ret void

+}

+

+define void @foo7(double %x) {

+	%tmp1 = fptosi double %x to i32

+	store i32 %tmp1, i32* @i

+	ret void

+}

+

+define void @foo8(double %x) {

+	%tmp1 = fptoui double %x to i32

+	store i32 %tmp1, i32* @u

+	ret void

+}

+

+define void @foo9(double %x) {

+	%tmp = fptoui double %x to i16

+	store i16 %tmp, i16* null

+	ret void

+}

+; CHECK: foo9:

+; CHECK: 	vmov	r0, s0

+


diff --git a/src/LLVM/test/CodeGen/ARM/fsubs.ll b/src/LLVM/test/CodeGen/ARM/fsubs.ll
new file mode 100644
index 0000000..bea8d5f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/fsubs.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fsub float %a, %b
+	ret float %0
+}
+
+; VFP2: vsub.f32	s0, s1, s0
+; NFP1: vsub.f32	d0, d1, d0
+; NFP0: vsub.f32	s0, s1, s0

diff --git a/src/LLVM/test/CodeGen/ARM/global-merge.ll b/src/LLVM/test/CodeGen/ARM/global-merge.ll
new file mode 100644
index 0000000..28bf221
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/global-merge.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; Test the ARMGlobalMerge pass.  Use -march=thumb because it has a small
+; value for the maximum offset (127).
+
+; A local array that exceeds the maximum offset should not be merged.
+; CHECK: g0:
+@g0 = internal global [32 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2 ]
+
+; CHECK: _MergedGlobals:
+@g1 = internal global i32 1
+@g2 = internal global i32 2
+
+; Make sure that the complete variable fits within the range of the maximum
+; offset.  Having the starting offset in range is not sufficient.
+; When this works properly, @g3 is placed in a separate chunk of merged globals.
+; CHECK: _MergedGlobals1:
+@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ]
+
+; Global variables that can be placed in BSS should be kept together in a
+; separate pool of merged globals.
+; CHECK: _MergedGlobals2
+@g4 = internal global i32 0
+@g5 = internal global i32 0

diff --git a/src/LLVM/test/CodeGen/ARM/globals.ll b/src/LLVM/test/CodeGen/ARM/globals.ll
new file mode 100644
index 0000000..06db74c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/globals.ll

@@ -0,0 +1,75 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=DarwinStatic

+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DarwinDynamic

+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=DarwinPIC

+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LinuxPIC

+

+@G = external global i32

+

+define i32 @test1() {

+	%tmp = load i32* @G

+	ret i32 %tmp

+}

+

+; DarwinStatic: _test1:

+; DarwinStatic: 	ldr r0, LCPI0_0

+; DarwinStatic:	        ldr r0, [r0]

+; DarwinStatic:	        bx lr

+

+; DarwinStatic: 	.align	2

+; DarwinStatic:	LCPI0_0:

+; DarwinStatic: 	.long	{{_G$}}

+

+

+; DarwinDynamic: _test1:

+; DarwinDynamic: 	ldr r0, LCPI0_0

+; DarwinDynamic:        ldr r0, [r0]

+; DarwinDynamic:        ldr r0, [r0]

+; DarwinDynamic:        bx lr

+

+; DarwinDynamic: 	.align	2

+; DarwinDynamic:	LCPI0_0:

+; DarwinDynamic: 	.long	L_G$non_lazy_ptr

+

+; DarwinDynamic: 	.section __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers

+; DarwinDynamic:	.align	2

+; DarwinDynamic: L_G$non_lazy_ptr:

+; DarwinDynamic:	.indirect_symbol _G

+; DarwinDynamic:	.long	0

+

+

+

+; DarwinPIC: _test1:

+; DarwinPIC: 	ldr r0, LCPI0_0

+; DarwinPIC: LPC0_0:

+; DarwinPIC:    ldr r0, [pc, r0]

+; DarwinPIC:    ldr r0, [r0]

+; DarwinPIC:    bx lr

+

+; DarwinPIC: 	.align	2

+; DarwinPIC: LCPI0_0:

+; DarwinPIC: 	.long	L_G$non_lazy_ptr-(LPC0_0+8)

+

+; DarwinPIC: 	.section __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers

+; DarwinPIC:	.align	2

+; DarwinPIC: L_G$non_lazy_ptr:

+; DarwinPIC:	.indirect_symbol _G

+; DarwinPIC:	.long	0

+

+

+

+; LinuxPIC: test1:

+; LinuxPIC: 	ldr r0, .LCPI0_0

+; LinuxPIC: 	ldr r1, .LCPI0_1

+	

+; LinuxPIC: .LPC0_0:

+; LinuxPIC: 	add r0, pc, r0

+; LinuxPIC: 	ldr r0, [r1, r0]

+; LinuxPIC: 	ldr r0, [r0]

+; LinuxPIC: 	bx lr

+

+; LinuxPIC: .align 2

+; LinuxPIC: .LCPI0_0:

+; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC0_0+8)

+; LinuxPIC: .align 2

+; LinuxPIC: .LCPI0_1:

+; LinuxPIC:     .long	G(GOT)


diff --git a/src/LLVM/test/CodeGen/ARM/hardfloat_neon.ll b/src/LLVM/test/CodeGen/ARM/hardfloat_neon.ll
new file mode 100644
index 0000000..4abf04b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/hardfloat_neon.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+neon -float-abi=hard
+
+define <16 x i8> @vmulQi8_reg(<16 x i8> %A, <16 x i8> %B) nounwind {
+        %tmp1 = mul <16 x i8> %A, %B
+        ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @f(<16 x i8> %a, <16 x i8> %b) {
+        %tmp = call <16 x i8> @g(<16 x i8> %b)
+        ret <16 x i8> %tmp
+}
+
+declare <16 x i8> @g(<16 x i8>)

diff --git a/src/LLVM/test/CodeGen/ARM/hello.ll b/src/LLVM/test/CodeGen/ARM/hello.ll
new file mode 100644
index 0000000..3d0958e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/hello.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm

+; RUN: llc < %s -mtriple=armv6-linux-gnueabi | grep mov | count 1

+; RUN: llc < %s -mtriple=armv6-linux-gnu --disable-fp-elim | \

+; RUN:   grep mov | count 2

+; RUN: llc < %s -mtriple=armv6-apple-darwin | grep mov | count 2

+

+@str = internal constant [12 x i8] c"Hello World\00"

+

+define i32 @main() {

+	%tmp = call i32 @puts( i8* getelementptr ([12 x i8]* @str, i32 0, i64 0) )		; <i32> [#uses=0]

+	ret i32 0

+}

+

+declare i32 @puts(i8*)


diff --git a/src/LLVM/test/CodeGen/ARM/hidden-vis-2.ll b/src/LLVM/test/CodeGen/ARM/hidden-vis-2.ll
new file mode 100644
index 0000000..8bb2c6e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/hidden-vis-2.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -relocation-model=dynamic-no-pic -mtriple=arm-apple-darwin | FileCheck %s
+
+@x = weak hidden global i32 0		; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+; CHECK: t:
+; CHECK: ldr
+; CHECK-NEXT: ldr
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/hidden-vis-3.ll b/src/LLVM/test/CodeGen/ARM/hidden-vis-3.ll
new file mode 100644
index 0000000..3bc3312
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/hidden-vis-3.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -relocation-model=dynamic-no-pic -mtriple=arm-apple-darwin9   | FileCheck %s
+
+@x = external hidden global i32		; <i32*> [#uses=1]
+@y = extern_weak hidden global i32	; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+; CHECK: LCPI0_0:
+; CHECK-NEXT: .long _x
+; CHECK: LCPI0_1:
+; CHECK-NEXT: .long _y
+
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	%1 = load i32* @y, align 4		; <i32> [#uses=1]
+	%2 = add i32 %1, %0		; <i32> [#uses=1]
+	ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/ARM/hidden-vis.ll b/src/LLVM/test/CodeGen/ARM/hidden-vis.ll
new file mode 100644
index 0000000..3544ae8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/hidden-vis.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=arm-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
+
+@a = hidden global i32 0
+@b = external global i32
+
+define weak hidden void @t1() nounwind {
+; LINUX: .hidden t1
+; LINUX: t1:
+
+; DARWIN: .private_extern _t1
+; DARWIN: t1:
+  ret void
+}
+
+define weak void @t2() nounwind {
+; LINUX: t2:
+; LINUX: .hidden a
+
+; DARWIN: t2:
+; DARWIN: .private_extern _a
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/iabs.ll b/src/LLVM/test/CodeGen/ARM/iabs.ll
new file mode 100644
index 0000000..82f45ab
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/iabs.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s

+

+;; Integer absolute value, should produce something as good as: ARM:

+;;   movs r0, r0

+;;   rsbmi r0, r0, #0

+;;   bx lr

+

+define i32 @test(i32 %a) {

+        %tmp1neg = sub i32 0, %a

+        %b = icmp sgt i32 %a, -1

+        %abs = select i1 %b, i32 %a, i32 %tmp1neg

+        ret i32 %abs

+; CHECK:  movs r0, r0

+; CHECK:  rsbmi r0, r0, #0

+; CHECK:  bx lr

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ifcvt1.ll b/src/LLVM/test/CodeGen/ARM/ifcvt1.ll
new file mode 100644
index 0000000..99447cf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ifcvt1.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mattr=+v4t

+; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 1

+

+define i32 @t1(i32 %a, i32 %b) {

+	%tmp2 = icmp eq i32 %a, 0

+	br i1 %tmp2, label %cond_false, label %cond_true

+

+cond_true:

+	%tmp5 = add i32 %b, 1

+	ret i32 %tmp5

+

+cond_false:

+	%tmp7 = add i32 %b, -1

+	ret i32 %tmp7

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ifcvt10.ll b/src/LLVM/test/CodeGen/ARM/ifcvt10.ll
new file mode 100644
index 0000000..18f87bf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ifcvt10.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; rdar://8402126
+; Make sure if-converter is not predicating vldmia and ldmia. These are
+; micro-coded and would have long issue latency even if predicated on
+; false predicate.
+
+define void @t(double %a, double %b, double %c, double %d, i32* nocapture %solutions, double* nocapture %x) nounwind {
+entry:
+; CHECK: t:
+; CHECK: vpop {d8}
+; CHECK-NOT: vpopne
+; CHECK: pop {r7, pc}
+; CHECK: vpop {d8}
+; CHECK: pop {r7, pc}
+  br i1 undef, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %mul73 = fmul double undef, 0.000000e+00
+  %sub76 = fsub double %mul73, undef
+  store double %sub76, double* undef, align 4
+  %call88 = tail call double @cos(double 0.000000e+00) nounwind
+  %mul89 = fmul double undef, %call88
+  %sub92 = fsub double %mul89, undef
+  store double %sub92, double* undef, align 4
+  ret void
+
+if.else:                                          ; preds = %entry
+  %tmp101 = tail call double @llvm.pow.f64(double undef, double 0x3FD5555555555555)
+  %add112 = fadd double %tmp101, undef
+  %mul118 = fmul double %add112, undef
+  store double 0.000000e+00, double* %x, align 4
+  ret void
+}
+
+declare double @acos(double)
+
+declare double @sqrt(double) readnone
+
+declare double @cos(double) readnone
+
+declare double @fabs(double)
+
+declare double @llvm.pow.f64(double, double) nounwind readonly

diff --git a/src/LLVM/test/CodeGen/ARM/ifcvt11.ll b/src/LLVM/test/CodeGen/ARM/ifcvt11.ll
new file mode 100644
index 0000000..63f8557
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ifcvt11.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; rdar://8598427
+; Adjust if-converter heuristics to avoid predicating vmrs which can cause
+; significant regression.
+
+%struct.xyz_t = type { double, double, double }
+
+define i32 @effie(i32 %tsets, %struct.xyz_t* nocapture %p, i32 %a, i32 %b, i32 %c) nounwind readonly noinline {
+; CHECK: effie:
+entry:
+  %0 = icmp sgt i32 %tsets, 0
+  br i1 %0, label %bb.nph, label %bb6
+
+bb.nph:                                           ; preds = %entry
+  %1 = add nsw i32 %b, %a
+  %2 = add nsw i32 %1, %c
+  br label %bb
+
+bb:                                               ; preds = %bb4, %bb.nph
+; CHECK: vcmpe.f64
+; CHECK: vmrs apsr_nzcv, fpscr
+  %r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ]
+  %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
+  %scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0
+  %scevgep11 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 1
+  %3 = load double* %scevgep10, align 4
+  %4 = load double* %scevgep11, align 4
+  %5 = fcmp uge double %3, %4
+  br i1 %5, label %bb3, label %bb1
+
+bb1:                                              ; preds = %bb
+; CHECK-NOT: it
+; CHECK-NOT: vcmpemi
+; CHECK-NOT: vmrsmi
+; CHECK: vcmpe.f64
+; CHECK: vmrs apsr_nzcv, fpscr
+  %scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2
+  %6 = load double* %scevgep12, align 4
+  %7 = fcmp uge double %3, %6
+  br i1 %7, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %8 = add nsw i32 %2, %r.19
+  br label %bb4
+
+bb3:                                              ; preds = %bb1, %bb
+  %9 = add nsw i32 %r.19, 1
+  br label %bb4
+
+bb4:                                              ; preds = %bb3, %bb2
+  %r.0 = phi i32 [ %9, %bb3 ], [ %8, %bb2 ]
+  %10 = add nsw i32 %n.08, 1
+  %exitcond = icmp eq i32 %10, %tsets
+  br i1 %exitcond, label %bb6, label %bb
+
+bb6:                                              ; preds = %bb4, %entry
+  %r.1.lcssa = phi i32 [ 0, %entry ], [ %r.0, %bb4 ]
+  ret i32 %r.1.lcssa
+}

diff --git a/src/LLVM/test/CodeGen/ARM/ifcvt2.ll b/src/LLVM/test/CodeGen/ARM/ifcvt2.ll
new file mode 100644
index 0000000..5d741a3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ifcvt2.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s

+

+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {

+; CHECK: t1:

+; CHECK: bxlt lr

+	%tmp2 = icmp sgt i32 %c, 10

+	%tmp5 = icmp slt i32 %d, 4

+	%tmp8 = or i1 %tmp5, %tmp2

+	%tmp13 = add i32 %b, %a

+	br i1 %tmp8, label %cond_true, label %UnifiedReturnBlock

+

+cond_true:

+	%tmp15 = add i32 %tmp13, %c

+	%tmp1821 = sub i32 %tmp15, %d

+	ret i32 %tmp1821

+

+UnifiedReturnBlock:

+	ret i32 %tmp13

+}

+

+define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) {

+; CHECK: t2:

+; CHECK: bxgt lr

+; CHECK: cmp

+; CHECK: addge

+; CHECK: subge

+; CHECK-NOT: bxge lr

+; CHECK: bx lr

+	%tmp2 = icmp sgt i32 %c, 10

+	%tmp5 = icmp slt i32 %d, 4

+	%tmp8 = and i1 %tmp5, %tmp2

+	%tmp13 = add i32 %b, %a

+	br i1 %tmp8, label %cond_true, label %UnifiedReturnBlock

+

+cond_true:

+	%tmp15 = add i32 %tmp13, %c

+	%tmp1821 = sub i32 %tmp15, %d

+	ret i32 %tmp1821

+

+UnifiedReturnBlock:

+	ret i32 %tmp13

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ifcvt3.ll b/src/LLVM/test/CodeGen/ARM/ifcvt3.ll
new file mode 100644
index 0000000..5744db8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ifcvt3.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm -mattr=+v4t

+; RUN: llc < %s -march=arm -mattr=+v4t | grep cmpne | count 1

+; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 2

+

+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {

+	switch i32 %c, label %cond_next [

+		 i32 1, label %cond_true

+		 i32 7, label %cond_true

+	]

+

+cond_true:

+	%tmp12 = add i32 %a, 1

+	%tmp1518 = add i32 %tmp12, %b

+	ret i32 %tmp1518

+

+cond_next:

+	%tmp15 = add i32 %b, %a

+	ret i32 %tmp15

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ifcvt4.ll b/src/LLVM/test/CodeGen/ARM/ifcvt4.ll
new file mode 100644
index 0000000..6d75d27
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ifcvt4.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=arm | FileCheck %s

+

+; Do not if-convert when branches go to the different loops.

+; CHECK: t:

+; CHECK-NOT: subgt

+; CHECK-NOT: suble

+; Don't use

+define i32 @t(i32 %a, i32 %b) {

+entry:

+	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]

+	br i1 %tmp1434, label %bb17, label %bb.outer

+

+bb.outer:		; preds = %cond_false, %entry

+	%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]		; <i32> [#uses=5]

+	%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]

+	br label %bb

+

+bb:		; preds = %cond_true, %bb.outer

+	%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]

+	%tmp. = sub i32 0, %b_addr.021.0.ph		; <i32> [#uses=1]

+	%tmp.40 = mul i32 %indvar, %tmp.		; <i32> [#uses=1]

+	%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph		; <i32> [#uses=6]

+	%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph		; <i1> [#uses=1]

+	br i1 %tmp3, label %cond_true, label %cond_false

+

+cond_true:		; preds = %bb

+	%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph		; <i32> [#uses=2]

+	%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph		; <i1> [#uses=1]

+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]

+	br i1 %tmp1437, label %bb17, label %bb

+

+cond_false:		; preds = %bb

+	%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0		; <i32> [#uses=2]

+	%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10		; <i1> [#uses=1]

+	br i1 %tmp14, label %bb17, label %bb.outer

+

+bb17:		; preds = %cond_false, %cond_true, %entry

+	%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]

+	ret i32 %a_addr.026.1

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ifcvt5.ll b/src/LLVM/test/CodeGen/ARM/ifcvt5.ll
new file mode 100644
index 0000000..6c02f64
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ifcvt5.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s

+

+@x = external global i32*		; <i32**> [#uses=1]

+

+define void @foo(i32 %a) {

+entry:

+	%tmp = load i32** @x		; <i32*> [#uses=1]

+	store i32 %a, i32* %tmp

+	ret void

+}

+

+define i32 @t1(i32 %a, i32 %b) {

+; CHECK: t1:

+; CHECK: poplt {r7, pc}

+entry:

+	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]

+	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock

+

+cond_true:		; preds = %entry

+	tail call void @foo( i32 %b )

+	ret i32 0

+

+UnifiedReturnBlock:		; preds = %entry

+	ret i32 1

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ifcvt6.ll b/src/LLVM/test/CodeGen/ARM/ifcvt6.ll
new file mode 100644
index 0000000..2327657
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ifcvt6.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+define void @foo(i32 %X, i32 %Y) {
+entry:
+; CHECK: cmpne
+; CHECK: pophi
+	%tmp1 = icmp ult i32 %X, 4		; <i1> [#uses=1]
+	%tmp4 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
+	%tmp7 = or i1 %tmp4, %tmp1		; <i1> [#uses=1]
+	br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	%tmp10 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare i32 @bar(...)

diff --git a/src/LLVM/test/CodeGen/ARM/ifcvt7.ll b/src/LLVM/test/CodeGen/ARM/ifcvt7.ll
new file mode 100644
index 0000000..81a161f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ifcvt7.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s

+; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.

+

+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }

+

+define fastcc i32 @CountTree(%struct.quad_struct* %tree) {

+; CHECK: cmpeq

+; CHECK: moveq

+; CHECK: popeq

+entry:

+	br label %tailrecurse

+

+tailrecurse:		; preds = %bb, %entry

+	%tmp6 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]

+	%tmp9 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]

+	%tmp12 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]

+	%tmp14 = icmp eq %struct.quad_struct* null, null		; <i1> [#uses=1]

+	%tmp17 = icmp eq %struct.quad_struct* %tmp6, null		; <i1> [#uses=1]

+	%tmp23 = icmp eq %struct.quad_struct* %tmp9, null		; <i1> [#uses=1]

+	%tmp29 = icmp eq %struct.quad_struct* %tmp12, null		; <i1> [#uses=1]

+	%bothcond = and i1 %tmp17, %tmp14		; <i1> [#uses=1]

+	%bothcond1 = and i1 %bothcond, %tmp23		; <i1> [#uses=1]

+	%bothcond2 = and i1 %bothcond1, %tmp29		; <i1> [#uses=1]

+	br i1 %bothcond2, label %return, label %bb

+

+bb:		; preds = %tailrecurse

+	%tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 )		; <i32> [#uses=0]

+	br label %tailrecurse

+

+return:		; preds = %tailrecurse

+	ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ifcvt8.ll b/src/LLVM/test/CodeGen/ARM/ifcvt8.ll
new file mode 100644
index 0000000..a1a1a3c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ifcvt8.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s

+

+	%struct.SString = type { i8*, i32, i32 }

+

+declare void @abort()

+

+define fastcc void @t(%struct.SString* %word, i8 signext  %c) {

+; CHECK: popne

+entry:

+	%tmp1 = icmp eq %struct.SString* %word, null		; <i1> [#uses=1]

+	br i1 %tmp1, label %cond_true, label %cond_false

+

+cond_true:		; preds = %entry

+	tail call void @abort( )

+	unreachable

+

+cond_false:		; preds = %entry

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ifcvt9.ll b/src/LLVM/test/CodeGen/ARM/ifcvt9.ll
new file mode 100644
index 0000000..05bdc45
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ifcvt9.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm
+
+define fastcc void @t() nounwind {
+entry:
+	br i1 undef, label %bb.i.i3, label %growMapping.exit
+
+bb.i.i3:		; preds = %entry
+	unreachable
+
+growMapping.exit:		; preds = %entry
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/ARM/illegal-vector-bitcast.ll b/src/LLVM/test/CodeGen/ARM/illegal-vector-bitcast.ll
new file mode 100644
index 0000000..f784b8b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/illegal-vector-bitcast.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm

+; RUN: llc < %s -mtriple=arm-linux

+

+define void @foo(<8 x float>* %f, <8 x float>* %g, <4 x i64>* %y)

+{

+  %h = load <8 x float>* %f

+  %i = fmul <8 x float> %h, <float 0x3FF19999A0000000, float 0x400A666660000000, float 0x40119999A0000000, float 0x40159999A0000000, float 0.5, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000>

+  %m = bitcast <8 x float> %i to <4 x i64>

+  %z = load <4 x i64>* %y

+  %n = mul <4 x i64> %z, %m

+  %p = bitcast <4 x i64> %n to <8 x float>

+  store <8 x float> %p, <8 x float>* %g

+  ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/imm.ll b/src/LLVM/test/CodeGen/ARM/imm.ll
new file mode 100644
index 0000000..5be7435
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/imm.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm | not grep CPI

+

+define i32 @test1(i32 %A) {

+        %B = add i32 %A, -268435441             ; <i32> [#uses=1]

+        ret i32 %B

+}

+

+define i32 @test2() {

+        ret i32 65533

+}

+

+define i32 @test3(i32 %A) {

+        %B = or i32 %A, 65533           ; <i32> [#uses=1]

+        ret i32 %B

+}

+


diff --git a/src/LLVM/test/CodeGen/ARM/indirectbr.ll b/src/LLVM/test/CodeGen/ARM/indirectbr.ll
new file mode 100644
index 0000000..341c33f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/indirectbr.ll

@@ -0,0 +1,66 @@
+; RUN: llc < %s -relocation-model=pic -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -relocation-model=pic -mtriple=thumbv6-apple-darwin | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -relocation-model=static -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=THUMB2
+
+@nextaddr = global i8* null                       ; <i8**> [#uses=2]
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal i32 @foo(i32 %i) nounwind {
+; ARM: foo:
+; THUMB: foo:
+; THUMB2: foo:
+entry:
+  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
+; indirect branch gets duplicated here
+; ARM: bx
+; THUMB: mov pc,
+; THUMB2: mov pc,
+  br i1 %1, label %bb3, label %bb2
+
+bb2:                                              ; preds = %entry, %bb3
+  %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
+; ARM: bx
+; THUMB: mov pc,
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+bb3:                                              ; preds = %entry
+  %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
+  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  br label %bb2
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i32 [ 385, %L5 ], [ 35, %bb2 ]     ; <i32> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i32 [ %res.0, %L4 ], [ 5, %bb2 ]   ; <i32> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+; THUMB: muls
+  %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ]   ; <i32> [#uses=1]
+  %phitmp = mul i32 %res.2, 6                     ; <i32> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
+; ARM: ldr [[R1:r[0-9]+]], LCPI
+; ARM: add [[R1b:r[0-9]+]], pc, [[R1]]
+; ARM: str [[R1b]]
+; THUMB: ldr.n
+; THUMB: add
+; THUMB: ldr.n [[R2:r[0-9]+]], LCPI
+; THUMB: add [[R2]], pc
+; THUMB: str [[R2]]
+; THUMB2: ldr.n [[R2:r[0-9]+]], LCPI
+; THUMB2-NEXT: str{{(.w)?}} [[R2]]
+  store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
+  ret i32 %res.3
+}
+; ARM: .long Ltmp0-(LPC{{.*}}+8)
+; THUMB: .long Ltmp0-(LPC{{.*}}+4)
+; THUMB2: .long Ltmp0

diff --git a/src/LLVM/test/CodeGen/ARM/inlineasm-imm-arm.ll b/src/LLVM/test/CodeGen/ARM/inlineasm-imm-arm.ll
new file mode 100644
index 0000000..45dfcf0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/inlineasm-imm-arm.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=arm
+
+; Test ARM-mode "I" constraint, for any Data Processing immediate.
+define i32 @testI(i32 %x) {
+	%y = call i32 asm "add $0, $1, $2", "=r,r,I"( i32 %x, i32 65280 ) nounwind
+	ret i32 %y
+}
+
+; Test ARM-mode "J" constraint, for compatibility with unknown use in GCC.
+define void @testJ() {
+	tail call void asm sideeffect ".word $0", "J"( i32 4080 ) nounwind
+	ret void
+}
+
+; Test ARM-mode "K" constraint, for bitwise inverted Data Processing immediates.
+define void @testK() {
+	tail call void asm sideeffect ".word $0", "K"( i32 16777215 ) nounwind
+	ret void
+}
+
+; Test ARM-mode "L" constraint, for negated Data Processing immediates.
+define void @testL() {
+	tail call void asm sideeffect ".word $0", "L"( i32 -65280 ) nounwind
+	ret void
+}
+
+; Test ARM-mode "M" constraint, for value between 0 and 32.
+define i32 @testM(i32 %x) {
+	%y = call i32 asm "lsl $0, $1, $2", "=r,r,M"( i32 %x, i32 31 ) nounwind
+	ret i32 %y
+}

diff --git a/src/LLVM/test/CodeGen/ARM/inlineasm.ll b/src/LLVM/test/CodeGen/ARM/inlineasm.ll
new file mode 100644
index 0000000..a72c9d9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/inlineasm.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+v6

+

+define i32 @test1(i32 %tmp54) {

+	%tmp56 = tail call i32 asm "uxtb16 $0,$1", "=r,r"( i32 %tmp54 )		; <i32> [#uses=1]

+	ret i32 %tmp56

+}

+

+define void @test2() {

+	tail call void asm sideeffect "/* number: ${0:c} */", "i"( i32 1 )

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/inlineasm2.ll b/src/LLVM/test/CodeGen/ARM/inlineasm2.ll
new file mode 100644
index 0000000..0aa8a96
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/inlineasm2.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2

+

+define double @__ieee754_sqrt(double %x) {

+	%tmp2 = tail call double asm "fsqrtd ${0:P}, ${1:P}", "=w,w"( double %x )

+	ret double %tmp2

+}

+

+define float @__ieee754_sqrtf(float %x) {

+	%tmp2 = tail call float asm "fsqrts $0, $1", "=w,w"( float %x )

+	ret float %tmp2

+}


diff --git a/src/LLVM/test/CodeGen/ARM/inlineasm3.ll b/src/LLVM/test/CodeGen/ARM/inlineasm3.ll
new file mode 100644
index 0000000..cb5243c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/inlineasm3.ll

@@ -0,0 +1,112 @@
+; RUN: llc < %s -march=arm -mattr=+neon,+v6t2 | FileCheck %s
+
+; Radar 7449043
+%struct.int32x4_t = type { <4 x i32> }
+
+define void @t() nounwind {
+entry:
+; CHECK: vmov.I64 q15, #0
+; CHECK: vmov.32 d30[0],
+; CHECK: vmov q8, q15
+  %tmp = alloca %struct.int32x4_t, align 16
+  call void asm sideeffect "vmov.I64 q15, #0\0Avmov.32 d30[0], $1\0Avmov ${0:q}, q15\0A", "=*w,r,~{d31},~{d30}"(%struct.int32x4_t* %tmp, i32 8192) nounwind
+  ret void
+}
+
+; Radar 7457110
+%struct.int32x2_t = type { <4 x i32> }
+
+define void @t2() nounwind {
+entry:
+; CHECK: vmov d30, d16
+; CHECK: vmov.32 r0, d30[0]
+  %asmtmp2 = tail call i32 asm sideeffect "vmov d30, $1\0Avmov.32 $0, d30[0]\0A", "=r,w,~{d30}"(<2 x i32> undef) nounwind
+  ret void
+}
+
+; Radar 9306086
+
+%0 = type { <8 x i8>, <16 x i8>* }
+
+define hidden void @conv4_8_E() nounwind {
+entry:
+%asmtmp31 = call %0 asm "vld1.u8  {$0}, [$1, :128]!\0A", "=w,=r,1"(<16 x i8>* undef) nounwind
+unreachable
+}
+
+; Radar 9037836 & 9119939
+
+define i32 @t3() nounwind {
+entry:
+tail call void asm sideeffect "flds s15, $0 \0A", "^Uv|m,~{s15}"(float 1.000000e+00) nounwind
+ret i32 0
+}
+
+; Radar 9037836 & 9119939
+
+@k.2126 = internal unnamed_addr global float 1.000000e+00
+define i32 @t4() nounwind {
+entry:
+call void asm sideeffect "flds s15, $0 \0A", "*^Uv,~{s15}"(float* @k.2126) nounwind
+ret i32 0
+}
+
+; Radar 9037836 & 9119939
+
+define i32 @t5() nounwind {
+entry:
+call void asm sideeffect "flds s15, $0 \0A", "*^Uvm,~{s15}"(float* @k.2126) nounwind
+ret i32 0
+}
+
+; Radar 9307836 & 9119939
+
+define float @t6(float %y) nounwind {
+entry:
+; CHECK: t6
+; CHECK: flds s15, s0
+  %0 = tail call float asm "flds s15, $0", "=x"() nounwind
+  ret float %0
+}
+
+; Radar 9307836 & 9119939
+
+define double @t7(double %y) nounwind {
+entry:
+; CHECK: t7
+; CHECK: flds s15, d0
+  %0 = tail call double asm "flds s15, $0", "=x"() nounwind
+  ret double %0
+}
+
+; Radar 9307836 & 9119939
+
+define float @t8(float %y) nounwind {
+entry:
+; CHECK: t8
+; CHECK: flds s15, s0
+  %0 = tail call float asm "flds s15, $0", "=t"() nounwind
+  ret float %0
+}
+
+; Radar 9307836 & 9119939
+
+define i32 @t9(i32 %r0) nounwind {
+entry:
+; CHECK: t9
+; CHECK: movw r0, #27182
+  %0 = tail call i32 asm "movw $0, $1", "=r,j"(i32 27182) nounwind
+  ret i32 %0
+}
+
+; Radar 9866494
+
+define void @t10(i8* %f, i32 %g) nounwind {
+entry:
+; CHECK: t10
+; CHECK: str r1, [r0]
+  %f.addr = alloca i8*, align 4
+  store i8* %f, i8** %f.addr, align 4
+  call void asm "str $1, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/inlineasm4.ll b/src/LLVM/test/CodeGen/ARM/inlineasm4.ll
new file mode 100644
index 0000000..9ed4b99
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/inlineasm4.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define double @f(double %x) {
+entry:
+  %0 = tail call double asm "mov     ${0:R}, #4\0A", "=&r"()
+  ret double %0
+; CHECK: f:
+; CHECK:	mov     r1, #4
+}
+
+define double @g(double %x) {
+entry:
+  %0 = tail call double asm "mov     ${0:Q}, #4\0A", "=&r"()
+  ret double %0
+; CHECK: g:
+; CHECK:	mov     r0, #4
+}

diff --git a/src/LLVM/test/CodeGen/ARM/insn-sched1.ll b/src/LLVM/test/CodeGen/ARM/insn-sched1.ll
new file mode 100644
index 0000000..80844a0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/insn-sched1.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+v6

+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\

+; RUN:   grep mov | count 3

+

+define i32 @test(i32 %x) {

+        %tmp = trunc i32 %x to i16              ; <i16> [#uses=1]

+        %tmp2 = call i32 @f( i32 1, i16 %tmp )             ; <i32> [#uses=1]

+        ret i32 %tmp2

+}

+

+declare i32 @f(i32, i16)


diff --git a/src/LLVM/test/CodeGen/ARM/int-to-fp.ll b/src/LLVM/test/CodeGen/ARM/int-to-fp.ll
new file mode 100644
index 0000000..889b149
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/int-to-fp.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10.0.0"
+
+; CHECK: sint_to_fp
+; CHECK: vmovl.s16
+; CHECK: vcvt.f32.s32
+define <4 x float> @sint_to_fp(<4 x i16> %x) nounwind ssp {
+  %a = sitofp <4 x i16> %x to <4 x float>
+  ret <4 x float> %a
+}
+
+; CHECK: uint_to_fp
+; CHECK: vmovl.u16
+; CHECK: vcvt.f32.u32
+define <4 x float> @uint_to_fp(<4 x i16> %x) nounwind ssp {
+  %a = uitofp <4 x i16> %x to <4 x float>
+  ret <4 x float> %a
+}

diff --git a/src/LLVM/test/CodeGen/ARM/intrinsics.ll b/src/LLVM/test/CodeGen/ARM/intrinsics.ll
new file mode 100644
index 0000000..54cc3e0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/intrinsics.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=thumb -mtriple=thumbv7-eabi -mcpu=cortex-a8 | FileCheck %s
+
+define void @coproc() nounwind {
+entry:
+  ; CHECK: mrc
+  %0 = tail call i32 @llvm.arm.mrc(i32 7, i32 1, i32 1, i32 1, i32 4) nounwind
+  ; CHECK: mcr
+  tail call void @llvm.arm.mcr(i32 7, i32 1, i32 %0, i32 1, i32 1, i32 4) nounwind
+  ; CHECK: mrc2
+  %1 = tail call i32 @llvm.arm.mrc2(i32 7, i32 1, i32 1, i32 1, i32 4) nounwind
+  ; CHECK: mcr2
+  tail call void @llvm.arm.mcr2(i32 7, i32 1, i32 %1, i32 1, i32 1, i32 4) nounwind
+  ; CHECK: mcrr
+  tail call void @llvm.arm.mcrr(i32 7, i32 1, i32 %0, i32 %1, i32 1) nounwind
+  ; CHECK: mcrr2
+  tail call void @llvm.arm.mcrr2(i32 7, i32 1, i32 %0, i32 %1, i32 1) nounwind
+  ; CHECK: cdp
+  tail call void @llvm.arm.cdp(i32 7, i32 3, i32 1, i32 1, i32 1, i32 5) nounwind
+  ; CHECK: cdp2
+  tail call void @llvm.arm.cdp2(i32 7, i32 3, i32 1, i32 1, i32 1, i32 5) nounwind
+  ret void
+}
+
+declare void @llvm.arm.cdp2(i32, i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.cdp(i32, i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.mcrr2(i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.mcrr(i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.mcr2(i32, i32, i32, i32, i32, i32) nounwind
+
+declare i32 @llvm.arm.mrc2(i32, i32, i32, i32, i32) nounwind
+
+declare void @llvm.arm.mcr(i32, i32, i32, i32, i32, i32) nounwind
+
+declare i32 @llvm.arm.mrc(i32, i32, i32, i32, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/ispositive.ll b/src/LLVM/test/CodeGen/ARM/ispositive.ll
new file mode 100644
index 0000000..22ec2a5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ispositive.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm | FileCheck %s

+

+define i32 @test1(i32 %X) {

+; CHECK: lsr{{.*}}#31

+entry:

+        icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]

+        zext i1 %0 to i32               ; <i32>:1 [#uses=1]

+        ret i32 %1

+}

+


diff --git a/src/LLVM/test/CodeGen/ARM/jumptable-label.ll b/src/LLVM/test/CodeGen/ARM/jumptable-label.ll
new file mode 100644
index 0000000..49d6986
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/jumptable-label.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple thumbv6-apple-macosx10.6.0 | FileCheck %s
+
+; test that we print the label of a bb that is only used in a jump table.
+
+; CHECK:	.long	LBB0_2
+; CHECK: LBB0_2:
+
+define i32 @calculate()  {
+entry:
+  switch i32 undef, label %return [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb6
+    i32 3, label %sw.bb13
+    i32 4, label %sw.bb20
+  ]
+
+sw.bb:                                            ; preds = %entry
+  br label %return
+
+sw.bb6:                                           ; preds = %entry
+  br label %return
+
+sw.bb13:                                          ; preds = %entry
+  br label %return
+
+sw.bb20:                                          ; preds = %entry
+  %div = sdiv i32 undef, undef
+  br label %return
+
+return:                                           ; preds = %sw.bb20, %sw.bb13, %sw.bb6, %sw.bb, %entry
+  %retval.0 = phi i32 [ %div, %sw.bb20 ], [ undef, %sw.bb13 ], [ undef, %sw.bb6 ], [ undef, %sw.bb ], [ 0, %entry ]
+  ret i32 %retval.0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/large-stack.ll b/src/LLVM/test/CodeGen/ARM/large-stack.ll
new file mode 100644
index 0000000..97e330b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/large-stack.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm

+

+define void @test1() {

+    %tmp = alloca [ 64 x i32 ] , align 4

+    ret void

+}

+

+define void @test2() {

+    %tmp = alloca [ 4168 x i8 ] , align 4

+    ret void

+}

+

+define i32 @test3() {

+	%retval = alloca i32, align 4

+	%tmp = alloca i32, align 4

+	%a = alloca [805306369 x i8], align 16

+	store i32 0, i32* %tmp

+	%tmp1 = load i32* %tmp

+        ret i32 %tmp1

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ldm.ll b/src/LLVM/test/CodeGen/ARM/ldm.ll
new file mode 100644
index 0000000..14aec01
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ldm.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s

+; RUN: llc < %s -mtriple=armv4t-apple-darwin | FileCheck %s -check-prefix=V4T

+

+@X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]

+

+define i32 @t1() {

+; CHECK: t1:

+; CHECK: pop

+; V4T: t1:

+; V4T: pop

+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]

+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]

+        %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]

+        ret i32 %tmp4

+}

+

+define i32 @t2() {

+; CHECK: t2:

+; CHECK: pop

+; V4T: t2:

+; V4T: pop

+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]

+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]

+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]

+        %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]

+        ret i32 %tmp6

+}

+

+define i32 @t3() {

+; CHECK: t3:

+; CHECK: ldmib

+; CHECK: pop

+; V4T: t3:

+; V4T: ldmib

+; V4T: pop

+; V4T-NEXT: bx lr

+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]

+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]

+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]

+        %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]

+        ret i32 %tmp6

+}

+

+declare i32 @f1(i32, i32)

+

+declare i32 @f2(i32, i32, i32)


diff --git a/src/LLVM/test/CodeGen/ARM/ldr.ll b/src/LLVM/test/CodeGen/ARM/ldr.ll
new file mode 100644
index 0000000..25e296b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ldr.ll

@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=arm | FileCheck %s

+

+define i32 @f1(i32* %v) {

+; CHECK: f1:

+; CHECK: ldr r0

+entry:

+        %tmp = load i32* %v

+        ret i32 %tmp

+}

+

+define i32 @f2(i32* %v) {

+; CHECK: f2:

+; CHECK: ldr r0

+entry:

+        %tmp2 = getelementptr i32* %v, i32 1023

+        %tmp = load i32* %tmp2

+        ret i32 %tmp

+}

+

+define i32 @f3(i32* %v) {

+; CHECK: f3:

+; CHECK: mov

+; CHECK: ldr r0

+entry:

+        %tmp2 = getelementptr i32* %v, i32 1024

+        %tmp = load i32* %tmp2

+        ret i32 %tmp

+}

+

+define i32 @f4(i32 %base) {

+; CHECK: f4:

+; CHECK-NOT: mvn

+; CHECK: ldr r0

+entry:

+        %tmp1 = sub i32 %base, 128

+        %tmp2 = inttoptr i32 %tmp1 to i32*

+        %tmp3 = load i32* %tmp2

+        ret i32 %tmp3

+}

+

+define i32 @f5(i32 %base, i32 %offset) {

+; CHECK: f5:

+; CHECK: ldr r0

+entry:

+        %tmp1 = add i32 %base, %offset

+        %tmp2 = inttoptr i32 %tmp1 to i32*

+        %tmp3 = load i32* %tmp2

+        ret i32 %tmp3

+}

+

+define i32 @f6(i32 %base, i32 %offset) {

+; CHECK: f6:

+; CHECK: ldr r0{{.*}}lsl{{.*}}

+entry:

+        %tmp1 = shl i32 %offset, 2

+        %tmp2 = add i32 %base, %tmp1

+        %tmp3 = inttoptr i32 %tmp2 to i32*

+        %tmp4 = load i32* %tmp3

+        ret i32 %tmp4

+}

+

+define i32 @f7(i32 %base, i32 %offset) {

+; CHECK: f7:

+; CHECK: ldr r0{{.*}}lsr{{.*}}

+entry:

+        %tmp1 = lshr i32 %offset, 2

+        %tmp2 = add i32 %base, %tmp1

+        %tmp3 = inttoptr i32 %tmp2 to i32*

+        %tmp4 = load i32* %tmp3

+        ret i32 %tmp4

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ldr_ext.ll b/src/LLVM/test/CodeGen/ARM/ldr_ext.ll
new file mode 100644
index 0000000..c37f962
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ldr_ext.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=arm | FileCheck %s

+

+define i32 @test1(i8* %t1) nounwind {

+; CHECK: ldrb

+    %tmp.u = load i8* %t1

+    %tmp1.s = zext i8 %tmp.u to i32

+    ret i32 %tmp1.s

+}

+

+define i32 @test2(i16* %t1) nounwind {

+; CHECK: ldrh

+    %tmp.u = load i16* %t1

+    %tmp1.s = zext i16 %tmp.u to i32

+    ret i32 %tmp1.s

+}

+

+define i32 @test3(i8* %t0) nounwind {

+; CHECK: ldrsb

+    %tmp.s = load i8* %t0

+    %tmp1.s = sext i8 %tmp.s to i32

+    ret i32 %tmp1.s

+}

+

+define i32 @test4(i16* %t0) nounwind {

+; CHECK: ldrsh

+    %tmp.s = load i16* %t0

+    %tmp1.s = sext i16 %tmp.s to i32

+    ret i32 %tmp1.s

+}

+

+define i32 @test5() nounwind {

+; CHECK: mov r0, #0

+; CHECK: ldrsh

+    %tmp.s = load i16* null

+    %tmp1.s = sext i16 %tmp.s to i32

+    ret i32 %tmp1.s

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ldr_frame.ll b/src/LLVM/test/CodeGen/ARM/ldr_frame.ll
new file mode 100644
index 0000000..9810041
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ldr_frame.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=arm -mattr=+v4t | not grep mov

+

+define i32 @f1() {

+	%buf = alloca [32 x i32], align 4

+	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 0

+	%tmp1 = load i32* %tmp

+	ret i32 %tmp1

+}

+

+define i32 @f2() {

+	%buf = alloca [32 x i8], align 4

+	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 0

+	%tmp1 = load i8* %tmp

+        %tmp2 = zext i8 %tmp1 to i32

+	ret i32 %tmp2

+}

+

+define i32 @f3() {

+	%buf = alloca [32 x i32], align 4

+	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 32

+	%tmp1 = load i32* %tmp

+	ret i32 %tmp1

+}

+

+define i32 @f4() {

+	%buf = alloca [32 x i8], align 4

+	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 2

+	%tmp1 = load i8* %tmp

+        %tmp2 = zext i8 %tmp1 to i32

+	ret i32 %tmp2

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ldr_post.ll b/src/LLVM/test/CodeGen/ARM/ldr_post.ll
new file mode 100644
index 0000000..8abb20e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ldr_post.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm | \

+; RUN:   grep {ldr.*\\\[.*\],} | count 1

+

+define i32 @test(i32 %a, i32 %b, i32 %c) {

+        %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]

+        %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]

+        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]

+        %tmp4 = sub i32 %tmp1, %c               ; <i32> [#uses=1]

+        %tmp5 = mul i32 %tmp4, %tmp3            ; <i32> [#uses=1]

+        ret i32 %tmp5

+}

+


diff --git a/src/LLVM/test/CodeGen/ARM/ldr_pre.ll b/src/LLVM/test/CodeGen/ARM/ldr_pre.ll
new file mode 100644
index 0000000..57edef3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ldr_pre.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm | \

+; RUN:   grep {ldr.*\\!} | count 2

+

+define i32* @test1(i32* %X, i32* %dest) {

+        %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]

+        %A = load i32* %Y               ; <i32> [#uses=1]

+        store i32 %A, i32* %dest

+        ret i32* %Y

+}

+

+define i32 @test2(i32 %a, i32 %b, i32 %c) {

+        %tmp1 = sub i32 %a, %b          ; <i32> [#uses=2]

+        %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]

+        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]

+        %tmp4 = sub i32 %tmp1, %c               ; <i32> [#uses=1]

+        %tmp5 = add i32 %tmp4, %tmp3            ; <i32> [#uses=1]

+        ret i32 %tmp5

+}

+


diff --git a/src/LLVM/test/CodeGen/ARM/ldrd.ll b/src/LLVM/test/CodeGen/ARM/ldrd.ll
new file mode 100644
index 0000000..8010f20
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ldrd.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -mtriple=armv5-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V5
+; RUN: llc < %s -mtriple=armv6-eabi -regalloc=linearscan | FileCheck %s -check-prefix=EABI
+; rdar://r6949835
+
+; Magic ARM pair hints works best with linearscan.
+
+@b = external global i64*
+
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+;V6:   ldrd r2, r3, [r2]
+
+;V5:   ldr r{{[0-9]+}}, [r2]
+;V5:   ldr r{{[0-9]+}}, [r2, #4]
+
+;EABI: ldr r{{[0-9]+}}, [r2]
+;EABI: ldr r{{[0-9]+}}, [r2, #4]
+
+	%0 = load i64** @b, align 4
+	%1 = load i64* %0, align 4
+	%2 = mul i64 %1, %a
+	ret i64 %2
+}

diff --git a/src/LLVM/test/CodeGen/ARM/ldst-f32-2-i32.ll b/src/LLVM/test/CodeGen/ARM/ldst-f32-2-i32.ll
new file mode 100644
index 0000000..1c69e15
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ldst-f32-2-i32.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; Check if the f32 load / store pair are optimized to i32 load / store.
+; rdar://8944252
+
+define void @t(i32 %width, float* nocapture %src, float* nocapture %dst, i32 %index) nounwind {
+; CHECK: t:
+entry:
+  %src6 = bitcast float* %src to i8*
+  %0 = icmp eq i32 %width, 0
+  br i1 %0, label %return, label %bb
+
+bb:
+; CHECK: ldr [[REGISTER:(r[0-9]+)]], [{{r[0-9]+}}], {{r[0-9]+}}
+; CHECK: str [[REGISTER]], [{{r[0-9]+}}], #4
+  %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
+  %tmp = mul i32 %j.05, %index
+  %uglygep = getelementptr i8* %src6, i32 %tmp
+  %src_addr.04 = bitcast i8* %uglygep to float*
+  %dst_addr.03 = getelementptr float* %dst, i32 %j.05
+  %1 = load float* %src_addr.04, align 4
+  store float %1, float* %dst_addr.03, align 4
+  %2 = add i32 %j.05, 1
+  %exitcond = icmp eq i32 %2, %width
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/ldstrexd.ll b/src/LLVM/test/CodeGen/ARM/ldstrexd.ll
new file mode 100644
index 0000000..0c0911a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ldstrexd.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+%0 = type { i32, i32 }
+
+; CHECK: f0:
+; CHECK: ldrexd
+define i64 @f0(i8* %p) nounwind readonly {
+entry:
+  %ldrexd = tail call %0 @llvm.arm.ldrexd(i8* %p)
+  %0 = extractvalue %0 %ldrexd, 1
+  %1 = extractvalue %0 %ldrexd, 0
+  %2 = zext i32 %0 to i64
+  %3 = zext i32 %1 to i64
+  %shl = shl nuw i64 %2, 32
+  %4 = or i64 %shl, %3
+  ret i64 %4
+}
+
+; CHECK: f1:
+; CHECK: strexd
+define i32 @f1(i8* %ptr, i64 %val) nounwind {
+entry:
+  %tmp4 = trunc i64 %val to i32
+  %tmp6 = lshr i64 %val, 32
+  %tmp7 = trunc i64 %tmp6 to i32
+  %strexd = tail call i32 @llvm.arm.strexd(i32 %tmp4, i32 %tmp7, i8* %ptr)
+  ret i32 %strexd
+}
+
+declare %0 @llvm.arm.ldrexd(i8*) nounwind readonly
+declare i32 @llvm.arm.strexd(i32, i32, i8*) nounwind
+

diff --git a/src/LLVM/test/CodeGen/ARM/load-global.ll b/src/LLVM/test/CodeGen/ARM/load-global.ll
new file mode 100644
index 0000000..15a415d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/load-global.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC_T
+; RUN: llc < %s -mtriple=armv7-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC_V7
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX
+
+@G = external global i32
+
+define i32 @test1() {
+; STATIC: _test1:
+; STATIC: ldr r0, LCPI0_0
+; STATIC: ldr r0, [r0]
+; STATIC: .long _G
+
+; DYNAMIC: _test1:
+; DYNAMIC: ldr r0, LCPI0_0
+; DYNAMIC: ldr r0, [r0]
+; DYNAMIC: ldr r0, [r0]
+; DYNAMIC: .long L_G$non_lazy_ptr
+
+; PIC: _test1
+; PIC: ldr r0, LCPI0_0
+; PIC: ldr r0, [pc, r0]
+; PIC: ldr r0, [r0]
+; PIC: .long L_G$non_lazy_ptr-(LPC0_0+8)
+
+; PIC_T: _test1
+; PIC_T: ldr.n r0, LCPI0_0
+; PIC_T: add r0, pc
+; PIC_T: ldr r0, [r0]
+; PIC_T: ldr r0, [r0]
+; PIC_T: .long L_G$non_lazy_ptr-(LPC0_0+4)
+
+; PIC_V7: _test1
+; PIC_V7: movw r0, :lower16:(L_G$non_lazy_ptr-(LPC0_0+8))
+; PIC_V7: movt r0, :upper16:(L_G$non_lazy_ptr-(LPC0_0+8))
+; PIC_V7: ldr r0, [pc, r0]
+; PIC_V7: ldr r0, [r0]
+
+; LINUX: test1
+; LINUX: ldr r0, .LCPI0_0
+; LINUX: ldr r1, .LCPI0_1
+; LINUX: add r0, pc, r0
+; LINUX: ldr r0, [r1, r0]
+; LINUX: ldr r0, [r0]
+; LINUX: .long G(GOT)
+	%tmp = load i32* @G
+	ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/ARM/load.ll b/src/LLVM/test/CodeGen/ARM/load.ll
new file mode 100644
index 0000000..1f7e7d5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/load.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=arm > %t

+; RUN: grep ldrsb %t

+; RUN: grep ldrb %t

+; RUN: grep ldrsh %t

+; RUN: grep ldrh %t

+

+

+define i32 @f1(i8* %p) {

+entry:

+        %tmp = load i8* %p              ; <i8> [#uses=1]

+        %tmp1 = sext i8 %tmp to i32              ; <i32> [#uses=1]

+        ret i32 %tmp1

+}

+

+define i32 @f2(i8* %p) {

+entry:

+        %tmp = load i8* %p              ; <i8> [#uses=1]

+        %tmp2 = zext i8 %tmp to i32              ; <i32> [#uses=1]

+        ret i32 %tmp2

+}

+

+define i32 @f3(i16* %p) {

+entry:

+        %tmp = load i16* %p             ; <i16> [#uses=1]

+        %tmp3 = sext i16 %tmp to i32             ; <i32> [#uses=1]

+        ret i32 %tmp3

+}

+

+define i32 @f4(i16* %p) {

+entry:

+        %tmp = load i16* %p             ; <i16> [#uses=1]

+        %tmp4 = zext i16 %tmp to i32             ; <i32> [#uses=1]

+        ret i32 %tmp4

+}


diff --git a/src/LLVM/test/CodeGen/ARM/long-setcc.ll b/src/LLVM/test/CodeGen/ARM/long-setcc.ll
new file mode 100644
index 0000000..265ec09
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/long-setcc.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm | grep cmp | count 1

+

+

+define i1 @t1(i64 %x) {

+	%B = icmp slt i64 %x, 0

+	ret i1 %B

+}

+

+define i1 @t2(i64 %x) {

+	%tmp = icmp ult i64 %x, 4294967296

+	ret i1 %tmp

+}

+

+define i1 @t3(i32 %x) {

+	%tmp = icmp ugt i32 %x, -1

+	ret i1 %tmp

+}


diff --git a/src/LLVM/test/CodeGen/ARM/long.ll b/src/LLVM/test/CodeGen/ARM/long.ll
new file mode 100644
index 0000000..532fb89
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/long.ll

@@ -0,0 +1,90 @@
+; RUN: llc < %s -march=arm | FileCheck %s

+

+define i64 @f1() {

+; CHECK: f1:

+entry:

+        ret i64 0

+}

+

+define i64 @f2() {

+; CHECK: f2:

+entry:

+        ret i64 1

+}

+

+define i64 @f3() {

+; CHECK: f3:

+; CHECK: mvn r0, #-2147483648

+entry:

+        ret i64 2147483647

+}

+

+define i64 @f4() {

+; CHECK: f4:

+; CHECK: mov r0, #-2147483648

+entry:

+        ret i64 2147483648

+}

+

+define i64 @f5() {

+; CHECK: f5:

+; CHECK: mvn r0, #0

+; CHECK: mvn r1, #-2147483648

+entry:

+        ret i64 9223372036854775807

+}

+

+define i64 @f6(i64 %x, i64 %y) {

+; CHECK: f6:

+; CHECK: adds

+; CHECK: adc

+entry:

+        %tmp1 = add i64 %y, 1           ; <i64> [#uses=1]

+        ret i64 %tmp1

+}

+

+define void @f7() {

+; CHECK: f7:

+entry:

+        %tmp = call i64 @f8( )          ; <i64> [#uses=0]

+        ret void

+}

+

+declare i64 @f8()

+

+define i64 @f9(i64 %a, i64 %b) {

+; CHECK: f9:

+; CHECK: subs r

+; CHECK: sbc

+entry:

+        %tmp = sub i64 %a, %b           ; <i64> [#uses=1]

+        ret i64 %tmp

+}

+

+define i64 @f(i32 %a, i32 %b) {

+; CHECK: f:

+; CHECK: smull

+entry:

+        %tmp = sext i32 %a to i64               ; <i64> [#uses=1]

+        %tmp1 = sext i32 %b to i64              ; <i64> [#uses=1]

+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]

+        ret i64 %tmp2

+}

+

+define i64 @g(i32 %a, i32 %b) {

+; CHECK: g:

+; CHECK: umull

+entry:

+        %tmp = zext i32 %a to i64               ; <i64> [#uses=1]

+        %tmp1 = zext i32 %b to i64              ; <i64> [#uses=1]

+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]

+        ret i64 %tmp2

+}

+

+define i64 @f10() {

+; CHECK: f10:

+entry:

+        %a = alloca i64, align 8                ; <i64*> [#uses=1]

+        %retval = load i64* %a          ; <i64> [#uses=1]

+        ret i64 %retval

+}


diff --git a/src/LLVM/test/CodeGen/ARM/long_shift.ll b/src/LLVM/test/CodeGen/ARM/long_shift.ll
new file mode 100644
index 0000000..792ce8a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/long_shift.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=arm | FileCheck %s

+

+define i64 @f0(i64 %A, i64 %B) {

+; CHECK: f0

+; CHECK:      lsrs    r3, r3, #1

+; CHECK-NEXT: rrx     r2, r2

+; CHECK-NEXT: subs    r0, r0, r2

+; CHECK-NEXT: sbc     r1, r1, r3

+	%tmp = bitcast i64 %A to i64

+	%tmp2 = lshr i64 %B, 1

+	%tmp3 = sub i64 %tmp, %tmp2

+	ret i64 %tmp3

+}

+

+define i32 @f1(i64 %x, i64 %y) {

+; CHECK: f1

+; CHECK: lsl{{.*}}r2

+	%a = shl i64 %x, %y

+	%b = trunc i64 %a to i32

+	ret i32 %b

+}

+

+define i32 @f2(i64 %x, i64 %y) {

+; CHECK: f2

+; CHECK:      lsr{{.*}}r2

+; CHECK-NEXT: rsb     r3, r2, #32

+; CHECK-NEXT: sub     r2, r2, #32

+; CHECK-NEXT: cmp     r2, #0

+; CHECK-NEXT: orr     r0, r0, r1, lsl r3

+; CHECK-NEXT: asrge   r0, r1, r2

+	%a = ashr i64 %x, %y

+	%b = trunc i64 %a to i32

+	ret i32 %b

+}

+

+define i32 @f3(i64 %x, i64 %y) {

+; CHECK: f3

+; CHECK:      lsr{{.*}}r2

+; CHECK-NEXT: rsb     r3, r2, #32

+; CHECK-NEXT: sub     r2, r2, #32

+; CHECK-NEXT: cmp     r2, #0

+; CHECK-NEXT: orr     r0, r0, r1, lsl r3

+; CHECK-NEXT: lsrge   r0, r1, r2

+	%a = lshr i64 %x, %y

+	%b = trunc i64 %a to i32

+	ret i32 %b

+}


diff --git a/src/LLVM/test/CodeGen/ARM/lsr-code-insertion.ll b/src/LLVM/test/CodeGen/ARM/lsr-code-insertion.ll
new file mode 100644
index 0000000..a8097b2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/lsr-code-insertion.ll

@@ -0,0 +1,64 @@
+; RUN: llc < %s | FileCheck %s

+; This test really wants to check that the resultant "cond_true" block only 

+; has a single store in it, and that cond_true55 only has code to materialize 

+; the constant and do a store.  We do *not* want something like this:

+;

+;LBB1_3: @cond_true

+;        add r8, r0, r6

+;        str r10, [r8, #+4]

+;

+; CHECK: ldr [[R6:r[0-9*]+]], LCP

+; CHECK: cmp {{.*}}, [[R6]]

+; CHECK: ldrle

+; CHECK-NEXT: strle

+

+target triple = "arm-apple-darwin8"

+

+define void @foo(i32* %mc, i32* %mpp, i32* %ip, i32* %dpp, i32* %tpmm, i32 %M, i32* %tpim, i32* %tpdm, i32* %bp, i32* %ms, i32 %xmb) {

+entry:

+	%tmp6584 = icmp slt i32 %M, 1		; <i1> [#uses=1]

+	br i1 %tmp6584, label %return, label %bb

+

+bb:		; preds = %cond_next59, %entry

+	%indvar = phi i32 [ 0, %entry ], [ %k.069.0, %cond_next59 ]		; <i32> [#uses=6]

+	%k.069.0 = add i32 %indvar, 1		; <i32> [#uses=3]

+	%tmp3 = getelementptr i32* %mpp, i32 %indvar		; <i32*> [#uses=1]

+	%tmp4 = load i32* %tmp3		; <i32> [#uses=1]

+	%tmp8 = getelementptr i32* %tpmm, i32 %indvar		; <i32*> [#uses=1]

+	%tmp9 = load i32* %tmp8		; <i32> [#uses=1]

+	%tmp10 = add i32 %tmp9, %tmp4		; <i32> [#uses=2]

+	%tmp13 = getelementptr i32* %mc, i32 %k.069.0		; <i32*> [#uses=5]

+	store i32 %tmp10, i32* %tmp13

+	%tmp17 = getelementptr i32* %ip, i32 %indvar		; <i32*> [#uses=1]

+	%tmp18 = load i32* %tmp17		; <i32> [#uses=1]

+	%tmp22 = getelementptr i32* %tpim, i32 %indvar		; <i32*> [#uses=1]

+	%tmp23 = load i32* %tmp22		; <i32> [#uses=1]

+	%tmp24 = add i32 %tmp23, %tmp18		; <i32> [#uses=2]

+	%tmp30 = icmp sgt i32 %tmp24, %tmp10		; <i1> [#uses=1]

+	br i1 %tmp30, label %cond_true, label %cond_next

+

+cond_true:		; preds = %bb

+	store i32 %tmp24, i32* %tmp13

+	br label %cond_next

+

+cond_next:		; preds = %cond_true, %bb

+	%tmp39 = load i32* %tmp13		; <i32> [#uses=1]

+	%tmp42 = getelementptr i32* %ms, i32 %k.069.0		; <i32*> [#uses=1]

+	%tmp43 = load i32* %tmp42		; <i32> [#uses=1]

+	%tmp44 = add i32 %tmp43, %tmp39		; <i32> [#uses=2]

+	store i32 %tmp44, i32* %tmp13

+	%tmp52 = icmp slt i32 %tmp44, -987654321		; <i1> [#uses=1]

+	br i1 %tmp52, label %cond_true55, label %cond_next59

+

+cond_true55:		; preds = %cond_next

+	store i32 -987654321, i32* %tmp13

+	br label %cond_next59

+

+cond_next59:		; preds = %cond_true55, %cond_next

+	%tmp61 = add i32 %indvar, 2		; <i32> [#uses=1]

+	%tmp65 = icmp sgt i32 %tmp61, %M		; <i1> [#uses=1]

+	br i1 %tmp65, label %return, label %bb

+

+return:		; preds = %cond_next59, %entry

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/src/LLVM/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
new file mode 100644
index 0000000..4737901
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/lsr-on-unrolled-loops.ll

@@ -0,0 +1,640 @@
+; RUN: llc -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8  -enable-lsr-nested < %s | FileCheck %s
+
+; LSR should recognize that this is an unrolled loop which can use
+; constant offset addressing, so that each of the following stores
+; uses the same register.
+
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96]
+
+; We can also save a register in the outer loop, but that requires
+; performing LSR on the outer loop.
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+
+%0 = type { %1*, %3*, %6*, i8*, i32, i32, %8*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %9*], [4 x %10*], [4 x %10*], i32, %11*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i8, i8, i16, i16, i32, i8, i32, %12*, i32, i32, i32, i32, i8*, i32, [4 x %11*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %13*, %14*, %15*, %16*, %17*, %18*, %19*, %20*, %21*, %22*, %23* }
+%1 = type { void (%2*)*, void (%2*, i32)*, void (%2*)*, void (%2*, i8*)*, void (%2*)*, i32, %7, i32, i32, i8**, i32, i8**, i32, i32 }
+%2 = type { %1*, %3*, %6*, i8*, i32, i32 }
+%3 = type { i8* (%2*, i32, i32)*, i8* (%2*, i32, i32)*, i8** (%2*, i32, i32, i32)*, [64 x i16]** (%2*, i32, i32, i32)*, %4* (%2*, i32, i32, i32, i32, i32)*, %5* (%2*, i32, i32, i32, i32, i32)*, void (%2*)*, i8** (%2*, %4*, i32, i32, i32)*, [64 x i16]** (%2*, %5*, i32, i32, i32)*, void (%2*, i32)*, void (%2*)*, i32, i32 }
+%4 = type opaque
+%5 = type opaque
+%6 = type { void (%2*)*, i32, i32, i32, i32 }
+%7 = type { [8 x i32], [12 x i32] }
+%8 = type { i8*, i32, void (%0*)*, i32 (%0*)*, void (%0*, i32)*, i32 (%0*, i32)*, void (%0*)* }
+%9 = type { [64 x i16], i32 }
+%10 = type { [17 x i8], [256 x i8], i32 }
+%11 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %9*, i8* }
+%12 = type { %12*, i8, i32, i32, i8* }
+%13 = type { void (%0*)*, void (%0*)*, i32 }
+%14 = type { void (%0*, i32)*, void (%0*, i8**, i32*, i32)* }
+%15 = type { void (%0*)*, i32 (%0*)*, void (%0*)*, i32 (%0*, i8***)*, %5** }
+%16 = type { void (%0*, i32)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)* }
+%17 = type { i32 (%0*)*, void (%0*)*, void (%0*)*, void (%0*)*, i32, i32 }
+%18 = type { void (%0*)*, i32 (%0*)*, i32 (%0*)*, i32, i32, i32, i32 }
+%19 = type { void (%0*)*, i32 (%0*, [64 x i16]**)*, i32 }
+%20 = type { void (%0*)*, [10 x void (%0*, %11*, i16*, i8**, i32)*] }
+%21 = type { void (%0*)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
+%22 = type { void (%0*)*, void (%0*, i8***, i32, i8**, i32)* }
+%23 = type { void (%0*, i32)*, void (%0*, i8**, i8**, i32)*, void (%0*)*, void (%0*)* }
+
+define void @test(%0* nocapture %a0, %11* nocapture %a1, i16* nocapture %a2, i8** nocapture %a3, i32 %a4) nounwind {
+bb:
+  %t = alloca [64 x float], align 4
+  %t5 = getelementptr inbounds %0* %a0, i32 0, i32 65
+  %t6 = load i8** %t5, align 4
+  %t7 = getelementptr inbounds %11* %a1, i32 0, i32 20
+  %t8 = load i8** %t7, align 4
+  br label %bb9
+
+bb9:
+  %t10 = phi i32 [ 0, %bb ], [ %t157, %bb156 ]
+  %t11 = add i32 %t10, 8
+  %t12 = getelementptr [64 x float]* %t, i32 0, i32 %t11
+  %t13 = add i32 %t10, 16
+  %t14 = getelementptr [64 x float]* %t, i32 0, i32 %t13
+  %t15 = add i32 %t10, 24
+  %t16 = getelementptr [64 x float]* %t, i32 0, i32 %t15
+  %t17 = add i32 %t10, 32
+  %t18 = getelementptr [64 x float]* %t, i32 0, i32 %t17
+  %t19 = add i32 %t10, 40
+  %t20 = getelementptr [64 x float]* %t, i32 0, i32 %t19
+  %t21 = add i32 %t10, 48
+  %t22 = getelementptr [64 x float]* %t, i32 0, i32 %t21
+  %t23 = add i32 %t10, 56
+  %t24 = getelementptr [64 x float]* %t, i32 0, i32 %t23
+  %t25 = getelementptr [64 x float]* %t, i32 0, i32 %t10
+  %t26 = shl i32 %t10, 5
+  %t27 = or i32 %t26, 8
+  %t28 = getelementptr i8* %t8, i32 %t27
+  %t29 = bitcast i8* %t28 to float*
+  %t30 = or i32 %t26, 16
+  %t31 = getelementptr i8* %t8, i32 %t30
+  %t32 = bitcast i8* %t31 to float*
+  %t33 = or i32 %t26, 24
+  %t34 = getelementptr i8* %t8, i32 %t33
+  %t35 = bitcast i8* %t34 to float*
+  %t36 = or i32 %t26, 4
+  %t37 = getelementptr i8* %t8, i32 %t36
+  %t38 = bitcast i8* %t37 to float*
+  %t39 = or i32 %t26, 12
+  %t40 = getelementptr i8* %t8, i32 %t39
+  %t41 = bitcast i8* %t40 to float*
+  %t42 = or i32 %t26, 20
+  %t43 = getelementptr i8* %t8, i32 %t42
+  %t44 = bitcast i8* %t43 to float*
+  %t45 = or i32 %t26, 28
+  %t46 = getelementptr i8* %t8, i32 %t45
+  %t47 = bitcast i8* %t46 to float*
+  %t48 = getelementptr i8* %t8, i32 %t26
+  %t49 = bitcast i8* %t48 to float*
+  %t50 = shl i32 %t10, 3
+  %t51 = or i32 %t50, 1
+  %t52 = getelementptr i16* %a2, i32 %t51
+  %t53 = or i32 %t50, 2
+  %t54 = getelementptr i16* %a2, i32 %t53
+  %t55 = or i32 %t50, 3
+  %t56 = getelementptr i16* %a2, i32 %t55
+  %t57 = or i32 %t50, 4
+  %t58 = getelementptr i16* %a2, i32 %t57
+  %t59 = or i32 %t50, 5
+  %t60 = getelementptr i16* %a2, i32 %t59
+  %t61 = or i32 %t50, 6
+  %t62 = getelementptr i16* %a2, i32 %t61
+  %t63 = or i32 %t50, 7
+  %t64 = getelementptr i16* %a2, i32 %t63
+  %t65 = getelementptr i16* %a2, i32 %t50
+  %t66 = load i16* %t52, align 2
+  %t67 = icmp eq i16 %t66, 0
+  %t68 = load i16* %t54, align 2
+  %t69 = icmp eq i16 %t68, 0
+  %t70 = and i1 %t67, %t69
+  br i1 %t70, label %bb71, label %bb91
+
+bb71:
+  %t72 = load i16* %t56, align 2
+  %t73 = icmp eq i16 %t72, 0
+  br i1 %t73, label %bb74, label %bb91
+
+bb74:
+  %t75 = load i16* %t58, align 2
+  %t76 = icmp eq i16 %t75, 0
+  br i1 %t76, label %bb77, label %bb91
+
+bb77:
+  %t78 = load i16* %t60, align 2
+  %t79 = icmp eq i16 %t78, 0
+  br i1 %t79, label %bb80, label %bb91
+
+bb80:
+  %t81 = load i16* %t62, align 2
+  %t82 = icmp eq i16 %t81, 0
+  br i1 %t82, label %bb83, label %bb91
+
+bb83:
+  %t84 = load i16* %t64, align 2
+  %t85 = icmp eq i16 %t84, 0
+  br i1 %t85, label %bb86, label %bb91
+
+bb86:
+  %t87 = load i16* %t65, align 2
+  %t88 = sitofp i16 %t87 to float
+  %t89 = load float* %t49, align 4
+  %t90 = fmul float %t88, %t89
+  store float %t90, float* %t25, align 4
+  store float %t90, float* %t12, align 4
+  store float %t90, float* %t14, align 4
+  store float %t90, float* %t16, align 4
+  store float %t90, float* %t18, align 4
+  store float %t90, float* %t20, align 4
+  store float %t90, float* %t22, align 4
+  store float %t90, float* %t24, align 4
+  br label %bb156
+
+bb91:
+  %t92 = load i16* %t65, align 2
+  %t93 = sitofp i16 %t92 to float
+  %t94 = load float* %t49, align 4
+  %t95 = fmul float %t93, %t94
+  %t96 = sitofp i16 %t68 to float
+  %t97 = load float* %t29, align 4
+  %t98 = fmul float %t96, %t97
+  %t99 = load i16* %t58, align 2
+  %t100 = sitofp i16 %t99 to float
+  %t101 = load float* %t32, align 4
+  %t102 = fmul float %t100, %t101
+  %t103 = load i16* %t62, align 2
+  %t104 = sitofp i16 %t103 to float
+  %t105 = load float* %t35, align 4
+  %t106 = fmul float %t104, %t105
+  %t107 = fadd float %t95, %t102
+  %t108 = fsub float %t95, %t102
+  %t109 = fadd float %t98, %t106
+  %t110 = fsub float %t98, %t106
+  %t111 = fmul float %t110, 0x3FF6A09E60000000
+  %t112 = fsub float %t111, %t109
+  %t113 = fadd float %t107, %t109
+  %t114 = fsub float %t107, %t109
+  %t115 = fadd float %t108, %t112
+  %t116 = fsub float %t108, %t112
+  %t117 = sitofp i16 %t66 to float
+  %t118 = load float* %t38, align 4
+  %t119 = fmul float %t117, %t118
+  %t120 = load i16* %t56, align 2
+  %t121 = sitofp i16 %t120 to float
+  %t122 = load float* %t41, align 4
+  %t123 = fmul float %t121, %t122
+  %t124 = load i16* %t60, align 2
+  %t125 = sitofp i16 %t124 to float
+  %t126 = load float* %t44, align 4
+  %t127 = fmul float %t125, %t126
+  %t128 = load i16* %t64, align 2
+  %t129 = sitofp i16 %t128 to float
+  %t130 = load float* %t47, align 4
+  %t131 = fmul float %t129, %t130
+  %t132 = fadd float %t127, %t123
+  %t133 = fsub float %t127, %t123
+  %t134 = fadd float %t119, %t131
+  %t135 = fsub float %t119, %t131
+  %t136 = fadd float %t134, %t132
+  %t137 = fsub float %t134, %t132
+  %t138 = fmul float %t137, 0x3FF6A09E60000000
+  %t139 = fadd float %t133, %t135
+  %t140 = fmul float %t139, 0x3FFD906BC0000000
+  %t141 = fmul float %t135, 0x3FF1517A80000000
+  %t142 = fsub float %t141, %t140
+  %t143 = fmul float %t133, 0xC004E7AEA0000000
+  %t144 = fadd float %t143, %t140
+  %t145 = fsub float %t144, %t136
+  %t146 = fsub float %t138, %t145
+  %t147 = fadd float %t142, %t146
+  %t148 = fadd float %t113, %t136
+  store float %t148, float* %t25, align 4
+  %t149 = fsub float %t113, %t136
+  store float %t149, float* %t24, align 4
+  %t150 = fadd float %t115, %t145
+  store float %t150, float* %t12, align 4
+  %t151 = fsub float %t115, %t145
+  store float %t151, float* %t22, align 4
+  %t152 = fadd float %t116, %t146
+  store float %t152, float* %t14, align 4
+  %t153 = fsub float %t116, %t146
+  store float %t153, float* %t20, align 4
+  %t154 = fadd float %t114, %t147
+  store float %t154, float* %t18, align 4
+  %t155 = fsub float %t114, %t147
+  store float %t155, float* %t16, align 4
+  br label %bb156
+
+bb156:
+  %t157 = add i32 %t10, 1
+  %t158 = icmp eq i32 %t157, 8
+  br i1 %t158, label %bb159, label %bb9
+
+bb159:
+  %t160 = add i32 %a4, 7
+  %t161 = add i32 %a4, 1
+  %t162 = add i32 %a4, 6
+  %t163 = add i32 %a4, 2
+  %t164 = add i32 %a4, 5
+  %t165 = add i32 %a4, 4
+  %t166 = add i32 %a4, 3
+  br label %bb167
+
+bb167:
+  %t168 = phi i32 [ 0, %bb159 ], [ %t293, %bb167 ]
+  %t169 = getelementptr i8** %a3, i32 %t168
+  %t170 = shl i32 %t168, 3
+  %t171 = or i32 %t170, 4
+  %t172 = getelementptr [64 x float]* %t, i32 0, i32 %t171
+  %t173 = or i32 %t170, 2
+  %t174 = getelementptr [64 x float]* %t, i32 0, i32 %t173
+  %t175 = or i32 %t170, 6
+  %t176 = getelementptr [64 x float]* %t, i32 0, i32 %t175
+  %t177 = or i32 %t170, 5
+  %t178 = getelementptr [64 x float]* %t, i32 0, i32 %t177
+  %t179 = or i32 %t170, 3
+  %t180 = getelementptr [64 x float]* %t, i32 0, i32 %t179
+  %t181 = or i32 %t170, 1
+  %t182 = getelementptr [64 x float]* %t, i32 0, i32 %t181
+  %t183 = or i32 %t170, 7
+  %t184 = getelementptr [64 x float]* %t, i32 0, i32 %t183
+  %t185 = getelementptr [64 x float]* %t, i32 0, i32 %t170
+  %t186 = load i8** %t169, align 4
+  %t187 = getelementptr inbounds i8* %t186, i32 %a4
+  %t188 = load float* %t185, align 4
+  %t189 = load float* %t172, align 4
+  %t190 = fadd float %t188, %t189
+  %t191 = fsub float %t188, %t189
+  %t192 = load float* %t174, align 4
+  %t193 = load float* %t176, align 4
+  %t194 = fadd float %t192, %t193
+  %t195 = fsub float %t192, %t193
+  %t196 = fmul float %t195, 0x3FF6A09E60000000
+  %t197 = fsub float %t196, %t194
+  %t198 = fadd float %t190, %t194
+  %t199 = fsub float %t190, %t194
+  %t200 = fadd float %t191, %t197
+  %t201 = fsub float %t191, %t197
+  %t202 = load float* %t178, align 4
+  %t203 = load float* %t180, align 4
+  %t204 = fadd float %t202, %t203
+  %t205 = fsub float %t202, %t203
+  %t206 = load float* %t182, align 4
+  %t207 = load float* %t184, align 4
+  %t208 = fadd float %t206, %t207
+  %t209 = fsub float %t206, %t207
+  %t210 = fadd float %t208, %t204
+  %t211 = fsub float %t208, %t204
+  %t212 = fmul float %t211, 0x3FF6A09E60000000
+  %t213 = fadd float %t205, %t209
+  %t214 = fmul float %t213, 0x3FFD906BC0000000
+  %t215 = fmul float %t209, 0x3FF1517A80000000
+  %t216 = fsub float %t215, %t214
+  %t217 = fmul float %t205, 0xC004E7AEA0000000
+  %t218 = fadd float %t217, %t214
+  %t219 = fsub float %t218, %t210
+  %t220 = fsub float %t212, %t219
+  %t221 = fadd float %t216, %t220
+  %t222 = fadd float %t198, %t210
+  %t223 = fptosi float %t222 to i32
+  %t224 = add nsw i32 %t223, 4
+  %t225 = lshr i32 %t224, 3
+  %t226 = and i32 %t225, 1023
+  %t227 = add i32 %t226, 128
+  %t228 = getelementptr inbounds i8* %t6, i32 %t227
+  %t229 = load i8* %t228, align 1
+  store i8 %t229, i8* %t187, align 1
+  %t230 = fsub float %t198, %t210
+  %t231 = fptosi float %t230 to i32
+  %t232 = add nsw i32 %t231, 4
+  %t233 = lshr i32 %t232, 3
+  %t234 = and i32 %t233, 1023
+  %t235 = add i32 %t234, 128
+  %t236 = getelementptr inbounds i8* %t6, i32 %t235
+  %t237 = load i8* %t236, align 1
+  %t238 = getelementptr inbounds i8* %t186, i32 %t160
+  store i8 %t237, i8* %t238, align 1
+  %t239 = fadd float %t200, %t219
+  %t240 = fptosi float %t239 to i32
+  %t241 = add nsw i32 %t240, 4
+  %t242 = lshr i32 %t241, 3
+  %t243 = and i32 %t242, 1023
+  %t244 = add i32 %t243, 128
+  %t245 = getelementptr inbounds i8* %t6, i32 %t244
+  %t246 = load i8* %t245, align 1
+  %t247 = getelementptr inbounds i8* %t186, i32 %t161
+  store i8 %t246, i8* %t247, align 1
+  %t248 = fsub float %t200, %t219
+  %t249 = fptosi float %t248 to i32
+  %t250 = add nsw i32 %t249, 4
+  %t251 = lshr i32 %t250, 3
+  %t252 = and i32 %t251, 1023
+  %t253 = add i32 %t252, 128
+  %t254 = getelementptr inbounds i8* %t6, i32 %t253
+  %t255 = load i8* %t254, align 1
+  %t256 = getelementptr inbounds i8* %t186, i32 %t162
+  store i8 %t255, i8* %t256, align 1
+  %t257 = fadd float %t201, %t220
+  %t258 = fptosi float %t257 to i32
+  %t259 = add nsw i32 %t258, 4
+  %t260 = lshr i32 %t259, 3
+  %t261 = and i32 %t260, 1023
+  %t262 = add i32 %t261, 128
+  %t263 = getelementptr inbounds i8* %t6, i32 %t262
+  %t264 = load i8* %t263, align 1
+  %t265 = getelementptr inbounds i8* %t186, i32 %t163
+  store i8 %t264, i8* %t265, align 1
+  %t266 = fsub float %t201, %t220
+  %t267 = fptosi float %t266 to i32
+  %t268 = add nsw i32 %t267, 4
+  %t269 = lshr i32 %t268, 3
+  %t270 = and i32 %t269, 1023
+  %t271 = add i32 %t270, 128
+  %t272 = getelementptr inbounds i8* %t6, i32 %t271
+  %t273 = load i8* %t272, align 1
+  %t274 = getelementptr inbounds i8* %t186, i32 %t164
+  store i8 %t273, i8* %t274, align 1
+  %t275 = fadd float %t199, %t221
+  %t276 = fptosi float %t275 to i32
+  %t277 = add nsw i32 %t276, 4
+  %t278 = lshr i32 %t277, 3
+  %t279 = and i32 %t278, 1023
+  %t280 = add i32 %t279, 128
+  %t281 = getelementptr inbounds i8* %t6, i32 %t280
+  %t282 = load i8* %t281, align 1
+  %t283 = getelementptr inbounds i8* %t186, i32 %t165
+  store i8 %t282, i8* %t283, align 1
+  %t284 = fsub float %t199, %t221
+  %t285 = fptosi float %t284 to i32
+  %t286 = add nsw i32 %t285, 4
+  %t287 = lshr i32 %t286, 3
+  %t288 = and i32 %t287, 1023
+  %t289 = add i32 %t288, 128
+  %t290 = getelementptr inbounds i8* %t6, i32 %t289
+  %t291 = load i8* %t290, align 1
+  %t292 = getelementptr inbounds i8* %t186, i32 %t166
+  store i8 %t291, i8* %t292, align 1
+  %t293 = add nsw i32 %t168, 1
+  %t294 = icmp eq i32 %t293, 8
+  br i1 %t294, label %bb295, label %bb167
+
+bb295:
+  ret void
+}
+
+%struct.ct_data_s = type { %union.anon, %union.anon }
+%struct.gz_header = type { i32, i32, i32, i32, i8*, i32, i32, i8*, i32, i8*, i32, i32, i32 }
+%struct.internal_state = type { %struct.z_stream*, i32, i8*, i32, i8*, i32, i32, %struct.gz_header*, i32, i8, i32, i32, i32, i32, i8*, i32, i16*, i16*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [573 x %struct.ct_data_s], [61 x %struct.ct_data_s], [39 x %struct.ct_data_s], %struct.tree_desc_s, %struct.tree_desc_s, %struct.tree_desc_s, [16 x i16], [573 x i32], i32, i32, [573 x i8], i8*, i32, i32, i16*, i32, i32, i32, i32, i16, i32 }
+%struct.static_tree_desc = type { i32 }
+%struct.tree_desc_s = type { %struct.ct_data_s*, i32, %struct.static_tree_desc* }
+%struct.z_stream = type { i8*, i32, i32, i8*, i32, i32, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i32, i32 }
+%union.anon = type { i16 }
+
+define i32 @longest_match(%struct.internal_state* %s, i32 %cur_match) nounwind optsize {
+entry:
+  %0 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 31 ; <i32*> [#uses=1]
+  %1 = load i32* %0, align 4                      ; <i32> [#uses=2]
+  %2 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 14 ; <i8**> [#uses=1]
+  %3 = load i8** %2, align 4                      ; <i8*> [#uses=27]
+  %4 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 27 ; <i32*> [#uses=1]
+  %5 = load i32* %4, align 4                      ; <i32> [#uses=17]
+  %6 = getelementptr inbounds i8* %3, i32 %5      ; <i8*> [#uses=1]
+  %7 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 30 ; <i32*> [#uses=1]
+  %8 = load i32* %7, align 4                      ; <i32> [#uses=4]
+  %9 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 36 ; <i32*> [#uses=1]
+  %10 = load i32* %9, align 4                     ; <i32> [#uses=2]
+  %11 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 11 ; <i32*> [#uses=1]
+  %12 = load i32* %11, align 4                    ; <i32> [#uses=2]
+  %13 = add i32 %12, -262                         ; <i32> [#uses=1]
+  %14 = icmp ugt i32 %5, %13                      ; <i1> [#uses=1]
+  br i1 %14, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  %15 = add i32 %5, 262                           ; <i32> [#uses=1]
+  %16 = sub i32 %15, %12                          ; <i32> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  %iftmp.48.0 = phi i32 [ %16, %bb ], [ 0, %entry ] ; <i32> [#uses=1]
+  %17 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 16 ; <i16**> [#uses=1]
+  %18 = load i16** %17, align 4                   ; <i16*> [#uses=1]
+  %19 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 13 ; <i32*> [#uses=1]
+  %20 = load i32* %19, align 4                    ; <i32> [#uses=1]
+  %.sum = add i32 %5, 258                         ; <i32> [#uses=2]
+  %21 = getelementptr inbounds i8* %3, i32 %.sum  ; <i8*> [#uses=1]
+  %22 = add nsw i32 %5, -1                        ; <i32> [#uses=1]
+  %.sum30 = add i32 %22, %8                       ; <i32> [#uses=1]
+  %23 = getelementptr inbounds i8* %3, i32 %.sum30 ; <i8*> [#uses=1]
+  %24 = load i8* %23, align 1                     ; <i8> [#uses=1]
+  %.sum31 = add i32 %8, %5                        ; <i32> [#uses=1]
+  %25 = getelementptr inbounds i8* %3, i32 %.sum31 ; <i8*> [#uses=1]
+  %26 = load i8* %25, align 1                     ; <i8> [#uses=1]
+  %27 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 35 ; <i32*> [#uses=1]
+  %28 = load i32* %27, align 4                    ; <i32> [#uses=1]
+  %29 = lshr i32 %1, 2                            ; <i32> [#uses=1]
+  %30 = icmp ult i32 %8, %28                      ; <i1> [#uses=1]
+  %. = select i1 %30, i32 %1, i32 %29             ; <i32> [#uses=1]
+  %31 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 29 ; <i32*> [#uses=1]
+  %32 = load i32* %31, align 4                    ; <i32> [#uses=4]
+  %33 = icmp ugt i32 %10, %32                     ; <i1> [#uses=1]
+  %nice_match.0.ph = select i1 %33, i32 %32, i32 %10 ; <i32> [#uses=1]
+  %34 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 28 ; <i32*> [#uses=1]
+  %35 = ptrtoint i8* %21 to i32                   ; <i32> [#uses=1]
+  %36 = add nsw i32 %5, 257                       ; <i32> [#uses=1]
+  %tmp81 = add i32 %., -1                         ; <i32> [#uses=1]
+  br label %bb6
+
+bb6:                                              ; preds = %bb24, %bb2
+  %indvar78 = phi i32 [ 0, %bb2 ], [ %indvar.next79, %bb24 ] ; <i32> [#uses=2]
+  %best_len.2 = phi i32 [ %8, %bb2 ], [ %best_len.0, %bb24 ] ; <i32> [#uses=8]
+  %scan_end1.1 = phi i8 [ %24, %bb2 ], [ %scan_end1.0, %bb24 ] ; <i8> [#uses=6]
+  %cur_match_addr.0 = phi i32 [ %cur_match, %bb2 ], [ %90, %bb24 ] ; <i32> [#uses=14]
+  %scan_end.1 = phi i8 [ %26, %bb2 ], [ %scan_end.0, %bb24 ] ; <i8> [#uses=6]
+  %37 = getelementptr inbounds i8* %3, i32 %cur_match_addr.0 ; <i8*> [#uses=1]
+  %.sum32 = add i32 %cur_match_addr.0, %best_len.2 ; <i32> [#uses=1]
+  %38 = getelementptr inbounds i8* %3, i32 %.sum32 ; <i8*> [#uses=1]
+  %39 = load i8* %38, align 1                     ; <i8> [#uses=1]
+  %40 = icmp eq i8 %39, %scan_end.1               ; <i1> [#uses=1]
+  br i1 %40, label %bb7, label %bb23
+
+bb7:                                              ; preds = %bb6
+  %41 = add nsw i32 %best_len.2, -1               ; <i32> [#uses=1]
+  %.sum33 = add i32 %41, %cur_match_addr.0        ; <i32> [#uses=1]
+  %42 = getelementptr inbounds i8* %3, i32 %.sum33 ; <i8*> [#uses=1]
+  %43 = load i8* %42, align 1                     ; <i8> [#uses=1]
+  %44 = icmp eq i8 %43, %scan_end1.1              ; <i1> [#uses=1]
+  br i1 %44, label %bb8, label %bb23
+
+bb8:                                              ; preds = %bb7
+  %45 = load i8* %37, align 1                     ; <i8> [#uses=1]
+  %46 = load i8* %6, align 1                      ; <i8> [#uses=1]
+  %47 = icmp eq i8 %45, %46                       ; <i1> [#uses=1]
+  br i1 %47, label %bb9, label %bb23
+
+bb9:                                              ; preds = %bb8
+  %.sum34 = add i32 %cur_match_addr.0, 1          ; <i32> [#uses=1]
+  %48 = getelementptr inbounds i8* %3, i32 %.sum34 ; <i8*> [#uses=1]
+  %49 = load i8* %48, align 1                     ; <i8> [#uses=1]
+  %.sum88 = add i32 %5, 1                         ; <i32> [#uses=1]
+  %50 = getelementptr inbounds i8* %3, i32 %.sum88 ; <i8*> [#uses=1]
+  %51 = load i8* %50, align 1                     ; <i8> [#uses=1]
+  %52 = icmp eq i8 %49, %51                       ; <i1> [#uses=1]
+  br i1 %52, label %bb10, label %bb23
+
+bb10:                                             ; preds = %bb9
+  %tmp39 = add i32 %cur_match_addr.0, 10          ; <i32> [#uses=1]
+  %tmp41 = add i32 %cur_match_addr.0, 9           ; <i32> [#uses=1]
+  %tmp44 = add i32 %cur_match_addr.0, 8           ; <i32> [#uses=1]
+  %tmp47 = add i32 %cur_match_addr.0, 7           ; <i32> [#uses=1]
+  %tmp50 = add i32 %cur_match_addr.0, 6           ; <i32> [#uses=1]
+  %tmp53 = add i32 %cur_match_addr.0, 5           ; <i32> [#uses=1]
+  %tmp56 = add i32 %cur_match_addr.0, 4           ; <i32> [#uses=1]
+  %tmp59 = add i32 %cur_match_addr.0, 3           ; <i32> [#uses=1]
+  br label %bb11
+
+bb11:                                             ; preds = %bb18, %bb10
+  %indvar = phi i32 [ %indvar.next, %bb18 ], [ 0, %bb10 ] ; <i32> [#uses=2]
+  %tmp = shl i32 %indvar, 3                       ; <i32> [#uses=16]
+  %tmp40 = add i32 %tmp39, %tmp                   ; <i32> [#uses=1]
+  %scevgep = getelementptr i8* %3, i32 %tmp40     ; <i8*> [#uses=1]
+  %tmp42 = add i32 %tmp41, %tmp                   ; <i32> [#uses=1]
+  %scevgep43 = getelementptr i8* %3, i32 %tmp42   ; <i8*> [#uses=1]
+  %tmp45 = add i32 %tmp44, %tmp                   ; <i32> [#uses=1]
+  %scevgep46 = getelementptr i8* %3, i32 %tmp45   ; <i8*> [#uses=1]
+  %tmp48 = add i32 %tmp47, %tmp                   ; <i32> [#uses=1]
+  %scevgep49 = getelementptr i8* %3, i32 %tmp48   ; <i8*> [#uses=1]
+  %tmp51 = add i32 %tmp50, %tmp                   ; <i32> [#uses=1]
+  %scevgep52 = getelementptr i8* %3, i32 %tmp51   ; <i8*> [#uses=1]
+  %tmp54 = add i32 %tmp53, %tmp                   ; <i32> [#uses=1]
+  %scevgep55 = getelementptr i8* %3, i32 %tmp54   ; <i8*> [#uses=1]
+  %tmp60 = add i32 %tmp59, %tmp                   ; <i32> [#uses=1]
+  %scevgep61 = getelementptr i8* %3, i32 %tmp60   ; <i8*> [#uses=1]
+  %tmp62 = add i32 %tmp, 10                       ; <i32> [#uses=1]
+  %.sum89 = add i32 %5, %tmp62                    ; <i32> [#uses=2]
+  %scevgep63 = getelementptr i8* %3, i32 %.sum89  ; <i8*> [#uses=2]
+  %tmp64 = add i32 %tmp, 9                        ; <i32> [#uses=1]
+  %.sum90 = add i32 %5, %tmp64                    ; <i32> [#uses=1]
+  %scevgep65 = getelementptr i8* %3, i32 %.sum90  ; <i8*> [#uses=2]
+  %tmp66 = add i32 %tmp, 8                        ; <i32> [#uses=1]
+  %.sum91 = add i32 %5, %tmp66                    ; <i32> [#uses=1]
+  %scevgep67 = getelementptr i8* %3, i32 %.sum91  ; <i8*> [#uses=2]
+  %tmp6883 = or i32 %tmp, 7                       ; <i32> [#uses=1]
+  %.sum92 = add i32 %5, %tmp6883                  ; <i32> [#uses=1]
+  %scevgep69 = getelementptr i8* %3, i32 %.sum92  ; <i8*> [#uses=2]
+  %tmp7084 = or i32 %tmp, 6                       ; <i32> [#uses=1]
+  %.sum93 = add i32 %5, %tmp7084                  ; <i32> [#uses=1]
+  %scevgep71 = getelementptr i8* %3, i32 %.sum93  ; <i8*> [#uses=2]
+  %tmp7285 = or i32 %tmp, 5                       ; <i32> [#uses=1]
+  %.sum94 = add i32 %5, %tmp7285                  ; <i32> [#uses=1]
+  %scevgep73 = getelementptr i8* %3, i32 %.sum94  ; <i8*> [#uses=2]
+  %tmp7486 = or i32 %tmp, 4                       ; <i32> [#uses=1]
+  %.sum95 = add i32 %5, %tmp7486                  ; <i32> [#uses=1]
+  %scevgep75 = getelementptr i8* %3, i32 %.sum95  ; <i8*> [#uses=2]
+  %tmp7687 = or i32 %tmp, 3                       ; <i32> [#uses=1]
+  %.sum96 = add i32 %5, %tmp7687                  ; <i32> [#uses=1]
+  %scevgep77 = getelementptr i8* %3, i32 %.sum96  ; <i8*> [#uses=2]
+  %53 = load i8* %scevgep77, align 1              ; <i8> [#uses=1]
+  %54 = load i8* %scevgep61, align 1              ; <i8> [#uses=1]
+  %55 = icmp eq i8 %53, %54                       ; <i1> [#uses=1]
+  br i1 %55, label %bb12, label %bb20
+
+bb12:                                             ; preds = %bb11
+  %tmp57 = add i32 %tmp56, %tmp                   ; <i32> [#uses=1]
+  %scevgep58 = getelementptr i8* %3, i32 %tmp57   ; <i8*> [#uses=1]
+  %56 = load i8* %scevgep75, align 1              ; <i8> [#uses=1]
+  %57 = load i8* %scevgep58, align 1              ; <i8> [#uses=1]
+  %58 = icmp eq i8 %56, %57                       ; <i1> [#uses=1]
+  br i1 %58, label %bb13, label %bb20
+
+bb13:                                             ; preds = %bb12
+  %59 = load i8* %scevgep73, align 1              ; <i8> [#uses=1]
+  %60 = load i8* %scevgep55, align 1              ; <i8> [#uses=1]
+  %61 = icmp eq i8 %59, %60                       ; <i1> [#uses=1]
+  br i1 %61, label %bb14, label %bb20
+
+bb14:                                             ; preds = %bb13
+  %62 = load i8* %scevgep71, align 1              ; <i8> [#uses=1]
+  %63 = load i8* %scevgep52, align 1              ; <i8> [#uses=1]
+  %64 = icmp eq i8 %62, %63                       ; <i1> [#uses=1]
+  br i1 %64, label %bb15, label %bb20
+
+bb15:                                             ; preds = %bb14
+  %65 = load i8* %scevgep69, align 1              ; <i8> [#uses=1]
+  %66 = load i8* %scevgep49, align 1              ; <i8> [#uses=1]
+  %67 = icmp eq i8 %65, %66                       ; <i1> [#uses=1]
+  br i1 %67, label %bb16, label %bb20
+
+bb16:                                             ; preds = %bb15
+  %68 = load i8* %scevgep67, align 1              ; <i8> [#uses=1]
+  %69 = load i8* %scevgep46, align 1              ; <i8> [#uses=1]
+  %70 = icmp eq i8 %68, %69                       ; <i1> [#uses=1]
+  br i1 %70, label %bb17, label %bb20
+
+bb17:                                             ; preds = %bb16
+  %71 = load i8* %scevgep65, align 1              ; <i8> [#uses=1]
+  %72 = load i8* %scevgep43, align 1              ; <i8> [#uses=1]
+  %73 = icmp eq i8 %71, %72                       ; <i1> [#uses=1]
+  br i1 %73, label %bb18, label %bb20
+
+bb18:                                             ; preds = %bb17
+  %74 = load i8* %scevgep63, align 1              ; <i8> [#uses=1]
+  %75 = load i8* %scevgep, align 1                ; <i8> [#uses=1]
+  %76 = icmp eq i8 %74, %75                       ; <i1> [#uses=1]
+  %77 = icmp slt i32 %.sum89, %.sum               ; <i1> [#uses=1]
+  %or.cond = and i1 %76, %77                      ; <i1> [#uses=1]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+  br i1 %or.cond, label %bb11, label %bb20
+
+bb20:                                             ; preds = %bb18, %bb17, %bb16, %bb15, %bb14, %bb13, %bb12, %bb11
+  %scan.3 = phi i8* [ %scevgep77, %bb11 ], [ %scevgep75, %bb12 ], [ %scevgep73, %bb13 ], [ %scevgep71, %bb14 ], [ %scevgep69, %bb15 ], [ %scevgep67, %bb16 ], [ %scevgep65, %bb17 ], [ %scevgep63, %bb18 ] ; <i8*> [#uses=1]
+  %78 = ptrtoint i8* %scan.3 to i32               ; <i32> [#uses=1]
+  %79 = sub nsw i32 %78, %35                      ; <i32> [#uses=2]
+  %80 = add i32 %79, 258                          ; <i32> [#uses=5]
+  %81 = icmp sgt i32 %80, %best_len.2             ; <i1> [#uses=1]
+  br i1 %81, label %bb21, label %bb23
+
+bb21:                                             ; preds = %bb20
+  store i32 %cur_match_addr.0, i32* %34, align 4
+  %82 = icmp slt i32 %80, %nice_match.0.ph        ; <i1> [#uses=1]
+  br i1 %82, label %bb22, label %bb25
+
+bb22:                                             ; preds = %bb21
+  %.sum37 = add i32 %36, %79                      ; <i32> [#uses=1]
+  %83 = getelementptr inbounds i8* %3, i32 %.sum37 ; <i8*> [#uses=1]
+  %84 = load i8* %83, align 1                     ; <i8> [#uses=1]
+  %.sum38 = add i32 %80, %5                       ; <i32> [#uses=1]
+  %85 = getelementptr inbounds i8* %3, i32 %.sum38 ; <i8*> [#uses=1]
+  %86 = load i8* %85, align 1                     ; <i8> [#uses=1]
+  br label %bb23
+
+bb23:                                             ; preds = %bb22, %bb20, %bb9, %bb8, %bb7, %bb6
+  %best_len.0 = phi i32 [ %best_len.2, %bb6 ], [ %best_len.2, %bb7 ], [ %best_len.2, %bb8 ], [ %best_len.2, %bb9 ], [ %80, %bb22 ], [ %best_len.2, %bb20 ] ; <i32> [#uses=3]
+  %scan_end1.0 = phi i8 [ %scan_end1.1, %bb6 ], [ %scan_end1.1, %bb7 ], [ %scan_end1.1, %bb8 ], [ %scan_end1.1, %bb9 ], [ %84, %bb22 ], [ %scan_end1.1, %bb20 ] ; <i8> [#uses=1]
+  %scan_end.0 = phi i8 [ %scan_end.1, %bb6 ], [ %scan_end.1, %bb7 ], [ %scan_end.1, %bb8 ], [ %scan_end.1, %bb9 ], [ %86, %bb22 ], [ %scan_end.1, %bb20 ] ; <i8> [#uses=1]
+  %87 = and i32 %cur_match_addr.0, %20            ; <i32> [#uses=1]
+  %88 = getelementptr inbounds i16* %18, i32 %87  ; <i16*> [#uses=1]
+  %89 = load i16* %88, align 2                    ; <i16> [#uses=1]
+  %90 = zext i16 %89 to i32                       ; <i32> [#uses=2]
+  %91 = icmp ugt i32 %90, %iftmp.48.0             ; <i1> [#uses=1]
+  br i1 %91, label %bb24, label %bb25
+
+bb24:                                             ; preds = %bb23
+
+; LSR should use count-down iteration to avoid requiring the trip count
+; in a register.
+
+;      CHECK: @ %bb24
+; CHECK: subs{{.*}} {{(r[0-9]+)|(lr)}}, #1
+; CHECK: bne.w
+
+  %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1]
+  %indvar.next79 = add i32 %indvar78, 1           ; <i32> [#uses=1]
+  br i1 %92, label %bb25, label %bb6
+
+bb25:                                             ; preds = %bb24, %bb23, %bb21
+  %best_len.1 = phi i32 [ %best_len.0, %bb23 ], [ %best_len.0, %bb24 ], [ %80, %bb21 ] ; <i32> [#uses=2]
+  %93 = icmp ugt i32 %best_len.1, %32             ; <i1> [#uses=1]
+  %merge = select i1 %93, i32 %32, i32 %best_len.1 ; <i32> [#uses=1]
+  ret i32 %merge
+}

diff --git a/src/LLVM/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/src/LLVM/test/CodeGen/ARM/lsr-scale-addr-mode.ll
new file mode 100644
index 0000000..7abe1d2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/lsr-scale-addr-mode.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm | grep lsl | grep -F {lsl #2\]}

+; Should use scaled addressing mode.

+

+define void @sintzero(i32* %a) nounwind {

+entry:

+	store i32 0, i32* %a

+	br label %cond_next

+

+cond_next:		; preds = %cond_next, %entry

+	%indvar = phi i32 [ 0, %entry ], [ %tmp25, %cond_next ]		; <i32> [#uses=1]

+	%tmp25 = add i32 %indvar, 1		; <i32> [#uses=3]

+	%tmp36 = getelementptr i32* %a, i32 %tmp25		; <i32*> [#uses=1]

+	store i32 0, i32* %tmp36

+	icmp eq i32 %tmp25, -1		; <i1>:0 [#uses=1]

+	br i1 %0, label %return, label %cond_next

+

+return:		; preds = %cond_next

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/lsr-unfolded-offset.ll b/src/LLVM/test/CodeGen/ARM/lsr-unfolded-offset.ll
new file mode 100644
index 0000000..bf26a96
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/lsr-unfolded-offset.ll

@@ -0,0 +1,80 @@
+; RUN: llc -regalloc=greedy < %s | FileCheck %s
+
+; LSR shouldn't introduce more induction variables than needed, increasing
+; register pressure and therefore spilling. There is more room for improvement
+; here.
+
+; CHECK: sub sp, #{{40|32|28|24}}
+
+; CHECK: %for.inc
+; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, #
+; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, #
+; CHECK: add
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.7.0"
+
+%struct.partition_entry = type { i32, i32, i64, i64 }
+
+define i32 @partition_overlap_check(%struct.partition_entry* nocapture %part, i32 %num_entries) nounwind readonly optsize ssp {
+entry:
+  %cmp79 = icmp sgt i32 %num_entries, 0
+  br i1 %cmp79, label %outer.loop, label %for.end72
+
+outer.loop:                                 ; preds = %for.inc69, %entry
+  %overlap.081 = phi i32 [ %overlap.4, %for.inc69 ], [ 0, %entry ]
+  %0 = phi i32 [ %inc71, %for.inc69 ], [ 0, %entry ]
+  %offset = getelementptr %struct.partition_entry* %part, i32 %0, i32 2
+  %len = getelementptr %struct.partition_entry* %part, i32 %0, i32 3
+  %tmp5 = load i64* %offset, align 4, !tbaa !0
+  %tmp15 = load i64* %len, align 4, !tbaa !0
+  %add = add nsw i64 %tmp15, %tmp5
+  br label %inner.loop
+
+inner.loop:                                       ; preds = %for.inc, %outer.loop
+  %overlap.178 = phi i32 [ %overlap.081, %outer.loop ], [ %overlap.4, %for.inc ]
+  %1 = phi i32 [ 0, %outer.loop ], [ %inc, %for.inc ]
+  %cmp23 = icmp eq i32 %0, %1
+  br i1 %cmp23, label %for.inc, label %if.end
+
+if.end:                                           ; preds = %inner.loop
+  %len39 = getelementptr %struct.partition_entry* %part, i32 %1, i32 3
+  %offset28 = getelementptr %struct.partition_entry* %part, i32 %1, i32 2
+  %tmp29 = load i64* %offset28, align 4, !tbaa !0
+  %tmp40 = load i64* %len39, align 4, !tbaa !0
+  %add41 = add nsw i64 %tmp40, %tmp29
+  %cmp44 = icmp sge i64 %tmp29, %tmp5
+  %cmp47 = icmp slt i64 %tmp29, %add
+  %or.cond = and i1 %cmp44, %cmp47
+  %overlap.2 = select i1 %or.cond, i32 1, i32 %overlap.178
+  %cmp52 = icmp sle i64 %add41, %add
+  %cmp56 = icmp sgt i64 %add41, %tmp5
+  %or.cond74 = and i1 %cmp52, %cmp56
+  %overlap.3 = select i1 %or.cond74, i32 1, i32 %overlap.2
+  %cmp61 = icmp sgt i64 %tmp29, %tmp5
+  %cmp65 = icmp slt i64 %add41, %add
+  %or.cond75 = or i1 %cmp61, %cmp65
+  br i1 %or.cond75, label %for.inc, label %if.then66
+
+if.then66:                                        ; preds = %if.end
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end, %if.then66, %inner.loop
+  %overlap.4 = phi i32 [ %overlap.178, %inner.loop ], [ 1, %if.then66 ], [ %overlap.3, %if.end ]
+  %inc = add nsw i32 %1, 1
+  %exitcond = icmp eq i32 %inc, %num_entries
+  br i1 %exitcond, label %for.inc69, label %inner.loop
+
+for.inc69:                                        ; preds = %for.inc
+  %inc71 = add nsw i32 %0, 1
+  %exitcond83 = icmp eq i32 %inc71, %num_entries
+  br i1 %exitcond83, label %for.end72, label %outer.loop
+
+for.end72:                                        ; preds = %for.inc69, %entry
+  %overlap.0.lcssa = phi i32 [ 0, %entry ], [ %overlap.4, %for.inc69 ]
+  ret i32 %overlap.0.lcssa
+}
+
+!0 = metadata !{metadata !"long long", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}

diff --git a/src/LLVM/test/CodeGen/ARM/machine-cse-cmp.ll b/src/LLVM/test/CodeGen/ARM/machine-cse-cmp.ll
new file mode 100644
index 0000000..c77402f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/machine-cse-cmp.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+;rdar://8003725
+
+@G1 = external global i32
+@G2 = external global i32
+
+define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
+entry:
+; CHECK: cmp
+; CHECK: moveq
+; CHECK-NOT: cmp
+; CHECK: moveq
+    %tmp1 = icmp eq i32 %cond1, 0
+    %tmp2 = select i1 %tmp1, i32 %x1, i32 %x2
+    %tmp3 = select i1 %tmp1, i32 %x2, i32 %x3
+    %tmp4 = add i32 %tmp2, %tmp3
+    ret i32 %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/ARM/machine-licm.ll b/src/LLVM/test/CodeGen/ARM/machine-licm.ll
new file mode 100644
index 0000000..8656c5b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/machine-licm.ll

@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -disable-fp-elim   | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6t2 | FileCheck %s -check-prefix=MOVT
+; rdar://7353541
+; rdar://7354376
+; rdar://8887598
+
+; The generated code is no where near ideal. It's not recognizing the two
+; constantpool entries being loaded can be merged into one.
+
+@GV = external global i32                         ; <i32*> [#uses=2]
+
+define void @t(i32* nocapture %vals, i32 %c) nounwind {
+entry:
+; ARM: t:
+; ARM: ldr [[REGISTER_1:r[0-9]+]], LCPI0_0
+; Unfortunately currently ARM codegen doesn't cse the ldr from constantpool.
+; The issue is it can be read by an "add pc" or a "ldr [pc]" so it's messy
+; to add the pseudo instructions to make sure they are CSE'ed at the same
+; time as the "ldr cp".
+; ARM: ldr r{{[0-9]+}}, LCPI0_1
+; ARM: LPC0_0:
+; ARM: ldr r{{[0-9]+}}, [pc, [[REGISTER_1]]]
+; ARM: ldr r{{[0-9]+}}, [r{{[0-9]+}}]
+
+; MOVT: t:
+; MOVT: movw [[REGISTER_2:r[0-9]+]], :lower16:(L_GV$non_lazy_ptr-(LPC0_0+8))
+; MOVT: movt [[REGISTER_2]], :upper16:(L_GV$non_lazy_ptr-(LPC0_0+8))
+; MOVT: LPC0_0:
+; MOVT: ldr r{{[0-9]+}}, [pc, [[REGISTER_2]]]
+; MOVT: ldr r{{[0-9]+}}, [r{{[0-9]+}}]
+
+; THUMB: t:
+  %0 = icmp eq i32 %c, 0                          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+; ARM: LCPI0_0:
+; ARM: LCPI0_1:
+; ARM: .section
+
+; THUMB: BB#1
+; THUMB: ldr.n r2, LCPI0_0
+; THUMB: add r2, pc
+; THUMB: ldr r{{[0-9]+}}, [r2]
+; THUMB: LBB0_2
+; THUMB: LCPI0_0:
+; THUMB-NOT: LCPI0_1:
+; THUMB: .section
+  %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %1 = phi i32 [ %.pre, %bb.nph ], [ %3, %bb ]    ; <i32> [#uses=1]
+  %i.03 = phi i32 [ 0, %bb.nph ], [ %4, %bb ]     ; <i32> [#uses=2]
+  %scevgep = getelementptr i32* %vals, i32 %i.03  ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = add nsw i32 %1, %2                         ; <i32> [#uses=2]
+  store i32 %3, i32* @GV, align 4
+  %4 = add i32 %i.03, 1                           ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %4, %c                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/mem.ll b/src/LLVM/test/CodeGen/ARM/mem.ll
new file mode 100644
index 0000000..13dbbe7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/mem.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm | grep strb

+; RUN: llc < %s -march=arm | grep strh

+

+define void @f1() {

+entry:

+        store i8 0, i8* null

+        ret void

+}

+

+define void @f2() {

+entry:

+        store i16 0, i16* null

+        ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/memcpy-inline.ll b/src/LLVM/test/CodeGen/ARM/memcpy-inline.ll
new file mode 100644
index 0000000..30b9f59
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/memcpy-inline.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
+
+; The ARM magic hinting works best with linear scan.
+; CHECK: ldrd
+; CHECK: strd
+; CHECK: ldrb
+
+%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+
+@src = external global %struct.x
+@dst = external global %struct.x
+
+define i32 @t() {
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
+  ret i32 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/memfunc.ll b/src/LLVM/test/CodeGen/ARM/memfunc.ll
new file mode 100644
index 0000000..d03d6a9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/memfunc.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s

+; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s

+

+@from = common global [500 x i32] zeroinitializer, align 4

+@to = common global [500 x i32] zeroinitializer, align 4

+

+define void @f() {

+entry:

+

+        ; CHECK: memmove

+        ; EABI: __aeabi_memmove

+        call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)

+

+        ; CHECK: memcpy

+        ; EABI: __aeabi_memcpy

+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)

+

+        ; EABI memset swaps arguments

+        ; CHECK: mov r1, #0

+        ; CHECK: memset

+        ; EABI: mov r2, #0

+        ; EABI: __aeabi_memset

+        call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)

+        unreachable

+}

+

+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind

+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind

+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind


diff --git a/src/LLVM/test/CodeGen/ARM/mls.ll b/src/LLVM/test/CodeGen/ARM/mls.ll
new file mode 100644
index 0000000..a6cdba4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/mls.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %c, %tmp1
+    ret i32 %tmp2
+}
+
+; sub doesn't commute, so no mls for this one
+define i32 @f2(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %tmp1, %c
+    ret i32 %tmp2
+}
+
+; CHECK: mls	r0, r0, r1, r2

diff --git a/src/LLVM/test/CodeGen/ARM/movt-movw-global.ll b/src/LLVM/test/CodeGen/ARM/movt-movw-global.ll
new file mode 100644
index 0000000..991d728
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/movt-movw-global.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=armv7-eabi      | FileCheck %s -check-prefix=EABI
+; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=IOS
+; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=pic            | FileCheck %s -check-prefix=IOS-PIC
+; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=static         | FileCheck %s -check-prefix=IOS-STATIC
+
+@foo = common global i32 0
+
+define i32* @bar1() nounwind readnone {
+entry:
+; EABI:      movw    r0, :lower16:foo
+; EABI-NEXT: movt    r0, :upper16:foo
+
+; IOS:      movw    r0, :lower16:L_foo$non_lazy_ptr
+; IOS-NEXT: movt    r0, :upper16:L_foo$non_lazy_ptr
+
+; IOS-PIC:      movw    r0, :lower16:(L_foo$non_lazy_ptr-(LPC0_0+8))
+; IOS-PIC-NEXT: movt    r0, :upper16:(L_foo$non_lazy_ptr-(LPC0_0+8))
+
+; IOS-STATIC-NOT:      movw    r0, :lower16:_foo
+; IOS-STATIC-NOT:       movt    r0, :upper16:_foo
+  ret i32* @foo
+}
+
+define void @bar2(i32 %baz) nounwind {
+entry:
+; EABI:      movw    r1, :lower16:foo
+; EABI-NEXT: movt    r1, :upper16:foo
+
+; IOS:      movw    r1, :lower16:L_foo$non_lazy_ptr
+; IOS-NEXT: movt    r1, :upper16:L_foo$non_lazy_ptr
+
+; IOS-PIC:      movw    r1, :lower16:(L_foo$non_lazy_ptr-(LPC1_0+8))
+; IOS-PIC-NEXT: movt    r1, :upper16:(L_foo$non_lazy_ptr-(LPC1_0+8))
+
+; IOS-STATIC-NOT:      movw    r1, :lower16:_foo
+; IOS-STATIC-NOT:      movt    r1, :upper16:_foo
+  store i32 %baz, i32* @foo, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/movt.ll b/src/LLVM/test/CodeGen/ARM/movt.ll
new file mode 100644
index 0000000..e82aca0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/movt.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s
+; rdar://7317664
+
+define i32 @t(i32 %X) nounwind {
+; CHECK: t:
+; CHECK: movt r0, #65535
+entry:
+	%0 = or i32 %X, -65536
+	ret i32 %0
+}
+
+define i32 @t2(i32 %X) nounwind {
+; CHECK: t2:
+; CHECK: movt r0, #65534
+entry:
+	%0 = or i32 %X, -131072
+	%1 = and i32 %0, -65537
+	ret i32 %1
+}

diff --git a/src/LLVM/test/CodeGen/ARM/mul.ll b/src/LLVM/test/CodeGen/ARM/mul.ll
new file mode 100644
index 0000000..f886f10
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/mul.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm | grep mul | count 2

+; RUN: llc < %s -march=arm | grep lsl | count 2

+

+define i32 @f1(i32 %u) {

+    %tmp = mul i32 %u, %u

+    ret i32 %tmp

+}

+

+define i32 @f2(i32 %u, i32 %v) {

+    %tmp = mul i32 %u, %v

+    ret i32 %tmp

+}

+

+define i32 @f3(i32 %u) {

+	%tmp = mul i32 %u, 5

+        ret i32 %tmp

+}

+

+define i32 @f4(i32 %u) {

+	%tmp = mul i32 %u, 4

+        ret i32 %tmp

+}


diff --git a/src/LLVM/test/CodeGen/ARM/mul_const.ll b/src/LLVM/test/CodeGen/ARM/mul_const.ll
new file mode 100644
index 0000000..3cb8a8e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/mul_const.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @t9(i32 %v) nounwind readnone {
+entry:
+; CHECK: t9:
+; CHECK: add r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 9
+	ret i32 %0
+}
+
+define i32 @t7(i32 %v) nounwind readnone {
+entry:
+; CHECK: t7:
+; CHECK: rsb r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 7
+	ret i32 %0
+}
+
+define i32 @t5(i32 %v) nounwind readnone {
+entry:
+; CHECK: t5:
+; CHECK: add r0, r0, r0, lsl #2
+        %0 = mul i32 %v, 5
+        ret i32 %0
+}
+
+define i32 @t3(i32 %v) nounwind readnone {
+entry:
+; CHECK: t3:
+; CHECK: add r0, r0, r0, lsl #1
+        %0 = mul i32 %v, 3
+        ret i32 %0
+}
+
+define i32 @t12288(i32 %v) nounwind readnone {
+entry:
+; CHECK: t12288:
+; CHECK: add r0, r0, r0, lsl #1
+; CHECK: lsl{{.*}}#12
+        %0 = mul i32 %v, 12288
+        ret i32 %0
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/mulhi.ll b/src/LLVM/test/CodeGen/ARM/mulhi.ll
new file mode 100644
index 0000000..5735f8e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/mulhi.ll

@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s -check-prefix=V6

+; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=V4

+; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=M3

+

+define i32 @smulhi(i32 %x, i32 %y) nounwind {

+; V6: smulhi:

+; V6: smmul

+

+; V4: smulhi:

+; V4: smull

+

+; M3: smulhi:

+; M3: smull

+        %tmp = sext i32 %x to i64               ; <i64> [#uses=1]

+        %tmp1 = sext i32 %y to i64              ; <i64> [#uses=1]

+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]

+        %tmp3 = lshr i64 %tmp2, 32              ; <i64> [#uses=1]

+        %tmp3.upgrd.1 = trunc i64 %tmp3 to i32          ; <i32> [#uses=1]

+        ret i32 %tmp3.upgrd.1

+}

+

+define i32 @umulhi(i32 %x, i32 %y) nounwind {

+; V6: umulhi:

+; V6: umull

+

+; V4: umulhi:

+; V4: umull

+

+; M3: umulhi:

+; M3: umull

+        %tmp = zext i32 %x to i64               ; <i64> [#uses=1]

+        %tmp1 = zext i32 %y to i64              ; <i64> [#uses=1]

+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]

+        %tmp3 = lshr i64 %tmp2, 32              ; <i64> [#uses=1]

+        %tmp3.upgrd.2 = trunc i64 %tmp3 to i32          ; <i32> [#uses=1]

+        ret i32 %tmp3.upgrd.2

+}

+

+; rdar://r10152911

+define i32 @t3(i32 %a) nounwind {

+; V6: t3:

+; V6: smmla

+

+; V4: t3:

+; V4: smull

+

+; M3: t3:

+; M3-NOT: smmla

+; M3: smull

+entry:

+  %tmp1 = mul nsw i32 %a, 3

+  %tmp2 = sdiv i32 %tmp1, 23

+  ret i32 %tmp2

+}


diff --git a/src/LLVM/test/CodeGen/ARM/mult-alt-generic-arm.ll b/src/LLVM/test/CodeGen/ARM/mult-alt-generic-arm.ll
new file mode 100644
index 0000000..a8104db
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/mult-alt-generic-arm.ll

@@ -0,0 +1,323 @@
+; RUN: llc < %s -march=arm
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "arm"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define arm_aapcscc void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define arm_aapcscc void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define arm_aapcscc void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define arm_aapcscc void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define arm_aapcscc void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/mvn.ll b/src/LLVM/test/CodeGen/ARM/mvn.ll
new file mode 100644
index 0000000..2c62bbb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/mvn.ll

@@ -0,0 +1,74 @@
+; RUN: llc < %s -march=arm | grep mvn | count 8

+

+define i32 @f1() {

+entry:

+	ret i32 -1

+}

+

+define i32 @f2(i32 %a) {

+entry:

+	%tmpnot = xor i32 %a, -1		; <i32> [#uses=1]

+	ret i32 %tmpnot

+}

+

+define i32 @f3(i32 %a) {

+entry:

+	%tmp1 = shl i32 %a, 2		; <i32> [#uses=1]

+	%tmp1not = xor i32 %tmp1, -1		; <i32> [#uses=1]

+	ret i32 %tmp1not

+}

+

+define i32 @f4(i32 %a, i8 %b) {

+entry:

+	%shift.upgrd.1 = zext i8 %b to i32		; <i32> [#uses=1]

+	%tmp3 = shl i32 %a, %shift.upgrd.1		; <i32> [#uses=1]

+	%tmp3not = xor i32 %tmp3, -1		; <i32> [#uses=1]

+	ret i32 %tmp3not

+}

+

+define i32 @f5(i32 %a) {

+entry:

+	%tmp1 = lshr i32 %a, 2		; <i32> [#uses=1]

+	%tmp1not = xor i32 %tmp1, -1		; <i32> [#uses=1]

+	ret i32 %tmp1not

+}

+

+define i32 @f6(i32 %a, i8 %b) {

+entry:

+	%shift.upgrd.2 = zext i8 %b to i32		; <i32> [#uses=1]

+	%tmp2 = lshr i32 %a, %shift.upgrd.2		; <i32> [#uses=1]

+	%tmp2not = xor i32 %tmp2, -1		; <i32> [#uses=1]

+	ret i32 %tmp2not

+}

+

+define i32 @f7(i32 %a) {

+entry:

+	%tmp1 = ashr i32 %a, 2		; <i32> [#uses=1]

+	%tmp1not = xor i32 %tmp1, -1		; <i32> [#uses=1]

+	ret i32 %tmp1not

+}

+

+define i32 @f8(i32 %a, i8 %b) {

+entry:

+	%shift.upgrd.3 = zext i8 %b to i32		; <i32> [#uses=1]

+	%tmp3 = ashr i32 %a, %shift.upgrd.3		; <i32> [#uses=1]

+	%tmp3not = xor i32 %tmp3, -1		; <i32> [#uses=1]

+	ret i32 %tmp3not

+}

+

+define i32 @f9() {

+entry:

+	%tmp4845 = add i32 0, 0		; <i32> [#uses=1]

+	br label %cond_true4848

+

+cond_true4848:		; preds = %entry

+	%tmp4851 = sub i32 -3, 0		; <i32> [#uses=1]

+	%abc = add i32 %tmp4851, %tmp4845		; <i32> [#uses=1]

+	ret i32 %abc

+}

+

+define i1 @f10(i32 %a) {

+entry:

+	%tmp102 = icmp eq i32 -2, %a		; <i1> [#uses=1]

+	ret i1 %tmp102

+}


diff --git a/src/LLVM/test/CodeGen/ARM/neon_arith1.ll b/src/LLVM/test/CodeGen/ARM/neon_arith1.ll
new file mode 100644
index 0000000..5892737
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/neon_arith1.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm -mattr=+neon | grep vadd
+
+define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind {
+entry:
+	%0 = add <8 x i8> %a, %b
+	ret <8 x i8> %0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/neon_div.ll b/src/LLVM/test/CodeGen/ARM/neon_div.ll
new file mode 100644
index 0000000..de48fee
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/neon_div.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source | FileCheck %s
+
+define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecpe.f32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i16
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sdiv <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i32
+;CHECK: vqmovun.s16
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = udiv <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sdivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sdiv <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <4 x i16> @udivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = udiv <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}

diff --git a/src/LLVM/test/CodeGen/ARM/neon_ld1.ll b/src/LLVM/test/CodeGen/ARM/neon_ld1.ll
new file mode 100644
index 0000000..c78872a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/neon_ld1.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm -mattr=+neon | grep vldr.64 | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep vstr.64
+; RUN: llc < %s -march=arm -mattr=+neon | grep vmov
+
+define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
+entry:
+	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
+	%1 = load <4 x i16>* %b, align 8		; <<4 x i16>> [#uses=1]
+	%2 = add <4 x i16> %0, %1		; <<4 x i16>> [#uses=1]
+	%3 = bitcast <4 x i16> %2 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	store <2 x i32> %3, <2 x i32>* %r, align 8
+	ret void
+}
+
+define <2 x i32> @t2(<4 x i16>* %a, <4 x i16>* %b) nounwind readonly {
+entry:
+	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
+	%1 = load <4 x i16>* %b, align 8		; <<4 x i16>> [#uses=1]
+	%2 = sub <4 x i16> %0, %1		; <<4 x i16>> [#uses=1]
+	%3 = bitcast <4 x i16> %2 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	ret <2 x i32> %3
+}

diff --git a/src/LLVM/test/CodeGen/ARM/neon_ld2.ll b/src/LLVM/test/CodeGen/ARM/neon_ld2.ll
new file mode 100644
index 0000000..130277b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/neon_ld2.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | grep vmov  | count 2
+
+define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+entry:
+	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
+	%1 = load <2 x i64>* %b, align 16		; <<2 x i64>> [#uses=1]
+	%2 = add <2 x i64> %0, %1		; <<2 x i64>> [#uses=1]
+	%3 = bitcast <2 x i64> %2 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %3, <4 x i32>* %r, align 16
+	ret void
+}
+
+define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly {
+entry:
+	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
+	%1 = load <2 x i64>* %b, align 16		; <<2 x i64>> [#uses=1]
+	%2 = sub <2 x i64> %0, %1		; <<2 x i64>> [#uses=1]
+	%3 = bitcast <2 x i64> %2 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %3
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/neon_minmax.ll b/src/LLVM/test/CodeGen/ARM/neon_minmax.ll
new file mode 100644
index 0000000..d301c6a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/neon_minmax.ll

@@ -0,0 +1,81 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+
+define float @fmin_ole(float %x) nounwind {
+;CHECK: fmin_ole:
+;CHECK: vmin.f32
+  %cond = fcmp ole float 1.0, %x
+  %min1 = select i1 %cond, float 1.0, float %x
+  ret float %min1
+}
+
+define float @fmin_ole_zero(float %x) nounwind {
+;CHECK: fmin_ole_zero:
+;CHECK-NOT: vmin.f32
+  %cond = fcmp ole float 0.0, %x
+  %min1 = select i1 %cond, float 0.0, float %x
+  ret float %min1
+}
+
+define float @fmin_ult(float %x) nounwind {
+;CHECK: fmin_ult:
+;CHECK: vmin.f32
+  %cond = fcmp ult float %x, 1.0
+  %min1 = select i1 %cond, float %x, float 1.0
+  ret float %min1
+}
+
+define float @fmax_ogt(float %x) nounwind {
+;CHECK: fmax_ogt:
+;CHECK: vmax.f32
+  %cond = fcmp ogt float 1.0, %x
+  %max1 = select i1 %cond, float 1.0, float %x
+  ret float %max1
+}
+
+define float @fmax_uge(float %x) nounwind {
+;CHECK: fmax_uge:
+;CHECK: vmax.f32
+  %cond = fcmp uge float %x, 1.0
+  %max1 = select i1 %cond, float %x, float 1.0
+  ret float %max1
+}
+
+define float @fmax_uge_zero(float %x) nounwind {
+;CHECK: fmax_uge_zero:
+;CHECK-NOT: vmax.f32
+  %cond = fcmp uge float %x, 0.0
+  %max1 = select i1 %cond, float %x, float 0.0
+  ret float %max1
+}
+
+define float @fmax_olt_reverse(float %x) nounwind {
+;CHECK: fmax_olt_reverse:
+;CHECK: vmax.f32
+  %cond = fcmp olt float %x, 1.0
+  %max1 = select i1 %cond, float 1.0, float %x
+  ret float %max1
+}
+
+define float @fmax_ule_reverse(float %x) nounwind {
+;CHECK: fmax_ule_reverse:
+;CHECK: vmax.f32
+  %cond = fcmp ult float 1.0, %x
+  %max1 = select i1 %cond, float %x, float 1.0
+  ret float %max1
+}
+
+define float @fmin_oge_reverse(float %x) nounwind {
+;CHECK: fmin_oge_reverse:
+;CHECK: vmin.f32
+  %cond = fcmp oge float %x, 1.0
+  %min1 = select i1 %cond, float 1.0, float %x
+  ret float %min1
+}
+
+define float @fmin_ugt_reverse(float %x) nounwind {
+;CHECK: fmin_ugt_reverse:
+;CHECK: vmin.f32
+  %cond = fcmp ugt float 1.0, %x
+  %min1 = select i1 %cond, float %x, float 1.0
+  ret float %min1
+}

diff --git a/src/LLVM/test/CodeGen/ARM/neon_shift.ll b/src/LLVM/test/CodeGen/ARM/neon_shift.ll
new file mode 100644
index 0000000..340f220
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/neon_shift.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; <rdar://problem/9055897>
+define <4 x i16> @t1(<4 x i32> %a) nounwind {
+entry:
+; CHECK: vqrshrn.s32 d{{[0-9]+}}, q{{[0-9]*}}, #13
+  %x = tail call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %a, <4 x i32> <i32 -13, i32 -13, i32 -13, i32 -13>)
+  ret <4 x i16> %x
+}
+
+declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/pack.ll b/src/LLVM/test/CodeGen/ARM/pack.ll
new file mode 100644
index 0000000..c4035da
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/pack.ll

@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s

+

+; CHECK: test1

+; CHECK: pkhbt   r0, r0, r1, lsl #16

+define i32 @test1(i32 %X, i32 %Y) {

+	%tmp1 = and i32 %X, 65535

+	%tmp4 = shl i32 %Y, 16

+	%tmp5 = or i32 %tmp4, %tmp1

+	ret i32 %tmp5

+}

+

+; CHECK: test2

+; CHECK: pkhbt   r0, r0, r1, lsl #12

+define i32 @test2(i32 %X, i32 %Y) {

+	%tmp1 = and i32 %X, 65535

+	%tmp3 = shl i32 %Y, 12

+	%tmp4 = and i32 %tmp3, -65536

+	%tmp57 = or i32 %tmp4, %tmp1

+	ret i32 %tmp57

+}

+

+; CHECK: test3

+; CHECK: pkhbt   r0, r0, r1, lsl #18

+define i32 @test3(i32 %X, i32 %Y) {

+	%tmp19 = and i32 %X, 65535

+	%tmp37 = shl i32 %Y, 18

+	%tmp5 = or i32 %tmp37, %tmp19

+	ret i32 %tmp5

+}

+

+; CHECK: test4

+; CHECK: pkhbt   r0, r0, r1

+define i32 @test4(i32 %X, i32 %Y) {

+	%tmp1 = and i32 %X, 65535

+	%tmp3 = and i32 %Y, -65536

+	%tmp46 = or i32 %tmp3, %tmp1

+	ret i32 %tmp46

+}

+

+; CHECK: test5

+; CHECK: pkhtb   r0, r0, r1, asr #16

+define i32 @test5(i32 %X, i32 %Y) {

+	%tmp17 = and i32 %X, -65536

+	%tmp2 = bitcast i32 %Y to i32

+	%tmp4 = lshr i32 %tmp2, 16

+	%tmp5 = or i32 %tmp4, %tmp17

+	ret i32 %tmp5

+}

+

+; CHECK: test5a

+; CHECK: pkhtb   r0, r0, r1, asr #16

+define i32 @test5a(i32 %X, i32 %Y) {

+	%tmp110 = and i32 %X, -65536

+	%tmp37 = lshr i32 %Y, 16

+	%tmp39 = bitcast i32 %tmp37 to i32

+	%tmp5 = or i32 %tmp39, %tmp110

+	ret i32 %tmp5

+}

+

+; CHECK: test6

+; CHECK: pkhtb   r0, r0, r1, asr #12

+define i32 @test6(i32 %X, i32 %Y) {

+	%tmp1 = and i32 %X, -65536

+	%tmp37 = lshr i32 %Y, 12

+	%tmp38 = bitcast i32 %tmp37 to i32

+	%tmp4 = and i32 %tmp38, 65535

+	%tmp59 = or i32 %tmp4, %tmp1

+	ret i32 %tmp59

+}

+

+; CHECK: test7

+; CHECK: pkhtb   r0, r0, r1, asr #18

+define i32 @test7(i32 %X, i32 %Y) {

+	%tmp1 = and i32 %X, -65536

+	%tmp3 = ashr i32 %Y, 18

+	%tmp4 = and i32 %tmp3, 65535

+	%tmp57 = or i32 %tmp4, %tmp1

+	ret i32 %tmp57

+}

+

+; CHECK: test8

+; CHECK: pkhtb   r0, r0, r1, asr #22

+define i32 @test8(i32 %X, i32 %Y) {

+	%tmp1 = and i32 %X, -65536

+	%tmp3 = lshr i32 %Y, 22

+	%tmp57 = or i32 %tmp3, %tmp1

+	ret i32 %tmp57

+}


diff --git a/src/LLVM/test/CodeGen/ARM/peephole-bitcast.ll b/src/LLVM/test/CodeGen/ARM/peephole-bitcast.ll
new file mode 100644
index 0000000..e670a5b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/peephole-bitcast.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=linearscan | FileCheck %s
+
+; vmov s0, r0 + vmov r0, s0 should have been optimized away.
+; rdar://9104514
+
+; Peephole leaves a dead vmovsr instruction behind, and depends on linear scan
+; to remove it.
+
+define void @t(float %x) nounwind ssp {
+entry:
+; CHECK:     t:
+; CHECK-NOT: vmov
+; CHECK:     bl
+  %0 = bitcast float %x to i32
+  %cmp = icmp ult i32 %0, 2139095039
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @doSomething(float %x) nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+declare void @doSomething(float)

diff --git a/src/LLVM/test/CodeGen/ARM/phi.ll b/src/LLVM/test/CodeGen/ARM/phi.ll
new file mode 100644
index 0000000..dc1a95b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/phi.ll

@@ -0,0 +1,23 @@
+; RUN: llc -march=arm -mattr=+v4t < %s | FileCheck %s
+; <rdar://problem/8686347>
+
+define i32 @test1(i1 %a, i32* %b) {
+; CHECK: test1
+entry:
+  br i1 %a, label %lblock, label %rblock
+
+lblock:
+  %lbranch = getelementptr i32* %b, i32 1
+  br label %end
+
+rblock:
+  %rbranch = getelementptr i32* %b, i32 1
+  br label %end
+  
+end:
+; CHECK: ldr	r0, [r1, #4]
+  %gep = phi i32* [%lbranch, %lblock], [%rbranch, %rblock]
+  %r = load i32* %gep
+; CHECK-NEXT: bx	lr
+  ret i32 %r
+}

diff --git a/src/LLVM/test/CodeGen/ARM/pr3502.ll b/src/LLVM/test/CodeGen/ARM/pr3502.ll
new file mode 100644
index 0000000..606d969
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/pr3502.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi
+;pr3502
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+	%struct.ArmPTD = type { i32 }
+	%struct.RegisterSave = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.SHARED_AREA = type { i32, %struct.SHARED_AREA*, %struct.SHARED_AREA*, %struct.SHARED_AREA*, %struct.ArmPTD, void (%struct.RegisterSave*)*, void (%struct.RegisterSave*)*, i32, [1024 x i8], i32, i32, i32, i32, i32, i8, i8, i16, i32, i32, i32, i32, [16 x i8], i32, i32, i32, i8, i8, i8, i32, i16, i32, i64, i32, i32, i32, i32, i32, i32, i8*, i32, [256 x i8], i32, i32, i32, [20 x i8], %struct.RegisterSave, { %struct.WorldSwitchV5 }, [4 x i32] }
+	%struct.WorldSwitchV5 = type { i32, i32, i32, i32, i32, i32, i32 }
+
+define void @SomeCall(i32 %num) nounwind {
+entry:
+	tail call void asm sideeffect "mcr p15, 0, $0, c7, c10, 4 \0A\09", "r,~{memory}"(i32 0) nounwind
+	tail call void asm sideeffect "mcr p15,0,$0,c7,c14,0", "r,~{memory}"(i32 0) nounwind
+	%0 = load %struct.SHARED_AREA** null, align 4		; <%struct.SHARED_AREA*> [#uses=1]
+	%1 = ptrtoint %struct.SHARED_AREA* %0 to i32		; <i32> [#uses=1]
+	%2 = lshr i32 %1, 20		; <i32> [#uses=1]
+	%3 = tail call i32 @SetCurrEntry(i32 %2, i32 0) nounwind		; <i32> [#uses=0]
+	tail call void @ClearStuff(i32 0) nounwind
+	ret void
+}
+
+declare i32 @SetCurrEntry(i32, i32)
+
+declare void @ClearStuff(i32)

diff --git a/src/LLVM/test/CodeGen/ARM/prefetch.ll b/src/LLVM/test/CodeGen/ARM/prefetch.ll
new file mode 100644
index 0000000..9c8ff2b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/prefetch.ll

@@ -0,0 +1,77 @@
+; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld
+; RUN: llc < %s -march=thumb -mattr=+v7         | FileCheck %s -check-prefix=THUMB2
+; RUN: llc < %s -march=arm   -mattr=+v7         | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -march=arm   -mcpu=cortex-a9-mp | FileCheck %s -check-prefix=ARM-MP
+; rdar://8601536
+
+define void @t1(i8* %ptr) nounwind  {
+entry:
+; ARM: t1:
+; ARM-NOT: pldw [r0]
+; ARM: pld [r0]
+
+; ARM-MP: t1:
+; ARM-MP: pldw [r0]
+; ARM-MP: pld [r0]
+
+; THUMB2: t1:
+; THUMB2-NOT: pldw [r0]
+; THUMB2: pld [r0]
+  tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 )
+  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
+  ret void
+}
+
+define void @t2(i8* %ptr) nounwind  {
+entry:
+; ARM: t2:
+; ARM: pld [r0, #1023]
+
+; THUMB2: t2:
+; THUMB2: pld [r0, #1023]
+  %tmp = getelementptr i8* %ptr, i32 1023
+  tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3, i32 1 )
+  ret void
+}
+
+define void @t3(i32 %base, i32 %offset) nounwind  {
+entry:
+; ARM: t3:
+; ARM: pld [r0, r1, lsr #2]
+
+; THUMB2: t3:
+; THUMB2: lsrs r1, r1, #2
+; THUMB2: pld [r0, r1]
+  %tmp1 = lshr i32 %offset, 2
+  %tmp2 = add i32 %base, %tmp1
+  %tmp3 = inttoptr i32 %tmp2 to i8*
+  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3, i32 1 )
+  ret void
+}
+
+define void @t4(i32 %base, i32 %offset) nounwind  {
+entry:
+; ARM: t4:
+; ARM: pld [r0, r1, lsl #2]
+
+; THUMB2: t4:
+; THUMB2: pld [r0, r1, lsl #2]
+  %tmp1 = shl i32 %offset, 2
+  %tmp2 = add i32 %base, %tmp1
+  %tmp3 = inttoptr i32 %tmp2 to i8*
+  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3, i32 1 )
+  ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
+
+define void @t5(i8* %ptr) nounwind  {
+entry:
+; ARM: t5:
+; ARM: pli [r0]
+
+; THUMB2: t5:
+; THUMB2: pli [r0]
+  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 0 )
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/private.ll b/src/LLVM/test/CodeGen/ARM/private.ll
new file mode 100644
index 0000000..f93ffe7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/private.ll

@@ -0,0 +1,20 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -mtriple=arm-linux-gnueabi > %t
+; RUN: grep .Lfoo: %t
+; RUN: egrep bl.*\.Lfoo %t
+; RUN: grep .Lbaz: %t
+; RUN: grep long.*\.Lbaz %t
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/reg_sequence.ll b/src/LLVM/test/CodeGen/ARM/reg_sequence.ll
new file mode 100644
index 0000000..3a19211
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/reg_sequence.ll

@@ -0,0 +1,350 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s
+; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's.
+
+%struct.int16x8_t = type { <8 x i16> }
+%struct.int32x4_t = type { <4 x i32> }
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+
+define void @t1(i16* %i_ptr, i16* %o_ptr, %struct.int32x4_t* nocapture %vT0ptr, %struct.int32x4_t* nocapture %vT1ptr) nounwind {
+entry:
+; CHECK:        t1:
+; CHECK:        vld1.16
+; CHECK-NOT:    vmov d
+; CHECK:        vmovl.s16
+; CHECK:        vshrn.i32
+; CHECK:        vshrn.i32
+; CHECK-NOT:    vmov d
+; CHECK-NEXT:   vst1.16
+  %0 = getelementptr inbounds %struct.int32x4_t* %vT0ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
+  %1 = load <4 x i32>* %0, align 16               ; <<4 x i32>> [#uses=1]
+  %2 = getelementptr inbounds %struct.int32x4_t* %vT1ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
+  %3 = load <4 x i32>* %2, align 16               ; <<4 x i32>> [#uses=1]
+  %4 = bitcast i16* %i_ptr to i8*                 ; <i8*> [#uses=1]
+  %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
+  %6 = bitcast <8 x i16> %5 to <2 x double>       ; <<2 x double>> [#uses=2]
+  %7 = extractelement <2 x double> %6, i32 0      ; <double> [#uses=1]
+  %8 = bitcast double %7 to <4 x i16>             ; <<4 x i16>> [#uses=1]
+  %9 = sext <4 x i16> %8 to <4 x i32>             ; <<4 x i32>> [#uses=1]
+  %10 = extractelement <2 x double> %6, i32 1     ; <double> [#uses=1]
+  %11 = bitcast double %10 to <4 x i16>           ; <<4 x i16>> [#uses=1]
+  %12 = sext <4 x i16> %11 to <4 x i32>           ; <<4 x i32>> [#uses=1]
+  %13 = mul <4 x i32> %1, %9                      ; <<4 x i32>> [#uses=1]
+  %14 = mul <4 x i32> %3, %12                     ; <<4 x i32>> [#uses=1]
+  %15 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %13, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1]
+  %16 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %14, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1]
+  %17 = shufflevector <4 x i16> %15, <4 x i16> %16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1]
+  %18 = bitcast i16* %o_ptr to i8*                ; <i8*> [#uses=1]
+  tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17, i32 1)
+  ret void
+}
+
+define void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocapture %vT0ptr, %struct.int16x8_t* nocapture %vT1ptr) nounwind {
+entry:
+; CHECK:        t2:
+; CHECK:        vld1.16
+; CHECK-NOT:    vmov
+; CHECK:        vmul.i16
+; CHECK:        vld1.16
+; CHECK:        vmul.i16
+; CHECK-NOT:    vmov
+; CHECK:        vst1.16
+; CHECK:        vst1.16
+  %0 = getelementptr inbounds %struct.int16x8_t* %vT0ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %1 = load <8 x i16>* %0, align 16               ; <<8 x i16>> [#uses=1]
+  %2 = getelementptr inbounds %struct.int16x8_t* %vT1ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %3 = load <8 x i16>* %2, align 16               ; <<8 x i16>> [#uses=1]
+  %4 = bitcast i16* %i_ptr to i8*                 ; <i8*> [#uses=1]
+  %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
+  %6 = getelementptr inbounds i16* %i_ptr, i32 8  ; <i16*> [#uses=1]
+  %7 = bitcast i16* %6 to i8*                     ; <i8*> [#uses=1]
+  %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %7, i32 1) ; <<8 x i16>> [#uses=1]
+  %9 = mul <8 x i16> %1, %5                       ; <<8 x i16>> [#uses=1]
+  %10 = mul <8 x i16> %3, %8                      ; <<8 x i16>> [#uses=1]
+  %11 = bitcast i16* %o_ptr to i8*                ; <i8*> [#uses=1]
+  tail call void @llvm.arm.neon.vst1.v8i16(i8* %11, <8 x i16> %9, i32 1)
+  %12 = getelementptr inbounds i16* %o_ptr, i32 8 ; <i16*> [#uses=1]
+  %13 = bitcast i16* %12 to i8*                   ; <i8*> [#uses=1]
+  tail call void @llvm.arm.neon.vst1.v8i16(i8* %13, <8 x i16> %10, i32 1)
+  ret void
+}
+
+define <8 x i8> @t3(i8* %A, i8* %B) nounwind {
+; CHECK:        t3:
+; CHECK:        vld3.8
+; CHECK:        vmul.i8
+; CHECK:        vmov r
+; CHECK-NOT:    vmov d
+; CHECK:        vst3.8
+  %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 ; <<8 x i8>> [#uses=1]
+  %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 ; <<8 x i8>> [#uses=1]
+  %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 1 ; <<8 x i8>> [#uses=1]
+  %tmp5 = sub <8 x i8> %tmp3, %tmp4
+  %tmp6 = add <8 x i8> %tmp2, %tmp3               ; <<8 x i8>> [#uses=1]
+  %tmp7 = mul <8 x i8> %tmp4, %tmp2
+  tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7, i32 1)
+  ret <8 x i8> %tmp4
+}
+
+define void @t4(i32* %in, i32* %out) nounwind {
+entry:
+; CHECK:        t4:
+; CHECK:        vld2.32
+; CHECK-NOT:    vmov
+; CHECK:        vld2.32
+; CHECK-NOT:    vmov
+; CHECK:        bne
+  %tmp1 = bitcast i32* %in to i8*                 ; <i8*> [#uses=1]
+  %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp1, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+  %tmp3 = getelementptr inbounds i32* %in, i32 8  ; <i32*> [#uses=1]
+  %tmp4 = bitcast i32* %tmp3 to i8*               ; <i8*> [#uses=1]
+  %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp4, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+  %tmp8 = bitcast i32* %out to i8*                ; <i8*> [#uses=1]
+  br i1 undef, label %return1, label %return2
+
+return1:
+; CHECK:        %return1
+; CHECK-NOT:    vmov
+; CHECK-NEXT:   vadd.i32
+; CHECK-NEXT:   vadd.i32
+; CHECK-NEXT:   vst2.32
+  %tmp52 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
+  %tmp57 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1 ; <<4 x i32>> [#uses=1]
+  %tmp = extractvalue %struct.__neon_int32x4x2_t %tmp5, 0 ; <<4 x i32>> [#uses=1]
+  %tmp39 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
+  %tmp6 = add <4 x i32> %tmp52, %tmp              ; <<4 x i32>> [#uses=1]
+  %tmp7 = add <4 x i32> %tmp57, %tmp39            ; <<4 x i32>> [#uses=1]
+  tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7, i32 1)
+  ret void
+
+return2:
+; CHECK:        %return2
+; CHECK:        vadd.i32
+; CHECK:        vorr {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK-NOT:    vmov
+; CHECK:        vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+  %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
+  %tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
+  %tmp102 = add <4 x i32> %tmp100, %tmp101              ; <<4 x i32>> [#uses=1]
+  tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101, i32 1)
+  call void @llvm.trap()
+  unreachable
+}
+
+define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
+; CHECK:        t5:
+; CHECK:        vldmia
+; How can FileCheck match Q and D registers? We need a lisp interpreter.
+; CHECK:        vorr {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK-NOT:    vmov
+; CHECK:        vld2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0]
+; CHECK-NOT:    vmov
+; CHECK:        vadd.i16
+  %tmp0 = bitcast i16* %A to i8*                  ; <i8*> [#uses=1]
+  %tmp1 = load <8 x i16>* %B                      ; <<8 x i16>> [#uses=2]
+  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2]
+  %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 ; <<8 x i16>> [#uses=1]
+  %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 ; <<8 x i16>> [#uses=1]
+  %tmp5 = add <8 x i16> %tmp3, %tmp4              ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %tmp5
+}
+
+define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
+; CHECK:        t6:
+; CHECK:        vldr.64
+; CHECK:        vorr d[[D0:[0-9]+]], d[[D1:[0-9]+]]
+; CHECK-NEXT:   vld2.8 {d[[D1]][1], d[[D0]][1]}
+  %tmp1 = load <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
+  %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
+  %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
+  %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1]
+  %tmp5 = add <8 x i8> %tmp3, %tmp4               ; <<8 x i8>> [#uses=1]
+  ret <8 x i8> %tmp5
+}
+
+define void @t7(i32* %iptr, i32* %optr) nounwind {
+entry:
+; CHECK:        t7:
+; CHECK:        vld2.32
+; CHECK:        vst2.32
+; CHECK:        vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}},
+; CHECK:        vorr q[[Q0:[0-9]+]], q[[Q1:[0-9]+]], q[[Q1:[0-9]+]]
+; CHECK-NOT:    vmov
+; CHECK:        vuzp.32 q[[Q1]], q[[Q0]]
+; CHECK:        vst1.32
+  %0 = bitcast i32* %iptr to i8*                  ; <i8*> [#uses=2]
+  %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+  %tmp57 = extractvalue %struct.__neon_int32x4x2_t %1, 0 ; <<4 x i32>> [#uses=1]
+  %tmp60 = extractvalue %struct.__neon_int32x4x2_t %1, 1 ; <<4 x i32>> [#uses=1]
+  %2 = bitcast i32* %optr to i8*                  ; <i8*> [#uses=2]
+  tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60, i32 1)
+  %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0, i32 1) ; <<4 x i32>> [#uses=1]
+  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> ; <<4 x i32>> [#uses=1]
+  tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4, i32 1)
+  ret void
+}
+
+; PR7156
+define arm_aapcs_vfpcc i32 @t8() nounwind {
+; CHECK: t8:
+; CHECK: vrsqrte.f32 q8, q8
+bb.nph55.bb.nph55.split_crit_edge:
+  br label %bb3
+
+bb3:                                              ; preds = %bb3, %bb.nph55.bb.nph55.split_crit_edge
+  br i1 undef, label %bb5, label %bb3
+
+bb5:                                              ; preds = %bb3
+  br label %bb.i25
+
+bb.i25:                                           ; preds = %bb.i25, %bb5
+  %0 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %1 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %0) nounwind ; <<4 x float>> [#uses=1]
+  %2 = fmul <4 x float> %1, undef                 ; <<4 x float>> [#uses=1]
+  %3 = fmul <4 x float> undef, %2                 ; <<4 x float>> [#uses=1]
+  %tmp26.i = bitcast <4 x float> %3 to <2 x double> ; <<2 x double>> [#uses=1]
+  %4 = extractelement <2 x double> %tmp26.i, i32 0 ; <double> [#uses=1]
+  %5 = bitcast double %4 to <2 x float>           ; <<2 x float>> [#uses=1]
+  %6 = extractelement <2 x float> %5, i32 1       ; <float> [#uses=1]
+  store float %6, float* undef, align 4
+  br i1 undef, label %bb6, label %bb.i25
+
+bb6:                                              ; preds = %bb.i25
+  br i1 undef, label %bb7, label %bb14
+
+bb7:                                              ; preds = %bb6
+  br label %bb.i49
+
+bb.i49:                                           ; preds = %bb.i49, %bb7
+  br i1 undef, label %bb.i19, label %bb.i49
+
+bb.i19:                                           ; preds = %bb.i19, %bb.i49
+  br i1 undef, label %exit, label %bb.i19
+
+exit:          ; preds = %bb.i19
+  unreachable
+
+bb14:                                             ; preds = %bb6
+  ret i32 0
+}
+
+%0 = type { %1, %1, %1, %1 }
+%1 = type { %2 }
+%2 = type { <4 x float> }
+%3 = type { %0, %1 }
+
+; PR7157
+define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
+; CHECK:        t9:
+; CHECK:        vldr.64
+; CHECK-NOT:    vmov d{{.*}}, d16
+; CHECK:        vmov.i32 d17
+; CHECK-NEXT:   vstmia r0, {d16, d17}
+; CHECK-NEXT:   vstmia r0, {d16, d17}
+  %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
+  %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  store <4 x float> %4, <4 x float>* undef, align 16
+  %5 = shufflevector <2 x float> %3, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  store <4 x float> %5, <4 x float>* undef, align 16
+  br label %8
+
+; <label>:6                                       ; preds = %8
+  br label %7
+
+; <label>:7                                       ; preds = %6
+  br label %8
+
+; <label>:8                                       ; preds = %7, %2
+  br label %6
+
+; <label>:9                                       ; preds = %8
+  ret float undef
+
+; <label>:10                                      ; preds = %6
+  ret float 9.990000e+02
+}
+
+; PR7162
+define arm_aapcs_vfpcc i32 @t10() nounwind {
+entry:
+; CHECK: t10:
+; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3F000000
+; CHECK: vmul.f32 q8, q8, d0[0]
+; CHECK: vadd.f32 q8, q8, q8
+  %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %2 = insertelement <4 x float> %1, float undef, i32 2 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float undef, i32 3 ; <<4 x float>> [#uses=1]
+  %tmp54.i = bitcast <4 x float> %3 to <2 x double> ; <<2 x double>> [#uses=1]
+  %4 = extractelement <2 x double> %tmp54.i, i32 1 ; <double> [#uses=1]
+  %5 = bitcast double %4 to <2 x float>           ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %7 = fmul <4 x float> undef, %6                 ; <<4 x float>> [#uses=1]
+  %8 = fadd <4 x float> %7, undef                 ; <<4 x float>> [#uses=1]
+  %9 = fadd <4 x float> %8, undef                 ; <<4 x float>> [#uses=1]
+  %10 = shufflevector <4 x float> undef, <4 x float> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 7> ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %12 = shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %13 = shufflevector <4 x float> %12, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %14 = fmul <4 x float> %13, undef               ; <<4 x float>> [#uses=1]
+  %15 = fadd <4 x float> undef, %14               ; <<4 x float>> [#uses=1]
+  %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 0, i32 1, i32 6, i32 3> ; <<4 x float>> [#uses=1]
+  %17 = fmul <4 x float> %16, undef               ; <<4 x float>> [#uses=1]
+  %18 = extractelement <4 x float> %17, i32 2     ; <float> [#uses=1]
+  store float %18, float* undef, align 4
+  br i1 undef, label %exit, label %bb14
+
+exit:          ; preds = %bb.i19
+  unreachable
+
+bb14:                                             ; preds = %bb6
+  ret i32 0
+}
+
+; This test crashes the coalescer because live variables were not updated properly.
+define <8 x i8> @t11(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind {
+  %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1]
+  %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1]
+  %tmp2bd = add <8 x i8> zeroinitializer, %tmp2d  ; <<8 x i8>> [#uses=1]
+  %tmp2abcd = mul <8 x i8> zeroinitializer, %tmp2bd ; <<8 x i8>> [#uses=1]
+  %tmp2ef = sub <8 x i8> zeroinitializer, %tmp2f  ; <<8 x i8>> [#uses=1]
+  %tmp2efgh = mul <8 x i8> %tmp2ef, undef         ; <<8 x i8>> [#uses=2]
+  call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh, i32 1)
+  %tmp2 = sub <8 x i8> %tmp2efgh, %tmp2abcd       ; <<8 x i8>> [#uses=1]
+  %tmp7 = mul <8 x i8> undef, %tmp2               ; <<8 x i8>> [#uses=1]
+  tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7, i32 1)
+  ret <8 x i8> undef
+}
+
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly
+
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
+
+declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind
+
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+nounwind
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
+
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
+
+declare void @llvm.trap() nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/ret0.ll b/src/LLVM/test/CodeGen/ARM/ret0.ll
new file mode 100644
index 0000000..212cc8f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret0.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm

+

+define i32 @test() {

+        ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ret_arg1.ll b/src/LLVM/test/CodeGen/ARM/ret_arg1.ll
new file mode 100644
index 0000000..5a35d8c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_arg1.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm

+

+define i32 @test(i32 %a1) {

+        ret i32 %a1

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ret_arg2.ll b/src/LLVM/test/CodeGen/ARM/ret_arg2.ll
new file mode 100644
index 0000000..db28c5f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_arg2.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm

+

+define i32 @test(i32 %a1, i32 %a2) {

+        ret i32 %a2

+}

+


diff --git a/src/LLVM/test/CodeGen/ARM/ret_arg3.ll b/src/LLVM/test/CodeGen/ARM/ret_arg3.ll
new file mode 100644
index 0000000..c4da1f9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_arg3.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm

+define i32 @test(i32 %a1, i32 %a2, i32 %a3) {

+        ret i32 %a3

+}

+


diff --git a/src/LLVM/test/CodeGen/ARM/ret_arg4.ll b/src/LLVM/test/CodeGen/ARM/ret_arg4.ll
new file mode 100644
index 0000000..bdb1cd9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_arg4.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm

+

+define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {

+        ret i32 %a4

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ret_arg5.ll b/src/LLVM/test/CodeGen/ARM/ret_arg5.ll
new file mode 100644
index 0000000..992c54e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_arg5.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm

+

+define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {

+        ret i32 %a5

+}


diff --git a/src/LLVM/test/CodeGen/ARM/ret_f32_arg2.ll b/src/LLVM/test/CodeGen/ARM/ret_f32_arg2.ll
new file mode 100644
index 0000000..2bafea6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_f32_arg2.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define float @test_f32(float %a1, float %a2) {
+        ret float %a2
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/ret_f32_arg5.ll b/src/LLVM/test/CodeGen/ARM/ret_f32_arg5.ll
new file mode 100644
index 0000000..c6ce60e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_f32_arg5.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define float @test_f32_arg5(float %a1, float %a2, float %a3, float %a4, float %a5) {
+        ret float %a5
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/ret_f64_arg2.ll b/src/LLVM/test/CodeGen/ARM/ret_f64_arg2.ll
new file mode 100644
index 0000000..386e85f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_f64_arg2.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define double @test_f64(double %a1, double %a2) {
+        ret double %a2
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/ret_f64_arg_reg_split.ll b/src/LLVM/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
new file mode 100644
index 0000000..bdb0a60
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_f64_arg_reg_split.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mcpu=arm8 -mattr=+vfp2
+
+define double @test_double_arg_reg_split(i32 %a1, double %a2) {
+        ret double %a2
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/ret_f64_arg_split.ll b/src/LLVM/test/CodeGen/ARM/ret_f64_arg_split.ll
new file mode 100644
index 0000000..4f841a3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_f64_arg_split.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define double @test_double_arg_split(i64 %a1, i32 %a2, double %a3) {
+        ret double %a3
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/ret_f64_arg_stack.ll b/src/LLVM/test/CodeGen/ARM/ret_f64_arg_stack.ll
new file mode 100644
index 0000000..2144317
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_f64_arg_stack.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define double @test_double_arg_stack(i64 %a1, i32 %a2, i32 %a3, double %a4) {
+        ret double %a4
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/ret_i128_arg2.ll b/src/LLVM/test/CodeGen/ARM/ret_i128_arg2.ll
new file mode 100644
index 0000000..908c34f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_i128_arg2.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define i128 @test_i128(i128 %a1, i128 %a2, i128 %a3) {
+        ret i128 %a3
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/ret_i64_arg2.ll b/src/LLVM/test/CodeGen/ARM/ret_i64_arg2.ll
new file mode 100644
index 0000000..b1a1024
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_i64_arg2.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define i64 @test_i64(i64 %a1, i64 %a2) {
+        ret i64 %a2
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/ret_i64_arg3.ll b/src/LLVM/test/CodeGen/ARM/ret_i64_arg3.ll
new file mode 100644
index 0000000..ffc1d2f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_i64_arg3.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define i64 @test_i64_arg3(i64 %a1, i64 %a2, i64 %a3) {
+        ret i64 %a3
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/ret_i64_arg_split.ll b/src/LLVM/test/CodeGen/ARM/ret_i64_arg_split.ll
new file mode 100644
index 0000000..956bce5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_i64_arg_split.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define i64 @test_i64_arg_split(i64 %a1, i32 %a2, i64 %a3) {
+        ret i64 %a3
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/ret_void.ll b/src/LLVM/test/CodeGen/ARM/ret_void.ll
new file mode 100644
index 0000000..6bd179f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/ret_void.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm

+

+define void @test() {

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/ARM/rev.ll b/src/LLVM/test/CodeGen/ARM/rev.ll
new file mode 100644
index 0000000..180c5c6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/rev.ll

@@ -0,0 +1,126 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s

+

+define i32 @test1(i32 %X) nounwind {

+; CHECK: test1

+; CHECK: rev16 r0, r0

+        %tmp1 = lshr i32 %X, 8

+        %X15 = bitcast i32 %X to i32

+        %tmp4 = shl i32 %X15, 8

+        %tmp2 = and i32 %tmp1, 16711680

+        %tmp5 = and i32 %tmp4, -16777216

+        %tmp9 = and i32 %tmp1, 255

+        %tmp13 = and i32 %tmp4, 65280

+        %tmp6 = or i32 %tmp5, %tmp2

+        %tmp10 = or i32 %tmp6, %tmp13

+        %tmp14 = or i32 %tmp10, %tmp9

+        ret i32 %tmp14

+}

+

+define i32 @test2(i32 %X) nounwind {

+; CHECK: test2

+; CHECK: revsh r0, r0

+        %tmp1 = lshr i32 %X, 8

+        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16

+        %tmp3 = trunc i32 %X to i16

+        %tmp2 = and i16 %tmp1.upgrd.1, 255

+        %tmp4 = shl i16 %tmp3, 8

+        %tmp5 = or i16 %tmp2, %tmp4

+        %tmp5.upgrd.2 = sext i16 %tmp5 to i32

+        ret i32 %tmp5.upgrd.2

+}

+

+; rdar://9147637

+define i32 @test3(i16 zeroext %a) nounwind {

+entry:

+; CHECK: test3:

+; CHECK: revsh r0, r0

+  %0 = tail call i16 @llvm.bswap.i16(i16 %a)

+  %1 = sext i16 %0 to i32

+  ret i32 %1

+}

+

+declare i16 @llvm.bswap.i16(i16) nounwind readnone

+

+define i32 @test4(i16 zeroext %a) nounwind {

+entry:

+; CHECK: test4:

+; CHECK: revsh r0, r0

+  %conv = zext i16 %a to i32

+  %shr9 = lshr i16 %a, 8

+  %conv2 = zext i16 %shr9 to i32

+  %shl = shl nuw nsw i32 %conv, 8

+  %or = or i32 %conv2, %shl

+  %sext = shl i32 %or, 16

+  %conv8 = ashr exact i32 %sext, 16

+  ret i32 %conv8

+}

+

+; rdar://9609059

+define i32 @test5(i32 %i) nounwind readnone {

+entry:

+; CHECK: test5

+; CHECK: revsh r0, r0

+  %shl = shl i32 %i, 24

+  %shr = ashr exact i32 %shl, 16

+  %shr23 = lshr i32 %i, 8

+  %and = and i32 %shr23, 255

+  %or = or i32 %shr, %and

+  ret i32 %or

+}

+

+; rdar://9609108

+define i32 @test6(i32 %x) nounwind readnone {

+entry:

+; CHECK: test6

+; CHECK: rev16 r0, r0

+  %and = shl i32 %x, 8

+  %shl = and i32 %and, 65280

+  %and2 = lshr i32 %x, 8

+  %shr11 = and i32 %and2, 255

+  %shr5 = and i32 %and2, 16711680

+  %shl9 = and i32 %and, -16777216

+  %or = or i32 %shr5, %shl9

+  %or6 = or i32 %or, %shr11

+  %or10 = or i32 %or6, %shl

+  ret i32 %or10

+}

+

+; rdar://9164521

+define i32 @test7(i32 %a) nounwind readnone {

+entry:

+; CHECK: test7

+; CHECK: rev r0, r0

+; CHECK: lsr r0, r0, #16

+  %and = lshr i32 %a, 8

+  %shr3 = and i32 %and, 255

+  %and2 = shl i32 %a, 8

+  %shl = and i32 %and2, 65280

+  %or = or i32 %shr3, %shl

+  ret i32 %or

+}

+

+define i32 @test8(i32 %a) nounwind readnone {

+entry:

+; CHECK: test8

+; CHECK: revsh r0, r0

+  %and = lshr i32 %a, 8

+  %shr4 = and i32 %and, 255

+  %and2 = shl i32 %a, 8

+  %or = or i32 %shr4, %and2

+  %sext = shl i32 %or, 16

+  %conv3 = ashr exact i32 %sext, 16

+  ret i32 %conv3

+}

+

+define zeroext i16 @test9(i16 zeroext %v) nounwind readnone {

+entry:

+; CHECK: test9

+; CHECK: rev r0, r0

+; CHECK: lsr r0, r0, #16

+  %conv = zext i16 %v to i32

+  %shr4 = lshr i32 %conv, 8

+  %shl = shl nuw nsw i32 %conv, 8

+  %or = or i32 %shr4, %shl

+  %conv3 = trunc i32 %or to i16

+  ret i16 %conv3

+}


diff --git a/src/LLVM/test/CodeGen/ARM/sbfx.ll b/src/LLVM/test/CodeGen/ARM/sbfx.ll
new file mode 100644
index 0000000..d29693e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/sbfx.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+entry:
+; CHECK: f1:
+; CHECK: sbfx r0, r0, #0, #20
+    %tmp = shl i32 %a, 12
+    %tmp2 = ashr i32 %tmp, 12
+    ret i32 %tmp2
+}
+
+define i32 @f2(i32 %a) {
+entry:
+; CHECK: f2:
+; CHECK: bfc	r0, #20, #12
+    %tmp = shl i32 %a, 12
+    %tmp2 = lshr i32 %tmp, 12
+    ret i32 %tmp2
+}
+
+define i32 @f3(i32 %a) {
+entry:
+; CHECK: f3:
+; CHECK: sbfx r0, r0, #5, #3
+    %tmp = shl i32 %a, 24
+    %tmp2 = ashr i32 %tmp, 29
+    ret i32 %tmp2
+}
+
+define i32 @f4(i32 %a) {
+entry:
+; CHECK: f4:
+; CHECK: ubfx r0, r0, #5, #3
+    %tmp = shl i32 %a, 24
+    %tmp2 = lshr i32 %tmp, 29
+    ret i32 %tmp2
+}
+
+define i32 @f5(i32 %a) {
+entry:
+; CHECK: f5:
+; CHECK-NOT: sbfx
+; CHECK: bx
+    %tmp = shl i32 %a, 3
+    %tmp2 = ashr i32 %tmp, 1
+    ret i32 %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/ARM/section.ll b/src/LLVM/test/CodeGen/ARM/section.ll
new file mode 100644
index 0000000..3e9955d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/section.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=arm-linux | FileCheck %s

+

+; CHECK: .section .dtors,"aw",%progbits

+; CHECK: __DTOR_END__:

+@__DTOR_END__ = internal global [1 x i32] zeroinitializer, section ".dtors"       ; <[1 x i32]*> [#uses=0]

+


diff --git a/src/LLVM/test/CodeGen/ARM/select-imm.ll b/src/LLVM/test/CodeGen/ARM/select-imm.ll
new file mode 100644
index 0000000..f43dde5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/select-imm.ll

@@ -0,0 +1,114 @@
+; RUN: llc < %s -march=arm                  | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -march=arm -mattr=+thumb2   | FileCheck %s --check-prefix=ARMT2
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s --check-prefix=THUMB2
+
+define i32 @t1(i32 %c) nounwind readnone {
+entry:
+; ARM: t1:
+; ARM: mov [[R1:r[0-9]+]], #101
+; ARM: orr [[R1b:r[0-9]+]], [[R1]], #256
+; ARM: movgt r0, #123
+
+; ARMT2: t1:
+; ARMT2: movw r0, #357
+; ARMT2: movgt r0, #123
+
+; THUMB2: t1:
+; THUMB2: movw r0, #357
+; THUMB2: movgt r0, #123
+
+  %0 = icmp sgt i32 %c, 1
+  %1 = select i1 %0, i32 123, i32 357
+  ret i32 %1
+}
+
+define i32 @t2(i32 %c) nounwind readnone {
+entry:
+; ARM: t2:
+; ARM: mov r0, #123
+; ARM: movgt r0, #101
+; ARM: orrgt r0, r0, #256
+
+; ARMT2: t2:
+; ARMT2: mov r0, #123
+; ARMT2: movwgt r0, #357
+
+; THUMB2: t2:
+; THUMB2: mov{{(s|\.w)}} r0, #123
+; THUMB2: movwgt r0, #357
+
+  %0 = icmp sgt i32 %c, 1
+  %1 = select i1 %0, i32 357, i32 123
+  ret i32 %1
+}
+
+define i32 @t3(i32 %a) nounwind readnone {
+entry:
+; ARM: t3:
+; ARM: mov r0, #0
+; ARM: moveq r0, #1
+
+; ARMT2: t3:
+; ARMT2: mov r0, #0
+; ARMT2: moveq r0, #1
+
+; THUMB2: t3:
+; THUMB2: mov{{(s|\.w)}} r0, #0
+; THUMB2: moveq r0, #1
+  %0 = icmp eq i32 %a, 160
+  %1 = zext i1 %0 to i32
+  ret i32 %1
+}
+
+define i32 @t4(i32 %a, i32 %b, i32 %x) nounwind {
+entry:
+; ARM: t4:
+; ARM: ldr
+; ARM: movlt
+
+; ARMT2: t4:
+; ARMT2: movwlt [[R0:r[0-9]+]], #65365
+; ARMT2: movtlt [[R0]], #65365
+
+; THUMB2: t4:
+; THUMB2: mvnlt.w [[R0:r[0-9]+]], #11141290
+  %0 = icmp slt i32 %a, %b
+  %1 = select i1 %0, i32 4283826005, i32 %x
+  ret i32 %1
+}
+
+; rdar://9758317
+define i32 @t5(i32 %a) nounwind {
+entry:
+; ARM: t5:
+; ARM-NOT: mov
+; ARM: cmp r0, #1
+; ARM-NOT: mov
+; ARM: movne r0, #0
+
+; THUMB2: t5:
+; THUMB2-NOT: mov
+; THUMB2: cmp r0, #1
+; THUMB2: it ne
+; THUMB2: movne r0, #0
+  %cmp = icmp eq i32 %a, 1
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @t6(i32 %a) nounwind {
+entry:
+; ARM: t6:
+; ARM-NOT: mov
+; ARM: cmp r0, #0
+; ARM: movne r0, #1
+
+; THUMB2: t6:
+; THUMB2-NOT: mov
+; THUMB2: cmp r0, #0
+; THUMB2: it ne
+; THUMB2: movne r0, #1
+  %tobool = icmp ne i32 %a, 0
+  %lnot.ext = zext i1 %tobool to i32
+  ret i32 %lnot.ext
+}

diff --git a/src/LLVM/test/CodeGen/ARM/select.ll b/src/LLVM/test/CodeGen/ARM/select.ll
new file mode 100644
index 0000000..3baf944
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/select.ll

@@ -0,0 +1,115 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s

+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP

+; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON

+

+define i32 @f1(i32 %a.s) {

+;CHECK: f1:

+;CHECK: moveq

+entry:

+    %tmp = icmp eq i32 %a.s, 4

+    %tmp1.s = select i1 %tmp, i32 2, i32 3

+    ret i32 %tmp1.s

+}

+

+define i32 @f2(i32 %a.s) {

+;CHECK: f2:

+;CHECK: movgt

+entry:

+    %tmp = icmp sgt i32 %a.s, 4

+    %tmp1.s = select i1 %tmp, i32 2, i32 3

+    ret i32 %tmp1.s

+}

+

+define i32 @f3(i32 %a.s, i32 %b.s) {

+;CHECK: f3:

+;CHECK: movlt

+entry:

+    %tmp = icmp slt i32 %a.s, %b.s

+    %tmp1.s = select i1 %tmp, i32 2, i32 3

+    ret i32 %tmp1.s

+}

+

+define i32 @f4(i32 %a.s, i32 %b.s) {

+;CHECK: f4:

+;CHECK: movle

+entry:

+    %tmp = icmp sle i32 %a.s, %b.s

+    %tmp1.s = select i1 %tmp, i32 2, i32 3

+    ret i32 %tmp1.s

+}

+

+define i32 @f5(i32 %a.u, i32 %b.u) {

+;CHECK: f5:

+;CHECK: movls

+entry:

+    %tmp = icmp ule i32 %a.u, %b.u

+    %tmp1.s = select i1 %tmp, i32 2, i32 3

+    ret i32 %tmp1.s

+}

+

+define i32 @f6(i32 %a.u, i32 %b.u) {

+;CHECK: f6:

+;CHECK: movhi

+entry:

+    %tmp = icmp ugt i32 %a.u, %b.u

+    %tmp1.s = select i1 %tmp, i32 2, i32 3

+    ret i32 %tmp1.s

+}

+

+define double @f7(double %a, double %b) {

+;CHECK: f7:

+;CHECK: movlt

+;CHECK: movlt

+;CHECK-VFP: f7:

+;CHECK-VFP: vmovmi

+    %tmp = fcmp olt double %a, 1.234e+00

+    %tmp1 = select i1 %tmp, double -1.000e+00, double %b

+    ret double %tmp1

+}

+

+; <rdar://problem/7260094>

+;

+; We used to generate really horrible code for this function. The main cause was

+; a lack of a custom lowering routine for an ISD::SELECT. This would result in

+; two "it" blocks in the code: one for the "icmp" and another to move the index

+; into the constant pool based on the value of the "icmp". If we have one "it"

+; block generated, odds are good that we have close to the ideal code for this:

+;

+; CHECK-NEON:      _f8:

+; CHECK-NEON:      adr     r2, LCPI7_0

+; CHECK-NEON-NEXT: movw    r3, #1123

+; CHECK-NEON-NEXT: adds    r1, r2, #4

+; CHECK-NEON-NEXT: cmp     r0, r3

+; CHECK-NEON-NEXT: it      ne

+; CHECK-NEON-NEXT: movne   r1, r2

+; CHECK-NEON-NEXT: ldr

+; CHECK-NEON:      bx

+

+define arm_apcscc float @f8(i32 %a) nounwind {

+  %tmp = icmp eq i32 %a, 1123

+  %tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000

+  ret float %tmp1

+}

+

+; <rdar://problem/9049552>

+; Glue values can only have a single use, but the following test exposed a

+; case where a SELECT was lowered with 2 uses of a comparison, causing the

+; scheduler to assert.

+; CHECK-VFP: f9:

+

+declare i8* @objc_msgSend(i8*, i8*, ...)

+define void @f9() optsize {

+entry:

+  %cmp = icmp eq i8* undef, inttoptr (i32 4 to i8*)

+  %conv191 = select i1 %cmp, float -3.000000e+00, float 0.000000e+00

+  %conv195 = select i1 %cmp, double -1.000000e+00, double 0.000000e+00

+  %add = fadd double %conv195, 1.100000e+01

+  %conv196 = fptrunc double %add to float

+  %add201 = fadd float undef, %conv191

+  %tmp484 = bitcast float %conv196 to i32

+  %tmp478 = bitcast float %add201 to i32

+  %tmp490 = insertvalue [2 x i32] undef, i32 %tmp484, 0

+  %tmp493 = insertvalue [2 x i32] %tmp490, i32 %tmp478, 1

+  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, [2 x i32], i32, float)*)(i8* undef, i8* undef, [2 x i32] %tmp493, i32 0, float 1.000000e+00) optsize

+  ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/select_xform.ll b/src/LLVM/test/CodeGen/ARM/select_xform.ll
new file mode 100644
index 0000000..a2317cd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/select_xform.ll

@@ -0,0 +1,60 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARM

+; RUN: llc < %s -mtriple=thumb-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=T2

+; rdar://8662825

+

+define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {

+; ARM: t1:

+; ARM: sub r0, r1, #-2147483647

+; ARM: movgt r0, r1

+

+; T2: t1:

+; T2: mvn r0, #-2147483648

+; T2: add r0, r1

+; T2: movgt r0, r1

+  %tmp1 = icmp sgt i32 %c, 10

+  %tmp2 = select i1 %tmp1, i32 0, i32 2147483647

+  %tmp3 = add i32 %tmp2, %b

+  ret i32 %tmp3

+}

+

+define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {

+; ARM: t2:

+; ARM: sub r0, r1, #10

+; ARM: movgt r0, r1

+

+; T2: t2:

+; T2: sub.w r0, r1, #10

+; T2: movgt r0, r1

+  %tmp1 = icmp sgt i32 %c, 10

+  %tmp2 = select i1 %tmp1, i32 0, i32 10

+  %tmp3 = sub i32 %b, %tmp2

+  ret i32 %tmp3

+}

+

+define i32 @t3(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {

+; ARM: t3:

+; ARM: mvnlt r2, #0

+; ARM: and r0, r2, r3

+

+; T2: t3:

+; T2: movlt.w r2, #-1

+; T2: and.w r0, r2, r3

+  %cond = icmp slt i32 %a, %b

+  %z = select i1 %cond, i32 -1, i32 %x

+  %s = and i32 %z, %y

+ ret i32 %s

+}

+

+define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {

+; ARM: t4:

+; ARM: movlt r2, #0

+; ARM: orr r0, r2, r3

+

+; T2: t4:

+; T2: movlt r2, #0

+; T2: orr.w r0, r2, r3

+  %cond = icmp slt i32 %a, %b

+  %z = select i1 %cond, i32 0, i32 %x

+  %s = or i32 %z, %y

+ ret i32 %s

+}


diff --git a/src/LLVM/test/CodeGen/ARM/shifter_operand.ll b/src/LLVM/test/CodeGen/ARM/shifter_operand.ll
new file mode 100644
index 0000000..0d69e19
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/shifter_operand.ll

@@ -0,0 +1,71 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8

+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9

+; rdar://8576755

+

+

+define i32 @test1(i32 %X, i32 %Y, i8 %sh) {

+; A8: test1:

+; A8: add r0, r0, r1, lsl r2

+

+; A9: test1:

+; A9: add r0, r0, r1, lsl r2

+        %shift.upgrd.1 = zext i8 %sh to i32

+        %A = shl i32 %Y, %shift.upgrd.1

+        %B = add i32 %X, %A

+        ret i32 %B

+}

+

+define i32 @test2(i32 %X, i32 %Y, i8 %sh) {

+; A8: test2:

+; A8: bic r0, r0, r1, asr r2

+

+; A9: test2:

+; A9: bic r0, r0, r1, asr r2

+        %shift.upgrd.2 = zext i8 %sh to i32

+        %A = ashr i32 %Y, %shift.upgrd.2

+        %B = xor i32 %A, -1

+        %C = and i32 %X, %B

+        ret i32 %C

+}

+

+define i32 @test3(i32 %base, i32 %base2, i32 %offset) {

+entry:

+; A8: test3:

+; A8: ldr r0, [r0, r2, lsl #2]

+; A8: ldr r1, [r1, r2, lsl #2]

+

+; lsl #2 is free

+; A9: test3:

+; A9: ldr r0, [r0, r2, lsl #2]

+; A9: ldr r1, [r1, r2, lsl #2]

+        %tmp1 = shl i32 %offset, 2

+        %tmp2 = add i32 %base, %tmp1

+        %tmp3 = inttoptr i32 %tmp2 to i32*

+        %tmp4 = add i32 %base2, %tmp1

+        %tmp5 = inttoptr i32 %tmp4 to i32*

+        %tmp6 = load i32* %tmp3

+        %tmp7 = load i32* %tmp5

+        %tmp8 = add i32 %tmp7, %tmp6

+        ret i32 %tmp8

+}

+

+declare i8* @malloc(...)

+

+define fastcc void @test4(i16 %addr) nounwind {

+entry:

+; A8: test4:

+; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]

+; A8: str [[REG]], [r0, r1, lsl #2]

+

+; A9: test4:

+; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]

+; A9: str [[REG]], [r0, r1, lsl #2]

+  %0 = tail call i8* (...)* @malloc(i32 undef) nounwind

+  %1 = bitcast i8* %0 to i32*

+  %2 = sext i16 %addr to i32

+  %3 = getelementptr inbounds i32* %1, i32 %2

+  %4 = load i32* %3, align 4

+  %5 = add nsw i32 %4, 1

+  store i32 %5, i32* %3, align 4

+  ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/shuffle.ll b/src/LLVM/test/CodeGen/ARM/shuffle.ll
new file mode 100644
index 0000000..7d6be4f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/shuffle.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin"
+
+define <8 x i8> @shuf(<8 x i8> %a) nounwind readnone optsize ssp {
+entry:
+; CHECK: vtbl
+  %shuffle = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 4, i32 5, i32 0>
+  ret <8 x i8> %shuffle
+}
+
+define <8 x i8> @shuf2(<8 x i8> %a, <8 x i8> %b) nounwind readnone optsize ssp {
+entry:
+; CHECK: vtbl
+  %shuffle = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 4, i32 5, i32 8>
+  ret <8 x i8> %shuffle
+}

diff --git a/src/LLVM/test/CodeGen/ARM/smul.ll b/src/LLVM/test/CodeGen/ARM/smul.ll
new file mode 100644
index 0000000..6ae1236
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/smul.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=arm -mcpu=generic

+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s

+

+@x = weak global i16 0          ; <i16*> [#uses=1]

+@y = weak global i16 0          ; <i16*> [#uses=0]

+

+define i32 @f1(i32 %y) {

+; CHECK: f1

+; CHECK: smulbt

+        %tmp = load i16* @x             ; <i16> [#uses=1]

+        %tmp1 = add i16 %tmp, 2         ; <i16> [#uses=1]

+        %tmp2 = sext i16 %tmp1 to i32           ; <i32> [#uses=1]

+        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]

+        %tmp4 = mul i32 %tmp2, %tmp3            ; <i32> [#uses=1]

+        ret i32 %tmp4

+}

+

+define i32 @f2(i32 %x, i32 %y) {

+; CHECK: f2

+; CHECK: smultt

+        %tmp1 = ashr i32 %x, 16         ; <i32> [#uses=1]

+        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]

+        %tmp4 = mul i32 %tmp3, %tmp1            ; <i32> [#uses=1]

+        ret i32 %tmp4

+}

+

+define i32 @f3(i32 %a, i16 %x, i32 %y) {

+; CHECK: f3

+; CHECK: smlabt

+        %tmp = sext i16 %x to i32               ; <i32> [#uses=1]

+        %tmp2 = ashr i32 %y, 16         ; <i32> [#uses=1]

+        %tmp3 = mul i32 %tmp2, %tmp             ; <i32> [#uses=1]

+        %tmp5 = add i32 %tmp3, %a               ; <i32> [#uses=1]

+        ret i32 %tmp5

+}

+


diff --git a/src/LLVM/test/CodeGen/ARM/spill-q.ll b/src/LLVM/test/CodeGen/ARM/spill-q.ll
new file mode 100644
index 0000000..bf4e55c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/spill-q.ll

@@ -0,0 +1,91 @@
+; RUN: llc < %s -mtriple=armv7-elf -mattr=+neon | FileCheck %s
+; PR4789
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+
+define void @aaa(%quuz* %this, i8* %block) {
+; CHECK: aaa:
+; CHECK: bic sp, sp, #15
+; CHECK: vst1.64 {{.*}}sp, :128
+; CHECK: vld1.64 {{.*}}sp, :128
+entry:
+  %aligned_vec = alloca <4 x float>, align 16
+  %"alloca point" = bitcast i32 0 to i32
+  %vecptr = bitcast <4 x float>* %aligned_vec to i8*
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  store float 6.300000e+01, float* undef, align 4
+  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  store float 0.000000e+00, float* undef, align 4
+  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
+  br label %bb4
+
+bb4:                                              ; preds = %bb193, %entry
+  %besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
+  %part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
+  %3 = fmul <4 x float> zeroinitializer, %0       ; <<4 x float>> [#uses=2]
+  %4 = fadd <4 x float> %3, %part0.0.0261         ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %7 = fmul <4 x float> %1, undef                 ; <<4 x float>> [#uses=1]
+  %8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %9 = fptosi <4 x float> %8 to <4 x i32>         ; <<4 x i32>> [#uses=1]
+  %10 = sitofp <4 x i32> %9 to <4 x float>        ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, %2                  ; <<4 x float>> [#uses=1]
+  %12 = fmul <4 x float> undef, %6                ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> %11, %4                  ; <<4 x float>> [#uses=1]
+  %14 = fsub <4 x float> %12, %13                 ; <<4 x float>> [#uses=1]
+  %15 = fsub <4 x float> %14, undef               ; <<4 x float>> [#uses=1]
+  %16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %16, undef               ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %tmp1 = fadd <4 x float> %20, %ld3
+  %tmp2 = fadd <4 x float> %tmp1, %ld4
+  %tmp3 = fadd <4 x float> %tmp2, %ld5
+  %tmp4 = fadd <4 x float> %tmp3, %ld6
+  %tmp5 = fadd <4 x float> %tmp4, %ld7
+  %tmp6 = fadd <4 x float> %tmp5, %ld8
+  %tmp7 = fadd <4 x float> %tmp6, %ld9
+  %tmp8 = fadd <4 x float> %tmp7, %ld10
+  %tmp9 = fadd <4 x float> %tmp8, %ld11
+  %21 = fadd <4 x float> %tmp9, %ld12
+  %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
+  %tmp = extractelement <4 x i1> %22, i32 0
+  br i1 %tmp, label %bb193, label %bb186
+
+bb186:                                            ; preds = %bb4
+  br label %bb193
+
+bb193:                                            ; preds = %bb186, %bb4
+  %besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
+  %23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
+  br label %bb4
+}

diff --git a/src/LLVM/test/CodeGen/ARM/stack-frame.ll b/src/LLVM/test/CodeGen/ARM/stack-frame.ll
new file mode 100644
index 0000000..86cad93
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/stack-frame.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm

+; RUN: llc < %s -march=arm | grep add | count 1

+

+define void @f1() {

+	%c = alloca i8, align 1

+	ret void

+}

+

+define i32 @f2() {

+	ret i32 1

+}

+

+


diff --git a/src/LLVM/test/CodeGen/ARM/stm.ll b/src/LLVM/test/CodeGen/ARM/stm.ll
new file mode 100644
index 0000000..82dc14d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/stm.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
+
+@"\01LC" = internal constant [32 x i8] c"Boolean Not: %d %d %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[32 x i8]*> [#uses=1]
+@"\01LC1" = internal constant [26 x i8] c"Bitwise Not: %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[26 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+; CHECK: main
+; CHECK: push
+; CHECK: stm
+	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @"\01LC1", i32 0, i32 0), i32 -2, i32 -3, i32 2, i32 -6) nounwind		; <i32> [#uses=0]
+	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([32 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 1, i32 0, i32 1, i32 0, i32 1) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/ARM/str_post.ll b/src/LLVM/test/CodeGen/ARM/str_post.ll
new file mode 100644
index 0000000..a76c3c6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/str_post.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm | FileCheck %s

+

+define i16 @test1(i32* %X, i16* %A) {

+; CHECK: test1:

+; CHECK: strh {{.*}}[{{.*}}], #-4

+        %Y = load i32* %X               ; <i32> [#uses=1]

+        %tmp1 = trunc i32 %Y to i16             ; <i16> [#uses=1]

+        store i16 %tmp1, i16* %A

+        %tmp2 = ptrtoint i16* %A to i16         ; <i16> [#uses=1]

+        %tmp3 = sub i16 %tmp2, 4                ; <i16> [#uses=1]

+        ret i16 %tmp3

+}

+

+define i32 @test2(i32* %X, i32* %A) {

+; CHECK: test2:

+; CHECK: str {{.*}}[{{.*}}],

+        %Y = load i32* %X               ; <i32> [#uses=1]

+        store i32 %Y, i32* %A

+        %tmp1 = ptrtoint i32* %A to i32         ; <i32> [#uses=1]

+        %tmp2 = sub i32 %tmp1, 4                ; <i32> [#uses=1]

+        ret i32 %tmp2

+}


diff --git a/src/LLVM/test/CodeGen/ARM/str_pre-2.ll b/src/LLVM/test/CodeGen/ARM/str_pre-2.ll
new file mode 100644
index 0000000..f4e3a44
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/str_pre-2.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=basic | FileCheck %s
+
+; The greedy register allocator uses a single CSR here, invalidating the test.
+
+@b = external global i64*
+
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+; CHECK: push {lr}
+; CHECK: pop {lr}
+	%0 = load i64** @b, align 4
+	%1 = load i64* %0, align 4
+	%2 = mul i64 %1, %a
+	ret i64 %2
+}

diff --git a/src/LLVM/test/CodeGen/ARM/str_pre.ll b/src/LLVM/test/CodeGen/ARM/str_pre.ll
new file mode 100644
index 0000000..3eda1f1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/str_pre.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | \

+; RUN:   grep {str.*\\!} | count 2

+

+define void @test1(i32* %X, i32* %A, i32** %dest) {

+        %B = load i32* %A               ; <i32> [#uses=1]

+        %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]

+        store i32 %B, i32* %Y

+        store i32* %Y, i32** %dest

+        ret void

+}

+

+define i16* @test2(i16* %X, i32* %A) {

+        %B = load i32* %A               ; <i32> [#uses=1]

+        %Y = getelementptr i16* %X, i32 4               ; <i16*> [#uses=2]

+        %tmp = trunc i32 %B to i16              ; <i16> [#uses=1]

+        store i16 %tmp, i16* %Y

+        ret i16* %Y

+}


diff --git a/src/LLVM/test/CodeGen/ARM/str_trunc.ll b/src/LLVM/test/CodeGen/ARM/str_trunc.ll
new file mode 100644
index 0000000..52652ab
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/str_trunc.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm | \

+; RUN:   grep strb | count 1

+; RUN: llc < %s -march=arm | \

+; RUN:   grep strh | count 1

+

+define void @test1(i32 %v, i16* %ptr) {

+        %tmp = trunc i32 %v to i16              ; <i16> [#uses=1]

+        store i16 %tmp, i16* %ptr

+        ret void

+}

+

+define void @test2(i32 %v, i8* %ptr) {

+        %tmp = trunc i32 %v to i8               ; <i8> [#uses=1]

+        store i8 %tmp, i8* %ptr

+        ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/sub.ll b/src/LLVM/test/CodeGen/ARM/sub.ll
new file mode 100644
index 0000000..06ea703
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/sub.ll

@@ -0,0 +1,38 @@
+; RUN: llc -march=arm < %s | FileCheck %s
+
+; 171 = 0x000000ab
+define i64 @f1(i64 %a) {
+; CHECK: f1
+; CHECK: subs r0, r0, #171
+; CHECK: sbc r1, r1, #0
+    %tmp = sub i64 %a, 171
+    ret i64 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i64 @f2(i64 %a) {
+; CHECK: f2
+; CHECK: subs r0, r0, #66846720
+; CHECK: sbc r1, r1, #0
+    %tmp = sub i64 %a, 66846720
+    ret i64 %tmp
+}
+
+; 734439407618 = 0x000000ab00000002
+define i64 @f3(i64 %a) {
+; CHECK: f3
+; CHECK: subs r0, r0, #2
+; CHECK: sbc r1, r1, #171
+   %tmp = sub i64 %a, 734439407618
+   ret i64 %tmp
+}
+
+define i32 @f4(i32 %x) {
+entry:
+; CHECK: f4
+; CHECK: rsbs
+  %sub = sub i32 1, %x
+  %cmp = icmp ugt i32 %sub, 0
+  %sel = select i1 %cmp, i32 1, i32 %sub
+  ret i32 %sel
+}

diff --git a/src/LLVM/test/CodeGen/ARM/subreg-remat.ll b/src/LLVM/test/CodeGen/ARM/subreg-remat.ll
new file mode 100644
index 0000000..993d7ec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/subreg-remat.ll

@@ -0,0 +1,52 @@
+; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 -pre-RA-sched=source | FileCheck %s
+target triple = "thumbv7-apple-ios"
+; <rdar://problem/10032939>
+;
+; The vector %v2 is built like this:
+;
+;   %vreg6:ssub_1<def> = VMOVSR %vreg0<kill>, pred:14, pred:%noreg, %vreg6<imp-def>; DPR_VFP2:%vreg6 GPR:%vreg0
+;   %vreg6:ssub_0<def> = VLDRS <cp#0>, 0, pred:14, pred:%noreg; mem:LD4[ConstantPool] DPR_VFP2:%vreg6
+;
+; When %vreg6 spills, the VLDRS constant pool load cannot be rematerialized
+; since it implicitly reads the ssub_1 sub-register.
+;
+; CHECK: f1
+; CHECK: vmov    s1, r0
+; CHECK: vldr.32 s0, LCPI
+; The vector must be spilled:
+; CHECK: vstr.64 d0,
+; CHECK: asm clobber d0
+; And reloaded after the asm:
+; CHECK: vldr.64 [[D16:d[0-9]+]],
+; CHECK: vstr.64 [[D16]], [r1]
+define void @f1(float %x, <2 x float>* %p) {
+  %v1 = insertelement <2 x float> undef, float %x, i32 1
+  %v2 = insertelement <2 x float> %v1, float 0x400921FB60000000, i32 0
+  %y = call double asm sideeffect "asm clobber $0", "=w,0,~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15},~{d16},~{d17},~{d18},~{d19},~{d20},~{d21},~{d22},~{d23},~{d24},~{d25},~{d26},~{d27},~{d28},~{d29},~{d30},~{d31}"(<2 x float> %v2) nounwind
+  store <2 x float> %v2, <2 x float>* %p, align 8
+  ret void
+}
+
+; On the other hand, when the partial redef doesn't read the full register
+; because the bits are undef, we should rematerialize.  The vector is now built
+; like this:
+;
+;   %vreg2:ssub_0<def> = VLDRS <cp#0>, 0, pred:14, pred:%noreg, %vreg2<imp-def>; mem:LD4[ConstantPool]
+;
+; The extra <imp-def> operand indicates that the instruction fully defines the
+; virtual register.  It doesn't read the old value.
+;
+; CHECK: f2
+; CHECK: vldr.32 s0, LCPI
+; The vector must not be spilled:
+; CHECK-NOT: vstr.64
+; CHECK: asm clobber d0
+; But instead rematerialize after the asm:
+; CHECK: vldr.32 [[S0:s[0-9]+]], LCPI
+; CHECK: vstr.64 [[D0:d[0-9]+]], [r0]
+define void @f2(<2 x float>* %p) {
+  %v2 = insertelement <2 x float> undef, float 0x400921FB60000000, i32 0
+  %y = call double asm sideeffect "asm clobber $0", "=w,0,~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15},~{d16},~{d17},~{d18},~{d19},~{d20},~{d21},~{d22},~{d23},~{d24},~{d25},~{d26},~{d27},~{d28},~{d29},~{d30},~{d31}"(<2 x float> %v2) nounwind
+  store <2 x float> %v2, <2 x float>* %p, align 8
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/sxt_rot.ll b/src/LLVM/test/CodeGen/ARM/sxt_rot.ll
new file mode 100644
index 0000000..550c420
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/sxt_rot.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s

+

+define i32 @test0(i8 %A) {

+; CHECK: test0

+; CHECK: sxtb r0, r0 

+  %B = sext i8 %A to i32

+  ret i32 %B

+}

+

+define signext i8 @test1(i32 %A) {

+; CHECK: test1

+; CHECK: sxtb r0, r0, ror #8

+  %B = lshr i32 %A, 8

+  %C = shl i32 %A, 24

+  %D = or i32 %B, %C

+  %E = trunc i32 %D to i8

+  ret i8 %E

+}

+

+define signext i32 @test2(i32 %A, i32 %X) {

+; CHECK: test2

+; CHECK: sxtab r0, r1, r0

+  %B = lshr i32 %A, 8

+  %C = shl i32 %A, 24

+  %D = or i32 %B, %C

+  %E = trunc i32 %D to i8

+  %F = sext i8 %E to i32

+  %G = add i32 %F, %X

+  ret i32 %G

+}


diff --git a/src/LLVM/test/CodeGen/ARM/t2-imm.ll b/src/LLVM/test/CodeGen/ARM/t2-imm.ll
new file mode 100644
index 0000000..8b41459
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/t2-imm.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f6(i32 %a) {
+; CHECK:f6
+; CHECK: movw r0, #1123
+; CHECK: movt r0, #1000
+    %tmp = add i32 0, 65537123
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/ARM/tail-opts.ll b/src/LLVM/test/CodeGen/ARM/tail-opts.ll
new file mode 100644
index 0000000..3dc77e2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/tail-opts.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic -mcpu=cortex-a8 -asm-verbose=false | FileCheck %s
+
+declare void @bar(i32)
+declare void @car(i32)
+declare void @dar(i32)
+declare void @ear(i32)
+declare void @far(i32)
+declare i1 @qux()
+
+@GHJK = global i32 0
+
+declare i8* @choose(i8*, i8*)
+
+; BranchFolding should tail-duplicate the indirect jump to avoid
+; redundant branching.
+
+; CHECK: tail_duplicate_me:
+; CHECK:      qux
+; CHECK:      qux
+; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
+; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
+; CHECK:      str r
+; CHECK-NEXT: bx r
+; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
+; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
+; CHECK:      str r
+; CHECK-NEXT: bx r
+; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
+; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
+; CHECK:      str r
+; CHECK-NEXT: bx r
+
+define void @tail_duplicate_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  %c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return),
+                        i8* blockaddress(@tail_duplicate_me, %altret))
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  indirectbr i8* %c, [label %return, label %altret]
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/thread_pointer.ll b/src/LLVM/test/CodeGen/ARM/thread_pointer.ll
new file mode 100644
index 0000000..3143387
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/thread_pointer.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN:     grep {__aeabi_read_tp}
+
+define i8* @test() {
+entry:
+	%tmp1 = call i8* @llvm.arm.thread.pointer( )		; <i8*> [#uses=0]
+	ret i8* %tmp1
+}
+
+declare i8* @llvm.arm.thread.pointer()

diff --git a/src/LLVM/test/CodeGen/ARM/thumb1-varalloc.ll b/src/LLVM/test/CodeGen/ARM/thumb1-varalloc.ll
new file mode 100644
index 0000000..aa88ae0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/thumb1-varalloc.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
+; rdar://8819685
+
+@__bar = external hidden global i8*
+@__baz = external hidden global i8*
+
+define i8* @_foo() {
+entry:
+; CHECK: foo:
+
+	%size = alloca i32, align 4
+	%0 = load i8** @__bar, align 4
+	%1 = icmp eq i8* %0, null
+	br i1 %1, label %bb1, label %bb3
+; CHECK: bne
+		
+bb1:
+	store i32 1026, i32* %size, align 4
+	%2 = alloca [1026 x i8], align 1
+; CHECK: mov     [[R0:r[0-9]+]], sp
+; CHECK: adds    {{r[0-9]+}}, [[R0]], {{r[0-9]+}}
+	%3 = getelementptr inbounds [1026 x i8]* %2, i32 0, i32 0
+	%4 = call i32 @_called_func(i8* %3, i32* %size) nounwind
+	%5 = icmp eq i32 %4, 0
+	br i1 %5, label %bb2, label %bb3
+	
+bb2:
+	%6 = call i8* @strdup(i8* %3) nounwind
+	store i8* %6, i8** @__baz, align 4
+	br label %bb3
+	
+bb3:
+	%.0 = phi i8* [ %0, %entry ], [ %6, %bb2 ], [ %3, %bb1 ]
+; CHECK: subs    r4, #5
+; CHECK-NEXT: mov     sp, r4
+; CHECK-NEXT: pop     {r4, r5, r6, r7, pc}
+	ret i8* %.0
+}
+
+declare noalias i8* @strdup(i8* nocapture) nounwind
+declare i32 @_called_func(i8*, i32*) nounwind
\ No newline at end of file

diff --git a/src/LLVM/test/CodeGen/ARM/thumb2-it-block.ll b/src/LLVM/test/CodeGen/ARM/thumb2-it-block.ll
new file mode 100644
index 0000000..28fd469
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/thumb2-it-block.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; PR11107
+
+define i32 @test(i32 %a, i32 %b) {
+entry:
+; CHECK:        movs.w
+; CHECK-NEXT:   it    mi
+; CHECK-NEXT:   rsbmi
+; CHECK-NEXT:   movs.w
+; CHECK-NEXT:   it    mi
+; CHECK-NEXT:   rsbmi
+ %cmp1 = icmp slt i32 %a, 0
+ %sub1 = sub nsw i32 0, %a
+ %abs1 = select i1 %cmp1, i32 %sub1, i32 %a
+ %cmp2 = icmp slt i32 %b, 0
+ %sub2 = sub nsw i32 0, %b
+ %abs2 = select i1 %cmp2, i32 %sub2, i32 %b
+ %add = add nsw i32 %abs1, %abs2
+ ret i32 %add
+}

diff --git a/src/LLVM/test/CodeGen/ARM/tls1.ll b/src/LLVM/test/CodeGen/ARM/tls1.ll
new file mode 100644
index 0000000..842a90b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/tls1.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \

+; RUN:     grep {i(tpoff)}

+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \

+; RUN:     grep {__aeabi_read_tp}

+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \

+; RUN:     -relocation-model=pic | grep {__tls_get_addr}

+

+

+@i = thread_local global i32 15		; <i32*> [#uses=2]

+

+define i32 @f() {

+entry:

+	%tmp1 = load i32* @i		; <i32> [#uses=1]

+	ret i32 %tmp1

+}

+

+define i32* @g() {

+entry:

+	ret i32* @i

+}


diff --git a/src/LLVM/test/CodeGen/ARM/tls2.ll b/src/LLVM/test/CodeGen/ARM/tls2.ll
new file mode 100644
index 0000000..51474eb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/tls2.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \

+; RUN:   | FileCheck %s -check-prefix=CHECK-NONPIC

+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \

+; RUN:   -relocation-model=pic | FileCheck %s -check-prefix=CHECK-PIC

+

+@i = external thread_local global i32		; <i32*> [#uses=2]

+

+define i32 @f() {

+; CHECK-NONPIC: f:

+; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}]

+; CHECK-NONPIC: i(gottpoff)

+; CHECK-PIC: f:

+; CHECK-PIC: __tls_get_addr

+entry:

+	%tmp1 = load i32* @i		; <i32> [#uses=1]

+	ret i32 %tmp1

+}

+

+define i32* @g() {

+; CHECK-NONPIC: g:

+; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}]

+; CHECK-NONPIC: i(gottpoff)

+; CHECK-PIC: g:

+; CHECK-PIC: __tls_get_addr

+entry:

+	ret i32* @i

+}


diff --git a/src/LLVM/test/CodeGen/ARM/tls3.ll b/src/LLVM/test/CodeGen/ARM/tls3.ll
new file mode 100644
index 0000000..df7a4ca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/tls3.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN:     grep {tbss}
+
+%struct.anon = type { i32, i32 }
+@teste = internal thread_local global %struct.anon zeroinitializer		; <%struct.anon*> [#uses=1]
+
+define i32 @main() {
+entry:
+	%tmp2 = load i32* getelementptr (%struct.anon* @teste, i32 0, i32 0), align 8		; <i32> [#uses=1]
+	ret i32 %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/ARM/trap.ll b/src/LLVM/test/CodeGen/ARM/trap.ll
new file mode 100644
index 0000000..38842a9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/trap.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=INSTR
+; RUN: llc < %s -mtriple=arm-apple-darwin -trap-func=_trap | FileCheck %s -check-prefix=FUNC
+; rdar://7961298
+; rdar://9249183
+
+define void @t() nounwind {
+entry:
+; INSTR: t:
+; INSTR: trap
+
+; FUNC: t:
+; FUNC: bl __trap
+  call void @llvm.trap()
+  unreachable
+}
+
+declare void @llvm.trap() nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/trunc_ldr.ll b/src/LLVM/test/CodeGen/ARM/trunc_ldr.ll
new file mode 100644
index 0000000..b950c57
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/trunc_ldr.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm | grep ldrb.*7 | count 1

+; RUN: llc < %s -march=arm | grep ldrsb.*7 | count 1

+

+	%struct.A = type { i8, i8, i8, i8, i16, i8, i8, %struct.B** }

+	%struct.B = type { float, float, i32, i32, i32, [0 x i8] }

+

+define i8 @f1(%struct.A* %d) {

+	%tmp2 = getelementptr %struct.A* %d, i32 0, i32 4

+	%tmp23 = bitcast i16* %tmp2 to i32*

+	%tmp4 = load i32* %tmp23

+	%tmp512 = lshr i32 %tmp4, 24

+	%tmp56 = trunc i32 %tmp512 to i8

+	ret i8 %tmp56

+}

+

+define i32 @f2(%struct.A* %d) {

+	%tmp2 = getelementptr %struct.A* %d, i32 0, i32 4

+	%tmp23 = bitcast i16* %tmp2 to i32*

+	%tmp4 = load i32* %tmp23

+	%tmp512 = lshr i32 %tmp4, 24

+	%tmp56 = trunc i32 %tmp512 to i8

+        %tmp57 = sext i8 %tmp56 to i32

+	ret i32 %tmp57

+}


diff --git a/src/LLVM/test/CodeGen/ARM/truncstore-dag-combine.ll b/src/LLVM/test/CodeGen/ARM/truncstore-dag-combine.ll
new file mode 100644
index 0000000..5665440
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/truncstore-dag-combine.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm -mattr=+v4t | not grep orr
+; RUN: llc < %s -march=arm -mattr=+v4t | not grep mov
+
+define void @bar(i8* %P, i16* %Q) {
+entry:
+	%P1 = bitcast i8* %P to i16*		; <i16*> [#uses=1]
+	%tmp = load i16* %Q, align 1		; <i16> [#uses=1]
+	store i16 %tmp, i16* %P1, align 1
+	ret void
+}
+
+define void @foo(i8* %P, i32* %Q) {
+entry:
+	%P1 = bitcast i8* %P to i32*		; <i32*> [#uses=1]
+	%tmp = load i32* %Q, align 1		; <i32> [#uses=1]
+	store i32 %tmp, i32* %P1, align 1
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/tst_teq.ll b/src/LLVM/test/CodeGen/ARM/tst_teq.ll
new file mode 100644
index 0000000..e5bc695
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/tst_teq.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | grep tst

+; RUN: llc < %s -march=arm | grep teq

+

+define i32 @f(i32 %a) {

+entry:

+	%tmp2 = and i32 %a, 255		; <i32> [#uses=1]

+	icmp eq i32 %tmp2, 0		; <i1>:0 [#uses=1]

+	%retval = select i1 %0, i32 20, i32 10		; <i32> [#uses=1]

+	ret i32 %retval

+}

+

+define i32 @g(i32 %a) {

+entry:

+        %tmp2 = xor i32 %a, 255

+	icmp eq i32 %tmp2, 0		; <i1>:0 [#uses=1]

+	%retval = select i1 %0, i32 20, i32 10		; <i32> [#uses=1]

+	ret i32 %retval

+}


diff --git a/src/LLVM/test/CodeGen/ARM/uint64tof64.ll b/src/LLVM/test/CodeGen/ARM/uint64tof64.ll
new file mode 100644
index 0000000..32eb225
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/uint64tof64.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+vfp2
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+@"\01LC10" = external constant [54 x i8]		; <[54 x i8]*> [#uses=1]
+
+define fastcc void @t() {
+entry:
+	%0 = load i64* null, align 4		; <i64> [#uses=1]
+	%1 = uitofp i64 %0 to double		; <double> [#uses=1]
+	%2 = fdiv double 0.000000e+00, %1		; <double> [#uses=1]
+	%3 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* null, i8* getelementptr ([54 x i8]* @"\01LC10", i32 0, i32 0), i64 0, double %2)		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @fprintf(%struct.FILE*, i8*, ...)

diff --git a/src/LLVM/test/CodeGen/ARM/umulo-32.ll b/src/LLVM/test/CodeGen/ARM/umulo-32.ll
new file mode 100644
index 0000000..fa5c016
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/umulo-32.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
+
+%umul.ty = type { i32, i1 }
+
+define i32 @func(i32 %a) nounwind {
+; CHECK: func
+; CHECK: muldi3
+  %tmp0 = tail call %umul.ty @llvm.umul.with.overflow.i32(i32 %a, i32 37)
+  %tmp1 = extractvalue %umul.ty %tmp0, 0
+  %tmp2 = select i1 undef, i32 -1, i32 %tmp1
+  ret i32 %tmp2
+}
+
+declare %umul.ty @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
+define i32 @f(i32 %argc, i8** %argv) ssp {
+; CHECK: func
+; CHECK: str     r0
+; CHECK: movs    r2
+; CHECK: mov     r1
+; CHECK: mov     r3
+; CHECK: muldi3
+%1 = alloca i32, align 4
+%2 = alloca i32, align 4
+%3 = alloca i8**, align 4
+%m_degree = alloca i32, align 4
+store i32 0, i32* %1
+store i32 %argc, i32* %2, align 4
+store i8** %argv, i8*** %3, align 4
+store i32 10, i32* %m_degree, align 4
+%4 = load i32* %m_degree, align 4
+%5 = call %umul.ty @llvm.umul.with.overflow.i32(i32 %4, i32 8)
+%6 = extractvalue %umul.ty %5, 1
+%7 = extractvalue %umul.ty %5, 0
+%8 = select i1 %6, i32 -1, i32 %7
+%9 = call noalias i8* @_Znam(i32 %8)
+%10 = bitcast i8* %9 to double*
+ret i32 0
+}
+
+declare noalias i8* @_Znam(i32)

diff --git a/src/LLVM/test/CodeGen/ARM/unaligned_load_store.ll b/src/LLVM/test/CodeGen/ARM/unaligned_load_store.ll
new file mode 100644
index 0000000..a8237c6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/unaligned_load_store.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
+; RUN: llc < %s -mtriple=armv6-apple-darwin -arm-strict-align | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -mtriple=armv6-linux | FileCheck %s -check-prefix=GENERIC
+
+; rdar://7113725
+
+define void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
+entry:
+; GENERIC: t:
+; GENERIC: ldrb [[R2:r[0-9]+]]
+; GENERIC: ldrb [[R3:r[0-9]+]]
+; GENERIC: ldrb [[R12:r[0-9]+]]
+; GENERIC: ldrb [[R1:r[0-9]+]]
+; GENERIC: strb [[R1]]
+; GENERIC: strb [[R12]]
+; GENERIC: strb [[R3]]
+; GENERIC: strb [[R2]]
+
+; DARWIN_V6: t:
+; DARWIN_V6: ldr r1
+; DARWIN_V6: str r1
+
+  %__src1.i = bitcast i8* %b to i32*              ; <i32*> [#uses=1]
+  %__dest2.i = bitcast i8* %a to i32*             ; <i32*> [#uses=1]
+  %tmp.i = load i32* %__src1.i, align 1           ; <i32> [#uses=1]
+  store i32 %tmp.i, i32* %__dest2.i, align 1
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/undef-sext.ll b/src/LLVM/test/CodeGen/ARM/undef-sext.ll
new file mode 100644
index 0000000..2c28da3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/undef-sext.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+
+; No need to sign-extend undef.
+
+define i32 @t(i32* %a) nounwind {
+entry:
+; CHECK: t:
+; CHECK: ldr r0, [r0]
+; CHECK: bx lr
+  %0 = sext i16 undef to i32
+  %1 = getelementptr inbounds i32* %a, i32 %0
+  %2 = load i32* %1, align 4
+  ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/ARM/unord.ll b/src/LLVM/test/CodeGen/ARM/unord.ll
new file mode 100644
index 0000000..75ecb94
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/unord.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm | grep movne | count 1

+; RUN: llc < %s -march=arm | grep moveq | count 1

+

+define i32 @f1(float %X, float %Y) {

+	%tmp = fcmp uno float %X, %Y

+	%retval = select i1 %tmp, i32 1, i32 -1

+	ret i32 %retval

+}

+

+define i32 @f2(float %X, float %Y) {

+	%tmp = fcmp ord float %X, %Y

+	%retval = select i1 %tmp, i32 1, i32 -1

+	ret i32 %retval

+}


diff --git a/src/LLVM/test/CodeGen/ARM/uxt_rot.ll b/src/LLVM/test/CodeGen/ARM/uxt_rot.ll
new file mode 100644
index 0000000..1a22032
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/uxt_rot.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtb | count 1

+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtab | count 1

+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxth | count 1

+

+define zeroext i8 @test1(i32 %A.u) {

+    %B.u = trunc i32 %A.u to i8

+    ret i8 %B.u

+}

+

+define zeroext i32 @test2(i32 %A.u, i32 %B.u) {

+    %C.u = trunc i32 %B.u to i8

+    %D.u = zext i8 %C.u to i32

+    %E.u = add i32 %A.u, %D.u

+    ret i32 %E.u

+}

+

+define zeroext i32 @test3(i32 %A.u) {

+    %B.u = lshr i32 %A.u, 8

+    %C.u = shl i32 %A.u, 24

+    %D.u = or i32 %B.u, %C.u

+    %E.u = trunc i32 %D.u to i16

+    %F.u = zext i16 %E.u to i32

+    ret i32 %F.u

+}


diff --git a/src/LLVM/test/CodeGen/ARM/uxtb.ll b/src/LLVM/test/CodeGen/ARM/uxtb.ll
new file mode 100644
index 0000000..49070df
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/uxtb.ll

@@ -0,0 +1,74 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin | \

+; RUN:   grep uxt | count 10

+

+define i32 @test1(i32 %x) {

+	%tmp1 = and i32 %x, 16711935		; <i32> [#uses=1]

+	ret i32 %tmp1

+}

+

+define i32 @test2(i32 %x) {

+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]

+	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]

+	ret i32 %tmp2

+}

+

+define i32 @test3(i32 %x) {

+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]

+	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]

+	ret i32 %tmp2

+}

+

+define i32 @test4(i32 %x) {

+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]

+	%tmp6 = and i32 %tmp1, 16711935		; <i32> [#uses=1]

+	ret i32 %tmp6

+}

+

+define i32 @test5(i32 %x) {

+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]

+	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]

+	ret i32 %tmp2

+}

+

+define i32 @test6(i32 %x) {

+	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]

+	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]

+	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]

+	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]

+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=1]

+	ret i32 %tmp6

+}

+

+define i32 @test7(i32 %x) {

+	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]

+	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]

+	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]

+	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]

+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=1]

+	ret i32 %tmp6

+}

+

+define i32 @test8(i32 %x) {

+	%tmp1 = shl i32 %x, 8		; <i32> [#uses=1]

+	%tmp2 = and i32 %tmp1, 16711680		; <i32> [#uses=1]

+	%tmp5 = lshr i32 %x, 24		; <i32> [#uses=1]

+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=1]

+	ret i32 %tmp6

+}

+

+define i32 @test9(i32 %x) {

+	%tmp1 = lshr i32 %x, 24		; <i32> [#uses=1]

+	%tmp4 = shl i32 %x, 8		; <i32> [#uses=1]

+	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]

+	%tmp6 = or i32 %tmp5, %tmp1		; <i32> [#uses=1]

+	ret i32 %tmp6

+}

+

+define i32 @test10(i32 %p0) {

+	%tmp1 = lshr i32 %p0, 7		; <i32> [#uses=1]

+	%tmp2 = and i32 %tmp1, 16253176		; <i32> [#uses=2]

+	%tmp4 = lshr i32 %tmp2, 5		; <i32> [#uses=1]

+	%tmp5 = and i32 %tmp4, 458759		; <i32> [#uses=1]

+	%tmp7 = or i32 %tmp5, %tmp2		; <i32> [#uses=1]

+	ret i32 %tmp7

+}


diff --git a/src/LLVM/test/CodeGen/ARM/va_arg.ll b/src/LLVM/test/CodeGen/ARM/va_arg.ll
new file mode 100644
index 0000000..af477b4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/va_arg.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -pre-RA-sched=source | FileCheck %s
+; Test that we correctly align elements when using va_arg
+
+; CHECK: test1:
+; CHECK-NOT: bfc
+; CHECK: add	[[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
+; CHECK: bfc	[[REG]], #0, #3
+; CHECK-NOT: bfc
+
+define i64 @test1(i32 %i, ...) nounwind optsize {
+entry:
+  %g = alloca i8*, align 4
+  %g1 = bitcast i8** %g to i8*
+  call void @llvm.va_start(i8* %g1)
+  %0 = va_arg i8** %g, i64
+  call void @llvm.va_end(i8* %g1)
+  ret i64 %0
+}
+
+; CHECK: test2:
+; CHECK-NOT: bfc
+; CHECK: add	[[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
+; CHECK: bfc	[[REG]], #0, #3
+; CHECK-NOT:	bfc
+; CHECK: bx	lr
+
+define double @test2(i32 %a, i32 %b, ...) nounwind optsize {
+entry:
+  %ap = alloca i8*, align 4                       ; <i8**> [#uses=3]
+  %ap1 = bitcast i8** %ap to i8*                  ; <i8*> [#uses=2]
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32                       ; <i32> [#uses=0]
+  store i32 %0, i32* undef
+  %1 = va_arg i8** %ap, double                    ; <double> [#uses=1]
+  call void @llvm.va_end(i8* %ap1)
+  ret double %1
+}
+
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/vaba.ll b/src/LLVM/test/CodeGen/ARM/vaba.ll
new file mode 100644
index 0000000..4fe1c43
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vaba.ll

@@ -0,0 +1,221 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabas8:
+;CHECK: vaba.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
+	%tmp5 = add <8 x i8> %tmp1, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabas16:
+;CHECK: vaba.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
+	%tmp5 = add <4 x i16> %tmp1, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabas32:
+;CHECK: vaba.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
+	%tmp5 = add <2 x i32> %tmp1, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabau8:
+;CHECK: vaba.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
+	%tmp5 = add <8 x i8> %tmp1, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabau16:
+;CHECK: vaba.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
+	%tmp5 = add <4 x i16> %tmp1, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabau32:
+;CHECK: vaba.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
+	%tmp5 = add <2 x i32> %tmp1, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: vabaQs8:
+;CHECK: vaba.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = load <16 x i8>* %C
+	%tmp4 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
+	%tmp5 = add <16 x i8> %tmp1, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vabaQs16:
+;CHECK: vaba.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = load <8 x i16>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
+	%tmp5 = add <8 x i16> %tmp1, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vabaQs32:
+;CHECK: vaba.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = load <4 x i32>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
+	%tmp5 = add <4 x i32> %tmp1, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: vabaQu8:
+;CHECK: vaba.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = load <16 x i8>* %C
+	%tmp4 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
+	%tmp5 = add <16 x i8> %tmp1, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vabaQu16:
+;CHECK: vaba.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = load <8 x i16>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
+	%tmp5 = add <8 x i16> %tmp1, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vabaQu32:
+;CHECK: vaba.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = load <4 x i32>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
+	%tmp5 = add <4 x i32> %tmp1, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+declare <8 x i8>  @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabals8:
+;CHECK: vabal.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
+	%tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
+	%tmp6 = add <8 x i16> %tmp1, %tmp5
+	ret <8 x i16> %tmp6
+}
+
+define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabals16:
+;CHECK: vabal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
+	%tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
+	%tmp6 = add <4 x i32> %tmp1, %tmp5
+	ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabals32:
+;CHECK: vabal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
+	%tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
+	%tmp6 = add <2 x i64> %tmp1, %tmp5
+	ret <2 x i64> %tmp6
+}
+
+define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabalu8:
+;CHECK: vabal.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
+	%tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
+	%tmp6 = add <8 x i16> %tmp1, %tmp5
+	ret <8 x i16> %tmp6
+}
+
+define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabalu16:
+;CHECK: vabal.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
+	%tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
+	%tmp6 = add <4 x i32> %tmp1, %tmp5
+	ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabalu32:
+;CHECK: vabal.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
+	%tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
+	%tmp6 = add <2 x i64> %tmp1, %tmp5
+	ret <2 x i64> %tmp6
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vabd.ll b/src/LLVM/test/CodeGen/ARM/vabd.ll
new file mode 100644
index 0000000..9ec734f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vabd.ll

@@ -0,0 +1,207 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabds8:
+;CHECK: vabd.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabds16:
+;CHECK: vabd.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabds32:
+;CHECK: vabd.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdu8:
+;CHECK: vabd.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdu16:
+;CHECK: vabd.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdu32:
+;CHECK: vabd.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vabdf32:
+;CHECK: vabd.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vabdQs8:
+;CHECK: vabd.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vabdQs16:
+;CHECK: vabd.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vabdQs32:
+;CHECK: vabd.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vabdQu8:
+;CHECK: vabd.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vabdQu16:
+;CHECK: vabd.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vabdQu32:
+;CHECK: vabd.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vabdQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vabdQf32:
+;CHECK: vabd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdls8:
+;CHECK: vabdl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdls16:
+;CHECK: vabdl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdls32:
+;CHECK: vabdl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdlu8:
+;CHECK: vabdl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdlu16:
+;CHECK: vabdl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdlu32:
+;CHECK: vabdl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vabs.ll b/src/LLVM/test/CodeGen/ARM/vabs.ll
new file mode 100644
index 0000000..18ba61f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vabs.ll

@@ -0,0 +1,131 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vabss8(<8 x i8>* %A) nounwind {
+;CHECK: vabss8:
+;CHECK: vabs.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vabss16(<4 x i16>* %A) nounwind {
+;CHECK: vabss16:
+;CHECK: vabs.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vabss32(<2 x i32>* %A) nounwind {
+;CHECK: vabss32:
+;CHECK: vabs.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
+;CHECK: vabsf32:
+;CHECK: vabs.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float> %tmp1)
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vabsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vabsQs8:
+;CHECK: vabs.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vabsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vabsQs16:
+;CHECK: vabs.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vabsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vabsQs32:
+;CHECK: vabs.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vabsQf32(<4 x float>* %A) nounwind {
+;CHECK: vabsQf32:
+;CHECK: vabs.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vabs.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone
+
+define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
+;CHECK: vqabss8:
+;CHECK: vqabs.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
+;CHECK: vqabss16:
+;CHECK: vqabs.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
+;CHECK: vqabss32:
+;CHECK: vqabs.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vqabsQs8:
+;CHECK: vqabs.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vqabsQs16:
+;CHECK: vqabs.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vqabsQs32:
+;CHECK: vqabs.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqabs.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vadd.ll b/src/LLVM/test/CodeGen/ARM/vadd.ll
new file mode 100644
index 0000000..a830e96
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vadd.ll

@@ -0,0 +1,279 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddi8:
+;CHECK: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = add <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddi16:
+;CHECK: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = add <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddi32:
+;CHECK: vadd.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = add <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vaddi64:
+;CHECK: vadd.i64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = add <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vaddf32:
+;CHECK: vadd.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fadd <2 x float> %tmp1, %tmp2
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vaddQi8:
+;CHECK: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = add <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vaddQi16:
+;CHECK: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = add <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vaddQi32:
+;CHECK: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = add <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vaddQi64:
+;CHECK: vadd.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = add <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vaddQf32:
+;CHECK: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fadd <4 x float> %tmp1, %tmp2
+	ret <4 x float> %tmp3
+}
+
+define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vaddhni16:
+;CHECK: vaddhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vaddhni32:
+;CHECK: vaddhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vaddhni64:
+;CHECK: vaddhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vraddhni16:
+;CHECK: vraddhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vraddhni32:
+;CHECK: vraddhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vraddhni64:
+;CHECK: vraddhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddls8:
+;CHECK: vaddl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
+	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddls16:
+;CHECK: vaddl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
+	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddls32:
+;CHECK: vaddl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
+	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = add <2 x i64> %tmp3, %tmp4
+	ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddlu8:
+;CHECK: vaddl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
+	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddlu16:
+;CHECK: vaddl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
+	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddlu32:
+;CHECK: vaddl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
+	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = add <2 x i64> %tmp3, %tmp4
+	ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddws8:
+;CHECK: vaddw.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
+	%tmp4 = add <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddws16:
+;CHECK: vaddw.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
+	%tmp4 = add <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddws32:
+;CHECK: vaddw.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
+	%tmp4 = add <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddwu8:
+;CHECK: vaddw.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
+	%tmp4 = add <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddwu16:
+;CHECK: vaddw.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
+	%tmp4 = add <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddwu32:
+;CHECK: vaddw.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
+	%tmp4 = add <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vargs.ll b/src/LLVM/test/CodeGen/ARM/vargs.ll
new file mode 100644
index 0000000..3d5b0bd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vargs.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm

+@str = internal constant [43 x i8] c"Hello World %d %d %d %d %d %d %d %d %d %d\0A\00"           ; <[43 x i8]*> [#uses=1]

+

+define i32 @main() {

+entry:

+        %tmp = call i32 (i8*, ...)* @printf( i8* getelementptr ([43 x i8]* @str, i32 0, i64 0), i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 )         ; <i32> [#uses=0]

+        %tmp2 = call i32 (i8*, ...)* @printf( i8* getelementptr ([43 x i8]* @str, i32 0, i64 0), i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1 )                ; <i32> [#uses=0]

+        ret i32 11

+}

+

+declare i32 @printf(i8*, ...)

+


diff --git a/src/LLVM/test/CodeGen/ARM/vargs_align.ll b/src/LLVM/test/CodeGen/ARM/vargs_align.ll
new file mode 100644
index 0000000..2f27529
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vargs_align.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=EABI

+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=OABI

+

+define i32 @f(i32 %a, ...) {

+entry:

+	%a_addr = alloca i32		; <i32*> [#uses=1]

+	%retval = alloca i32, align 4		; <i32*> [#uses=2]

+	%tmp = alloca i32, align 4		; <i32*> [#uses=2]

+	store i32 %a, i32* %a_addr

+	store i32 0, i32* %tmp

+	%tmp1 = load i32* %tmp		; <i32> [#uses=1]

+	store i32 %tmp1, i32* %retval

+	br label %return

+

+return:		; preds = %entry

+	%retval2 = load i32* %retval		; <i32> [#uses=1]

+	ret i32 %retval2

+; EABI: add sp, sp, #12

+; EABI: add sp, sp, #16

+; OABI: add sp, sp, #12

+; OABI: add sp, sp, #12

+}


diff --git a/src/LLVM/test/CodeGen/ARM/vbits.ll b/src/LLVM/test/CodeGen/ARM/vbits.ll
new file mode 100644
index 0000000..51f9bdf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vbits.ll

@@ -0,0 +1,547 @@
+; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a8 | FileCheck %s
+
+define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_andi8:
+;CHECK: vand
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = and <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_andi16:
+;CHECK: vand
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = and <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_andi32:
+;CHECK: vand
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = and <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_andi64:
+;CHECK: vand
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = and <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_andQi8:
+;CHECK: vand
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = and <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_andQi16:
+;CHECK: vand
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = and <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_andQi32:
+;CHECK: vand
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = and <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_andQi64:
+;CHECK: vand
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = and <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_bici8:
+;CHECK: vbic
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = and <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_bici16:
+;CHECK: vbic
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = and <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_bici32:
+;CHECK: vbic
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
+	%tmp4 = and <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_bici64:
+;CHECK: vbic
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
+	%tmp4 = and <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_bicQi8:
+;CHECK: vbic
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = and <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_bicQi16:
+;CHECK: vbic
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = and <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_bicQi32:
+;CHECK: vbic
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	%tmp4 = and <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_bicQi64:
+;CHECK: vbic
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
+	%tmp4 = and <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_eori8:
+;CHECK: veor
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_eori16:
+;CHECK: veor
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_eori32:
+;CHECK: veor
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_eori64:
+;CHECK: veor
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_eorQi8:
+;CHECK: veor
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_eorQi16:
+;CHECK: veor
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_eorQi32:
+;CHECK: veor
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_eorQi64:
+;CHECK: veor
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
+;CHECK: v_mvni8:
+;CHECK: vmvn
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
+;CHECK: v_mvni16:
+;CHECK: vmvn
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
+;CHECK: v_mvni32:
+;CHECK: vmvn
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
+;CHECK: v_mvni64:
+;CHECK: vmvn
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
+;CHECK: v_mvnQi8:
+;CHECK: vmvn
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
+;CHECK: v_mvnQi16:
+;CHECK: vmvn
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
+;CHECK: v_mvnQi32:
+;CHECK: vmvn
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
+;CHECK: v_mvnQi64:
+;CHECK: vmvn
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_orri8:
+;CHECK: vorr
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = or <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_orri16:
+;CHECK: vorr
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = or <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_orri32:
+;CHECK: vorr
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = or <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_orri64:
+;CHECK: vorr
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = or <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_orrQi8:
+;CHECK: vorr
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = or <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_orrQi16:
+;CHECK: vorr
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = or <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_orrQi32:
+;CHECK: vorr
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = or <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_orrQi64:
+;CHECK: vorr
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = or <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_orni8:
+;CHECK: vorn
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = or <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_orni16:
+;CHECK: vorn
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = or <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_orni32:
+;CHECK: vorn
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
+	%tmp4 = or <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_orni64:
+;CHECK: vorn
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
+	%tmp4 = or <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_ornQi8:
+;CHECK: vorn
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = or <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_ornQi16:
+;CHECK: vorn
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = or <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_ornQi32:
+;CHECK: vorn
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	%tmp4 = or <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_ornQi64:
+;CHECK: vorn
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
+	%tmp4 = or <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtsti8:
+;CHECK: vtst.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = and <8 x i8> %tmp1, %tmp2
+	%tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
+        %tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vtsti16:
+;CHECK: vtst.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = and <4 x i16> %tmp1, %tmp2
+	%tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
+        %tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vtsti32:
+;CHECK: vtst.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = and <2 x i32> %tmp1, %tmp2
+	%tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
+        %tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
+	ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vtstQi8:
+;CHECK: vtst.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = and <16 x i8> %tmp1, %tmp2
+	%tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
+        %tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtstQi16:
+;CHECK: vtst.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = and <8 x i16> %tmp1, %tmp2
+	%tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
+        %tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vtstQi32:
+;CHECK: vtst.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = and <4 x i32> %tmp1, %tmp2
+	%tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
+        %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
+	ret <4 x i32> %tmp5
+}
+
+define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
+; CHECK: v_orrimm:
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vorr
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
+; CHECK: v_orrimmQ
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vorr
+	%tmp1 = load <16 x i8>* %A
+	%tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
+; CHECK: v_bicimm:
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vbic
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind {
+; CHECK: v_bicimmQ:
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vbic
+	%tmp1 = load <16 x i8>* %A
+	%tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
+	ret <16 x i8> %tmp3
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vbsl-constant.ll b/src/LLVM/test/CodeGen/ARM/vbsl-constant.ll
new file mode 100644
index 0000000..14e668e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vbsl-constant.ll

@@ -0,0 +1,115 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: v_bsli8:
+;CHECK: vldr.64
+;CHECK: vldr.64
+;CHECK: vbsl
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = and <8 x i8> %tmp1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+	%tmp6 = and <8 x i8> %tmp3, <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
+	%tmp7 = or <8 x i8> %tmp4, %tmp6
+	ret <8 x i8> %tmp7
+}
+
+define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: v_bsli16:
+;CHECK: vldr.64
+;CHECK: vldr.64
+;CHECK: vbsl
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = and <4 x i16> %tmp1, <i16 3, i16 3, i16 3, i16 3>
+	%tmp6 = and <4 x i16> %tmp3, <i16 -4, i16 -4, i16 -4, i16 -4>
+	%tmp7 = or <4 x i16> %tmp4, %tmp6
+	ret <4 x i16> %tmp7
+}
+
+define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: v_bsli32:
+;CHECK: vldr.64
+;CHECK: vldr.64
+;CHECK: vbsl
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = and <2 x i32> %tmp1, <i32 3, i32 3>
+	%tmp6 = and <2 x i32> %tmp3, <i32 -4, i32 -4>
+	%tmp7 = or <2 x i32> %tmp4, %tmp6
+	ret <2 x i32> %tmp7
+}
+
+define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
+;CHECK: v_bsli64:
+;CHECK: vldr.64
+;CHECK: vldr.64
+;CHECK: vldr.64
+;CHECK: vbsl
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = load <1 x i64>* %C
+	%tmp4 = and <1 x i64> %tmp1, <i64 3>
+	%tmp6 = and <1 x i64> %tmp3, <i64 -4>
+	%tmp7 = or <1 x i64> %tmp4, %tmp6
+	ret <1 x i64> %tmp7
+}
+
+define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: v_bslQi8:
+;CHECK: vldmia
+;CHECK: vldmia
+;CHECK: vbsl
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = load <16 x i8>* %C
+	%tmp4 = and <16 x i8> %tmp1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+	%tmp6 = and <16 x i8> %tmp3, <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
+	%tmp7 = or <16 x i8> %tmp4, %tmp6
+	ret <16 x i8> %tmp7
+}
+
+define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: v_bslQi16:
+;CHECK: vldmia
+;CHECK: vldmia
+;CHECK: vbsl
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = load <8 x i16>* %C
+	%tmp4 = and <8 x i16> %tmp1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+	%tmp6 = and <8 x i16> %tmp3, <i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4>
+	%tmp7 = or <8 x i16> %tmp4, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: v_bslQi32:
+;CHECK: vldmia
+;CHECK: vldmia
+;CHECK: vbsl
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = load <4 x i32>* %C
+	%tmp4 = and <4 x i32> %tmp1, <i32 3, i32 3, i32 3, i32 3>
+	%tmp6 = and <4 x i32> %tmp3, <i32 -4, i32 -4, i32 -4, i32 -4>
+	%tmp7 = or <4 x i32> %tmp4, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
+;CHECK: v_bslQi64:
+;CHECK: vldmia
+;CHECK: vldmia
+;CHECK: vldmia
+;CHECK: vbsl
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = load <2 x i64>* %C
+	%tmp4 = and <2 x i64> %tmp1, <i64 3, i64 3>
+	%tmp6 = and <2 x i64> %tmp3, <i64 -4, i64 -4>
+	%tmp7 = or <2 x i64> %tmp4, %tmp6
+	ret <2 x i64> %tmp7
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vbsl.ll b/src/LLVM/test/CodeGen/ARM/vbsl.ll
new file mode 100644
index 0000000..9f3bb4e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vbsl.ll

@@ -0,0 +1,105 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: v_bsli8:
+;CHECK: vbsl
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = and <8 x i8> %tmp1, %tmp2
+	%tmp5 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp6 = and <8 x i8> %tmp5, %tmp3
+	%tmp7 = or <8 x i8> %tmp4, %tmp6
+	ret <8 x i8> %tmp7
+}
+
+define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: v_bsli16:
+;CHECK: vbsl
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = and <4 x i16> %tmp1, %tmp2
+	%tmp5 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp6 = and <4 x i16> %tmp5, %tmp3
+	%tmp7 = or <4 x i16> %tmp4, %tmp6
+	ret <4 x i16> %tmp7
+}
+
+define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: v_bsli32:
+;CHECK: vbsl
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = and <2 x i32> %tmp1, %tmp2
+	%tmp5 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
+	%tmp6 = and <2 x i32> %tmp5, %tmp3
+	%tmp7 = or <2 x i32> %tmp4, %tmp6
+	ret <2 x i32> %tmp7
+}
+
+define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
+;CHECK: v_bsli64:
+;CHECK: vbsl
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = load <1 x i64>* %C
+	%tmp4 = and <1 x i64> %tmp1, %tmp2
+	%tmp5 = xor <1 x i64> %tmp1, < i64 -1 >
+	%tmp6 = and <1 x i64> %tmp5, %tmp3
+	%tmp7 = or <1 x i64> %tmp4, %tmp6
+	ret <1 x i64> %tmp7
+}
+
+define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: v_bslQi8:
+;CHECK: vbsl
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = load <16 x i8>* %C
+	%tmp4 = and <16 x i8> %tmp1, %tmp2
+	%tmp5 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp6 = and <16 x i8> %tmp5, %tmp3
+	%tmp7 = or <16 x i8> %tmp4, %tmp6
+	ret <16 x i8> %tmp7
+}
+
+define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: v_bslQi16:
+;CHECK: vbsl
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = load <8 x i16>* %C
+	%tmp4 = and <8 x i16> %tmp1, %tmp2
+	%tmp5 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp6 = and <8 x i16> %tmp5, %tmp3
+	%tmp7 = or <8 x i16> %tmp4, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: v_bslQi32:
+;CHECK: vbsl
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = load <4 x i32>* %C
+	%tmp4 = and <4 x i32> %tmp1, %tmp2
+	%tmp5 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	%tmp6 = and <4 x i32> %tmp5, %tmp3
+	%tmp7 = or <4 x i32> %tmp4, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
+;CHECK: v_bslQi64:
+;CHECK: vbsl
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = load <2 x i64>* %C
+	%tmp4 = and <2 x i64> %tmp1, %tmp2
+	%tmp5 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
+	%tmp6 = and <2 x i64> %tmp5, %tmp3
+	%tmp7 = or <2 x i64> %tmp4, %tmp6
+	ret <2 x i64> %tmp7
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vceq.ll b/src/LLVM/test/CodeGen/ARM/vceq.ll
new file mode 100644
index 0000000..051c349
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vceq.ll

@@ -0,0 +1,92 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vceqi8:
+;CHECK: vceq.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = icmp eq <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vceqi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vceqi16:
+;CHECK: vceq.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp eq <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vceqi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vceqi32:
+;CHECK: vceq.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = icmp eq <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <2 x i32> @vceqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vceqf32:
+;CHECK: vceq.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp oeq <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vceqQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vceqQi8:
+;CHECK: vceq.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = icmp eq <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vceqQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vceqQi16:
+;CHECK: vceq.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = icmp eq <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vceqQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vceqQi32:
+;CHECK: vceq.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = icmp eq <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vceqQf32:
+;CHECK: vceq.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fcmp oeq <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <8 x i8> @vceqi8Z(<8 x i8>* %A) nounwind {
+;CHECK: vceqi8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vceq.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp eq <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vcge.ll b/src/LLVM/test/CodeGen/ARM/vcge.ll
new file mode 100644
index 0000000..bf5f0b9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vcge.ll

@@ -0,0 +1,203 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcges8:
+;CHECK: vcge.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = icmp sge <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcges16:
+;CHECK: vcge.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp sge <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcges32:
+;CHECK: vcge.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = icmp sge <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgeu8:
+;CHECK: vcge.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = icmp uge <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgeu16:
+;CHECK: vcge.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp uge <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgeu32:
+;CHECK: vcge.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = icmp uge <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcgef32:
+;CHECK: vcge.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp oge <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgeQs8:
+;CHECK: vcge.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = icmp sge <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgeQs16:
+;CHECK: vcge.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = icmp sge <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgeQs32:
+;CHECK: vcge.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = icmp sge <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgeQu8:
+;CHECK: vcge.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = icmp uge <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgeQu16:
+;CHECK: vcge.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = icmp uge <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgeQu32:
+;CHECK: vcge.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = icmp uge <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgeQf32:
+;CHECK: vcge.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fcmp oge <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vacgef32:
+;CHECK: vacge.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vacgeQf32:
+;CHECK: vacge.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
+;CHECK: vcgei8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcge.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sge <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind {
+;CHECK: vclei8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcle.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sle <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+; Radar 8782191
+; Floating-point comparisons against zero produce results with integer
+; elements, not floating-point elements.
+define void @test_vclez_fp() nounwind optsize {
+;CHECK: test_vclez_fp
+;CHECK: vcle.f32
+entry:
+  %0 = fcmp ole <4 x float> undef, zeroinitializer
+  %1 = sext <4 x i1> %0 to <4 x i16>
+  %2 = add <4 x i16> %1, zeroinitializer
+  %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %4 = add <8 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %5 = trunc <8 x i16> %4 to <8 x i8>
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %5, i32 1)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/vcgt.ll b/src/LLVM/test/CodeGen/ARM/vcgt.ll
new file mode 100644
index 0000000..2243bac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vcgt.ll

@@ -0,0 +1,198 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
+
+define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgts8:
+;CHECK: vcgt.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = icmp sgt <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgts16:
+;CHECK: vcgt.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp sgt <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgts32:
+;CHECK: vcgt.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = icmp sgt <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgtu8:
+;CHECK: vcgt.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = icmp ugt <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgtu16:
+;CHECK: vcgt.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp ugt <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgtu32:
+;CHECK: vcgt.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = icmp ugt <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcgtf32:
+;CHECK: vcgt.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp ogt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgtQs8:
+;CHECK: vcgt.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = icmp sgt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgtQs16:
+;CHECK: vcgt.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = icmp sgt <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgtQs32:
+;CHECK: vcgt.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = icmp sgt <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgtQu8:
+;CHECK: vcgt.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = icmp ugt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgtQu16:
+;CHECK: vcgt.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = icmp ugt <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgtQu32:
+;CHECK: vcgt.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = icmp ugt <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgtQf32:
+;CHECK: vcgt.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vacgtf32:
+;CHECK: vacgt.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vacgtQf32:
+;CHECK: vacgt.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+; rdar://7923010
+define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgt_zext:
+;CHECK: vmov.i32 [[Q0:q[0-9]+]], #0x1
+;CHECK: vcgt.f32 [[Q1:q[0-9]+]]
+;CHECK: vand [[Q2:q[0-9]+]], [[Q1]], [[Q0]]
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
+        %tmp4 = zext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
+;CHECK: vcgti8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcgt.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sgt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vclti8Z(<8 x i8>* %A) nounwind {
+;CHECK: vclti8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vclt.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp slt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vcnt.ll b/src/LLVM/test/CodeGen/ARM/vcnt.ll
new file mode 100644
index 0000000..450f90d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vcnt.ll

@@ -0,0 +1,132 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
+;CHECK: vcnt8:
+;CHECK: vcnt.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
+;CHECK: vcntQ8:
+;CHECK: vcnt.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone
+
+define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
+;CHECK: vclz8:
+;CHECK: vclz.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
+;CHECK: vclz16:
+;CHECK: vclz.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
+;CHECK: vclz32:
+;CHECK: vclz.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
+;CHECK: vclzQ8:
+;CHECK: vclz.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
+;CHECK: vclzQ16:
+;CHECK: vclz.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
+;CHECK: vclzQ32:
+;CHECK: vclz.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone
+
+define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
+;CHECK: vclss8:
+;CHECK: vcls.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
+;CHECK: vclss16:
+;CHECK: vcls.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
+;CHECK: vclss32:
+;CHECK: vcls.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vclsQs8:
+;CHECK: vcls.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vclsQs16:
+;CHECK: vcls.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vclsQs32:
+;CHECK: vcls.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vcombine.ll b/src/LLVM/test/CodeGen/ARM/vcombine.ll
new file mode 100644
index 0000000..527f93b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vcombine.ll

@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+; CHECK: vcombine8
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+; CHECK: vcombine16
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+; CHECK: vcombine32
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
+; CHECK: vcombinefloat
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	ret <4 x float> %tmp3
+}
+
+define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+; CHECK: vcombine64
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
+	ret <2 x i64> %tmp3
+}
+
+; Check for vget_low and vget_high implemented with shufflevector.  PR8411.
+; They should not require storing to the stack.
+
+define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind {
+; CHECK: vget_low16
+; CHECK-NOT: vst
+; CHECK: vmov r0, r1, d16
+	%tmp1 = load <8 x i16>* %A
+        %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i16> %tmp2
+}
+
+define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
+; CHECK: vget_high8
+; CHECK-NOT: vst
+; CHECK: vmov r0, r1, d17
+	%tmp1 = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <8 x i8> %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vcvt.ll b/src/LLVM/test/CodeGen/ARM/vcvt.ll
new file mode 100644
index 0000000..c078f49
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vcvt.ll

@@ -0,0 +1,158 @@
+; RUN: llc < %s -march=arm -mattr=+neon,+fp16 | FileCheck %s
+
+define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_f32tos32:
+;CHECK: vcvt.s32.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_f32tou32:
+;CHECK: vcvt.u32.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_s32tof32:
+;CHECK: vcvt.f32.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x float>
+	ret <2 x float> %tmp2
+}
+
+define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_u32tof32:
+;CHECK: vcvt.f32.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x float>
+	ret <2 x float> %tmp2
+}
+
+define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_f32tos32:
+;CHECK: vcvt.s32.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+	ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_f32tou32:
+;CHECK: vcvt.u32.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_s32tof32:
+;CHECK: vcvt.f32.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = sitofp <4 x i32> %tmp1 to <4 x float>
+	ret <4 x float> %tmp2
+}
+
+define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_u32tof32:
+;CHECK: vcvt.f32.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = uitofp <4 x i32> %tmp1 to <4 x float>
+	ret <4 x float> %tmp2
+}
+
+define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_n_f32tos32:
+;CHECK: vcvt.s32.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
+	ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_n_f32tou32:
+;CHECK: vcvt.u32.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_n_s32tof32:
+;CHECK: vcvt.f32.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
+	ret <2 x float> %tmp2
+}
+
+define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_n_u32tof32:
+;CHECK: vcvt.f32.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
+	ret <2 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+
+define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_n_f32tos32:
+;CHECK: vcvt.s32.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
+	ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_n_f32tou32:
+;CHECK: vcvt.u32.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_n_s32tof32:
+;CHECK: vcvt.f32.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
+	ret <4 x float> %tmp2
+}
+
+define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_n_u32tof32:
+;CHECK: vcvt.f32.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
+	ret <4 x float> %tmp2
+}
+
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+
+define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind {
+;CHECK: vcvt_f16tof32:
+;CHECK: vcvt.f32.f16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind {
+;CHECK: vcvt_f32tof16:
+;CHECK: vcvt.f16.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vcvt_combine.ll b/src/LLVM/test/CodeGen/ARM/vcvt_combine.ll
new file mode 100644
index 0000000..3009e50
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vcvt_combine.ll

@@ -0,0 +1,99 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
+
+@in = global float 0x400921FA00000000, align 4
+
+; Test signed conversion.
+; CHECK: t0
+; CHECK-NOT: vmul
+define void @t0() nounwind {
+entry:
+  %tmp = load float* @in, align 4, !tbaa !0
+  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
+  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
+  %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
+  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+  tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
+  ret void
+}
+
+declare void @foo_int32x2_t(<2 x i32>)
+
+; Test unsigned conversion.
+; CHECK: t1
+; CHECK-NOT: vmul
+define void @t1() nounwind {
+entry:
+  %tmp = load float* @in, align 4, !tbaa !0
+  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
+  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
+  %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
+  %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
+  tail call void @foo_uint32x2_t(<2 x i32> %vcvt.i) nounwind
+  ret void
+}
+
+declare void @foo_uint32x2_t(<2 x i32>)
+
+; Test which should not fold due to non-power of 2.
+; CHECK: t2
+; CHECK: vmul
+define void @t2() nounwind {
+entry:
+  %tmp = load float* @in, align 4, !tbaa !0
+  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
+  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
+  %mul.i = fmul <2 x float> %vecinit2.i, <float 0x401B333340000000, float 0x401B333340000000>
+  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+  tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
+  ret void
+}
+
+; Test which should not fold due to power of 2 out of range.
+; CHECK: t3
+; CHECK: vmul
+define void @t3() nounwind {
+entry:
+  %tmp = load float* @in, align 4, !tbaa !0
+  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
+  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
+  %mul.i = fmul <2 x float> %vecinit2.i, <float 0x4200000000000000, float 0x4200000000000000>
+  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+  tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
+  ret void
+}
+
+; Test which case where const is max power of 2 (i.e., 2^32).
+; CHECK: t4
+; CHECK-NOT: vmul
+define void @t4() nounwind {
+entry:
+  %tmp = load float* @in, align 4, !tbaa !0
+  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
+  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
+  %mul.i = fmul <2 x float> %vecinit2.i, <float 0x41F0000000000000, float 0x41F0000000000000>
+  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+  tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
+  ret void
+}
+
+; Test quadword.
+; CHECK: t5
+; CHECK-NOT: vmul
+define void @t5() nounwind {
+entry:
+  %tmp = load float* @in, align 4, !tbaa !0
+  %vecinit.i = insertelement <4 x float> undef, float %tmp, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %tmp, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %tmp, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %tmp, i32 3
+  %mul.i = fmul <4 x float> %vecinit6.i, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+  %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32>
+  tail call void @foo_int32x4_t(<4 x i32> %vcvt.i) nounwind
+  ret void
+}
+
+declare void @foo_int32x4_t(<4 x i32>)
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}

diff --git a/src/LLVM/test/CodeGen/ARM/vdiv_combine.ll b/src/LLVM/test/CodeGen/ARM/vdiv_combine.ll
new file mode 100644
index 0000000..1387393
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vdiv_combine.ll

@@ -0,0 +1,102 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
+
+@in = global float 0x400921FA00000000, align 4
+@iin = global i32 -1023, align 4
+@uin = global i32 1023, align 4
+
+declare void @foo_int32x4_t(<4 x i32>)
+
+; Test signed conversion.
+; CHECK: t1
+; CHECK-NOT: vdiv
+define void @t1() nounwind {
+entry:
+  %tmp = load i32* @iin, align 4, !tbaa !3
+  %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
+  %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
+  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00>
+  tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
+  ret void
+}
+
+declare void @foo_float32x2_t(<2 x float>)
+
+; Test unsigned conversion.
+; CHECK: t2
+; CHECK-NOT: vdiv
+define void @t2() nounwind {
+entry:
+  %tmp = load i32* @uin, align 4, !tbaa !3
+  %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
+  %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
+  %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00>
+  tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
+  ret void
+}
+
+; Test which should not fold due to non-power of 2.
+; CHECK: t3
+; CHECK: vdiv
+define void @t3() nounwind {
+entry:
+  %tmp = load i32* @iin, align 4, !tbaa !3
+  %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
+  %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
+  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 0x401B333340000000, float 0x401B333340000000>
+  tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
+  ret void
+}
+
+; Test which should not fold due to power of 2 out of range.
+; CHECK: t4
+; CHECK: vdiv
+define void @t4() nounwind {
+entry:
+  %tmp = load i32* @iin, align 4, !tbaa !3
+  %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
+  %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
+  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000>
+  tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
+  ret void
+}
+
+; Test case where const is max power of 2 (i.e., 2^32).
+; CHECK: t5
+; CHECK-NOT: vdiv
+define void @t5() nounwind {
+entry:
+  %tmp = load i32* @iin, align 4, !tbaa !3
+  %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
+  %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
+  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000>
+  tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
+  ret void
+}
+
+; Test quadword.
+; CHECK: t6
+; CHECK-NOT: vdiv
+define void @t6() nounwind {
+entry:
+  %tmp = load i32* @iin, align 4, !tbaa !3
+  %vecinit.i = insertelement <4 x i32> undef, i32 %tmp, i32 0
+  %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %tmp, i32 1
+  %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %tmp, i32 2
+  %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %tmp, i32 3
+  %vcvt.i = sitofp <4 x i32> %vecinit6.i to <4 x float>
+  %div.i = fdiv <4 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+  tail call void @foo_float32x4_t(<4 x float> %div.i) nounwind
+  ret void
+}
+
+declare void @foo_float32x4_t(<4 x float>)
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}

diff --git a/src/LLVM/test/CodeGen/ARM/vdup.ll b/src/LLVM/test/CodeGen/ARM/vdup.ll
new file mode 100644
index 0000000..e99fac1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vdup.ll

@@ -0,0 +1,263 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_dup8(i8 %A) nounwind {
+;CHECK: v_dup8:
+;CHECK: vdup.8
+	%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
+	%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
+	%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
+	%tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
+	%tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
+	%tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
+	%tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
+	%tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
+	ret <8 x i8> %tmp8
+}
+
+define <4 x i16> @v_dup16(i16 %A) nounwind {
+;CHECK: v_dup16:
+;CHECK: vdup.16
+	%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
+	%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
+	%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
+	%tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_dup32(i32 %A) nounwind {
+;CHECK: v_dup32:
+;CHECK: vdup.32
+	%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
+	%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @v_dupfloat(float %A) nounwind {
+;CHECK: v_dupfloat:
+;CHECK: vdup.32
+	%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
+	%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @v_dupQ8(i8 %A) nounwind {
+;CHECK: v_dupQ8:
+;CHECK: vdup.8
+	%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
+	%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
+	%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
+	%tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
+	%tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
+	%tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
+	%tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
+	%tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
+	%tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
+	%tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
+	%tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
+	%tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
+	%tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
+	%tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
+	%tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
+	%tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
+	ret <16 x i8> %tmp16
+}
+
+define <8 x i16> @v_dupQ16(i16 %A) nounwind {
+;CHECK: v_dupQ16:
+;CHECK: vdup.16
+	%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
+	%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
+	%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
+	%tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
+	%tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
+	%tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
+	%tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
+	%tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
+	ret <8 x i16> %tmp8
+}
+
+define <4 x i32> @v_dupQ32(i32 %A) nounwind {
+;CHECK: v_dupQ32:
+;CHECK: vdup.32
+	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
+	%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
+	%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
+	%tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @v_dupQfloat(float %A) nounwind {
+;CHECK: v_dupQfloat:
+;CHECK: vdup.32
+	%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
+	%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
+	%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
+	%tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
+	ret <4 x float> %tmp4
+}
+
+; Check to make sure it works with shuffles, too.
+
+define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
+;CHECK: v_shuffledup8:
+;CHECK: vdup.8
+	%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
+;CHECK: v_shuffledup16:
+;CHECK: vdup.16
+	%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
+;CHECK: v_shuffledup32:
+;CHECK: vdup.32
+	%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @v_shuffledupfloat(float %A) nounwind {
+;CHECK: v_shuffledupfloat:
+;CHECK: vdup.32
+	%tmp1 = insertelement <2 x float> undef, float %A, i32 0
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
+;CHECK: v_shuffledupQ8:
+;CHECK: vdup.8
+	%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
+;CHECK: v_shuffledupQ16:
+;CHECK: vdup.16
+	%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
+;CHECK: v_shuffledupQ32:
+;CHECK: vdup.32
+	%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
+	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
+;CHECK: v_shuffledupQfloat:
+;CHECK: vdup.32
+	%tmp1 = insertelement <4 x float> undef, float %A, i32 0
+	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
+	ret <4 x float> %tmp2
+}
+
+define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
+;CHECK: vduplane8:
+;CHECK: vdup.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
+;CHECK: vduplane16:
+;CHECK: vdup.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
+;CHECK: vduplane32:
+;CHECK: vdup.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
+;CHECK: vduplanefloat:
+;CHECK: vdup.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
+;CHECK: vduplaneQ8:
+;CHECK: vdup.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
+;CHECK: vduplaneQ16:
+;CHECK: vdup.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
+;CHECK: vduplaneQ32:
+;CHECK: vdup.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
+;CHECK: vduplaneQfloat:
+;CHECK: vdup.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x float> %tmp2
+}
+
+define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %0
+}
+
+define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x i64> %0
+}
+
+define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x double> %0
+}
+
+define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x double> %0
+}
+
+; Radar 7373643
+;CHECK: redundantVdup:
+;CHECK: vmov.i8
+;CHECK-NOT: vdup.8
+;CHECK: vstr.64
+define void @redundantVdup(<8 x i8>* %ptr) nounwind {
+  %1 = insertelement <8 x i8> undef, i8 -128, i32 0
+  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  store <8 x i8> %2, <8 x i8>* %ptr, align 8
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vector-DAGCombine.ll b/src/LLVM/test/CodeGen/ARM/vector-DAGCombine.ll
new file mode 100644
index 0000000..81bdc44
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vector-DAGCombine.ll

@@ -0,0 +1,125 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+; PR7158
+define i32 @test_pr7158() nounwind {
+bb.nph55.bb.nph55.split_crit_edge:
+  br label %bb3
+
+bb3:                                              ; preds = %bb3, %bb.nph55.bb.nph55.split_crit_edge
+  br i1 undef, label %bb.i19, label %bb3
+
+bb.i19:                                           ; preds = %bb.i19, %bb3
+  %0 = insertelement <4 x float> undef, float undef, i32 3 ; <<4 x float>> [#uses=3]
+  %1 = fmul <4 x float> %0, %0                    ; <<4 x float>> [#uses=1]
+  %2 = bitcast <4 x float> %1 to <2 x double>     ; <<2 x double>> [#uses=0]
+  %3 = fmul <4 x float> %0, undef                 ; <<4 x float>> [#uses=0]
+  br label %bb.i19
+}
+
+; Check that the DAG combiner does not arbitrarily modify BUILD_VECTORs
+; after legalization.
+define void @test_illegal_build_vector() nounwind {
+entry:
+  store <2 x i64> undef, <2 x i64>* undef, align 16
+  %0 = load <16 x i8>* undef, align 16            ; <<16 x i8>> [#uses=1]
+  %1 = or <16 x i8> zeroinitializer, %0           ; <<16 x i8>> [#uses=1]
+  store <16 x i8> %1, <16 x i8>* undef, align 16
+  ret void
+}
+
+; Radar 8407927: Make sure that VMOVRRD gets optimized away when the result is
+; converted back to be used as a vector type.
+; CHECK: test_vmovrrd_combine
+define <4 x i32> @test_vmovrrd_combine() nounwind {
+entry:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  %0 = bitcast <2 x i64> zeroinitializer to <2 x double>
+  %1 = extractelement <2 x double> %0, i32 0
+  %2 = bitcast double %1 to i64
+  %3 = insertelement <1 x i64> undef, i64 %2, i32 0
+; CHECK-NOT: vmov s
+; CHECK: vext.8
+  %4 = shufflevector <1 x i64> %3, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %tmp2006.3 = bitcast <2 x i64> %4 to <16 x i8>
+  %5 = shufflevector <16 x i8> %tmp2006.3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+  %tmp2004.3 = bitcast <16 x i8> %5 to <4 x i32>
+  br i1 undef, label %bb2, label %bb1
+
+bb2:
+  %result = phi <4 x i32> [ undef, %entry ], [ %tmp2004.3, %bb1 ]
+  ret <4 x i32> %result
+}
+
+; Test trying to do a ShiftCombine on illegal types.
+; The vector should be split first.
+define void @lshrIllegalType(<8 x i32>* %A) nounwind {
+       %tmp1 = load <8 x i32>* %A
+       %tmp2 = lshr <8 x i32> %tmp1, < i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+       store <8 x i32> %tmp2, <8 x i32>* %A
+       ret void
+}
+
+; Test folding a binary vector operation with constant BUILD_VECTOR
+; operands with i16 elements.
+define void @test_i16_constant_fold() nounwind optsize {
+entry:
+  %0 = sext <4 x i1> zeroinitializer to <4 x i16>
+  %1 = add <4 x i16> %0, zeroinitializer
+  %2 = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %3 = add <8 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %4 = trunc <8 x i16> %3 to <8 x i8>
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %4, i32 1)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+
+; Test that loads and stores of i64 vector elements are handled as f64 values
+; so they are not split up into i32 values.  Radar 8755338.
+define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
+; CHECK: i64_buildvector
+; CHECK: vldr.64
+  %t0 = load i64* %ptr, align 4
+  %t1 = insertelement <2 x i64> undef, i64 %t0, i32 0
+  store <2 x i64> %t1, <2 x i64>* %vp
+  ret void
+}
+
+define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
+; CHECK: i64_insertelement
+; CHECK: vldr.64
+  %t0 = load i64* %ptr, align 4
+  %vec = load <2 x i64>* %vp
+  %t1 = insertelement <2 x i64> %vec, i64 %t0, i32 0
+  store <2 x i64> %t1, <2 x i64>* %vp
+  ret void
+}
+
+define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {
+; CHECK: i64_extractelement
+; CHECK: vstr.64
+  %vec = load <2 x i64>* %vp
+  %t1 = extractelement <2 x i64> %vec, i32 0
+  store i64 %t1, i64* %ptr
+  ret void
+}
+
+; Test trying to do a AND Combine on illegal types.
+define void @andVec(<3 x i8>* %A) nounwind {
+  %tmp = load <3 x i8>* %A, align 4
+  %and = and <3 x i8> %tmp, <i8 7, i8 7, i8 7>
+  store <3 x i8> %and, <3 x i8>* %A
+  ret void
+}
+
+
+; Test trying to do an OR Combine on illegal types.
+define void @orVec(<3 x i8>* %A) nounwind {
+  %tmp = load <3 x i8>* %A, align 4
+  %or = or <3 x i8> %tmp, <i8 7, i8 7, i8 7>
+  store <3 x i8> %or, <3 x i8>* %A
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/vext.ll b/src/LLVM/test/CodeGen/ARM/vext.ll
new file mode 100644
index 0000000..65b5913
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vext.ll

@@ -0,0 +1,152 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextd:
+;CHECK: vext
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+	ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextRd:
+;CHECK: vext
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextq:
+;CHECK: vext
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+	ret <16 x i8> %tmp3
+}
+
+define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextRq:
+;CHECK: vext
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: test_vextd16:
+;CHECK: vext
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+	ret <4 x i16> %tmp3
+}
+
+define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: test_vextq32:
+;CHECK: vext
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+	ret <4 x i32> %tmp3
+}
+
+; Undef shuffle indices should not prevent matching to VEXT:
+
+define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextd_undef:
+;CHECK: vext
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10>
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextRq_undef:
+;CHECK: vext
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6>
+	ret <16 x i8> %tmp3
+}
+
+; Tests for ReconstructShuffle function. Indices have to be carefully
+; chosen to reach lowering phase as a BUILD_VECTOR.
+
+; One vector needs vext, the other can be handled by extract_subvector
+; Also checks interleaving of sources is handled correctly.
+; Essence: a vext is used on %A and something saner than stack load/store for final result.
+define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: test_interleaved:
+;CHECK: vext.16
+;CHECK-NOT: vext.16
+;CHECK: vzip.16
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 3, i32 8, i32 5, i32 9>
+        ret <4 x i16> %tmp3
+}
+
+; An undef in the shuffle list should still be optimizable
+define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: test_undef:
+;CHECK: vzip.16
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 undef, i32 8, i32 5, i32 9>
+        ret <4 x i16> %tmp3
+}
+
+; We should ignore a build_vector with more than two sources.
+; Use illegal <32 x i16> type to produce such a shuffle after legalizing types.
+; Try to look for fallback to stack expansion.
+define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind {
+;CHECK: test_multisource:
+;CHECK: vst1.16
+        %tmp1 = load <32 x i16>* %B
+        %tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
+        ret <4 x i16> %tmp2
+}
+
+; We don't handle shuffles using more than half of a 128-bit vector.
+; Again, test for fallback to stack expansion
+define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {
+;CHECK: test_largespan:
+;CHECK: vst1.16
+        %tmp1 = load <8 x i16>* %B
+        %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+        ret <4 x i16> %tmp2
+}
+
+; The actual shuffle code only handles some cases, make sure we check
+; this rather than blindly emitting a VECTOR_SHUFFLE (infinite
+; lowering loop can result otherwise).
+define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: test_illegal:
+;CHECK: vst1.16
+       %tmp1 = load <8 x i16>* %A
+       %tmp2 = load <8 x i16>* %B
+       %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 13, i32 3, i32 2, i32 2, i32 9>
+       ret <8 x i16> %tmp3
+}
+
+; PR11129
+; Make sure this doesn't crash
+define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>* nocapture %dest) nounwind {
+; CHECK: test_elem_mismatch:
+; CHECK: vstr.64
+  %tmp0 = load <2 x i64>* %src, align 16
+  %tmp1 = bitcast <2 x i64> %tmp0 to <4 x i32>
+  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
+  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
+  %tmp4 = trunc i32 %tmp2 to i16
+  %tmp5 = trunc i32 %tmp3 to i16
+  %tmp6 = insertelement <4 x i16> undef, i16 %tmp4, i32 0
+  %tmp7 = insertelement <4 x i16> %tmp6, i16 %tmp5, i32 1
+  store <4 x i16> %tmp7, <4 x i16>* %dest, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vfcmp.ll b/src/LLVM/test/CodeGen/ARM/vfcmp.ll
new file mode 100644
index 0000000..6946d02
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vfcmp.ll

@@ -0,0 +1,139 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; This tests fcmp operations that do not map directly to NEON instructions.
+
+; une is implemented with VCEQ/VMVN
+define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcunef32:
+;CHECK: vceq.f32
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp une <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; olt is implemented with VCGT
+define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcoltf32:
+;CHECK: vcgt.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; ole is implemented with VCGE
+define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcolef32:
+;CHECK: vcge.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; uge is implemented with VCGT/VMVN
+define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcugef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; ule is implemented with VCGT/VMVN
+define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vculef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; ugt is implemented with VCGE/VMVN
+define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcugtf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; ult is implemented with VCGE/VMVN
+define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcultf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; ueq is implemented with VCGT/VCGT/VORR/VMVN
+define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcueqf32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; one is implemented with VCGT/VCGT/VORR
+define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vconef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp one <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; uno is implemented with VCGT/VCGE/VORR/VMVN
+define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcunof32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; ord is implemented with VCGT/VCGE/VORR
+define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcordf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vfp.ll b/src/LLVM/test/CodeGen/ARM/vfp.ll
new file mode 100644
index 0000000..f10e4b3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vfp.ll

@@ -0,0 +1,156 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra | FileCheck %s

+; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s

+

+define void @test(float* %P, double* %D) {

+	%A = load float* %P		; <float> [#uses=1]

+	%B = load double* %D		; <double> [#uses=1]

+	store float %A, float* %P

+	store double %B, double* %D

+	ret void

+}

+

+declare float @fabsf(float)

+

+declare double @fabs(double)

+

+define void @test_abs(float* %P, double* %D) {

+;CHECK: test_abs:

+	%a = load float* %P		; <float> [#uses=1]

+;CHECK: vabs.f32

+	%b = call float @fabsf( float %a )		; <float> [#uses=1]

+	store float %b, float* %P

+	%A = load double* %D		; <double> [#uses=1]

+;CHECK: vabs.f64

+	%B = call double @fabs( double %A )		; <double> [#uses=1]

+	store double %B, double* %D

+	ret void

+}

+

+define void @test_add(float* %P, double* %D) {

+;CHECK: test_add:

+	%a = load float* %P		; <float> [#uses=2]

+	%b = fadd float %a, %a		; <float> [#uses=1]

+	store float %b, float* %P

+	%A = load double* %D		; <double> [#uses=2]

+	%B = fadd double %A, %A		; <double> [#uses=1]

+	store double %B, double* %D

+	ret void

+}

+

+define void @test_ext_round(float* %P, double* %D) {

+;CHECK: test_ext_round:

+	%a = load float* %P		; <float> [#uses=1]

+;CHECK: vcvt.f64.f32

+;CHECK: vcvt.f32.f64

+	%b = fpext float %a to double		; <double> [#uses=1]

+	%A = load double* %D		; <double> [#uses=1]

+	%B = fptrunc double %A to float		; <float> [#uses=1]

+	store double %b, double* %D

+	store float %B, float* %P

+	ret void

+}

+

+define void @test_fma(float* %P1, float* %P2, float* %P3) {

+;CHECK: test_fma:

+	%a1 = load float* %P1		; <float> [#uses=1]

+	%a2 = load float* %P2		; <float> [#uses=1]

+	%a3 = load float* %P3		; <float> [#uses=1]

+;CHECK: vnmls.f32

+	%X = fmul float %a1, %a2		; <float> [#uses=1]

+	%Y = fsub float %X, %a3		; <float> [#uses=1]

+	store float %Y, float* %P1

+	ret void

+}

+

+define i32 @test_ftoi(float* %P1) {

+;CHECK: test_ftoi:

+	%a1 = load float* %P1		; <float> [#uses=1]

+;CHECK: vcvt.s32.f32

+	%b1 = fptosi float %a1 to i32		; <i32> [#uses=1]

+	ret i32 %b1

+}

+

+define i32 @test_ftou(float* %P1) {

+;CHECK: test_ftou:

+	%a1 = load float* %P1		; <float> [#uses=1]

+;CHECK: vcvt.u32.f32

+	%b1 = fptoui float %a1 to i32		; <i32> [#uses=1]

+	ret i32 %b1

+}

+

+define i32 @test_dtoi(double* %P1) {

+;CHECK: test_dtoi:

+	%a1 = load double* %P1		; <double> [#uses=1]

+;CHECK: vcvt.s32.f64

+	%b1 = fptosi double %a1 to i32		; <i32> [#uses=1]

+	ret i32 %b1

+}

+

+define i32 @test_dtou(double* %P1) {

+;CHECK: test_dtou:

+	%a1 = load double* %P1		; <double> [#uses=1]

+;CHECK: vcvt.u32.f64

+	%b1 = fptoui double %a1 to i32		; <i32> [#uses=1]

+	ret i32 %b1

+}

+

+define void @test_utod(double* %P1, i32 %X) {

+;CHECK: test_utod:

+;CHECK: vcvt.f64.u32

+	%b1 = uitofp i32 %X to double		; <double> [#uses=1]

+	store double %b1, double* %P1

+	ret void

+}

+

+define void @test_utod2(double* %P1, i8 %X) {

+;CHECK: test_utod2:

+;CHECK: vcvt.f64.u32

+	%b1 = uitofp i8 %X to double		; <double> [#uses=1]

+	store double %b1, double* %P1

+	ret void

+}

+

+define void @test_cmp(float* %glob, i32 %X) {

+;CHECK: test_cmp:

+entry:

+	%tmp = load float* %glob		; <float> [#uses=2]

+	%tmp3 = getelementptr float* %glob, i32 2		; <float*> [#uses=1]

+	%tmp4 = load float* %tmp3		; <float> [#uses=2]

+	%tmp.upgrd.1 = fcmp oeq float %tmp, %tmp4		; <i1> [#uses=1]

+	%tmp5 = fcmp uno float %tmp, %tmp4		; <i1> [#uses=1]

+	%tmp6 = or i1 %tmp.upgrd.1, %tmp5		; <i1> [#uses=1]

+;CHECK: bmi

+;CHECK-NEXT: bgt

+	br i1 %tmp6, label %cond_true, label %cond_false

+

+cond_true:		; preds = %entry

+	%tmp.upgrd.2 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]

+	ret void

+

+cond_false:		; preds = %entry

+	%tmp7 = tail call i32 (...)* @baz( )		; <i32> [#uses=0]

+	ret void

+}

+

+declare i1 @llvm.isunordered.f32(float, float)

+

+declare i32 @bar(...)

+

+declare i32 @baz(...)

+

+define void @test_cmpfp0(float* %glob, i32 %X) {

+;CHECK: test_cmpfp0:

+entry:

+	%tmp = load float* %glob		; <float> [#uses=1]

+;CHECK: vcmpe.f32

+	%tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00		; <i1> [#uses=1]

+	br i1 %tmp.upgrd.3, label %cond_true, label %cond_false

+

+cond_true:		; preds = %entry

+	%tmp.upgrd.4 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]

+	ret void

+

+cond_false:		; preds = %entry

+	%tmp1 = tail call i32 (...)* @baz( )		; <i32> [#uses=0]

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/ARM/vget_lane.ll b/src/LLVM/test/CodeGen/ARM/vget_lane.ll
new file mode 100644
index 0000000..1fc885d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vget_lane.ll

@@ -0,0 +1,233 @@
+; RUN: llc < %s -mattr=+neon | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
+;CHECK: vget_lanes8:
+;CHECK: vmov.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
+	%tmp3 = sext i8 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
+;CHECK: vget_lanes16:
+;CHECK: vmov.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
+	%tmp3 = sext i16 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
+;CHECK: vget_laneu8:
+;CHECK: vmov.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
+	%tmp3 = zext i8 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
+;CHECK: vget_laneu16:
+;CHECK: vmov.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
+	%tmp3 = zext i16 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+; Do a vector add to keep the extraction from being done directly from memory.
+define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
+;CHECK: vget_lanei32:
+;CHECK: vmov.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = add <2 x i32> %tmp1, %tmp1
+	%tmp3 = extractelement <2 x i32> %tmp2, i32 1
+	ret i32 %tmp3
+}
+
+define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
+;CHECK: vgetQ_lanes8:
+;CHECK: vmov.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
+	%tmp3 = sext i8 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
+;CHECK: vgetQ_lanes16:
+;CHECK: vmov.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
+	%tmp3 = sext i16 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
+;CHECK: vgetQ_laneu8:
+;CHECK: vmov.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
+	%tmp3 = zext i8 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
+;CHECK: vgetQ_laneu16:
+;CHECK: vmov.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
+	%tmp3 = zext i16 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+; Do a vector add to keep the extraction from being done directly from memory.
+define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
+;CHECK: vgetQ_lanei32:
+;CHECK: vmov.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = add <4 x i32> %tmp1, %tmp1
+	%tmp3 = extractelement <4 x i32> %tmp2, i32 1
+	ret i32 %tmp3
+}
+
+define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d{{.*}}[1]
+  %arg0_uint16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
+  %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <4 x i16>* %arg0_uint16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
+  %2 = add i16 %1, %1
+  store i16 %2, i16* %out_uint16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d{{.*}}[1]
+  %arg0_uint8x8_t = alloca <8 x i8>               ; <<8 x i8>*> [#uses=1]
+  %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <8 x i8>* %arg0_uint8x8_t, align 8    ; <<8 x i8>> [#uses=1]
+  %1 = extractelement <8 x i8> %0, i32 1          ; <i8> [#uses=1]
+  %2 = add i8 %1, %1
+  store i8 %2, i8* %out_uint8_t, align 1
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d{{.*}}[1]
+  %arg0_uint16x8_t = alloca <8 x i16>             ; <<8 x i16>*> [#uses=1]
+  %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
+  %1 = extractelement <8 x i16> %0, i32 1         ; <i16> [#uses=1]
+  %2 = add i16 %1, %1
+  store i16 %2, i16* %out_uint16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d{{.*}}[1]
+  %arg0_uint8x16_t = alloca <16 x i8>             ; <<16 x i8>*> [#uses=1]
+  %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
+  %1 = extractelement <16 x i8> %0, i32 1         ; <i8> [#uses=1]
+  %2 = add i8 %1, %1
+  store i8 %2, i8* %out_uint8_t, align 1
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
+;CHECK: vset_lane8:
+;CHECK: vmov.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
+;CHECK: vset_lane16:
+;CHECK: vmov.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
+;CHECK: vset_lane32:
+;CHECK: vmov.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
+;CHECK: vsetQ_lane8:
+;CHECK: vmov.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
+;CHECK: vsetQ_lane16:
+;CHECK: vmov.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
+;CHECK: vsetQ_lane32:
+;CHECK: vmov.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
+	ret <4 x i32> %tmp2
+}
+
+define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
+;CHECK: test_vset_lanef32:
+;CHECK: vmov.f32 s3, s0
+;CHECK: vmov.f64 d0, d1
+entry:
+  %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
+  ret <2 x float> %0
+}
+
+; The llvm extractelement instruction does not require that the lane number
+; be an immediate constant.  Make sure a variable lane number is handled.
+
+define i32 @vget_variable_lanes8(<8 x i8>* %A, i32 %B) nounwind {
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = extractelement <8 x i8> %tmp1, i32 %B
+	%tmp3 = sext i8 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vgetQ_variable_lanei32(<4 x i32>* %A, i32 %B) nounwind {
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = add <4 x i32> %tmp1, %tmp1
+	%tmp3 = extractelement <4 x i32> %tmp2, i32 %B
+	ret i32 %tmp3
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vhadd.ll b/src/LLVM/test/CodeGen/ARM/vhadd.ll
new file mode 100644
index 0000000..379e062
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vhadd.ll

@@ -0,0 +1,249 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhadds8:
+;CHECK: vhadd.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhadds16:
+;CHECK: vhadd.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhadds32:
+;CHECK: vhadd.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhaddu8:
+;CHECK: vhadd.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhaddu16:
+;CHECK: vhadd.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhaddu32:
+;CHECK: vhadd.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhaddQs8:
+;CHECK: vhadd.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhaddQs16:
+;CHECK: vhadd.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhaddQs32:
+;CHECK: vhadd.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhaddQu8:
+;CHECK: vhadd.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhaddQu16:
+;CHECK: vhadd.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhaddQu32:
+;CHECK: vhadd.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrhadds8:
+;CHECK: vrhadd.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrhadds16:
+;CHECK: vrhadd.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrhadds32:
+;CHECK: vrhadd.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrhaddu8:
+;CHECK: vrhadd.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrhaddu16:
+;CHECK: vrhadd.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrhaddu32:
+;CHECK: vrhadd.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrhaddQs8:
+;CHECK: vrhadd.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrhaddQs16:
+;CHECK: vrhadd.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrhaddQs32:
+;CHECK: vrhadd.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrhaddQu8:
+;CHECK: vrhadd.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrhaddQu16:
+;CHECK: vrhadd.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrhaddQu32:
+;CHECK: vrhadd.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vhsub.ll b/src/LLVM/test/CodeGen/ARM/vhsub.ll
new file mode 100644
index 0000000..0f0d027
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vhsub.ll

@@ -0,0 +1,125 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhsubs8:
+;CHECK: vhsub.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhsubs16:
+;CHECK: vhsub.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhsubs32:
+;CHECK: vhsub.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhsubu8:
+;CHECK: vhsub.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhsubu16:
+;CHECK: vhsub.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhsubu32:
+;CHECK: vhsub.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhsubQs8:
+;CHECK: vhsub.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhsubQs16:
+;CHECK: vhsub.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhsubQs32:
+;CHECK: vhsub.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhsubQu8:
+;CHECK: vhsub.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhsubQu16:
+;CHECK: vhsub.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vhsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhsubQu32:
+;CHECK: vhsub.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vhsubs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vhsubu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vicmp.ll b/src/LLVM/test/CodeGen/ARM/vicmp.ll
new file mode 100644
index 0000000..2d8cb89
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vicmp.ll

@@ -0,0 +1,113 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; This tests icmp operations that do not map directly to NEON instructions.
+; Not-equal (ne) operations are implemented by VCEQ/VMVN.  Less-than (lt/ult)
+; and less-than-or-equal (le/ule) are implemented by swapping the arguments
+; to VCGT and VCGE.  Test all the operand types for not-equal but only sample
+; the other operations.
+
+define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcnei8:
+;CHECK: vceq.i8
+;CHECK-NEXT: vmvn
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = icmp ne <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcnei16:
+;CHECK: vceq.i16
+;CHECK-NEXT: vmvn
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp ne <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcnei32:
+;CHECK: vceq.i32
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = icmp ne <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcneQi8:
+;CHECK: vceq.i8
+;CHECK-NEXT: vmvn
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = icmp ne <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcneQi16:
+;CHECK: vceq.i16
+;CHECK-NEXT: vmvn
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = icmp ne <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcneQi32:
+;CHECK: vceq.i32
+;CHECK-NEXT: vmvn
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = icmp ne <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcltQs8:
+;CHECK: vcgt.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = icmp slt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcles16:
+;CHECK: vcge.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp sle <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcltu16:
+;CHECK: vcgt.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp ult <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcleQu32:
+;CHECK: vcge.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = icmp ule <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vld1.ll b/src/LLVM/test/CodeGen/ARM/vld1.ll
new file mode 100644
index 0000000..e524395
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vld1.ll

@@ -0,0 +1,141 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
+
+define <8 x i8> @vld1i8(i8* %A) nounwind {
+;CHECK: vld1i8:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld1.8 {d16}, [r0, :64]
+	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16)
+	ret <8 x i8> %tmp1
+}
+
+define <4 x i16> @vld1i16(i16* %A) nounwind {
+;CHECK: vld1i16:
+;CHECK: vld1.16
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+	ret <4 x i16> %tmp1
+}
+
+;Check for a post-increment updating load. 
+define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
+;CHECK: vld1i16_update:
+;CHECK: vld1.16 {d16}, [{{r[0-9]+}}]!
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+	%tmp2 = getelementptr i16* %A, i32 4
+	       store i16* %tmp2, i16** %ptr
+	ret <4 x i16> %tmp1
+}
+
+define <2 x i32> @vld1i32(i32* %A) nounwind {
+;CHECK: vld1i32:
+;CHECK: vld1.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
+	ret <2 x i32> %tmp1
+}
+
+;Check for a post-increment updating load with register increment.
+define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
+;CHECK: vld1i32_update:
+;CHECK: vld1.32 {d16}, [{{r[0-9]+}}], {{r[0-9]+}}
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
+	%tmp2 = getelementptr i32* %A, i32 %inc
+	store i32* %tmp2, i32** %ptr
+	ret <2 x i32> %tmp1
+}
+
+define <2 x float> @vld1f(float* %A) nounwind {
+;CHECK: vld1f:
+;CHECK: vld1.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0, i32 1)
+	ret <2 x float> %tmp1
+}
+
+define <1 x i64> @vld1i64(i64* %A) nounwind {
+;CHECK: vld1i64:
+;CHECK: vld1.64
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0, i32 1)
+	ret <1 x i64> %tmp1
+}
+
+define <16 x i8> @vld1Qi8(i8* %A) nounwind {
+;CHECK: vld1Qi8:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld1.8 {d16, d17}, [r0, :64]
+	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+	ret <16 x i8> %tmp1
+}
+
+;Check for a post-increment updating load.
+define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
+;CHECK: vld1Qi8_update:
+;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}, :64]!
+	%A = load i8** %ptr
+	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+	%tmp2 = getelementptr i8* %A, i32 16
+	store i8* %tmp2, i8** %ptr
+	ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @vld1Qi16(i16* %A) nounwind {
+;CHECK: vld1Qi16:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld1.16 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32)
+	ret <8 x i16> %tmp1
+}
+
+define <4 x i32> @vld1Qi32(i32* %A) nounwind {
+;CHECK: vld1Qi32:
+;CHECK: vld1.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0, i32 1)
+	ret <4 x i32> %tmp1
+}
+
+define <4 x float> @vld1Qf(float* %A) nounwind {
+;CHECK: vld1Qf:
+;CHECK: vld1.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0, i32 1)
+	ret <4 x float> %tmp1
+}
+
+define <2 x i64> @vld1Qi64(i64* %A) nounwind {
+;CHECK: vld1Qi64:
+;CHECK: vld1.64
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0, i32 1)
+	ret <2 x i64> %tmp1
+}
+
+declare <8 x i8>  @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) nounwind readonly
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly
+
+; Radar 8355607
+; Do not crash if the vld1 result is not used.
+define void @unused_vld1_result() {
+entry:
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) 
+  call void @llvm.trap()
+  unreachable
+}
+
+declare void @llvm.trap() nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/vld2.ll b/src/LLVM/test/CodeGen/ARM/vld2.ll
new file mode 100644
index 0000000..29b3794
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vld2.ll

@@ -0,0 +1,155 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+define <8 x i8> @vld2i8(i8* %A) nounwind {
+;CHECK: vld2i8:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld2.8 {d16, d17}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8)
+        %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld2i16(i16* %A) nounwind {
+;CHECK: vld2i16:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld2.16 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32)
+        %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld2i32(i32* %A) nounwind {
+;CHECK: vld2i32:
+;CHECK: vld2.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld2f(float* %A) nounwind {
+;CHECK: vld2f:
+;CHECK: vld2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
+        %tmp4 = fadd <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+;Check for a post-increment updating load. 
+define <2 x float> @vld2f_update(float** %ptr) nounwind {
+;CHECK: vld2f_update:
+;CHECK: vld2.32 {d16, d17}, [r1]!
+	%A = load float** %ptr
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
+	%tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
+	%tmp4 = fadd <2 x float> %tmp2, %tmp3
+	%tmp5 = getelementptr float* %A, i32 4
+	store float* %tmp5, float** %ptr
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld2i64(i64* %A) nounwind {
+;CHECK: vld2i64:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld1.64 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 32)
+        %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld2Qi8(i8* %A) nounwind {
+;CHECK: vld2Qi8:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8)
+        %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+;Check for a post-increment updating load with register increment.
+define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld2Qi8_update:
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r2, :128], r1
+	%A = load i8** %ptr
+	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
+        %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	%tmp5 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp5, i8** %ptr
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld2Qi16(i16* %A) nounwind {
+;CHECK: vld2Qi16:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16)
+        %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld2Qi32(i32* %A) nounwind {
+;CHECK: vld2Qi32:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64)
+        %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld2Qf(float* %A) nounwind {
+;CHECK: vld2Qf:
+;CHECK: vld2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly

diff --git a/src/LLVM/test/CodeGen/ARM/vld3.ll b/src/LLVM/test/CodeGen/ARM/vld3.ll
new file mode 100644
index 0000000..b495319
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vld3.ll

@@ -0,0 +1,159 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld3i8(i8* %A) nounwind {
+;CHECK: vld3i8:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld3.8 {d16, d17, d18}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32)
+        %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld3i16(i16* %A) nounwind {
+;CHECK: vld3i16:
+;CHECK: vld3.16
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+;Check for a post-increment updating load with register increment.
+define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
+;CHECK: vld3i16_update:
+;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}}
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
+	%tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
+	%tmp4 = add <4 x i16> %tmp2, %tmp3
+	%tmp5 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp5, i16** %ptr
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld3i32(i32* %A) nounwind {
+;CHECK: vld3i32:
+;CHECK: vld3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld3f(float* %A) nounwind {
+;CHECK: vld3f:
+;CHECK: vld3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2
+        %tmp4 = fadd <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld3i64(i64* %A) nounwind {
+;CHECK: vld3i64:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld1.64 {d16, d17, d18}, [r0, :64]
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
+        %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld3Qi8(i8* %A) nounwind {
+;CHECK: vld3Qi8:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld3.8 {d16, d18, d20}, [r0, :64]!
+;CHECK: vld3.8 {d17, d19, d21}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32)
+        %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld3Qi16(i16* %A) nounwind {
+;CHECK: vld3Qi16:
+;CHECK: vld3.16
+;CHECK: vld3.16
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld3Qi32(i32* %A) nounwind {
+;CHECK: vld3Qi32:
+;CHECK: vld3.32
+;CHECK: vld3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+;Check for a post-increment updating load. 
+define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
+;CHECK: vld3Qi32_update:
+;CHECK: vld3.32 {d16, d18, d20}, [r[[R:[0-9]+]]]!
+;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
+	%tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
+	%tmp4 = add <4 x i32> %tmp2, %tmp3
+	%tmp5 = getelementptr i32* %A, i32 12
+	store i32* %tmp5, i32** %ptr
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld3Qf(float* %A) nounwind {
+;CHECK: vld3Qf:
+;CHECK: vld3.32
+;CHECK: vld3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*, i32) nounwind readonly

diff --git a/src/LLVM/test/CodeGen/ARM/vld4.ll b/src/LLVM/test/CodeGen/ARM/vld4.ll
new file mode 100644
index 0000000..59a73db
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vld4.ll

@@ -0,0 +1,160 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>,  <16 x i8>, <16 x i8> }
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld4i8(i8* %A) nounwind {
+;CHECK: vld4i8:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8)
+        %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+;Check for a post-increment updating load with register increment.
+define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld4i8_update:
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r2, :128], r1
+	%A = load i8** %ptr
+	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
+	%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
+	%tmp4 = add <8 x i8> %tmp2, %tmp3
+	%tmp5 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp5, i8** %ptr
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld4i16(i16* %A) nounwind {
+;CHECK: vld4i16:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.16 {d16, d17, d18, d19}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 16)
+        %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld4i32(i32* %A) nounwind {
+;CHECK: vld4i32:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.32 {d16, d17, d18, d19}, [r0, :256]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 32)
+        %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld4f(float* %A) nounwind {
+;CHECK: vld4f:
+;CHECK: vld4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2
+        %tmp4 = fadd <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld4i64(i64* %A) nounwind {
+;CHECK: vld4i64:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld1.64 {d16, d17, d18, d19}, [r0, :256]
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
+        %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld4Qi8(i8* %A) nounwind {
+;CHECK: vld4Qi8:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.8 {d16, d18, d20, d22}, [r0, :256]!
+;CHECK: vld4.8 {d17, d19, d21, d23}, [r0, :256]
+	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 64)
+        %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld4Qi16(i16* %A) nounwind {
+;CHECK: vld4Qi16:
+;Check for no alignment specifier.
+;CHECK: vld4.16 {d16, d18, d20, d22}, [r0]!
+;CHECK: vld4.16 {d17, d19, d21, d23}, [r0]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+;Check for a post-increment updating load. 
+define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
+;CHECK: vld4Qi16_update:
+;CHECK: vld4.16 {d16, d18, d20, d22}, [r1, :64]!
+;CHECK: vld4.16 {d17, d19, d21, d23}, [r1, :64]!
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
+	%tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
+	%tmp4 = add <8 x i16> %tmp2, %tmp3
+	%tmp5 = getelementptr i16* %A, i32 32
+	store i16* %tmp5, i16** %ptr
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld4Qi32(i32* %A) nounwind {
+;CHECK: vld4Qi32:
+;CHECK: vld4.32
+;CHECK: vld4.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld4Qf(float* %A) nounwind {
+;CHECK: vld4Qf:
+;CHECK: vld4.32
+;CHECK: vld4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*, i32) nounwind readonly

diff --git a/src/LLVM/test/CodeGen/ARM/vlddup.ll b/src/LLVM/test/CodeGen/ARM/vlddup.ll
new file mode 100644
index 0000000..d0e9ac3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vlddup.ll

@@ -0,0 +1,212 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vld1dupi8(i8* %A) nounwind {
+;CHECK: vld1dupi8:
+;Check the (default) alignment value.
+;CHECK: vld1.8 {d16[]}, [r0]
+	%tmp1 = load i8* %A, align 8
+	%tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0
+	%tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vld1dupi16(i16* %A) nounwind {
+;CHECK: vld1dupi16:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vld1.16 {d16[]}, [r0, :16]
+	%tmp1 = load i16* %A, align 8
+	%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
+	%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vld1dupi32(i32* %A) nounwind {
+;CHECK: vld1dupi32:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld1.32 {d16[]}, [r0, :32]
+	%tmp1 = load i32* %A, align 8
+	%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
+	%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
+        ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vld1dupf(float* %A) nounwind {
+;CHECK: vld1dupf:
+;CHECK: vld1.32 {d16[]}, [r0]
+	%tmp0 = load float* %A
+        %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
+        %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
+        ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
+;CHECK: vld1dupQi8:
+;Check the (default) alignment value.
+;CHECK: vld1.8 {d16[], d17[]}, [r0]
+	%tmp1 = load i8* %A, align 8
+	%tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0
+	%tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
+        ret <16 x i8> %tmp3
+}
+
+define <4 x float> @vld1dupQf(float* %A) nounwind {
+;CHECK: vld1dupQf:
+;CHECK: vld1.32 {d16[], d17[]}, [r0]
+        %tmp0 = load float* %A
+        %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
+        ret <4 x float> %tmp2
+}
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int4x16x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int2x32x2_t = type { <2 x i32>, <2 x i32> }
+
+define <8 x i8> @vld2dupi8(i8* %A) nounwind {
+;CHECK: vld2dupi8:
+;Check the (default) alignment value.
+;CHECK: vld2.8 {d16[], d17[]}, [r0]
+	%tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+	%tmp1 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 0
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 1
+	%tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer
+        %tmp5 = add <8 x i8> %tmp2, %tmp4
+        ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vld2dupi16(i16* %A) nounwind {
+;CHECK: vld2dupi16:
+;Check that a power-of-two alignment smaller than the total size of the memory
+;being loaded is ignored.
+;CHECK: vld2.16 {d16[], d17[]}, [r0]
+	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+        %tmp5 = add <4 x i16> %tmp2, %tmp4
+        ret <4 x i16> %tmp5
+}
+
+;Check for a post-increment updating load. 
+define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
+;CHECK: vld2dupi16_update:
+;CHECK: vld2.16 {d16[], d17[]}, [r1]!
+	%A = load i16** %ptr
+	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp5 = add <4 x i16> %tmp2, %tmp4
+	%tmp6 = getelementptr i16* %A, i32 2
+	store i16* %tmp6, i16** %ptr
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vld2dupi32(i32* %A) nounwind {
+;CHECK: vld2dupi32:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld2.32 {d16[], d17[]}, [r0, :64]
+	%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
+	%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1
+	%tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer
+        %tmp5 = add <2 x i32> %tmp2, %tmp4
+        ret <2 x i32> %tmp5
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+
+;Check for a post-increment updating load with register increment.
+define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld3dupi8_update:
+;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1
+	%A = load i8** %ptr
+	%tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
+	%tmp1 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 0
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 1
+	%tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 2
+	%tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp7 = add <8 x i8> %tmp2, %tmp4
+	%tmp8 = add <8 x i8> %tmp7, %tmp6
+	%tmp9 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp9, i8** %ptr
+	ret <8 x i8> %tmp8
+}
+
+define <4 x i16> @vld3dupi16(i16* %A) nounwind {
+;CHECK: vld3dupi16:
+;Check the (default) alignment value. VLD3 does not support alignment.
+;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0]
+	%tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
+	%tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 2
+	%tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer
+        %tmp7 = add <4 x i16> %tmp2, %tmp4
+        %tmp8 = add <4 x i16> %tmp7, %tmp6
+        ret <4 x i16> %tmp8
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+
+;Check for a post-increment updating load.
+define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
+;CHECK: vld4dupi16_update:
+;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
+	%A = load i16** %ptr
+	%tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
+	%tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 2
+	%tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp7 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 3
+	%tmp8 = shufflevector <4 x i16> %tmp7, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp9 = add <4 x i16> %tmp2, %tmp4
+	%tmp10 = add <4 x i16> %tmp6, %tmp8
+	%tmp11 = add <4 x i16> %tmp9, %tmp10
+	%tmp12 = getelementptr i16* %A, i32 4
+	store i16* %tmp12, i16** %ptr
+	ret <4 x i16> %tmp11
+}
+
+define <2 x i32> @vld4dupi32(i32* %A) nounwind {
+;CHECK: vld4dupi32:
+;Check the alignment value.  An 8-byte alignment is allowed here even though
+;it is smaller than the total size of the memory being loaded.
+;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0, :64]
+	%tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
+	%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1
+	%tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 2
+	%tmp6 = shufflevector <2 x i32> %tmp5, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp7 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 3
+	%tmp8 = shufflevector <2 x i32> %tmp7, <2 x i32> undef, <2 x i32> zeroinitializer
+        %tmp9 = add <2 x i32> %tmp2, %tmp4
+        %tmp10 = add <2 x i32> %tmp6, %tmp8
+        %tmp11 = add <2 x i32> %tmp9, %tmp10
+        ret <2 x i32> %tmp11
+}
+
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly

diff --git a/src/LLVM/test/CodeGen/ARM/vldlane.ll b/src/LLVM/test/CodeGen/ARM/vldlane.ll
new file mode 100644
index 0000000..0d7d4ec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vldlane.ll

@@ -0,0 +1,511 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
+
+define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld1lanei8:
+;Check the (default) alignment value.
+;CHECK: vld1.8 {d16[3]}, [r0]
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = load i8* %A, align 8
+	%tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld1lanei16:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vld1.16 {d16[2]}, [r0, :16]
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = load i16* %A, align 8
+	%tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld1lanei32:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld1.32 {d16[1]}, [r0, :32]
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = load i32* %A, align 8
+	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
+        ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld1lanef:
+;CHECK: vld1.32 {d16[1]}, [r0]
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = load float* %A, align 4
+	%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vld1laneQi8:
+;CHECK: vld1.8 {d17[1]}, [r0]
+	%tmp1 = load <16 x i8>* %B
+	%tmp2 = load i8* %A, align 8
+	%tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld1laneQi16:
+;CHECK: vld1.16 {d17[1]}, [r0, :16]
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = load i16* %A, align 8
+	%tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld1laneQi32:
+;CHECK: vld1.32 {d17[1]}, [r0, :32]
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = load i32* %A, align 8
+	%tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld1laneQf:
+;CHECK: vld1.32 {d16[0]}, [r0]
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = load float* %A
+	%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
+	ret <4 x float> %tmp3
+}
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld2lanei8:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16]
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld2lanei16:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+        %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld2lanei32:
+;CHECK: vld2.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+;Check for a post-increment updating load.
+define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
+;CHECK: vld2lanei32_update:
+;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+	%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+	%tmp5 = add <2 x i32> %tmp3, %tmp4
+	%tmp6 = getelementptr i32* %A, i32 2
+	store i32* %tmp6, i32** %ptr
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld2lanef:
+;CHECK: vld2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
+        %tmp5 = fadd <2 x float> %tmp3, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld2laneQi16:
+;Check the (default) alignment.
+;CHECK: vld2.16 {d17[1], d19[1]}, [{{r[0-9]+}}]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld2laneQi32:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}, :64]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
+        %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld2laneQf:
+;CHECK: vld2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld3lanei8:
+;CHECK: vld3.8
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+        %tmp6 = add <8 x i8> %tmp3, %tmp4
+        %tmp7 = add <8 x i8> %tmp5, %tmp6
+	ret <8 x i8> %tmp7
+}
+
+define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld3lanei16:
+;Check the (default) alignment value.  VLD3 does not support alignment.
+;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+        %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
+        %tmp6 = add <4 x i16> %tmp3, %tmp4
+        %tmp7 = add <4 x i16> %tmp5, %tmp6
+	ret <4 x i16> %tmp7
+}
+
+define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld3lanei32:
+;CHECK: vld3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
+        %tmp6 = add <2 x i32> %tmp3, %tmp4
+        %tmp7 = add <2 x i32> %tmp5, %tmp6
+	ret <2 x i32> %tmp7
+}
+
+define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld3lanef:
+;CHECK: vld3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
+        %tmp6 = fadd <2 x float> %tmp3, %tmp4
+        %tmp7 = fadd <2 x float> %tmp5, %tmp6
+	ret <2 x float> %tmp7
+}
+
+define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld3laneQi16:
+;Check the (default) alignment value.  VLD3 does not support alignment.
+;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
+        %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
+        %tmp6 = add <8 x i16> %tmp3, %tmp4
+        %tmp7 = add <8 x i16> %tmp5, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+;Check for a post-increment updating load with register increment.
+define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vld3laneQi16_update:
+;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
+	%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
+	%tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
+	%tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
+	%tmp6 = add <8 x i16> %tmp3, %tmp4
+	%tmp7 = add <8 x i16> %tmp5, %tmp6
+	%tmp8 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp8, i16** %ptr
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld3laneQi32:
+;CHECK: vld3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
+        %tmp6 = add <4 x i32> %tmp3, %tmp4
+        %tmp7 = add <4 x i32> %tmp5, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld3laneQf:
+;CHECK: vld3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
+        %tmp6 = fadd <4 x float> %tmp3, %tmp4
+        %tmp7 = fadd <4 x float> %tmp5, %tmp6
+	ret <4 x float> %tmp7
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld4lanei8:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}, :32]
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+        %tmp7 = add <8 x i8> %tmp3, %tmp4
+        %tmp8 = add <8 x i8> %tmp5, %tmp6
+        %tmp9 = add <8 x i8> %tmp7, %tmp8
+	ret <8 x i8> %tmp9
+}
+
+;Check for a post-increment updating load.
+define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
+;CHECK: vld4lanei8_update:
+;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :32]!
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+	%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+	%tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+	%tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = add <8 x i8> %tmp3, %tmp4
+	%tmp8 = add <8 x i8> %tmp5, %tmp6
+	%tmp9 = add <8 x i8> %tmp7, %tmp8
+	%tmp10 = getelementptr i8* %A, i32 4
+	store i8* %tmp10, i8** %ptr
+	ret <8 x i8> %tmp9
+}
+
+define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld4lanei16:
+;Check that a power-of-two alignment smaller than the total size of the memory
+;being loaded is ignored.
+;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
+        %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
+        %tmp7 = add <4 x i16> %tmp3, %tmp4
+        %tmp8 = add <4 x i16> %tmp5, %tmp6
+        %tmp9 = add <4 x i16> %tmp7, %tmp8
+	ret <4 x i16> %tmp9
+}
+
+define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld4lanei32:
+;Check the alignment value.  An 8-byte alignment is allowed here even though
+;it is smaller than the total size of the memory being loaded.
+;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :64]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
+        %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
+        %tmp7 = add <2 x i32> %tmp3, %tmp4
+        %tmp8 = add <2 x i32> %tmp5, %tmp6
+        %tmp9 = add <2 x i32> %tmp7, %tmp8
+	ret <2 x i32> %tmp9
+}
+
+define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld4lanef:
+;CHECK: vld4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
+        %tmp7 = fadd <2 x float> %tmp3, %tmp4
+        %tmp8 = fadd <2 x float> %tmp5, %tmp6
+        %tmp9 = fadd <2 x float> %tmp7, %tmp8
+	ret <2 x float> %tmp9
+}
+
+define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld4laneQi16:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}, :64]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
+        %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
+        %tmp7 = add <8 x i16> %tmp3, %tmp4
+        %tmp8 = add <8 x i16> %tmp5, %tmp6
+        %tmp9 = add <8 x i16> %tmp7, %tmp8
+	ret <8 x i16> %tmp9
+}
+
+define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld4laneQi32:
+;Check the (default) alignment.
+;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [{{r[0-9]+}}]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
+        %tmp7 = add <4 x i32> %tmp3, %tmp4
+        %tmp8 = add <4 x i32> %tmp5, %tmp6
+        %tmp9 = add <4 x i32> %tmp7, %tmp8
+	ret <4 x i32> %tmp9
+}
+
+define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld4laneQf:
+;CHECK: vld4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
+        %tmp7 = fadd <4 x float> %tmp3, %tmp4
+        %tmp8 = fadd <4 x float> %tmp5, %tmp6
+        %tmp9 = fadd <4 x float> %tmp7, %tmp8
+	ret <4 x float> %tmp9
+}
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+
+; Radar 8776599: If one of the operands to a QQQQ REG_SEQUENCE is a register
+; in the QPR_VFP2 regclass, it needs to be copied to a QPR regclass because
+; we don't currently have a QQQQ_VFP2 super-regclass.  (The "0" for the low
+; part of %ins67 is supposed to be loaded by a VLDRS instruction in this test.)
+define <8 x i16> @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
+;CHECK: test_qqqq_regsequence_subreg
+;CHECK: vld3.16
+  %tmp63 = extractvalue [6 x i64] %b, 5
+  %tmp64 = zext i64 %tmp63 to i128
+  %tmp65 = shl i128 %tmp64, 64
+  %ins67 = or i128 %tmp65, 0
+  %tmp78 = bitcast i128 %ins67 to <8 x i16>
+  %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2)
+  %tmp3 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 0
+  %tmp4 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 1
+  %tmp5 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 2
+  %tmp6 = add <8 x i16> %tmp3, %tmp4
+  %tmp7 = add <8 x i16> %tmp5, %tmp6
+  ret <8 x i16> %tmp7
+}
+
+declare void @llvm.trap() nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/vminmax.ll b/src/LLVM/test/CodeGen/ARM/vminmax.ll
new file mode 100644
index 0000000..e3527c1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vminmax.ll

@@ -0,0 +1,293 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmins8:
+;CHECK: vmin.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmins16:
+;CHECK: vmin.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmins32:
+;CHECK: vmin.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vminu8:
+;CHECK: vmin.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vminu16:
+;CHECK: vmin.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vminu32:
+;CHECK: vmin.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vminf32:
+;CHECK: vmin.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vminQs8:
+;CHECK: vmin.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vminQs16:
+;CHECK: vmin.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vminQs32:
+;CHECK: vmin.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vminQu8:
+;CHECK: vmin.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vminQu16:
+;CHECK: vmin.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vminQu32:
+;CHECK: vmin.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vminQf32:
+;CHECK: vmin.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmaxs8:
+;CHECK: vmax.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmaxs16:
+;CHECK: vmax.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmaxs32:
+;CHECK: vmax.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmaxu8:
+;CHECK: vmax.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmaxu16:
+;CHECK: vmax.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmaxu32:
+;CHECK: vmax.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vmaxf32:
+;CHECK: vmax.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmaxQs8:
+;CHECK: vmax.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmaxQs16:
+;CHECK: vmax.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmaxQs32:
+;CHECK: vmax.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmaxQu8:
+;CHECK: vmax.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmaxQu16:
+;CHECK: vmax.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmaxQu32:
+;CHECK: vmax.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vmaxQf32:
+;CHECK: vmax.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vmla.ll b/src/LLVM/test/CodeGen/ARM/vmla.ll
new file mode 100644
index 0000000..9c6b210
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vmla.ll

@@ -0,0 +1,215 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
+;CHECK: vmlai8:
+;CHECK: vmla.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = mul <8 x i8> %tmp2, %tmp3
+	%tmp5 = add <8 x i8> %tmp1, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlai16:
+;CHECK: vmla.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = mul <4 x i16> %tmp2, %tmp3
+	%tmp5 = add <4 x i16> %tmp1, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlai32:
+;CHECK: vmla.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = mul <2 x i32> %tmp2, %tmp3
+	%tmp5 = add <2 x i32> %tmp1, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK: vmlaf32:
+;CHECK: vmla.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = load <2 x float>* %C
+	%tmp4 = fmul <2 x float> %tmp2, %tmp3
+	%tmp5 = fadd <2 x float> %tmp1, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
+;CHECK: vmlaQi8:
+;CHECK: vmla.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = load <16 x i8>* %C
+	%tmp4 = mul <16 x i8> %tmp2, %tmp3
+	%tmp5 = add <16 x i8> %tmp1, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vmlaQi16:
+;CHECK: vmla.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = load <8 x i16>* %C
+	%tmp4 = mul <8 x i16> %tmp2, %tmp3
+	%tmp5 = add <8 x i16> %tmp1, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vmlaQi32:
+;CHECK: vmla.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = load <4 x i32>* %C
+	%tmp4 = mul <4 x i32> %tmp2, %tmp3
+	%tmp5 = add <4 x i32> %tmp1, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK: vmlaQf32:
+;CHECK: vmla.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = load <4 x float>* %C
+	%tmp4 = fmul <4 x float> %tmp2, %tmp3
+	%tmp5 = fadd <4 x float> %tmp1, %tmp4
+	ret <4 x float> %tmp5
+}
+
+define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlals8:
+;CHECK: vmlal.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
+	%tmp6 = mul <8 x i16> %tmp4, %tmp5
+	%tmp7 = add <8 x i16> %tmp1, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlals16:
+;CHECK: vmlal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
+	%tmp6 = mul <4 x i32> %tmp4, %tmp5
+	%tmp7 = add <4 x i32> %tmp1, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlals32:
+;CHECK: vmlal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
+	%tmp6 = mul <2 x i64> %tmp4, %tmp5
+	%tmp7 = add <2 x i64> %tmp1, %tmp6
+	ret <2 x i64> %tmp7
+}
+
+define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlalu8:
+;CHECK: vmlal.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
+	%tmp6 = mul <8 x i16> %tmp4, %tmp5
+	%tmp7 = add <8 x i16> %tmp1, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlalu16:
+;CHECK: vmlal.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
+	%tmp6 = mul <4 x i32> %tmp4, %tmp5
+	%tmp7 = add <4 x i32> %tmp1, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlalu32:
+;CHECK: vmlal.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
+	%tmp6 = mul <2 x i64> %tmp4, %tmp5
+	%tmp7 = add <2 x i64> %tmp1, %tmp6
+	ret <2 x i64> %tmp7
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_lanes16
+; CHECK: vmlal.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = sext <4 x i16> %arg1_int16x4_t to <4 x i32>
+  %2 = sext <4 x i16> %0 to <4 x i32>
+  %3 = mul <4 x i32> %1, %2
+  %4 = add <4 x i32> %arg0_int32x4_t, %3
+  ret <4 x i32> %4
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_lanes32
+; CHECK: vmlal.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = sext <2 x i32> %arg1_int32x2_t to <2 x i64>
+  %2 = sext <2 x i32> %0 to <2 x i64>
+  %3 = mul <2 x i64> %1, %2
+  %4 = add <2 x i64> %arg0_int64x2_t, %3
+  ret <2 x i64> %4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_laneu16
+; CHECK: vmlal.u16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = zext <4 x i16> %arg1_uint16x4_t to <4 x i32>
+  %2 = zext <4 x i16> %0 to <4 x i32>
+  %3 = mul <4 x i32> %1, %2
+  %4 = add <4 x i32> %arg0_uint32x4_t, %3
+  ret <4 x i32> %4
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_laneu32
+; CHECK: vmlal.u32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = zext <2 x i32> %arg1_uint32x2_t to <2 x i64>
+  %2 = zext <2 x i32> %0 to <2 x i64>
+  %3 = mul <2 x i64> %1, %2
+  %4 = add <2 x i64> %arg0_uint64x2_t, %3
+  ret <2 x i64> %4
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vmls.ll b/src/LLVM/test/CodeGen/ARM/vmls.ll
new file mode 100644
index 0000000..65e7fe4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vmls.ll

@@ -0,0 +1,215 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
+;CHECK: vmlsi8:
+;CHECK: vmls.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = mul <8 x i8> %tmp2, %tmp3
+	%tmp5 = sub <8 x i8> %tmp1, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlsi16:
+;CHECK: vmls.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = mul <4 x i16> %tmp2, %tmp3
+	%tmp5 = sub <4 x i16> %tmp1, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlsi32:
+;CHECK: vmls.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = mul <2 x i32> %tmp2, %tmp3
+	%tmp5 = sub <2 x i32> %tmp1, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK: vmlsf32:
+;CHECK: vmls.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = load <2 x float>* %C
+	%tmp4 = fmul <2 x float> %tmp2, %tmp3
+	%tmp5 = fsub <2 x float> %tmp1, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
+;CHECK: vmlsQi8:
+;CHECK: vmls.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = load <16 x i8>* %C
+	%tmp4 = mul <16 x i8> %tmp2, %tmp3
+	%tmp5 = sub <16 x i8> %tmp1, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vmlsQi16:
+;CHECK: vmls.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = load <8 x i16>* %C
+	%tmp4 = mul <8 x i16> %tmp2, %tmp3
+	%tmp5 = sub <8 x i16> %tmp1, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vmlsQi32:
+;CHECK: vmls.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = load <4 x i32>* %C
+	%tmp4 = mul <4 x i32> %tmp2, %tmp3
+	%tmp5 = sub <4 x i32> %tmp1, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK: vmlsQf32:
+;CHECK: vmls.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = load <4 x float>* %C
+	%tmp4 = fmul <4 x float> %tmp2, %tmp3
+	%tmp5 = fsub <4 x float> %tmp1, %tmp4
+	ret <4 x float> %tmp5
+}
+
+define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlsls8:
+;CHECK: vmlsl.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
+	%tmp6 = mul <8 x i16> %tmp4, %tmp5
+	%tmp7 = sub <8 x i16> %tmp1, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlsls16:
+;CHECK: vmlsl.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
+	%tmp6 = mul <4 x i32> %tmp4, %tmp5
+	%tmp7 = sub <4 x i32> %tmp1, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlsls32:
+;CHECK: vmlsl.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
+	%tmp6 = mul <2 x i64> %tmp4, %tmp5
+	%tmp7 = sub <2 x i64> %tmp1, %tmp6
+	ret <2 x i64> %tmp7
+}
+
+define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlslu8:
+;CHECK: vmlsl.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
+	%tmp6 = mul <8 x i16> %tmp4, %tmp5
+	%tmp7 = sub <8 x i16> %tmp1, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlslu16:
+;CHECK: vmlsl.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
+	%tmp6 = mul <4 x i32> %tmp4, %tmp5
+	%tmp7 = sub <4 x i32> %tmp1, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlslu32:
+;CHECK: vmlsl.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
+	%tmp6 = mul <2 x i64> %tmp4, %tmp5
+	%tmp7 = sub <2 x i64> %tmp1, %tmp6
+	ret <2 x i64> %tmp7
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_lanes16
+; CHECK: vmlsl.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = sext <4 x i16> %arg1_int16x4_t to <4 x i32>
+  %2 = sext <4 x i16> %0 to <4 x i32>
+  %3 = mul <4 x i32> %1, %2
+  %4 = sub <4 x i32> %arg0_int32x4_t, %3
+  ret <4 x i32> %4
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_lanes32
+; CHECK: vmlsl.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = sext <2 x i32> %arg1_int32x2_t to <2 x i64>
+  %2 = sext <2 x i32> %0 to <2 x i64>
+  %3 = mul <2 x i64> %1, %2
+  %4 = sub <2 x i64> %arg0_int64x2_t, %3
+  ret <2 x i64> %4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_laneu16
+; CHECK: vmlsl.u16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = zext <4 x i16> %arg1_uint16x4_t to <4 x i32>
+  %2 = zext <4 x i16> %0 to <4 x i32>
+  %3 = mul <4 x i32> %1, %2
+  %4 = sub <4 x i32> %arg0_uint32x4_t, %3
+  ret <4 x i32> %4
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_laneu32
+; CHECK: vmlsl.u32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = zext <2 x i32> %arg1_uint32x2_t to <2 x i64>
+  %2 = zext <2 x i32> %0 to <2 x i64>
+  %3 = mul <2 x i64> %1, %2
+  %4 = sub <2 x i64> %arg0_uint64x2_t, %3
+  ret <2 x i64> %4
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vmov.ll b/src/LLVM/test/CodeGen/ARM/vmov.ll
new file mode 100644
index 0000000..a86be32
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vmov.ll

@@ -0,0 +1,355 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_movi8() nounwind {
+;CHECK: v_movi8:
+;CHECK: vmov.i8 d{{.*}}, #0x8
+	ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+}
+
+define <4 x i16> @v_movi16a() nounwind {
+;CHECK: v_movi16a:
+;CHECK: vmov.i16 d{{.*}}, #0x10
+	ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 >
+}
+
+define <4 x i16> @v_movi16b() nounwind {
+;CHECK: v_movi16b:
+;CHECK: vmov.i16 d{{.*}}, #0x1000
+	ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
+}
+
+define <4 x i16> @v_mvni16a() nounwind {
+;CHECK: v_mvni16a:
+;CHECK: vmvn.i16 d{{.*}}, #0x10
+	ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
+}
+
+define <4 x i16> @v_mvni16b() nounwind {
+;CHECK: v_mvni16b:
+;CHECK: vmvn.i16 d{{.*}}, #0x1000
+	ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
+}
+
+define <2 x i32> @v_movi32a() nounwind {
+;CHECK: v_movi32a:
+;CHECK: vmov.i32 d{{.*}}, #0x20
+	ret <2 x i32> < i32 32, i32 32 >
+}
+
+define <2 x i32> @v_movi32b() nounwind {
+;CHECK: v_movi32b:
+;CHECK: vmov.i32 d{{.*}}, #0x2000
+	ret <2 x i32> < i32 8192, i32 8192 >
+}
+
+define <2 x i32> @v_movi32c() nounwind {
+;CHECK: v_movi32c:
+;CHECK: vmov.i32 d{{.*}}, #0x200000
+	ret <2 x i32> < i32 2097152, i32 2097152 >
+}
+
+define <2 x i32> @v_movi32d() nounwind {
+;CHECK: v_movi32d:
+;CHECK: vmov.i32 d{{.*}}, #0x20000000
+	ret <2 x i32> < i32 536870912, i32 536870912 >
+}
+
+define <2 x i32> @v_movi32e() nounwind {
+;CHECK: v_movi32e:
+;CHECK: vmov.i32 d{{.*}}, #0x20FF
+	ret <2 x i32> < i32 8447, i32 8447 >
+}
+
+define <2 x i32> @v_movi32f() nounwind {
+;CHECK: v_movi32f:
+;CHECK: vmov.i32 d{{.*}}, #0x20FFFF
+	ret <2 x i32> < i32 2162687, i32 2162687 >
+}
+
+define <2 x i32> @v_mvni32a() nounwind {
+;CHECK: v_mvni32a:
+;CHECK: vmvn.i32 d{{.*}}, #0x20
+	ret <2 x i32> < i32 4294967263, i32 4294967263 >
+}
+
+define <2 x i32> @v_mvni32b() nounwind {
+;CHECK: v_mvni32b:
+;CHECK: vmvn.i32 d{{.*}}, #0x2000
+	ret <2 x i32> < i32 4294959103, i32 4294959103 >
+}
+
+define <2 x i32> @v_mvni32c() nounwind {
+;CHECK: v_mvni32c:
+;CHECK: vmvn.i32 d{{.*}}, #0x200000
+	ret <2 x i32> < i32 4292870143, i32 4292870143 >
+}
+
+define <2 x i32> @v_mvni32d() nounwind {
+;CHECK: v_mvni32d:
+;CHECK: vmvn.i32 d{{.*}}, #0x20000000
+	ret <2 x i32> < i32 3758096383, i32 3758096383 >
+}
+
+define <2 x i32> @v_mvni32e() nounwind {
+;CHECK: v_mvni32e:
+;CHECK: vmvn.i32 d{{.*}}, #0x20FF
+	ret <2 x i32> < i32 4294958848, i32 4294958848 >
+}
+
+define <2 x i32> @v_mvni32f() nounwind {
+;CHECK: v_mvni32f:
+;CHECK: vmvn.i32 d{{.*}}, #0x20FFFF
+	ret <2 x i32> < i32 4292804608, i32 4292804608 >
+}
+
+define <1 x i64> @v_movi64() nounwind {
+;CHECK: v_movi64:
+;CHECK: vmov.i64 d{{.*}}, #0xFF0000FF0000FFFF
+	ret <1 x i64> < i64 18374687574888349695 >
+}
+
+define <16 x i8> @v_movQi8() nounwind {
+;CHECK: v_movQi8:
+;CHECK: vmov.i8 q{{.*}}, #0x8
+	ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+}
+
+define <8 x i16> @v_movQi16a() nounwind {
+;CHECK: v_movQi16a:
+;CHECK: vmov.i16 q{{.*}}, #0x10
+	ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+}
+
+define <8 x i16> @v_movQi16b() nounwind {
+;CHECK: v_movQi16b:
+;CHECK: vmov.i16 q{{.*}}, #0x1000
+	ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 >
+}
+
+define <4 x i32> @v_movQi32a() nounwind {
+;CHECK: v_movQi32a:
+;CHECK: vmov.i32 q{{.*}}, #0x20
+	ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 >
+}
+
+define <4 x i32> @v_movQi32b() nounwind {
+;CHECK: v_movQi32b:
+;CHECK: vmov.i32 q{{.*}}, #0x2000
+	ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 >
+}
+
+define <4 x i32> @v_movQi32c() nounwind {
+;CHECK: v_movQi32c:
+;CHECK: vmov.i32 q{{.*}}, #0x200000
+	ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 >
+}
+
+define <4 x i32> @v_movQi32d() nounwind {
+;CHECK: v_movQi32d:
+;CHECK: vmov.i32 q{{.*}}, #0x20000000
+	ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 >
+}
+
+define <4 x i32> @v_movQi32e() nounwind {
+;CHECK: v_movQi32e:
+;CHECK: vmov.i32 q{{.*}}, #0x20FF
+	ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
+}
+
+define <4 x i32> @v_movQi32f() nounwind {
+;CHECK: v_movQi32f:
+;CHECK: vmov.i32 q{{.*}}, #0x20FFFF
+	ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
+}
+
+define <2 x i64> @v_movQi64() nounwind {
+;CHECK: v_movQi64:
+;CHECK: vmov.i64 q{{.*}}, #0xFF0000FF0000FFFF
+	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
+}
+
+; Check for correct assembler printing for immediate values.
+%struct.int8x8_t = type { <8 x i8> }
+define void @vdupn128(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
+entry:
+;CHECK: vdupn128:
+;CHECK: vmov.i8 d{{.*}}, #0x80
+  %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
+  store <8 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>, <8 x i8>* %0, align 8
+  ret void
+}
+
+define void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
+entry:
+;CHECK: vdupnneg75:
+;CHECK: vmov.i8 d{{.*}}, #0xB5
+  %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
+  store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
+  ret void
+}
+
+define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
+;CHECK: vmovls8:
+;CHECK: vmovl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
+;CHECK: vmovls16:
+;CHECK: vmovl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
+;CHECK: vmovls32:
+;CHECK: vmovl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
+;CHECK: vmovlu8:
+;CHECK: vmovl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
+;CHECK: vmovlu16:
+;CHECK: vmovl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
+;CHECK: vmovlu32:
+;CHECK: vmovl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
+;CHECK: vmovni16:
+;CHECK: vmovn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
+;CHECK: vmovni32:
+;CHECK: vmovn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
+;CHECK: vmovni64:
+;CHECK: vmovn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovns16:
+;CHECK: vqmovn.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovns32:
+;CHECK: vqmovn.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovns64:
+;CHECK: vqmovn.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovnu16:
+;CHECK: vqmovn.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovnu32:
+;CHECK: vqmovn.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovnu64:
+;CHECK: vqmovn.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovuns16:
+;CHECK: vqmovun.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovuns32:
+;CHECK: vqmovun.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovuns64:
+;CHECK: vqmovun.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
+
+; Truncating vector stores are not supported.  The following should not crash.
+; Radar 8598391.
+define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
+;CHECK: vmovn
+  %tmp1 = load <4 x i32>* %a, align 16
+  %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
+  store <4 x i16> %tmp2, <4 x i16>* %b, align 8
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vmul.ll b/src/LLVM/test/CodeGen/ARM/vmul.ll
new file mode 100644
index 0000000..1780d6e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vmul.ll

@@ -0,0 +1,516 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+
+define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmuli8:
+;CHECK: vmul.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = mul <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmuli16:
+;CHECK: vmul.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = mul <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmuli32:
+;CHECK: vmul.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = mul <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vmulf32:
+;CHECK: vmul.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fmul <2 x float> %tmp1, %tmp2
+	ret <2 x float> %tmp3
+}
+
+define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulp8:
+;CHECK: vmul.p8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmulQi8:
+;CHECK: vmul.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = mul <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmulQi16:
+;CHECK: vmul.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = mul <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmulQi32:
+;CHECK: vmul.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = mul <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vmulQf32:
+;CHECK: vmul.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fmul <4 x float> %tmp1, %tmp2
+	ret <4 x float> %tmp3
+}
+
+define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmulQp8:
+;CHECK: vmul.p8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8>  @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+
+define arm_aapcs_vfpcc <2 x float> @test_vmul_lanef32(<2 x float> %arg0_float32x2_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanef32:
+; CHECK: vmul.f32 d0, d0, d1[0]
+  %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <2 x i32> zeroinitializer ; <<2 x float>> [#uses=1]
+  %1 = fmul <2 x float> %0, %arg0_float32x2_t     ; <<2 x float>> [#uses=1]
+  ret <2 x float> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vmul_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanes16:
+; CHECK: vmul.i16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses$
+  %1 = mul <4 x i16> %0, %arg0_int16x4_t          ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vmul_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanes32:
+; CHECK: vmul.i32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = mul <2 x i32> %0, %arg0_int32x2_t          ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vmulQ_lanef32(<4 x float> %arg0_float32x4_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanef32:
+; CHECK: vmul.f32 q0, q0, d2[1]
+  %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>$
+  %1 = fmul <4 x float> %0, %arg0_float32x4_t     ; <<4 x float>> [#uses=1]
+  ret <4 x float> %1
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmulQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanes16:
+; CHECK: vmul.i16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %1 = mul <8 x i16> %0, %arg0_int16x8_t          ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmulQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanes32:
+; CHECK: vmul.i32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses$
+  %1 = mul <4 x i32> %0, %arg0_int32x4_t          ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulls8:
+;CHECK: vmull.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
+	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = mul <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <8 x i16> @vmulls8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulls8_int:
+;CHECK: vmull.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmulls16:
+;CHECK: vmull.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
+	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = mul <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x i32> @vmulls16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmulls16_int:
+;CHECK: vmull.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmulls32:
+;CHECK: vmull.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
+	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = mul <2 x i64> %tmp3, %tmp4
+	ret <2 x i64> %tmp5
+}
+
+define <2 x i64> @vmulls32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmulls32_int:
+;CHECK: vmull.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullu8:
+;CHECK: vmull.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
+	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = mul <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <8 x i16> @vmullu8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullu8_int:
+;CHECK: vmull.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmullu16:
+;CHECK: vmull.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
+	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = mul <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x i32> @vmullu16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmullu16_int:
+;CHECK: vmull.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmullu32:
+;CHECK: vmull.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
+	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = mul <2 x i64> %tmp3, %tmp4
+	ret <2 x i64> %tmp5
+}
+
+define <2 x i64> @vmullu32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmullu32_int:
+;CHECK: vmull.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullp8:
+;CHECK: vmull.p8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes16
+; CHECK: vmull.s16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = sext <4 x i16> %arg0_int16x4_t to <4 x i32>
+  %2 = sext <4 x i16> %0 to <4 x i32>
+  %3 = mul <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16_int(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes16_int
+; CHECK: vmull.s16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes32
+; CHECK: vmull.s32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = sext <2 x i32> %arg0_int32x2_t to <2 x i64>
+  %2 = sext <2 x i32> %0 to <2 x i64>
+  %3 = mul <2 x i64> %1, %2
+  ret <2 x i64> %3
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32_int(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes32_int
+; CHECK: vmull.s32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu16
+; CHECK: vmull.u16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = zext <4 x i16> %arg0_uint16x4_t to <4 x i32>
+  %2 = zext <4 x i16> %0 to <4 x i32>
+  %3 = mul <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16_int(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu16_int
+; CHECK: vmull.u16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu32
+; CHECK: vmull.u32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = zext <2 x i32> %arg0_uint32x2_t to <2 x i64>
+  %2 = zext <2 x i32> %0 to <2 x i64>
+  %3 = mul <2 x i64> %1, %2
+  ret <2 x i64> %3
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32_int(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu32_int
+; CHECK: vmull.u32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16>  @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+
+
+; Radar 8687140
+; VMULL needs to recognize BUILD_VECTORs with sign/zero-extended elements.
+
+define <8 x i16> @vmull_extvec_s8(<8 x i8> %arg) nounwind {
+; CHECK: vmull_extvec_s8
+; CHECK: vmull.s8
+  %tmp3 = sext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @vmull_extvec_u8(<8 x i8> %arg) nounwind {
+; CHECK: vmull_extvec_u8
+; CHECK: vmull.u8
+  %tmp3 = zext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @vmull_noextvec_s8(<8 x i8> %arg) nounwind {
+; Do not use VMULL if the BUILD_VECTOR element values are too big.
+; CHECK: vmull_noextvec_s8
+; CHECK: vmovl.s8
+; CHECK: vmul.i16
+  %tmp3 = sext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @vmull_noextvec_u8(<8 x i8> %arg) nounwind {
+; Do not use VMULL if the BUILD_VECTOR element values are too big.
+; CHECK: vmull_noextvec_u8
+; CHECK: vmovl.u8
+; CHECK: vmul.i16
+  %tmp3 = zext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmull_extvec_s16(<4 x i16> %arg) nounwind {
+; CHECK: vmull_extvec_s16
+; CHECK: vmull.s16
+  %tmp3 = sext <4 x i16> %arg to <4 x i32>
+  %tmp4 = mul <4 x i32> %tmp3, <i32 -12, i32 -12, i32 -12, i32 -12>
+  ret <4 x i32> %tmp4
+}
+
+define <4 x i32> @vmull_extvec_u16(<4 x i16> %arg) nounwind {
+; CHECK: vmull_extvec_u16
+; CHECK: vmull.u16
+  %tmp3 = zext <4 x i16> %arg to <4 x i32>
+  %tmp4 = mul <4 x i32> %tmp3, <i32 1234, i32 1234, i32 1234, i32 1234>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmull_extvec_s32(<2 x i32> %arg) nounwind {
+; CHECK: vmull_extvec_s32
+; CHECK: vmull.s32
+  %tmp3 = sext <2 x i32> %arg to <2 x i64>
+  %tmp4 = mul <2 x i64> %tmp3, <i64 -1234, i64 -1234>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i64> @vmull_extvec_u32(<2 x i32> %arg) nounwind {
+; CHECK: vmull_extvec_u32
+; CHECK: vmull.u32
+  %tmp3 = zext <2 x i32> %arg to <2 x i64>
+  %tmp4 = mul <2 x i64> %tmp3, <i64 1234, i64 1234>
+  ret <2 x i64> %tmp4
+}
+
+; rdar://9197392
+define void @distribute(i16* %dst, i8* %src, i32 %mul) nounwind {
+entry:
+; CHECK: distribute:
+; CHECK: vmull.u8 [[REG1:(q[0-9]+)]], d{{.*}}, [[REG2:(d[0-9]+)]]
+; CHECK: vmlal.u8 [[REG1]], d{{.*}}, [[REG2]]
+  %0 = trunc i32 %mul to i8
+  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
+  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1)
+  %4 = bitcast <16 x i8> %3 to <2 x double>
+  %5 = extractelement <2 x double> %4, i32 1
+  %6 = bitcast double %5 to <8 x i8>
+  %7 = zext <8 x i8> %6 to <8 x i16>
+  %8 = zext <8 x i8> %2 to <8 x i16>
+  %9 = extractelement <2 x double> %4, i32 0
+  %10 = bitcast double %9 to <8 x i8>
+  %11 = zext <8 x i8> %10 to <8 x i16>
+  %12 = add <8 x i16> %7, %11
+  %13 = mul <8 x i16> %12, %8
+  %14 = bitcast i16* %dst to i8*
+  tail call void @llvm.arm.neon.vst1.v8i16(i8* %14, <8 x i16> %13, i32 2)
+  ret void
+}
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+
+; Take advantage of the Cortex-A8 multiplier accumulator forward.
+
+%struct.uint8x8_t = type { <8 x i8> }
+
+define void @distribute2(%struct.uint8x8_t* nocapture %dst, i8* %src, i32 %mul) nounwind {
+entry:
+; CHECK: distribute2
+; CHECK-NOT: vadd.i8
+; CHECK: vmul.i8
+; CHECK: vmla.i8
+  %0 = trunc i32 %mul to i8
+  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
+  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1)
+  %4 = bitcast <16 x i8> %3 to <2 x double>
+  %5 = extractelement <2 x double> %4, i32 1
+  %6 = bitcast double %5 to <8 x i8>
+  %7 = extractelement <2 x double> %4, i32 0
+  %8 = bitcast double %7 to <8 x i8>
+  %9 = add <8 x i8> %6, %8
+  %10 = mul <8 x i8> %9, %2
+  %11 = getelementptr inbounds %struct.uint8x8_t* %dst, i32 0, i32 0
+  store <8 x i8> %10, <8 x i8>* %11, align 8
+  ret void
+}
+
+define void @distribute2_commutative(%struct.uint8x8_t* nocapture %dst, i8* %src, i32 %mul) nounwind {
+entry:
+; CHECK: distribute2_commutative
+; CHECK-NOT: vadd.i8
+; CHECK: vmul.i8
+; CHECK: vmla.i8
+  %0 = trunc i32 %mul to i8
+  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
+  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1)
+  %4 = bitcast <16 x i8> %3 to <2 x double>
+  %5 = extractelement <2 x double> %4, i32 1
+  %6 = bitcast double %5 to <8 x i8>
+  %7 = extractelement <2 x double> %4, i32 0
+  %8 = bitcast double %7 to <8 x i8>
+  %9 = add <8 x i8> %6, %8
+  %10 = mul <8 x i8> %2, %9
+  %11 = getelementptr inbounds %struct.uint8x8_t* %dst, i32 0, i32 0
+  store <8 x i8> %10, <8 x i8>* %11, align 8
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vneg.ll b/src/LLVM/test/CodeGen/ARM/vneg.ll
new file mode 100644
index 0000000..4a10732
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vneg.ll

@@ -0,0 +1,121 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind {
+;CHECK: vnegs8:
+;CHECK: vneg.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = sub <8 x i8> zeroinitializer, %tmp1
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind {
+;CHECK: vnegs16:
+;CHECK: vneg.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = sub <4 x i16> zeroinitializer, %tmp1
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind {
+;CHECK: vnegs32:
+;CHECK: vneg.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = sub <2 x i32> zeroinitializer, %tmp1
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vnegf32(<2 x float>* %A) nounwind {
+;CHECK: vnegf32:
+;CHECK: vneg.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = fsub <2 x float> < float -0.000000e+00, float -0.000000e+00 >, %tmp1
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind {
+;CHECK: vnegQs8:
+;CHECK: vneg.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = sub <16 x i8> zeroinitializer, %tmp1
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind {
+;CHECK: vnegQs16:
+;CHECK: vneg.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = sub <8 x i16> zeroinitializer, %tmp1
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind {
+;CHECK: vnegQs32:
+;CHECK: vneg.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = sub <4 x i32> zeroinitializer, %tmp1
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vnegQf32(<4 x float>* %A) nounwind {
+;CHECK: vnegQf32:
+;CHECK: vneg.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1
+	ret <4 x float> %tmp2
+}
+
+define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
+;CHECK: vqnegs8:
+;CHECK: vqneg.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
+;CHECK: vqnegs16:
+;CHECK: vqneg.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
+;CHECK: vqnegs32:
+;CHECK: vqneg.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
+;CHECK: vqnegQs8:
+;CHECK: vqneg.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
+;CHECK: vqnegQs16:
+;CHECK: vqneg.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind {
+;CHECK: vqnegQs32:
+;CHECK: vqneg.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqneg.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vpadal.ll b/src/LLVM/test/CodeGen/ARM/vpadal.ll
new file mode 100644
index 0000000..7296e93
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vpadal.ll

@@ -0,0 +1,125 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpadals8:
+;CHECK: vpadal.s8
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpadals16:
+;CHECK: vpadal.s16
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpadals32:
+;CHECK: vpadal.s32
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpadalu8:
+;CHECK: vpadal.u8
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpadalu16:
+;CHECK: vpadal.u16
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpadalu32:
+;CHECK: vpadal.u32
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vpadalQs8:
+;CHECK: vpadal.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vpadalQs16:
+;CHECK: vpadal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vpadalQs32:
+;CHECK: vpadal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vpadalQu8:
+;CHECK: vpadal.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vpadalQu16:
+;CHECK: vpadal.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vpadalQu32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vpadalQu32:
+;CHECK: vpadal.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16>, <8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32>, <4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64>, <2 x i32>) nounwind readnone
+
+declare <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16>, <8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32>, <4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16>, <16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32>, <8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64>, <4 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16>, <16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32>, <8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64>, <4 x i32>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vpadd.ll b/src/LLVM/test/CodeGen/ARM/vpadd.ll
new file mode 100644
index 0000000..1ba68f5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vpadd.ll

@@ -0,0 +1,169 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpaddi8:
+;CHECK: vpadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpaddi16:
+;CHECK: vpadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpaddi32:
+;CHECK: vpadd.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpaddf32:
+;CHECK: vpadd.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
+;CHECK: vpaddls8:
+;CHECK: vpaddl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
+;CHECK: vpaddls16:
+;CHECK: vpaddl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
+;CHECK: vpaddls32:
+;CHECK: vpaddl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1)
+	ret <1 x i64> %tmp2
+}
+
+define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
+;CHECK: vpaddlu8:
+;CHECK: vpaddl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
+;CHECK: vpaddlu16:
+;CHECK: vpaddl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
+;CHECK: vpaddlu32:
+;CHECK: vpaddl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
+;CHECK: vpaddlQs8:
+;CHECK: vpaddl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
+;CHECK: vpaddlQs16:
+;CHECK: vpaddl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
+;CHECK: vpaddlQs32:
+;CHECK: vpaddl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
+;CHECK: vpaddlQu8:
+;CHECK: vpaddl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
+;CHECK: vpaddlQu16:
+;CHECK: vpaddl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind {
+;CHECK: vpaddlQu32:
+;CHECK: vpaddl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+; Test AddCombine optimization that generates a vpaddl.s
+define void @addCombineToVPADDL() nounwind ssp {
+; CHECK: vpaddl.s8
+  %cbcr = alloca <16 x i8>, align 16
+  %X = alloca <8 x i8>, align 8
+  %tmp = load <16 x i8>* %cbcr
+  %tmp1 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %tmp2 = load <16 x i8>* %cbcr
+  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %add = add <8 x i8> %tmp3, %tmp1
+  store <8 x i8> %add, <8 x i8>* %X, align 8
+  ret void
+}
+
+declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vpminmax.ll b/src/LLVM/test/CodeGen/ARM/vpminmax.ll
new file mode 100644
index 0000000..b75bcc9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vpminmax.ll

@@ -0,0 +1,147 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmins8:
+;CHECK: vpmin.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmins16:
+;CHECK: vpmin.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmins32:
+;CHECK: vpmin.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpminu8:
+;CHECK: vpmin.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpminu16:
+;CHECK: vpmin.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpminu32:
+;CHECK: vpmin.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpminf32:
+;CHECK: vpmin.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmaxs8:
+;CHECK: vpmax.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmaxs16:
+;CHECK: vpmax.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmaxs32:
+;CHECK: vpmax.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmaxu8:
+;CHECK: vpmax.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmaxu16:
+;CHECK: vpmax.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmaxu32:
+;CHECK: vpmax.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpmaxf32:
+;CHECK: vpmax.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vqadd.ll b/src/LLVM/test/CodeGen/ARM/vqadd.ll
new file mode 100644
index 0000000..a1669b6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vqadd.ll

@@ -0,0 +1,165 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqadds8:
+;CHECK: vqadd.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqadds16:
+;CHECK: vqadd.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqadds32:
+;CHECK: vqadd.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqadds64:
+;CHECK: vqadd.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqaddu8:
+;CHECK: vqadd.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqaddu16:
+;CHECK: vqadd.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqaddu32:
+;CHECK: vqadd.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqaddu64:
+;CHECK: vqadd.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqaddQs8:
+;CHECK: vqadd.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqaddQs16:
+;CHECK: vqadd.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqaddQs32:
+;CHECK: vqadd.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqaddQs64:
+;CHECK: vqadd.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqaddQu8:
+;CHECK: vqadd.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqaddQu16:
+;CHECK: vqadd.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqaddQu32:
+;CHECK: vqadd.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqaddQu64:
+;CHECK: vqadd.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vqdmul.ll b/src/LLVM/test/CodeGen/ARM/vqdmul.ll
new file mode 100644
index 0000000..08e7d2b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vqdmul.ll

@@ -0,0 +1,280 @@
+; RUN: llc -mattr=+neon < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqdmulhs16:
+;CHECK: vqdmulh.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqdmulhs32:
+;CHECK: vqdmulh.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqdmulhQs16:
+;CHECK: vqdmulh.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqdmulhQs32:
+;CHECK: vqdmulh.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulhQ_lanes16
+; CHECK: vqdmulh.s16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
+  %1 = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulhQ_lanes32
+; CHECK: vqdmulh.s32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vqdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulh_lanes16
+; CHECK: vqdmulh.s16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vqdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulh_lanes32
+; CHECK: vqdmulh.s32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrdmulhs16:
+;CHECK: vqrdmulh.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrdmulhs32:
+;CHECK: vqrdmulh.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrdmulhQs16:
+;CHECK: vqrdmulh.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrdmulhQs32:
+;CHECK: vqrdmulh.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqRdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulhQ_lanes16
+; CHECK: vqrdmulh.s16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
+  %1 = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqRdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulhQ_lanes32
+; CHECK: vqrdmulh.s32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vqRdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulh_lanes16
+; CHECK: vqrdmulh.s16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vqRdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulh_lanes32
+; CHECK: vqrdmulh.s32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqdmulls16:
+;CHECK: vqdmull.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqdmulls32:
+;CHECK: vqdmull.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmull_lanes16
+; CHECK: vqdmull.s16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmull_lanes32
+; CHECK: vqdmull.s32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vqdmlals16:
+;CHECK: vqdmlal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vqdmlals32:
+;CHECK: vqdmlal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlal_lanes16
+; CHECK: vqdmlal.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlal_lanes32
+; CHECK: vqdmlal.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vqdmlsls16:
+;CHECK: vqdmlsl.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vqdmlsls32:
+;CHECK: vqdmlsl.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlsl_lanes16
+; CHECK: vqdmlsl.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlsl_lanes32
+; CHECK: vqdmlsl.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vqshl.ll b/src/LLVM/test/CodeGen/ARM/vqshl.ll
new file mode 100644
index 0000000..e4d29a3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vqshl.ll

@@ -0,0 +1,531 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqshls8:
+;CHECK: vqshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqshls16:
+;CHECK: vqshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqshls32:
+;CHECK: vqshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqshls64:
+;CHECK: vqshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqshlu8:
+;CHECK: vqshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqshlu16:
+;CHECK: vqshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqshlu32:
+;CHECK: vqshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqshlu64:
+;CHECK: vqshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqshlQs8:
+;CHECK: vqshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqshlQs16:
+;CHECK: vqshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqshlQs32:
+;CHECK: vqshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqshlQs64:
+;CHECK: vqshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqshlQu8:
+;CHECK: vqshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqshlQu16:
+;CHECK: vqshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqshlQu32:
+;CHECK: vqshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqshlQu64:
+;CHECK: vqshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshls_n8:
+;CHECK: vqshl.s8{{.*#7}}
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshls_n16:
+;CHECK: vqshl.s16{{.*#15}}
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshls_n32:
+;CHECK: vqshl.s32{{.*#31}}
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshls_n64:
+;CHECK: vqshl.s64{{.*#63}}
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshlu_n8:
+;CHECK: vqshl.u8{{.*#7}}
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshlu_n16:
+;CHECK: vqshl.u16{{.*#15}}
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshlu_n32:
+;CHECK: vqshl.u32{{.*#31}}
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshlu_n64:
+;CHECK: vqshl.u64{{.*#63}}
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshlsu_n8:
+;CHECK: vqshlu.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshlsu_n16:
+;CHECK: vqshlu.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshlsu_n32:
+;CHECK: vqshlu.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshlsu_n64:
+;CHECK: vqshlu.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQs_n8:
+;CHECK: vqshl.s8{{.*#7}}
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQs_n16:
+;CHECK: vqshl.s16{{.*#15}}
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQs_n32:
+;CHECK: vqshl.s32{{.*#31}}
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQs_n64:
+;CHECK: vqshl.s64{{.*#63}}
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
+	ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQu_n8:
+;CHECK: vqshl.u8{{.*#7}}
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQu_n16:
+;CHECK: vqshl.u16{{.*#15}}
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQu_n32:
+;CHECK: vqshl.u32{{.*#31}}
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQu_n64:
+;CHECK: vqshl.u64{{.*#63}}
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
+	ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQsu_n8:
+;CHECK: vqshlu.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQsu_n16:
+;CHECK: vqshlu.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQsu_n32:
+;CHECK: vqshlu.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQsu_n64:
+;CHECK: vqshlu.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
+	ret <2 x i64> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqrshls8:
+;CHECK: vqrshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrshls16:
+;CHECK: vqrshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrshls32:
+;CHECK: vqrshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqrshls64:
+;CHECK: vqrshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqrshlu8:
+;CHECK: vqrshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrshlu16:
+;CHECK: vqrshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrshlu32:
+;CHECK: vqrshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqrshlu64:
+;CHECK: vqrshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqrshlQs8:
+;CHECK: vqrshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrshlQs16:
+;CHECK: vqrshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrshlQs32:
+;CHECK: vqrshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqrshlQs64:
+;CHECK: vqrshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqrshlQu8:
+;CHECK: vqrshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrshlQu16:
+;CHECK: vqrshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrshlQu32:
+;CHECK: vqrshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqrshlQu64:
+;CHECK: vqrshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vqshrn.ll b/src/LLVM/test/CodeGen/ARM/vqshrn.ll
new file mode 100644
index 0000000..5da7943
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vqshrn.ll

@@ -0,0 +1,169 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vqshrns8:
+;CHECK: vqshrn.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vqshrns16:
+;CHECK: vqshrn.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vqshrns32:
+;CHECK: vqshrn.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
+;CHECK: vqshrnu8:
+;CHECK: vqshrn.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
+;CHECK: vqshrnu16:
+;CHECK: vqshrn.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
+;CHECK: vqshrnu32:
+;CHECK: vqshrn.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
+;CHECK: vqshruns8:
+;CHECK: vqshrun.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
+;CHECK: vqshruns16:
+;CHECK: vqshrun.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind {
+;CHECK: vqshruns32:
+;CHECK: vqshrun.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshrns8:
+;CHECK: vqrshrn.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshrns16:
+;CHECK: vqrshrn.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshrns32:
+;CHECK: vqrshrn.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshrnu8:
+;CHECK: vqrshrn.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshrnu16:
+;CHECK: vqrshrn.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshrnu32:
+;CHECK: vqrshrn.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshruns8:
+;CHECK: vqrshrun.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshruns16:
+;CHECK: vqrshrun.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshruns32:
+;CHECK: vqrshrun.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vqsub.ll b/src/LLVM/test/CodeGen/ARM/vqsub.ll
new file mode 100644
index 0000000..4231fca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vqsub.ll

@@ -0,0 +1,165 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqsubs8:
+;CHECK: vqsub.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqsubs16:
+;CHECK: vqsub.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqsubs32:
+;CHECK: vqsub.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqsubs64:
+;CHECK: vqsub.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqsubu8:
+;CHECK: vqsub.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqsubu16:
+;CHECK: vqsub.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqsubu32:
+;CHECK: vqsub.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqsubu64:
+;CHECK: vqsub.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqsubQs8:
+;CHECK: vqsub.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqsubQs16:
+;CHECK: vqsub.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqsubQs32:
+;CHECK: vqsub.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqsubQs64:
+;CHECK: vqsub.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqsubQu8:
+;CHECK: vqsub.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqsubQu16:
+;CHECK: vqsub.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqsubQu32:
+;CHECK: vqsub.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqsubQu64:
+;CHECK: vqsub.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vrec.ll b/src/LLVM/test/CodeGen/ARM/vrec.ll
new file mode 100644
index 0000000..99989e9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vrec.ll

@@ -0,0 +1,119 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
+;CHECK: vrecpei32:
+;CHECK: vrecpe.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
+;CHECK: vrecpeQi32:
+;CHECK: vrecpe.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
+;CHECK: vrecpef32:
+;CHECK: vrecpe.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1)
+	ret <2 x float> %tmp2
+}
+
+define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind {
+;CHECK: vrecpeQf32:
+;CHECK: vrecpe.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vrecpsf32:
+;CHECK: vrecps.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vrecpsQf32:
+;CHECK: vrecps.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
+;CHECK: vrsqrtei32:
+;CHECK: vrsqrte.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
+;CHECK: vrsqrteQi32:
+;CHECK: vrsqrte.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
+;CHECK: vrsqrtef32:
+;CHECK: vrsqrte.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1)
+	ret <2 x float> %tmp2
+}
+
+define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind {
+;CHECK: vrsqrteQf32:
+;CHECK: vrsqrte.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
+
+define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vrsqrtsf32:
+;CHECK: vrsqrts.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vrsqrtsQf32:
+;CHECK: vrsqrts.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vrev.ll b/src/LLVM/test/CodeGen/ARM/vrev.ll
new file mode 100644
index 0000000..34acd16
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vrev.ll

@@ -0,0 +1,180 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev64D8:
+;CHECK: vrev64.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
+;CHECK: test_vrev64D16:
+;CHECK: vrev64.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
+;CHECK: test_vrev64D32:
+;CHECK: vrev64.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
+;CHECK: test_vrev64Df:
+;CHECK: vrev64.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev64Q8:
+;CHECK: vrev64.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev64Q16:
+;CHECK: vrev64.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
+;CHECK: test_vrev64Q32:
+;CHECK: vrev64.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
+;CHECK: test_vrev64Qf:
+;CHECK: vrev64.32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x float> %tmp2
+}
+
+define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev32D8:
+;CHECK: vrev32.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
+;CHECK: test_vrev32D16:
+;CHECK: vrev32.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x i16> %tmp2
+}
+
+define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev32Q8:
+;CHECK: vrev32.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev32Q16:
+;CHECK: vrev32.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+	ret <8 x i16> %tmp2
+}
+
+define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev16D8:
+;CHECK: vrev16.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+	ret <8 x i8> %tmp2
+}
+
+define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev16Q8:
+;CHECK: vrev16.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+	ret <16 x i8> %tmp2
+}
+
+; Undef shuffle indices should not prevent matching to VREV:
+
+define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev64D8_undef:
+;CHECK: vrev64.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
+	ret <8 x i8> %tmp2
+}
+
+define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev32Q16_undef:
+;CHECK: vrev32.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
+	ret <8 x i16> %tmp2
+}
+
+; A vcombine feeding a VREV should not obscure things.  Radar 8597007.
+
+define void @test_with_vcombine(<4 x float>* %v) nounwind {
+;CHECK: test_with_vcombine:
+;CHECK-NOT: vext
+;CHECK: vrev64.32
+  %tmp1 = load <4 x float>* %v, align 16
+  %tmp2 = bitcast <4 x float> %tmp1 to <2 x double>
+  %tmp3 = extractelement <2 x double> %tmp2, i32 0
+  %tmp4 = bitcast double %tmp3 to <2 x float>
+  %tmp5 = extractelement <2 x double> %tmp2, i32 1
+  %tmp6 = bitcast double %tmp5 to <2 x float>
+  %tmp7 = fadd <2 x float> %tmp6, %tmp6
+  %tmp8 = shufflevector <2 x float> %tmp4, <2 x float> %tmp7, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  store <4 x float> %tmp8, <4 x float>* %v, align 16
+  ret void
+}
+
+; vrev <4 x i16> should use VREV32 and not VREV64
+define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
+; CHECK: test_vrev64:
+; CHECK: vext.16
+; CHECK: vrev32.16
+entry:
+  %0 = bitcast <4 x i16>* %source to <8 x i16>*
+  %tmp2 = load <8 x i16>* %0, align 4
+  %tmp3 = extractelement <8 x i16> %tmp2, i32 6
+  %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
+  %tmp9 = extractelement <8 x i16> %tmp2, i32 5
+  %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
+  store <2 x i16> %tmp11, <2 x i16>* %dst, align 4
+  ret void
+}
+
+; Test vrev of float4
+define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest) nounwind noinline ssp {
+; CHECK: float_vrev64
+; CHECK: vext.32
+; CHECK: vrev64.32
+entry:
+  %0 = bitcast float* %source to <4 x float>*
+  %tmp2 = load <4 x float>* %0, align 4
+  %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
+  %arrayidx8 = getelementptr inbounds <4 x float>* %dest, i32 11
+  store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/vshift.ll b/src/LLVM/test/CodeGen/ARM/vshift.ll
new file mode 100644
index 0000000..f3cbec7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vshift.ll

@@ -0,0 +1,432 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshls8:
+;CHECK: vshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shl <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshls16:
+;CHECK: vshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shl <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshls32:
+;CHECK: vshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = shl <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshls64:
+;CHECK: vshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = shl <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
+;CHECK: vshli8:
+;CHECK: vshl.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
+;CHECK: vshli16:
+;CHECK: vshl.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
+;CHECK: vshli32:
+;CHECK: vshl.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 >
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
+;CHECK: vshli64:
+;CHECK: vshl.i64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = shl <1 x i64> %tmp1, < i64 63 >
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQs8:
+;CHECK: vshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shl <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQs16:
+;CHECK: vshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shl <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQs32:
+;CHECK: vshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shl <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQs64:
+;CHECK: vshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = shl <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
+;CHECK: vshlQi8:
+;CHECK: vshl.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
+;CHECK: vshlQi16:
+;CHECK: vshl.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
+;CHECK: vshlQi32:
+;CHECK: vshl.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
+;CHECK: vshlQi64:
+;CHECK: vshl.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 >
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vlshru8:
+;CHECK: vneg.s8
+;CHECK: vshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = lshr <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vlshru16:
+;CHECK: vneg.s16
+;CHECK: vshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = lshr <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vlshru32:
+;CHECK: vneg.s32
+;CHECK: vshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = lshr <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vlshru64:
+;CHECK: vsub.i64
+;CHECK: vshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = lshr <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
+;CHECK: vlshri8:
+;CHECK: vshr.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
+;CHECK: vlshri16:
+;CHECK: vshr.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
+;CHECK: vlshri32:
+;CHECK: vshr.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 >
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
+;CHECK: vlshri64:
+;CHECK: vshr.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = lshr <1 x i64> %tmp1, < i64 64 >
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vlshrQu8:
+;CHECK: vneg.s8
+;CHECK: vshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = lshr <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vlshrQu16:
+;CHECK: vneg.s16
+;CHECK: vshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = lshr <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vlshrQu32:
+;CHECK: vneg.s32
+;CHECK: vshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = lshr <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vlshrQu64:
+;CHECK: vsub.i64
+;CHECK: vshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = lshr <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
+;CHECK: vlshrQi8:
+;CHECK: vshr.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
+;CHECK: vlshrQi16:
+;CHECK: vshr.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
+;CHECK: vlshrQi32:
+;CHECK: vshr.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind {
+;CHECK: vlshrQi64:
+;CHECK: vshr.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 >
+	ret <2 x i64> %tmp2
+}
+
+; Example that requires splitting and expanding a vector shift.
+define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
+entry:
+	%shr = lshr <2 x i64> %val, < i64 2, i64 2 >		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %shr
+}
+
+define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vashrs8:
+;CHECK: vneg.s8
+;CHECK: vshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = ashr <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vashrs16:
+;CHECK: vneg.s16
+;CHECK: vshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = ashr <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vashrs32:
+;CHECK: vneg.s32
+;CHECK: vshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = ashr <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vashrs64:
+;CHECK: vsub.i64
+;CHECK: vshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = ashr <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
+;CHECK: vashri8:
+;CHECK: vshr.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
+;CHECK: vashri16:
+;CHECK: vshr.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
+;CHECK: vashri32:
+;CHECK: vshr.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 >
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
+;CHECK: vashri64:
+;CHECK: vshr.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = ashr <1 x i64> %tmp1, < i64 64 >
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vashrQs8:
+;CHECK: vneg.s8
+;CHECK: vshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = ashr <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vashrQs16:
+;CHECK: vneg.s16
+;CHECK: vshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = ashr <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vashrQs32:
+;CHECK: vneg.s32
+;CHECK: vshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = ashr <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vashrQs64:
+;CHECK: vsub.i64
+;CHECK: vshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = ashr <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
+;CHECK: vashrQi8:
+;CHECK: vshr.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
+;CHECK: vashrQi16:
+;CHECK: vshr.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
+;CHECK: vashrQi32:
+;CHECK: vshr.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind {
+;CHECK: vashrQi64:
+;CHECK: vshr.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 >
+	ret <2 x i64> %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vshiftins.ll b/src/LLVM/test/CodeGen/ARM/vshiftins.ll
new file mode 100644
index 0000000..3a4f857
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vshiftins.ll

@@ -0,0 +1,155 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsli8:
+;CHECK: vsli.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsli16:
+;CHECK: vsli.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsli32:
+;CHECK: vsli.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 31, i32 31 >)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsli64:
+;CHECK: vsli.64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 63 >)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsliQ8:
+;CHECK: vsli.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsliQ16:
+;CHECK: vsli.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsliQ32:
+;CHECK: vsli.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsliQ64:
+;CHECK: vsli.64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 63, i64 63 >)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsri8:
+;CHECK: vsri.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsri16:
+;CHECK: vsri.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsri32:
+;CHECK: vsri.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsri64:
+;CHECK: vsri.64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsriQ8:
+;CHECK: vsri.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsriQ16:
+;CHECK: vsri.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsriQ32:
+;CHECK: vsri.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsriQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsriQ64:
+;CHECK: vsri.64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vshiftins.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64>, <1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vshl.ll b/src/LLVM/test/CodeGen/ARM/vshl.ll
new file mode 100644
index 0000000..818e71b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vshl.ll

@@ -0,0 +1,654 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshls8:
+;CHECK: vshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshls16:
+;CHECK: vshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshls32:
+;CHECK: vshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshls64:
+;CHECK: vshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshlu8:
+;CHECK: vshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshlu16:
+;CHECK: vshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshlu32:
+;CHECK: vshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshlu64:
+;CHECK: vshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQs8:
+;CHECK: vshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQs16:
+;CHECK: vshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQs32:
+;CHECK: vshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQs64:
+;CHECK: vshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQu8:
+;CHECK: vshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQu16:
+;CHECK: vshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQu32:
+;CHECK: vshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQu64:
+;CHECK: vshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+; For left shifts by immediates, the signedness is irrelevant.
+; Test a mix of both signed and unsigned intrinsics.
+
+define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
+;CHECK: vshli8:
+;CHECK: vshl.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
+;CHECK: vshli16:
+;CHECK: vshl.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
+;CHECK: vshli32:
+;CHECK: vshl.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
+;CHECK: vshli64:
+;CHECK: vshl.i64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
+;CHECK: vshlQi8:
+;CHECK: vshl.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
+;CHECK: vshlQi16:
+;CHECK: vshl.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
+;CHECK: vshlQi32:
+;CHECK: vshl.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
+;CHECK: vshlQi64:
+;CHECK: vshl.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
+	ret <2 x i64> %tmp2
+}
+
+; Right shift by immediate:
+
+define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind {
+;CHECK: vshrs8:
+;CHECK: vshr.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind {
+;CHECK: vshrs16:
+;CHECK: vshr.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind {
+;CHECK: vshrs32:
+;CHECK: vshr.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind {
+;CHECK: vshrs64:
+;CHECK: vshr.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vshru8(<8 x i8>* %A) nounwind {
+;CHECK: vshru8:
+;CHECK: vshr.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshru16(<4 x i16>* %A) nounwind {
+;CHECK: vshru16:
+;CHECK: vshr.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshru32(<2 x i32>* %A) nounwind {
+;CHECK: vshru32:
+;CHECK: vshr.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vshru64(<1 x i64>* %A) nounwind {
+;CHECK: vshru64:
+;CHECK: vshr.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind {
+;CHECK: vshrQs8:
+;CHECK: vshr.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind {
+;CHECK: vshrQs16:
+;CHECK: vshr.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind {
+;CHECK: vshrQs32:
+;CHECK: vshr.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind {
+;CHECK: vshrQs64:
+;CHECK: vshr.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind {
+;CHECK: vshrQu8:
+;CHECK: vshr.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind {
+;CHECK: vshrQu16:
+;CHECK: vshr.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind {
+;CHECK: vshrQu32:
+;CHECK: vshr.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshrQu64(<2 x i64>* %A) nounwind {
+;CHECK: vshrQu64:
+;CHECK: vshr.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrshls8:
+;CHECK: vrshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrshls16:
+;CHECK: vrshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrshls32:
+;CHECK: vrshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrshls64:
+;CHECK: vrshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrshlu8:
+;CHECK: vrshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrshlu16:
+;CHECK: vrshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrshlu32:
+;CHECK: vrshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrshlu64:
+;CHECK: vrshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrshlQs8:
+;CHECK: vrshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrshlQs16:
+;CHECK: vrshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrshlQs32:
+;CHECK: vrshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrshlQs64:
+;CHECK: vrshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrshlQu8:
+;CHECK: vrshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrshlQu16:
+;CHECK: vrshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrshlQu32:
+;CHECK: vrshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrshlQu64:
+;CHECK: vrshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
+;CHECK: vrshrs8:
+;CHECK: vrshr.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
+;CHECK: vrshrs16:
+;CHECK: vrshr.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
+;CHECK: vrshrs32:
+;CHECK: vrshr.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
+;CHECK: vrshrs64:
+;CHECK: vrshr.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
+;CHECK: vrshru8:
+;CHECK: vrshr.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
+;CHECK: vrshru16:
+;CHECK: vrshr.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
+;CHECK: vrshru32:
+;CHECK: vrshr.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
+;CHECK: vrshru64:
+;CHECK: vrshr.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
+;CHECK: vrshrQs8:
+;CHECK: vrshr.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
+;CHECK: vrshrQs16:
+;CHECK: vrshr.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
+;CHECK: vrshrQs32:
+;CHECK: vrshr.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
+;CHECK: vrshrQs64:
+;CHECK: vrshr.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
+;CHECK: vrshrQu8:
+;CHECK: vrshr.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
+;CHECK: vrshrQu16:
+;CHECK: vrshr.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
+;CHECK: vrshrQu32:
+;CHECK: vrshr.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind {
+;CHECK: vrshrQu64:
+;CHECK: vrshr.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vshll.ll b/src/LLVM/test/CodeGen/ARM/vshll.ll
new file mode 100644
index 0000000..8e85b98
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vshll.ll

@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind {
+;CHECK: vshlls8:
+;CHECK: vshll.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind {
+;CHECK: vshlls16:
+;CHECK: vshll.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind {
+;CHECK: vshlls32:
+;CHECK: vshll.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind {
+;CHECK: vshllu8:
+;CHECK: vshll.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind {
+;CHECK: vshllu16:
+;CHECK: vshll.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
+;CHECK: vshllu32:
+;CHECK: vshll.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+	ret <2 x i64> %tmp2
+}
+
+; The following tests use the maximum shift count, so the signedness is
+; irrelevant.  Test both signed and unsigned versions.
+define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind {
+;CHECK: vshlli8:
+;CHECK: vshll.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind {
+;CHECK: vshlli16:
+;CHECK: vshll.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 16, i16 16, i16 16, i16 16 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind {
+;CHECK: vshlli32:
+;CHECK: vshll.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 32, i32 32 >)
+	ret <2 x i64> %tmp2
+}
+
+declare <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vshrn.ll b/src/LLVM/test/CodeGen/ARM/vshrn.ll
new file mode 100644
index 0000000..e2544f4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vshrn.ll

@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vshrns8:
+;CHECK: vshrn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vshrns16:
+;CHECK: vshrn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vshrns32:
+;CHECK: vshrn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vrshrns8:
+;CHECK: vrshrn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vrshrns16:
+;CHECK: vrshrn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vrshrns32:
+;CHECK: vrshrn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vsra.ll b/src/LLVM/test/CodeGen/ARM/vsra.ll
new file mode 100644
index 0000000..acb672d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vsra.ll

@@ -0,0 +1,341 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsras8:
+;CHECK: vsra.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = ashr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+        %tmp4 = add <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsras16:
+;CHECK: vsra.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = ashr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 >
+        %tmp4 = add <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsras32:
+;CHECK: vsra.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = ashr <2 x i32> %tmp2, < i32 32, i32 32 >
+        %tmp4 = add <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsras64:
+;CHECK: vsra.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = ashr <1 x i64> %tmp2, < i64 64 >
+        %tmp4 = add <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsraQs8:
+;CHECK: vsra.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = ashr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+        %tmp4 = add <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsraQs16:
+;CHECK: vsra.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = ashr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+        %tmp4 = add <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsraQs32:
+;CHECK: vsra.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = ashr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 >
+        %tmp4 = add <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsraQs64:
+;CHECK: vsra.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = ashr <2 x i64> %tmp2, < i64 64, i64 64 >
+        %tmp4 = add <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsrau8:
+;CHECK: vsra.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = lshr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+        %tmp4 = add <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsrau16:
+;CHECK: vsra.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = lshr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 >
+        %tmp4 = add <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsrau32:
+;CHECK: vsra.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = lshr <2 x i32> %tmp2, < i32 32, i32 32 >
+        %tmp4 = add <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsrau64:
+;CHECK: vsra.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = lshr <1 x i64> %tmp2, < i64 64 >
+        %tmp4 = add <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsraQu8:
+;CHECK: vsra.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = lshr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+        %tmp4 = add <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsraQu16:
+;CHECK: vsra.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = lshr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+        %tmp4 = add <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsraQu32:
+;CHECK: vsra.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = lshr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 >
+        %tmp4 = add <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsraQu64:
+;CHECK: vsra.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = lshr <2 x i64> %tmp2, < i64 64, i64 64 >
+        %tmp4 = add <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrsras8:
+;CHECK: vrsra.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+        %tmp4 = add <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrsras16:
+;CHECK: vrsra.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+        %tmp4 = add <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrsras32:
+;CHECK: vrsra.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
+        %tmp4 = add <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrsras64:
+;CHECK: vrsra.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
+        %tmp4 = add <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrsrau8:
+;CHECK: vrsra.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+        %tmp4 = add <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrsrau16:
+;CHECK: vrsra.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+        %tmp4 = add <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrsrau32:
+;CHECK: vrsra.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
+        %tmp4 = add <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrsrau64:
+;CHECK: vrsra.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
+        %tmp4 = add <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrsraQs8:
+;CHECK: vrsra.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+        %tmp4 = add <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsraQs16:
+;CHECK: vrsra.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+        %tmp4 = add <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsraQs32:
+;CHECK: vrsra.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+        %tmp4 = add <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsraQs64:
+;CHECK: vrsra.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
+        %tmp4 = add <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrsraQu8:
+;CHECK: vrsra.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+        %tmp4 = add <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsraQu16:
+;CHECK: vrsra.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+        %tmp4 = add <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsraQu32:
+;CHECK: vrsra.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+        %tmp4 = add <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vrsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsraQu64:
+;CHECK: vrsra.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
+        %tmp4 = add <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vst1.ll b/src/LLVM/test/CodeGen/ARM/vst1.ll
new file mode 100644
index 0000000..364d44b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vst1.ll

@@ -0,0 +1,130 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst1i8:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vst1.8 {d16}, [r0, :64]
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
+	ret void
+}
+
+define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst1i16:
+;CHECK: vst1.16
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst1i32:
+;CHECK: vst1.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst1f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst1f:
+;CHECK: vst1.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
+;CHECK: vst1f_update:
+;CHECK: vst1.32 {d16}, [r1]!
+	%A = load float** %ptr
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
+	%tmp2 = getelementptr float* %A, i32 2
+	store float* %tmp2, float** %ptr
+	ret void
+}
+
+define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst1i64:
+;CHECK: vst1.64
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst1Qi8:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst1.8 {d16, d17}, [r0, :64]
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
+	ret void
+}
+
+define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst1Qi16:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst1.16 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vst1Qi16_update:
+;CHECK: vst1.16 {d16, d17}, [r1, :64], r2
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 8)
+	%tmp2 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp2, i16** %ptr
+	ret void
+}
+
+define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst1Qi32:
+;CHECK: vst1.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst1Qf:
+;CHECK: vst1.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
+;CHECK: vst1Qi64:
+;CHECK: vst1.64
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = load <2 x i64>* %B
+	call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32) nounwind
+
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/vst2.ll b/src/LLVM/test/CodeGen/ARM/vst2.ll
new file mode 100644
index 0000000..915a84b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vst2.ll

@@ -0,0 +1,122 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst2i8:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst2.8 {d16, d17}, [r0, :64]
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
+;CHECK: vst2i8_update:
+;CHECK: vst2.8 {d16, d17}, [r1], r2
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4)
+	%tmp2 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp2, i8** %ptr
+	ret void
+}
+
+define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst2i16:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst2.16 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
+	ret void
+}
+
+define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst2i32:
+;CHECK: vst2.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst2f:
+;CHECK: vst2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst2i64:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst1.64 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
+;CHECK: vst2i64_update:
+;CHECK: vst1.64 {d16, d17}, [r1, :64]!
+	%A = load i64** %ptr
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 8)
+	%tmp2 = getelementptr i64* %A, i32 2
+	store i64* %tmp2, i64** %ptr
+	ret void
+}
+
+define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst2Qi8:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst2.8 {d16, d17, d18, d19}, [r0, :64]
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
+	ret void
+}
+
+define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst2Qi16:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst2.16 {d16, d17, d18, d19}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
+	ret void
+}
+
+define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst2Qi32:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0, :256]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
+	ret void
+}
+
+define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst2Qf:
+;CHECK: vst2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) nounwind
+
+declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/vst3.ll b/src/LLVM/test/CodeGen/ARM/vst3.ll
new file mode 100644
index 0000000..e3372a0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vst3.ll

@@ -0,0 +1,128 @@
+; RUN: llc < %s -march=arm -mattr=+neon -disable-arm-fast-isel -O0 | FileCheck %s
+
+define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst3i8:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;This test runs at -O0 so do not check for specific register numbers.
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
+	ret void
+}
+
+define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst3i16:
+;CHECK: vst3.16
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst3i32:
+;CHECK: vst3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst3i32_update(i32** %ptr, <2 x i32>* %B) nounwind {
+;CHECK: vst3i32_update:
+;CHECK: vst3.32 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	%tmp2 = getelementptr i32* %A, i32 6
+	store i32* %tmp2, i32** %ptr
+	ret void
+}
+
+define void @vst3f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst3f:
+;CHECK: vst3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst3i64:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;This test runs at -O0 so do not check for specific register numbers.
+;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
+	ret void
+}
+
+define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst3Qi8:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;This test runs at -O0 so do not check for specific register numbers.
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]!
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
+	ret void
+}
+
+define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst3Qi16:
+;CHECK: vst3.16
+;CHECK: vst3.16
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind {
+;CHECK: vst3Qi16_update:
+;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	%tmp2 = getelementptr i16* %A, i32 24
+	store i16* %tmp2, i16** %ptr
+	ret void
+}
+
+define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst3Qi32:
+;CHECK: vst3.32
+;CHECK: vst3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst3Qf:
+;CHECK: vst3.32
+;CHECK: vst3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
+
+declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/vst4.ll b/src/LLVM/test/CodeGen/ARM/vst4.ll
new file mode 100644
index 0000000..e94acb6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vst4.ll

@@ -0,0 +1,127 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst4i8:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r0, :64]
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
+;CHECK: vst4i8_update:
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :128], r2
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
+	%tmp2 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp2, i8** %ptr
+	ret void
+}
+
+define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst4i16:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.16 {d16, d17, d18, d19}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
+	ret void
+}
+
+define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst4i32:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.32 {d16, d17, d18, d19}, [r0, :256]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
+	ret void
+}
+
+define void @vst4f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst4f:
+;CHECK: vst4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst4i64:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst1.64 {d16, d17, d18, d19}, [r0, :256]
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
+	ret void
+}
+
+define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst4Qi8:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.8 {d16, d18, d20, d22}, [r0, :256]!
+;CHECK: vst4.8 {d17, d19, d21, d23}, [r0, :256]
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
+	ret void
+}
+
+define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst4Qi16:
+;Check for no alignment specifier.
+;CHECK: vst4.16 {d16, d18, d20, d22}, [r0]!
+;CHECK: vst4.16 {d17, d19, d21, d23}, [r0]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst4Qi32:
+;CHECK: vst4.32
+;CHECK: vst4.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst4Qf:
+;CHECK: vst4.32
+;CHECK: vst4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
+;CHECK: vst4Qf_update:
+;CHECK: vst4.32 {d16, d18, d20, d22}, [r1]!
+;CHECK: vst4.32 {d17, d19, d21, d23}, [r1]!
+	%A = load float** %ptr
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	%tmp2 = getelementptr float* %A, i32 16
+	store float* %tmp2, float** %ptr
+	ret void
+}
+
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
+
+declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/vstlane.ll b/src/LLVM/test/CodeGen/ARM/vstlane.ll
new file mode 100644
index 0000000..08b7232
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vstlane.ll

@@ -0,0 +1,368 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst1lanei8:
+;Check the (default) alignment.
+;CHECK: vst1.8 {d16[3]}, [r0]
+	%tmp1 = load <8 x i8>* %B
+        %tmp2 = extractelement <8 x i8> %tmp1, i32 3
+        store i8 %tmp2, i8* %A, align 8
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
+;CHECK: vst1lanei8_update:
+;CHECK: vst1.8 {d16[3]}, [r2]!
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = extractelement <8 x i8> %tmp1, i32 3
+	store i8 %tmp2, i8* %A, align 8
+	%tmp3 = getelementptr i8* %A, i32 1
+	store i8* %tmp3, i8** %ptr
+	ret void
+}
+
+define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst1lanei16:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vst1.16 {d16[2]}, [r0, :16]
+	%tmp1 = load <4 x i16>* %B
+        %tmp2 = extractelement <4 x i16> %tmp1, i32 2
+        store i16 %tmp2, i16* %A, align 8
+	ret void
+}
+
+define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst1lanei32:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vst1.32 {d16[1]}, [r0, :32]
+	%tmp1 = load <2 x i32>* %B
+        %tmp2 = extractelement <2 x i32> %tmp1, i32 1
+        store i32 %tmp2, i32* %A, align 8
+	ret void
+}
+
+define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst1lanef:
+;CHECK: vst1.32 {d16[1]}, [r0]
+	%tmp1 = load <2 x float>* %B
+        %tmp2 = extractelement <2 x float> %tmp1, i32 1
+        store float %tmp2, float* %A
+	ret void
+}
+
+define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst1laneQi8:
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.8 {d17[1]}, [r0]
+	%tmp1 = load <16 x i8>* %B
+        %tmp2 = extractelement <16 x i8> %tmp1, i32 9
+        store i8 %tmp2, i8* %A, align 8
+	ret void
+}
+
+define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst1laneQi16:
+;CHECK: vst1.16 {d17[1]}, [r0, :16]
+	%tmp1 = load <8 x i16>* %B
+        %tmp2 = extractelement <8 x i16> %tmp1, i32 5
+        store i16 %tmp2, i16* %A, align 8
+	ret void
+}
+
+define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst1laneQi32:
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.32 {d17[1]}, [r0, :32]
+	%tmp1 = load <4 x i32>* %B
+        %tmp2 = extractelement <4 x i32> %tmp1, i32 3
+        store i32 %tmp2, i32* %A, align 8
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
+;CHECK: vst1laneQi32_update:
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.32 {d17[1]}, [r1, :32]!
+	%A = load i32** %ptr
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = extractelement <4 x i32> %tmp1, i32 3
+	store i32 %tmp2, i32* %A, align 8
+	%tmp3 = getelementptr i32* %A, i32 1
+	store i32* %tmp3, i32** %ptr
+	ret void
+}
+
+define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst1laneQf:
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.32 {d17[1]}, [r0]
+	%tmp1 = load <4 x float>* %B
+        %tmp2 = extractelement <4 x float> %tmp1, i32 3
+        store float %tmp2, float* %A
+	ret void
+}
+
+define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst2lanei8:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vst2.8 {d16[1], d17[1]}, [r0, :16]
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
+	ret void
+}
+
+define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst2lanei16:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vst2lanei16_update:
+;CHECK: vst2.16 {d16[1], d17[1]}, [r1], r2
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2)
+	%tmp2 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp2, i16** %ptr
+	ret void
+}
+
+define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst2lanei32:
+;CHECK: vst2.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst2lanef:
+;CHECK: vst2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst2laneQi16:
+;Check the (default) alignment.
+;CHECK: vst2.16 {d17[1], d19[1]}, [r0]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
+	ret void
+}
+
+define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst2laneQi32:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
+	ret void
+}
+
+define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst2laneQf:
+;CHECK: vst2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind
+
+declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind
+
+define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst3lanei8:
+;CHECK: vst3.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst3lanei16:
+;Check the (default) alignment value.  VST3 does not support alignment.
+;CHECK: vst3.16 {d16[1], d17[1], d18[1]}, [r0]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+	ret void
+}
+
+define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst3lanei32:
+;CHECK: vst3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst3lanef:
+;CHECK: vst3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst3laneQi16:
+;Check the (default) alignment value.  VST3 does not support alignment.
+;CHECK: vst3.16 {d17[2], d19[2], d21[2]}, [r0]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8)
+	ret void
+}
+
+define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst3laneQi32:
+;CHECK: vst3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
+;CHECK: vst3laneQi32_update:
+;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r1]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
+	%tmp2 = getelementptr i32* %A, i32 3
+	store i32* %tmp2, i32** %ptr
+	ret void
+}
+
+define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst3laneQf:
+;CHECK: vst3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
+
+declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
+
+
+define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst4lanei8:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
+;CHECK: vst4lanei8_update:
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	%tmp2 = getelementptr i8* %A, i32 4
+	store i8* %tmp2, i8** %ptr
+	ret void
+}
+
+define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst4lanei16:
+;CHECK: vst4.16
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst4lanei32:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
+	ret void
+}
+
+define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst4lanef:
+;CHECK: vst4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst4laneQi16:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
+	ret void
+}
+
+define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst4laneQi32:
+;Check the (default) alignment.
+;CHECK: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+	ret void
+}
+
+define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst4laneQf:
+;CHECK: vst4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
+
+declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/ARM/vsub.ll b/src/LLVM/test/CodeGen/ARM/vsub.ll
new file mode 100644
index 0000000..df77bb3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vsub.ll

@@ -0,0 +1,279 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubi8:
+;CHECK: vsub.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sub <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubi16:
+;CHECK: vsub.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sub <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubi32:
+;CHECK: vsub.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = sub <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsubi64:
+;CHECK: vsub.i64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = sub <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vsubf32:
+;CHECK: vsub.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fsub <2 x float> %tmp1, %tmp2
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsubQi8:
+;CHECK: vsub.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = sub <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsubQi16:
+;CHECK: vsub.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = sub <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsubQi32:
+;CHECK: vsub.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = sub <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsubQi64:
+;CHECK: vsub.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = sub <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vsubQf32:
+;CHECK: vsub.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fsub <4 x float> %tmp1, %tmp2
+	ret <4 x float> %tmp3
+}
+
+define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsubhni16:
+;CHECK: vsubhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsubhni32:
+;CHECK: vsubhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsubhni64:
+;CHECK: vsubhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsubhni16:
+;CHECK: vrsubhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsubhni32:
+;CHECK: vrsubhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsubhni64:
+;CHECK: vrsubhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubls8:
+;CHECK: vsubl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
+	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = sub <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubls16:
+;CHECK: vsubl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
+	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = sub <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubls32:
+;CHECK: vsubl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
+	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = sub <2 x i64> %tmp3, %tmp4
+	ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsublu8:
+;CHECK: vsubl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
+	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = sub <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsublu16:
+;CHECK: vsubl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
+	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = sub <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsublu32:
+;CHECK: vsubl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
+	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = sub <2 x i64> %tmp3, %tmp4
+	ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubws8:
+;CHECK: vsubw.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
+	%tmp4 = sub <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubws16:
+;CHECK: vsubw.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
+	%tmp4 = sub <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubws32:
+;CHECK: vsubw.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
+	%tmp4 = sub <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubwu8:
+;CHECK: vsubw.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
+	%tmp4 = sub <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubwu16:
+;CHECK: vsubw.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
+	%tmp4 = sub <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubwu32:
+;CHECK: vsubw.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
+	%tmp4 = sub <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/ARM/vtbl.ll b/src/LLVM/test/CodeGen/ARM/vtbl.ll
new file mode 100644
index 0000000..9264987
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vtbl.ll

@@ -0,0 +1,109 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>, <8 x i8> }
+
+define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtbl1:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind {
+;CHECK: vtbl2:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x2_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+	%tmp5 = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4)
+	ret <8 x i8> %tmp5
+}
+
+define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind {
+;CHECK: vtbl3:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x3_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+	%tmp6 = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
+	ret <8 x i8> %tmp6
+}
+
+define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind {
+;CHECK: vtbl4:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
+	ret <8 x i8> %tmp7
+}
+
+define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx1:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx2:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x2_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+	%tmp5 = load <8 x i8>* %C
+	%tmp6 = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
+	ret <8 x i8> %tmp6
+}
+
+define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx3:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x3_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+	%tmp6 = load <8 x i8>* %C
+	%tmp7 = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
+	ret <8 x i8> %tmp7
+}
+
+define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx4:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = load <8 x i8>* %C
+	%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
+	ret <8 x i8> %tmp8
+}
+
+declare <8 x i8>  @llvm.arm.neon.vtbl1(<8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl2(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vtbx1(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx2(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/ARM/vtrn.ll b/src/LLVM/test/CodeGen/ARM/vtrn.ll
new file mode 100644
index 0000000..b1c2f93
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vtrn.ll

@@ -0,0 +1,124 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtrni8:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vtrni16:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vtrni32:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
+	%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vtrnf:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
+	%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
+        %tmp5 = fadd <2 x float> %tmp3, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vtrnQi8:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtrnQi16:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vtrnQi32:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vtrnQf:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
+
+; Undef shuffle indices should not prevent matching to VTRN:
+
+define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtrni8_undef:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtrnQi16_undef:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/vuzp.ll b/src/LLVM/test/CodeGen/ARM/vuzp.ll
new file mode 100644
index 0000000..9130f62
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vuzp.ll

@@ -0,0 +1,102 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vuzpi8:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vuzpi16:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+; VUZP.32 is equivalent to VTRN.32 for 64-bit vectors.
+
+define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vuzpQi8:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vuzpQi16:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vuzpQi32:
+;CHECK: vuzp.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vuzpQf:
+;CHECK: vuzp.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
+
+; Undef shuffle indices should not prevent matching to VUZP:
+
+define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vuzpi8_undef:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vuzpQi16_undef:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/vzip.ll b/src/LLVM/test/CodeGen/ARM/vzip.ll
new file mode 100644
index 0000000..926970a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/vzip.ll

@@ -0,0 +1,102 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vzipi8:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vzipi16:
+;CHECK: vzip.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+; VZIP.32 is equivalent to VTRN.32 for 64-bit vectors.
+
+define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vzipQi8:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vzipQi16:
+;CHECK: vzip.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vzipQi32:
+;CHECK: vzip.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vzipQf:
+;CHECK: vzip.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
+
+; Undef shuffle indices should not prevent matching to VZIP:
+
+define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vzipi8_undef:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vzipQi8_undef:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+

diff --git a/src/LLVM/test/CodeGen/ARM/weak.ll b/src/LLVM/test/CodeGen/ARM/weak.ll
new file mode 100644
index 0000000..0ff2fb4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/weak.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm | grep .weak.*f

+; RUN: llc < %s -march=arm | grep .weak.*h

+

+define weak i32 @f() {

+entry:

+        unreachable

+}

+

+define void @g() {

+entry:

+        tail call void @h( )

+        ret void

+}

+

+declare extern_weak void @h()

+


diff --git a/src/LLVM/test/CodeGen/ARM/weak2.ll b/src/LLVM/test/CodeGen/ARM/weak2.ll
new file mode 100644
index 0000000..e96a44c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/weak2.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | grep .weak

+

+define i32 @f(i32 %a) {

+entry:

+	%tmp2 = icmp eq i32 %a, 0		; <i1> [#uses=1]

+	%t.0 = select i1 %tmp2, i32 (...)* null, i32 (...)* @test_weak		; <i32 (...)*> [#uses=2]

+	%tmp5 = icmp eq i32 (...)* %t.0, null		; <i1> [#uses=1]

+	br i1 %tmp5, label %UnifiedReturnBlock, label %cond_true8

+

+cond_true8:		; preds = %entry

+	%tmp10 = tail call i32 (...)* %t.0( )		; <i32> [#uses=1]

+	ret i32 %tmp10

+

+UnifiedReturnBlock:		; preds = %entry

+	ret i32 250

+}

+

+declare extern_weak i32 @test_weak(...)


diff --git a/src/LLVM/test/CodeGen/ARM/widen-vmovs.ll b/src/LLVM/test/CodeGen/ARM/widen-vmovs.ll
new file mode 100644
index 0000000..8fd99ba
--- /dev/null
+++ b/src/LLVM/test/CodeGen/ARM/widen-vmovs.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs | FileCheck %s
+target triple = "thumbv7-apple-ios"
+
+; The 0.0 constant is loaded from the constant pool and kept in a register.
+; CHECK: %entry
+; CHECK: vldr.32 s
+; The float loop variable is initialized with a vmovs from the constant register.
+; The vmovs is first widened to a vmovd, and then converted to a vorr because of the v2f32 vadd.f32.
+; CHECK: vorr [[DL:d[0-9]+]], [[DN:d[0-9]+]]
+; CHECK: , [[DN]]
+; CHECK: %for.body.i
+; CHECK: vadd.f32 [[DL]], [[DL]], [[DN]]
+;
+; This test is verifying:
+; - The VMOVS widening is happening.
+; - Register liveness is verified.
+; - The execution domain switch to vorr works across basic blocks.
+
+define void @Mm() nounwind {
+entry:
+  br label %for.body4
+
+for.body4:
+  br label %for.body.i
+
+for.body.i:
+  %tmp3.i = phi float [ 0.000000e+00, %for.body4 ], [ %add.i, %for.body.i ]
+  %add.i = fadd float %tmp3.i, 0.000000e+00
+  %exitcond.i = icmp eq i32 undef, 41
+  br i1 %exitcond.i, label %rInnerproduct.exit, label %for.body.i
+
+rInnerproduct.exit:
+  store float %add.i, float* undef, align 4
+  br label %for.body4
+}

diff --git a/src/LLVM/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll b/src/LLVM/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
new file mode 100644
index 0000000..9482f9b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll

@@ -0,0 +1,40 @@
+; This shouldn't crash

+; RUN: llc < %s -march=alpha

+

+@.str_4 = external global [44 x i8]             ; <[44 x i8]*> [#uses=0]

+

+declare void @printf(i32, ...)

+

+define void @main() {

+entry:

+        %tmp.11861 = icmp slt i64 0, 1          ; <i1> [#uses=1]

+        %tmp.19466 = icmp slt i64 0, 1          ; <i1> [#uses=1]

+        %tmp.21571 = icmp slt i64 0, 1          ; <i1> [#uses=1]

+        %tmp.36796 = icmp slt i64 0, 1          ; <i1> [#uses=1]

+        br i1 %tmp.11861, label %loopexit.2, label %no_exit.2

+

+no_exit.2:              ; preds = %entry

+        ret void

+

+loopexit.2:             ; preds = %entry

+        br i1 %tmp.19466, label %loopexit.3, label %no_exit.3.preheader

+

+no_exit.3.preheader:            ; preds = %loopexit.2

+        ret void

+

+loopexit.3:             ; preds = %loopexit.2

+        br i1 %tmp.21571, label %no_exit.6, label %no_exit.4

+

+no_exit.4:              ; preds = %loopexit.3

+        ret void

+

+no_exit.6:              ; preds = %no_exit.6, %loopexit.3

+        %tmp.30793 = icmp sgt i64 0, 0          ; <i1> [#uses=1]

+        br i1 %tmp.30793, label %loopexit.6, label %no_exit.6

+

+loopexit.6:             ; preds = %no_exit.6

+        %Z.1 = select i1 %tmp.36796, double 1.000000e+00, double 0x3FEFFF7CEDE74EAE; <double> [#uses=2]

+        tail call void (i32, ...)* @printf( i32 0, i64 0, i64 0, i64 0, double 1.000000e+00, double 1.000000e+00, double %Z.1, double %Z.1 )

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll b/src/LLVM/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
new file mode 100644
index 0000000..f9967a1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll

@@ -0,0 +1,27 @@
+; The global symbol should be legalized

+; RUN: llc < %s -march=alpha 

+

+target datalayout = "e-p:64:64"

+        %struct.LIST_HELP = type { %struct.LIST_HELP*, i8* }

+        %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [44 x i8] }

+        %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }

+@clause_SORT = external global [21 x %struct.LIST_HELP*]                ; <[21 x %struct.LIST_HELP*]*> [#uses=0]

+@ia_in = external global %struct._IO_FILE*              ; <%struct._IO_FILE**> [#uses=1]

+@multvec_j = external global [100 x i32]                ; <[100 x i32]*> [#uses=0]

+

+define void @main(i32 %argc) {

+clock_Init.exit:

+        %tmp.5.i575 = load i32* null            ; <i32> [#uses=1]

+        %tmp.309 = icmp eq i32 %tmp.5.i575, 0           ; <i1> [#uses=1]

+        br i1 %tmp.309, label %UnifiedReturnBlock, label %then.17

+

+then.17:                ; preds = %clock_Init.exit

+        store %struct._IO_FILE* null, %struct._IO_FILE** @ia_in

+        %savedstack = call i8* @llvm.stacksave( )               ; <i8*> [#uses=0]

+        ret void

+

+UnifiedReturnBlock:             ; preds = %clock_Init.exit

+        ret void

+}

+

+declare i8* @llvm.stacksave()


diff --git a/src/LLVM/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll b/src/LLVM/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
new file mode 100644
index 0000000..0a75961
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll

@@ -0,0 +1,14 @@
+; This shouldn't crash

+; RUN: llc < %s -march=alpha 

+

+target datalayout = "e-p:64:64"

+target triple = "alphaev6-unknown-linux-gnu"

+deplibs = [ "c", "crtend", "stdc++" ]

+        %struct.__va_list_tag = type { i8*, i32 }

+

+define i32 @emit_library_call_value(i32 %nargs, ...) {

+entry:

+        %tmp.223 = va_arg %struct.__va_list_tag* null, i32              ; <i32> [#uses=1]

+        ret i32 %tmp.223

+}

+


diff --git a/src/LLVM/test/CodeGen/Alpha/2006-04-04-zextload.ll b/src/LLVM/test/CodeGen/Alpha/2006-04-04-zextload.ll
new file mode 100644
index 0000000..dd47430
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/2006-04-04-zextload.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=alpha

+

+target datalayout = "e-p:64:64"

+target triple = "alphaev67-unknown-linux-gnu"

+        %struct._Callback_list = type { %struct._Callback_list*, void (i32, %struct.ios_base*, i32)*, i32, i32 }

+        %struct._Impl = type { i32, %struct.facet**, i64, %struct.facet**, i8** }

+        %struct._Words = type { i8*, i64 }

+        %"struct.__codecvt_abstract_base<char,char,__mbstate_t>" = type { %struct.facet }

+        %"struct.basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %struct.locale }

+        %struct.facet = type { i32 (...)**, i32 }

+        %struct.ios_base = type { i32 (...)**, i64, i64, i32, i32, i32, %struct._Callback_list*, %struct._Words, [8 x %struct._Words], i32, %struct._Words*, %struct.locale }

+        %struct.locale = type { %struct._Impl* }

+        %"struct.ostreambuf_iterator<char,std::char_traits<char> >" = type { %"struct.basic_streambuf<char,std::char_traits<char> >"*, i1 }

+

+define void @_ZNKSt7num_putIcSt19ostreambuf_iteratorIcSt11char_traitsIcEEE15_M_insert_floatIdEES3_S3_RSt8ios_baseccT_() {

+entry:

+        %tmp234 = icmp eq i8 0, 0               ; <i1> [#uses=1]

+        br i1 %tmp234, label %cond_next243, label %cond_true235

+

+cond_true235:           ; preds = %entry

+        ret void

+

+cond_next243:           ; preds = %entry

+        %tmp428 = load i64* null                ; <i64> [#uses=1]

+        %tmp428.upgrd.1 = trunc i64 %tmp428 to i32              ; <i32> [#uses=1]

+        %tmp429 = alloca i8, i32 %tmp428.upgrd.1                ; <i8*> [#uses=0]

+        unreachable

+}

+

+


diff --git a/src/LLVM/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll b/src/LLVM/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
new file mode 100644
index 0000000..6a12ced
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=alpha

+

+target datalayout = "e-p:64:64"

+target triple = "alphaev67-unknown-linux-gnu"

+

+define i32 @_ZN9__gnu_cxx18__exchange_and_addEPVii(i32* %__mem, i32 %__val) {

+entry:

+        %__tmp = alloca i32, align 4            ; <i32*> [#uses=1]

+        %tmp3 = call i32 asm sideeffect "\0A$$Lxadd_0:\0A\09ldl_l  $0,$3\0A\09addl   $0,$4,$1\0A\09stl_c  $1,$2\0A\09beq    $1,$$Lxadd_0\0A\09mb", "=&r,=*&r,=*m,m,r"( i32* %__tmp, i32* %__mem, i32* %__mem, i32 %__val )            ; <i32> [#uses=1]

+        ret i32 %tmp3

+}

+

+define void @_ZN9__gnu_cxx12__atomic_addEPVii(i32* %__mem, i32 %__val) {

+entry:

+        %tmp2 = call i32 asm sideeffect "\0A$$Ladd_1:\0A\09ldl_l  $0,$2\0A\09addl   $0,$3,$0\0A\09stl_c  $0,$1\0A\09beq    $0,$$Ladd_1\0A\09mb", "=&r,=*m,m,r"( i32* %__mem, i32* %__mem, i32 %__val )                ; <i32> [#uses=0]

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/Alpha/2006-11-01-vastart.ll b/src/LLVM/test/CodeGen/Alpha/2006-11-01-vastart.ll
new file mode 100644
index 0000000..6c64e4b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/2006-11-01-vastart.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=alpha

+

+target datalayout = "e-p:64:64"

+target triple = "alphaev67-unknown-linux-gnu"

+        %struct.va_list = type { i8*, i32, i32 }

+

+define void @yyerror(i32, ...) {

+entry:

+        %va.upgrd.1 = bitcast %struct.va_list* null to i8*              ; <i8*> [#uses=1]

+        call void @llvm.va_start( i8* %va.upgrd.1 )

+        ret void

+}

+

+declare void @llvm.va_start(i8*)

+


diff --git a/src/LLVM/test/CodeGen/Alpha/2007-11-27-mulneg3.ll b/src/LLVM/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
new file mode 100644
index 0000000..b537e25
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/2007-11-27-mulneg3.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=alpha
+
+;FIXME: this should produce no mul inst.  But not crashing will have to do for now
+
+; ModuleID = 'Output/bugpoint-train/bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
+target triple = "alphaev6-unknown-linux-gnu"
+
+define fastcc i32 @getcount(i32 %s) {
+cond_next43:		; preds = %bb27
+	%tmp431 = mul i32 %s, -3
+	ret i32 %tmp431
+}

diff --git a/src/LLVM/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll b/src/LLVM/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
new file mode 100644
index 0000000..1a4b40e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=alpha
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
+target triple = "alphaev6-unknown-linux-gnu"
+
+define i64 @__mulvdi3(i64 %a, i64 %b) nounwind {
+entry:
+	%0 = sext i64 %a to i128		; <i128> [#uses=1]
+	%1 = sext i64 %b to i128		; <i128> [#uses=1]
+	%2 = mul i128 %1, %0		; <i128> [#uses=2]
+	%3 = lshr i128 %2, 64		; <i128> [#uses=1]
+	%4 = trunc i128 %3 to i64		; <i64> [#uses=1]
+	%5 = trunc i128 %2 to i64		; <i64> [#uses=1]
+	%6 = icmp eq i64 %4, 0		; <i1> [#uses=1]
+	br i1 %6, label %bb1, label %bb
+
+bb:		; preds = %entry
+	unreachable
+
+bb1:		; preds = %entry
+	ret i64 %5
+}

diff --git a/src/LLVM/test/CodeGen/Alpha/2008-11-12-Add128.ll b/src/LLVM/test/CodeGen/Alpha/2008-11-12-Add128.ll
new file mode 100644
index 0000000..8b9b603
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/2008-11-12-Add128.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s
+; PR3044
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
+target triple = "alphaev6-unknown-linux-gnu"
+
+define i128 @__mulvti3(i128 %u, i128 %v) nounwind {
+entry:
+	%0 = load i128* null, align 16		; <i128> [#uses=1]
+	%1 = load i64* null, align 8		; <i64> [#uses=1]
+	%2 = zext i64 %1 to i128		; <i128> [#uses=1]
+	%3 = add i128 %2, %0		; <i128> [#uses=1]
+	store i128 %3, i128* null, align 16
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll b/src/LLVM/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
new file mode 100644
index 0000000..cfbf7fc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=alpha
+
+define i1 @a(float %x) {
+  %r = fcmp ult float %x, 1.0
+  ret i1 %r
+}

diff --git a/src/LLVM/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll b/src/LLVM/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 0000000..4590f12
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll

@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=alpha -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+

diff --git a/src/LLVM/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll b/src/LLVM/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll
new file mode 100644
index 0000000..b838ec9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=alpha | FileCheck %s
+
+define fastcc i64 @getcount(i64 %s) {
+	%tmp431 = mul i64 %s, 12884901888
+	ret i64 %tmp431
+}
+
+; CHECK: sll $16,33,$0
+; CHECK-NEXT: sll $16,32,$1
+; CHECK-NEXT: addq $0,$1,$0
+

diff --git a/src/LLVM/test/CodeGen/Alpha/add.ll b/src/LLVM/test/CodeGen/Alpha/add.ll
new file mode 100644
index 0000000..0cde263
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/add.ll

@@ -0,0 +1,178 @@
+;test all the shifted and signextending adds and subs with and without consts

+;

+; RUN: llc < %s -march=alpha -o %t.s

+; RUN: grep {	addl} %t.s | count 2

+; RUN: grep {	addq} %t.s | count 2

+; RUN: grep {	subl} %t.s | count 2

+; RUN: grep {	subq} %t.s | count 2

+;

+; RUN: grep {s4addl} %t.s | count 2

+; RUN: grep {s8addl} %t.s | count 2

+; RUN: grep {s4addq} %t.s | count 2

+; RUN: grep {s8addq} %t.s | count 2

+;

+; RUN: grep {s4subl} %t.s | count 2

+; RUN: grep {s8subl} %t.s | count 2

+; RUN: grep {s4subq} %t.s | count 2

+; RUN: grep {s8subq} %t.s | count 2

+

+

+define signext i32 @al(i32 signext %x.s, i32 signext %y.s) {

+entry:

+	%tmp.3.s = add i32 %y.s, %x.s		; <i32> [#uses=1]

+	ret i32 %tmp.3.s

+}

+

+define signext i32 @ali(i32 signext %x.s)  {

+entry:

+	%tmp.3.s = add i32 100, %x.s		; <i32> [#uses=1]

+	ret i32 %tmp.3.s

+}

+

+define signext i64 @aq(i64 signext %x.s, i64 signext %y.s)  {

+entry:

+	%tmp.3.s = add i64 %y.s, %x.s		; <i64> [#uses=1]

+	ret i64 %tmp.3.s

+}

+

+define i64 @aqi(i64 %x.s) {

+entry:

+	%tmp.3.s = add i64 100, %x.s		; <i64> [#uses=1]

+	ret i64 %tmp.3.s

+}

+

+define signext i32 @sl(i32 signext %x.s, i32 signext %y.s)  {

+entry:

+	%tmp.3.s = sub i32 %y.s, %x.s		; <i32> [#uses=1]

+	ret i32 %tmp.3.s

+}

+

+define signext i32 @sli(i32 signext %x.s)  {

+entry:

+	%tmp.3.s = sub i32 %x.s, 100		; <i32> [#uses=1]

+	ret i32 %tmp.3.s

+}

+

+define i64 @sq(i64 %x.s, i64 %y.s) {

+entry:

+	%tmp.3.s = sub i64 %y.s, %x.s		; <i64> [#uses=1]

+	ret i64 %tmp.3.s

+}

+

+define i64 @sqi(i64 %x.s) {

+entry:

+	%tmp.3.s = sub i64 %x.s, 100		; <i64> [#uses=1]

+	ret i64 %tmp.3.s

+}

+

+define signext i32 @a4l(i32 signext %x.s, i32 signext %y.s)  {

+entry:

+	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]

+	%tmp.3.s = add i32 %tmp.1.s, %x.s		; <i32> [#uses=1]

+	ret i32 %tmp.3.s

+}

+

+define signext i32 @a8l(i32 signext %x.s, i32 signext %y.s)  {

+entry:

+	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]

+	%tmp.3.s = add i32 %tmp.1.s, %x.s		; <i32> [#uses=1]

+	ret i32 %tmp.3.s

+}

+

+define i64 @a4q(i64 %x.s, i64 %y.s) {

+entry:

+	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]

+	%tmp.3.s = add i64 %tmp.1.s, %x.s		; <i64> [#uses=1]

+	ret i64 %tmp.3.s

+}

+

+define i64 @a8q(i64 %x.s, i64 %y.s) {

+entry:

+	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]

+	%tmp.3.s = add i64 %tmp.1.s, %x.s		; <i64> [#uses=1]

+	ret i64 %tmp.3.s

+}

+

+define signext i32 @a4li(i32 signext %y.s)  {

+entry:

+	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]

+	%tmp.3.s = add i32 100, %tmp.1.s		; <i32> [#uses=1]

+	ret i32 %tmp.3.s

+}

+

+define signext i32 @a8li(i32 signext %y.s)  {

+entry:

+	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]

+	%tmp.3.s = add i32 100, %tmp.1.s		; <i32> [#uses=1]

+	ret i32 %tmp.3.s

+}

+

+define i64 @a4qi(i64 %y.s) {

+entry:

+	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]

+	%tmp.3.s = add i64 100, %tmp.1.s		; <i64> [#uses=1]

+	ret i64 %tmp.3.s

+}

+

+define i64 @a8qi(i64 %y.s) {

+entry:

+	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]

+	%tmp.3.s = add i64 100, %tmp.1.s		; <i64> [#uses=1]

+	ret i64 %tmp.3.s

+}

+

+define signext i32 @s4l(i32 signext %x.s, i32 signext %y.s)  {

+entry:

+	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]

+	%tmp.3.s = sub i32 %tmp.1.s, %x.s		; <i32> [#uses=1]

+	ret i32 %tmp.3.s

+}

+

+define signext i32 @s8l(i32 signext %x.s, i32 signext %y.s)  {

+entry:

+	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]

+	%tmp.3.s = sub i32 %tmp.1.s, %x.s		; <i32> [#uses=1]

+	ret i32 %tmp.3.s

+}

+

+define i64 @s4q(i64 %x.s, i64 %y.s) {

+entry:

+	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]

+	%tmp.3.s = sub i64 %tmp.1.s, %x.s		; <i64> [#uses=1]

+	ret i64 %tmp.3.s

+}

+

+define i64 @s8q(i64 %x.s, i64 %y.s) {

+entry:

+	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]

+	%tmp.3.s = sub i64 %tmp.1.s, %x.s		; <i64> [#uses=1]

+	ret i64 %tmp.3.s

+}

+

+define signext i32 @s4li(i32 signext %y.s)  {

+entry:

+	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]

+	%tmp.3.s = sub i32 %tmp.1.s, 100		; <i32> [#uses=1]

+	ret i32 %tmp.3.s

+}

+

+define signext i32 @s8li(i32 signext %y.s)  {

+entry:

+	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]

+	%tmp.3.s = sub i32 %tmp.1.s, 100		; <i32> [#uses=1]

+	ret i32 %tmp.3.s

+}

+

+define i64 @s4qi(i64 %y.s) {

+entry:

+	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]

+	%tmp.3.s = sub i64 %tmp.1.s, 100		; <i64> [#uses=1]

+	ret i64 %tmp.3.s

+}

+

+define i64 @s8qi(i64 %y.s) {

+entry:

+	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]

+	%tmp.3.s = sub i64 %tmp.1.s, 100		; <i64> [#uses=1]

+	ret i64 %tmp.3.s

+}


diff --git a/src/LLVM/test/CodeGen/Alpha/add128.ll b/src/LLVM/test/CodeGen/Alpha/add128.ll
new file mode 100644
index 0000000..fa3b949
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/add128.ll

@@ -0,0 +1,9 @@
+;test for ADDC and ADDE expansion
+;
+; RUN: llc < %s -march=alpha
+
+define i128 @add128(i128 %x, i128 %y) {
+entry:
+	%tmp = add i128 %y, %x
+	ret i128 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Alpha/bic.ll b/src/LLVM/test/CodeGen/Alpha/bic.ll
new file mode 100644
index 0000000..49a83f1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/bic.ll

@@ -0,0 +1,9 @@
+; Make sure this testcase codegens to the bic instruction

+; RUN: llc < %s -march=alpha | grep {bic}

+

+define i64 @bar(i64 %x, i64 %y) {

+entry:

+        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]

+        %tmp.2 = and i64 %y, %tmp.1             ; <i64> [#uses=1]

+        ret i64 %tmp.2

+}


diff --git a/src/LLVM/test/CodeGen/Alpha/bsr.ll b/src/LLVM/test/CodeGen/Alpha/bsr.ll
new file mode 100644
index 0000000..be28016
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/bsr.ll

@@ -0,0 +1,12 @@
+; Make sure this testcase codegens the bsr instruction

+; RUN: llc < %s -march=alpha | grep bsr

+

+define internal i64 @abc(i32 %x) {

+        %tmp.2 = add i32 %x, -1         ; <i32> [#uses=1]

+        %tmp.0 = call i64 @abc( i32 %tmp.2 )            ; <i64> [#uses=1]

+        %tmp.5 = add i32 %x, -2         ; <i32> [#uses=1]

+        %tmp.3 = call i64 @abc( i32 %tmp.5 )            ; <i64> [#uses=1]

+        %tmp.6 = add i64 %tmp.0, %tmp.3         ; <i64> [#uses=1]

+        ret i64 %tmp.6

+}

+


diff --git a/src/LLVM/test/CodeGen/Alpha/call_adj.ll b/src/LLVM/test/CodeGen/Alpha/call_adj.ll
new file mode 100644
index 0000000..83ec752
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/call_adj.ll

@@ -0,0 +1,13 @@
+;All this should do is not crash

+;RUN: llc < %s -march=alpha

+

+target datalayout = "e-p:64:64"

+target triple = "alphaev67-unknown-linux-gnu"

+

+define void @_ZNSt13basic_filebufIcSt11char_traitsIcEE22_M_convert_to_externalEPcl(i32 %f) {

+entry:

+        %tmp49 = alloca i8, i32 %f              ; <i8*> [#uses=0]

+        %tmp = call i32 null( i8* null, i8* null, i8* null, i8* null, i8* null, i8* null, i8* null )               ; <i32> [#uses=0]

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/Alpha/cmov.ll b/src/LLVM/test/CodeGen/Alpha/cmov.ll
new file mode 100644
index 0000000..041b612
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/cmov.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=alpha | not grep cmovlt

+; RUN: llc < %s -march=alpha | grep cmoveq

+

+define i64 @cmov_lt(i64 %a, i64 %c) {

+entry:

+        %tmp.1 = icmp slt i64 %c, 0             ; <i1> [#uses=1]

+        %retval = select i1 %tmp.1, i64 %a, i64 10              ; <i64> [#uses=1]

+        ret i64 %retval

+}

+

+define i64 @cmov_const(i64 %a, i64 %b, i64 %c) {

+entry:

+        %tmp.1 = icmp slt i64 %a, %b            ; <i1> [#uses=1]

+        %retval = select i1 %tmp.1, i64 %c, i64 10              ; <i64> [#uses=1]

+        ret i64 %retval

+}

+

+define i64 @cmov_lt2(i64 %a, i64 %c) {

+entry:

+        %tmp.1 = icmp sgt i64 %c, 0             ; <i1> [#uses=1]

+        %retval = select i1 %tmp.1, i64 10, i64 %a              ; <i64> [#uses=1]

+        ret i64 %retval

+}


diff --git a/src/LLVM/test/CodeGen/Alpha/cmpbge.ll b/src/LLVM/test/CodeGen/Alpha/cmpbge.ll
new file mode 100644
index 0000000..6e4c26a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/cmpbge.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=alpha | grep cmpbge | count 2

+

+define i1 @test1(i64 %A, i64 %B) {

+        %C = and i64 %A, 255            ; <i64> [#uses=1]

+        %D = and i64 %B, 255            ; <i64> [#uses=1]

+        %E = icmp uge i64 %C, %D                ; <i1> [#uses=1]

+        ret i1 %E

+}

+

+define i1 @test2(i64 %a, i64 %B) {

+        %A = shl i64 %a, 1              ; <i64> [#uses=1]

+        %C = and i64 %A, 254            ; <i64> [#uses=1]

+        %D = and i64 %B, 255            ; <i64> [#uses=1]

+        %E = icmp uge i64 %C, %D                ; <i1> [#uses=1]

+        ret i1 %E

+}


diff --git a/src/LLVM/test/CodeGen/Alpha/ctlz.ll b/src/LLVM/test/CodeGen/Alpha/ctlz.ll
new file mode 100644
index 0000000..c176e25
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/ctlz.ll

@@ -0,0 +1,14 @@
+; Make sure this testcase codegens to the ctlz instruction

+; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctlz

+; RUN: llc < %s -march=alpha -mattr=+CIX | grep -i ctlz

+; RUN: llc < %s -march=alpha -mcpu=ev6 | not grep -i ctlz

+; RUN: llc < %s -march=alpha -mattr=-CIX | not grep -i ctlz

+

+declare i8 @llvm.ctlz.i8(i8)

+

+define i32 @bar(i8 %x) {

+entry:

+	%tmp.1 = call i8 @llvm.ctlz.i8( i8 %x ) 

+	%tmp.2 = sext i8 %tmp.1 to i32

+	ret i32 %tmp.2

+}


diff --git a/src/LLVM/test/CodeGen/Alpha/ctlz_e.ll b/src/LLVM/test/CodeGen/Alpha/ctlz_e.ll
new file mode 100644
index 0000000..675ca9b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/ctlz_e.ll

@@ -0,0 +1,11 @@
+; Make sure this testcase does not use ctpop

+; RUN: llc < %s -march=alpha | not grep -i ctpop 

+

+declare i64 @llvm.ctlz.i64(i64)

+

+define i64 @bar(i64 %x) {

+entry:

+        %tmp.1 = call i64 @llvm.ctlz.i64( i64 %x )              ; <i64> [#uses=1]

+        ret i64 %tmp.1

+}

+


diff --git a/src/LLVM/test/CodeGen/Alpha/ctpop.ll b/src/LLVM/test/CodeGen/Alpha/ctpop.ll
new file mode 100644
index 0000000..38fd27e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/ctpop.ll

@@ -0,0 +1,17 @@
+; Make sure this testcase codegens to the ctpop instruction

+; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctpop

+; RUN: llc < %s -march=alpha -mattr=+CIX | \

+; RUN:   grep -i ctpop

+; RUN: llc < %s -march=alpha -mcpu=ev6 | \

+; RUN:   not grep -i ctpop

+; RUN: llc < %s -march=alpha -mattr=-CIX | \

+; RUN:   not grep -i ctpop

+

+declare i64 @llvm.ctpop.i64(i64)

+

+define i64 @bar(i64 %x) {

+entry:

+        %tmp.1 = call i64 @llvm.ctpop.i64( i64 %x )             ; <i64> [#uses=1]

+        ret i64 %tmp.1

+}

+


diff --git a/src/LLVM/test/CodeGen/Alpha/dg.exp b/src/LLVM/test/CodeGen/Alpha/dg.exp
new file mode 100644
index 0000000..6b6dea4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp

+

+if { [llvm_supports_target Alpha] } {

+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]

+}


diff --git a/src/LLVM/test/CodeGen/Alpha/eqv.ll b/src/LLVM/test/CodeGen/Alpha/eqv.ll
new file mode 100644
index 0000000..6aa443b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/eqv.ll

@@ -0,0 +1,10 @@
+; Make sure this testcase codegens to the eqv instruction

+; RUN: llc < %s -march=alpha | grep eqv

+

+define i64 @bar(i64 %x, i64 %y) {

+entry:

+        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]

+        %tmp.2 = xor i64 %y, %tmp.1             ; <i64> [#uses=1]

+        ret i64 %tmp.2

+}

+


diff --git a/src/LLVM/test/CodeGen/Alpha/i32_sub_1.ll b/src/LLVM/test/CodeGen/Alpha/i32_sub_1.ll
new file mode 100644
index 0000000..74156bb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/i32_sub_1.ll

@@ -0,0 +1,9 @@
+; Make sure this testcase codegens to the ctpop instruction

+; RUN: llc < %s -march=alpha | grep -i {subl \$16,1,\$0}

+

+

+define signext i32 @foo(i32 signext %x) {

+entry:

+	%tmp.1 = add i32 %x, -1		; <int> [#uses=1]

+	ret i32 %tmp.1

+}


diff --git a/src/LLVM/test/CodeGen/Alpha/illegal-element-type.ll b/src/LLVM/test/CodeGen/Alpha/illegal-element-type.ll
new file mode 100644
index 0000000..4cf80de
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/illegal-element-type.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=alphaev6-unknown-linux-gnu
+
+define void @foo() {
+entry:
+        br label %bb
+
+bb:             ; preds = %bb, %entry
+        br i1 false, label %bb26, label %bb
+
+bb19:           ; preds = %bb26
+        ret void
+
+bb26:           ; preds = %bb
+        br i1 false, label %bb30, label %bb19
+
+bb30:           ; preds = %bb26
+        br label %bb45
+
+bb45:           ; preds = %bb45, %bb30
+        %V.0 = phi <8 x i16> [ %tmp42, %bb45 ], [ zeroinitializer, %bb30 ]     ; <<8 x i16>> [#uses=1]
+        %tmp42 = mul <8 x i16> zeroinitializer, %V.0            ; <<8 x i16>> [#uses=1]
+        br label %bb45
+}

diff --git a/src/LLVM/test/CodeGen/Alpha/jmp_table.ll b/src/LLVM/test/CodeGen/Alpha/jmp_table.ll
new file mode 100644
index 0000000..2bda4f7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/jmp_table.ll

@@ -0,0 +1,99 @@
+; try to check that we have the most important instructions, which shouldn't 

+; appear otherwise

+; RUN: llc < %s -march=alpha | grep jmp

+; RUN: llc < %s -march=alpha | grep gprel32

+; RUN: llc < %s -march=alpha | grep ldl

+; RUN: llc < %s -march=alpha | grep rodata

+; END.

+

+target datalayout = "e-p:64:64"

+target triple = "alphaev67-unknown-linux-gnu"

+@str = internal constant [2 x i8] c"1\00"               ; <[2 x i8]*> [#uses=1]

+@str1 = internal constant [2 x i8] c"2\00"              ; <[2 x i8]*> [#uses=1]

+@str2 = internal constant [2 x i8] c"3\00"              ; <[2 x i8]*> [#uses=1]

+@str3 = internal constant [2 x i8] c"4\00"              ; <[2 x i8]*> [#uses=1]

+@str4 = internal constant [2 x i8] c"5\00"              ; <[2 x i8]*> [#uses=1]

+@str5 = internal constant [2 x i8] c"6\00"              ; <[2 x i8]*> [#uses=1]

+@str6 = internal constant [2 x i8] c"7\00"              ; <[2 x i8]*> [#uses=1]

+@str7 = internal constant [2 x i8] c"8\00"              ; <[2 x i8]*> [#uses=1]

+

+define i32 @main(i32 %x, i8** %y) {

+entry:

+        %x_addr = alloca i32            ; <i32*> [#uses=2]

+        %y_addr = alloca i8**           ; <i8***> [#uses=1]

+        %retval = alloca i32, align 4           ; <i32*> [#uses=2]

+        %tmp = alloca i32, align 4              ; <i32*> [#uses=2]

+        %foo = alloca i8*, align 8              ; <i8**> [#uses=9]

+        %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]

+        store i32 %x, i32* %x_addr

+        store i8** %y, i8*** %y_addr

+        %tmp.upgrd.1 = load i32* %x_addr                ; <i32> [#uses=1]

+        switch i32 %tmp.upgrd.1, label %bb15 [

+                 i32 1, label %bb

+                 i32 2, label %bb1

+                 i32 3, label %bb3

+                 i32 4, label %bb5

+                 i32 5, label %bb7

+                 i32 6, label %bb9

+                 i32 7, label %bb11

+                 i32 8, label %bb13

+        ]

+

+bb:             ; preds = %entry

+        %tmp.upgrd.2 = getelementptr [2 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]

+        store i8* %tmp.upgrd.2, i8** %foo

+        br label %bb16

+

+bb1:            ; preds = %entry

+        %tmp2 = getelementptr [2 x i8]* @str1, i32 0, i64 0             ; <i8*> [#uses=1]

+        store i8* %tmp2, i8** %foo

+        br label %bb16

+

+bb3:            ; preds = %entry

+        %tmp4 = getelementptr [2 x i8]* @str2, i32 0, i64 0             ; <i8*> [#uses=1]

+        store i8* %tmp4, i8** %foo

+        br label %bb16

+

+bb5:            ; preds = %entry

+        %tmp6 = getelementptr [2 x i8]* @str3, i32 0, i64 0             ; <i8*> [#uses=1]

+        store i8* %tmp6, i8** %foo

+        br label %bb16

+

+bb7:            ; preds = %entry

+        %tmp8 = getelementptr [2 x i8]* @str4, i32 0, i64 0             ; <i8*> [#uses=1]

+        store i8* %tmp8, i8** %foo

+        br label %bb16

+

+bb9:            ; preds = %entry

+        %tmp10 = getelementptr [2 x i8]* @str5, i32 0, i64 0            ; <i8*> [#uses=1]

+        store i8* %tmp10, i8** %foo

+        br label %bb16

+

+bb11:           ; preds = %entry

+        %tmp12 = getelementptr [2 x i8]* @str6, i32 0, i64 0            ; <i8*> [#uses=1]

+        store i8* %tmp12, i8** %foo

+        br label %bb16

+

+bb13:           ; preds = %entry

+        %tmp14 = getelementptr [2 x i8]* @str7, i32 0, i64 0            ; <i8*> [#uses=1]

+        store i8* %tmp14, i8** %foo

+        br label %bb16

+

+bb15:           ; preds = %entry

+        br label %bb16

+

+bb16:           ; preds = %bb15, %bb13, %bb11, %bb9, %bb7, %bb5, %bb3, %bb1, %bb

+        %tmp17 = load i8** %foo         ; <i8*> [#uses=1]

+        %tmp18 = call i32 (...)* @print( i8* %tmp17 )           ; <i32> [#uses=0]

+        store i32 0, i32* %tmp

+        %tmp19 = load i32* %tmp         ; <i32> [#uses=1]

+        store i32 %tmp19, i32* %retval

+        br label %return

+

+return:         ; preds = %bb16

+        %retval.upgrd.3 = load i32* %retval             ; <i32> [#uses=1]

+        ret i32 %retval.upgrd.3

+}

+

+declare i32 @print(...)

+


diff --git a/src/LLVM/test/CodeGen/Alpha/mb.ll b/src/LLVM/test/CodeGen/Alpha/mb.ll
new file mode 100644
index 0000000..3268c54
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/mb.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=alpha | grep mb
+
+define void @test() {
+	fence seq_cst
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Alpha/mul128.ll b/src/LLVM/test/CodeGen/Alpha/mul128.ll
new file mode 100644
index 0000000..daf8409
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/mul128.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=alpha
+
+define i128 @__mulvdi3(i128 %a, i128 %b) nounwind {
+entry:
+        %r = mul i128 %a, %b
+        ret i128 %r
+}

diff --git a/src/LLVM/test/CodeGen/Alpha/mul5.ll b/src/LLVM/test/CodeGen/Alpha/mul5.ll
new file mode 100644
index 0000000..19e1e62
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/mul5.ll

@@ -0,0 +1,33 @@
+; Make sure this testcase does not use mulq

+; RUN: llc < %s -march=alpha | not grep -i mul

+

+define i64 @foo1(i64 %x) {

+entry:

+        %tmp.1 = mul i64 %x, 9          ; <i64> [#uses=1]

+        ret i64 %tmp.1

+}

+

+define i64 @foo3(i64 %x) {

+entry:

+        %tmp.1 = mul i64 %x, 259                ; <i64> [#uses=1]

+        ret i64 %tmp.1

+}

+

+define i64 @foo4l(i64 %x) {

+entry:

+        %tmp.1 = mul i64 %x, 260                ; <i64> [#uses=1]

+        ret i64 %tmp.1

+}

+

+define i64 @foo8l(i64 %x) {

+entry:

+        %tmp.1 = mul i64 %x, 768                ; <i64> [#uses=1]

+        ret i64 %tmp.1

+}

+

+define i64 @bar(i64 %x) {

+entry:

+        %tmp.1 = mul i64 %x, 5          ; <i64> [#uses=1]

+        ret i64 %tmp.1

+}

+


diff --git a/src/LLVM/test/CodeGen/Alpha/neg1.ll b/src/LLVM/test/CodeGen/Alpha/neg1.ll
new file mode 100644
index 0000000..bef2f99
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/neg1.ll

@@ -0,0 +1,7 @@
+; Make sure this testcase codegens to the lda -1 instruction

+; RUN: llc < %s -march=alpha | grep {\\-1}

+

+define i64 @bar() {

+entry:

+	ret i64 -1

+}


diff --git a/src/LLVM/test/CodeGen/Alpha/not.ll b/src/LLVM/test/CodeGen/Alpha/not.ll
new file mode 100644
index 0000000..ddbf481
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/not.ll

@@ -0,0 +1,8 @@
+; Make sure this testcase codegens to the ornot instruction

+; RUN: llc < %s -march=alpha | grep eqv

+

+define i64 @bar(i64 %x) {

+entry:

+        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]

+        ret i64 %tmp.1

+}


diff --git a/src/LLVM/test/CodeGen/Alpha/ornot.ll b/src/LLVM/test/CodeGen/Alpha/ornot.ll
new file mode 100644
index 0000000..d4c2e7f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/ornot.ll

@@ -0,0 +1,10 @@
+; Make sure this testcase codegens to the ornot instruction

+; RUN: llc < %s -march=alpha | grep ornot

+

+define i64 @bar(i64 %x, i64 %y) {

+entry:

+        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]

+        %tmp.2 = or i64 %y, %tmp.1              ; <i64> [#uses=1]

+        ret i64 %tmp.2

+}

+


diff --git a/src/LLVM/test/CodeGen/Alpha/private.ll b/src/LLVM/test/CodeGen/Alpha/private.ll
new file mode 100644
index 0000000..f8d3094
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/private.ll

@@ -0,0 +1,19 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -march=alpha > %t
+; RUN: grep \\\$foo: %t
+; RUN: grep bsr.*\\\$\\\$foo %t
+; RUN: grep \\\$baz: %t
+; RUN: grep ldah.*\\\$baz %t
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}

diff --git a/src/LLVM/test/CodeGen/Alpha/rpcc.ll b/src/LLVM/test/CodeGen/Alpha/rpcc.ll
new file mode 100644
index 0000000..471c204
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/rpcc.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=alpha | grep rpcc

+

+declare i64 @llvm.readcyclecounter()

+

+define i64 @foo() {

+entry:

+        %tmp.1 = call i64 @llvm.readcyclecounter( )             ; <i64> [#uses=1]

+        ret i64 %tmp.1

+}


diff --git a/src/LLVM/test/CodeGen/Alpha/srl_and.ll b/src/LLVM/test/CodeGen/Alpha/srl_and.ll
new file mode 100644
index 0000000..b9b4500
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/srl_and.ll

@@ -0,0 +1,10 @@
+; Make sure this testcase codegens to the zapnot instruction

+; RUN: llc < %s -march=alpha | grep zapnot

+

+define i64 @foo(i64 %y) {

+entry:

+        %tmp = lshr i64 %y, 3           ; <i64> [#uses=1]

+        %tmp2 = and i64 %tmp, 8191              ; <i64> [#uses=1]

+        ret i64 %tmp2

+}

+


diff --git a/src/LLVM/test/CodeGen/Alpha/sub128.ll b/src/LLVM/test/CodeGen/Alpha/sub128.ll
new file mode 100644
index 0000000..d26404b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/sub128.ll

@@ -0,0 +1,9 @@
+;test for SUBC and SUBE expansion
+;
+; RUN: llc < %s -march=alpha
+
+define i128 @sub128(i128 %x, i128 %y) {
+entry:
+	%tmp = sub i128 %y, %x
+	ret i128 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Alpha/weak.ll b/src/LLVM/test/CodeGen/Alpha/weak.ll
new file mode 100644
index 0000000..1db146b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/weak.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=alpha | grep .weak.*f

+; RUN: llc < %s -march=alpha | grep .weak.*h

+

+define weak i32 @f() {

+entry:

+        unreachable

+}

+

+define void @g() {

+entry:

+        tail call void @h( )

+        ret void

+}

+

+declare extern_weak void @h()

+


diff --git a/src/LLVM/test/CodeGen/Alpha/zapnot.ll b/src/LLVM/test/CodeGen/Alpha/zapnot.ll
new file mode 100644
index 0000000..1bd26c3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/zapnot.ll

@@ -0,0 +1,9 @@
+; Make sure this testcase codegens to the bic instruction

+; RUN: llc < %s -march=alpha | grep zapnot

+

+

+define zeroext i16 @foo(i64 %y)  {

+entry:

+        %tmp.1 = trunc i64 %y to i16         ; <ushort> [#uses=1]

+        ret i16 %tmp.1

+}


diff --git a/src/LLVM/test/CodeGen/Alpha/zapnot2.ll b/src/LLVM/test/CodeGen/Alpha/zapnot2.ll
new file mode 100644
index 0000000..35f7602
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/zapnot2.ll

@@ -0,0 +1,9 @@
+; Make sure this testcase codegens to the zapnot instruction

+; RUN: llc < %s -march=alpha | grep zapnot

+

+define i64 @bar(i64 %x) {

+entry:

+        %tmp.1 = and i64 %x, 16711935           ; <i64> [#uses=1]

+        ret i64 %tmp.1

+}

+


diff --git a/src/LLVM/test/CodeGen/Alpha/zapnot3.ll b/src/LLVM/test/CodeGen/Alpha/zapnot3.ll
new file mode 100644
index 0000000..278dda0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/zapnot3.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=alpha | grep zapnot

+

+;demanded bits mess up this mask in a hard to fix way

+;define i64 @foo(i64 %y) {

+;        %tmp = and i64 %y,  65535

+;        %tmp2 = shr i64 %tmp,  i8 3

+;        ret i64 %tmp2

+;}

+

+define i64 @foo2(i64 %y) {

+        %tmp = lshr i64 %y, 3           ; <i64> [#uses=1]

+        %tmp2 = and i64 %tmp, 8191              ; <i64> [#uses=1]

+        ret i64 %tmp2

+}

+


diff --git a/src/LLVM/test/CodeGen/Alpha/zapnot4.ll b/src/LLVM/test/CodeGen/Alpha/zapnot4.ll
new file mode 100644
index 0000000..2e32fca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Alpha/zapnot4.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=alpha | grep zapnot

+

+define i64 @foo(i64 %y) {

+        %tmp = shl i64 %y, 3            ; <i64> [#uses=1]

+        %tmp2 = and i64 %tmp, 65535             ; <i64> [#uses=1]

+        ret i64 %tmp2

+}


diff --git a/src/LLVM/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll b/src/LLVM/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
new file mode 100644
index 0000000..50fccb4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs
+; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs -regalloc=greedy
+
+; Provoke an error in LowerSubregsPass::LowerExtract where the live range of a
+; super-register is illegally extended.
+
+define i16 @f(i16 %x1, i16 %x2, i16 %x3, i16 %x4) {
+  %y1 = add i16 %x1, 1
+  %y2 = add i16 %x2, 2
+  %y3 = add i16 %x3, 3
+  %y4 = add i16 %x4, 4
+  %z12 = add i16 %y1, %y2
+  %z34 = add i16 %y3, %y4
+  %p = add i16 %z12, %z34
+  ret i16 %p
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll b/src/LLVM/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
new file mode 100644
index 0000000..e5d1637
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+declare i64 @llvm.cttz.i64(i64) nounwind readnone
+
+declare i16 @llvm.cttz.i16(i16) nounwind readnone
+
+declare i8 @llvm.cttz.i8(i8) nounwind readnone
+
+define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
+	%a = call i8 @llvm.cttz.i8(i8 %A)		; <i8> [#uses=1]
+	%b = call i16 @llvm.cttz.i16(i16 %B)		; <i16> [#uses=1]
+	%d = call i64 @llvm.cttz.i64(i64 %D)		; <i64> [#uses=1]
+	store i8 %a, i8* %AP
+	store i16 %b, i16* %BP
+	store i64 %d, i64* %DP
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll b/src/LLVM/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
new file mode 100644
index 0000000..0b731dc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; When joining live intervals of sub-registers, an MBB live-in list is not
+; updated properly. The register scavenger asserts on an undefined register.
+
+define i32 @foo(i8 %bar) {
+entry:
+  switch i8 %bar, label %bb1203 [
+    i8 117, label %bb1204
+    i8 85, label %bb1204
+    i8 106, label %bb1204
+  ]
+
+bb1203:                                           ; preds = %entry
+  ret i32 1
+
+bb1204:                                           ; preds = %entry, %entry, %entry
+  ret i32 2
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll b/src/LLVM/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
new file mode 100644
index 0000000..dcc3ea0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; LocalRewriter can forget to transfer a <def,dead> flag when setting up call
+; argument registers. This then causes register scavenger asserts.
+
+declare i32 @printf(i8*, i32, float)
+
+define i32 @testissue(i32 %i, float %x, float %y) {
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %0
+  %x2 = fmul float %x, 5.000000e-01               ; <float> [#uses=1]
+  %y2 = fmul float %y, 0x3FECCCCCC0000000         ; <float> [#uses=1]
+  %z2 = fadd float %x2, %y2                       ; <float> [#uses=1]
+  %z3 = fadd float undef, %z2                     ; <float> [#uses=1]
+  %i1 = shl i32 %i, 3                             ; <i32> [#uses=1]
+  %j1 = add i32 %i, 7                             ; <i32> [#uses=1]
+  %m1 = add i32 %i1, %j1                          ; <i32> [#uses=2]
+  %b = icmp sle i32 %m1, 6                        ; <i1> [#uses=1]
+  br i1 %b, label %bb1, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %1 = call i32 @printf(i8* undef, i32 %m1, float %z3); <i32> [#uses=0]
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll b/src/LLVM/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
new file mode 100644
index 0000000..b6cd2d4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; An undef argument causes a setugt node to escape instruction selection.
+
+define void @bugt() {
+cond_next305:
+  %tmp306307 = trunc i32 undef to i8              ; <i8> [#uses=1]
+  %tmp308 = icmp ugt i8 %tmp306307, 6             ; <i1> [#uses=1]
+  br i1 %tmp308, label %bb311, label %bb314
+
+bb311:                                            ; preds = %cond_next305
+  unreachable
+
+bb314:                                            ; preds = %cond_next305
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/add-overflow.ll b/src/LLVM/test/CodeGen/Blackfin/add-overflow.ll
new file mode 100644
index 0000000..8dcf3f8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/add-overflow.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+	%0 = type { i24, i1 }		; type %0
+
+define i1 @func2(i24 zeroext %v1, i24 zeroext %v2) nounwind {
+entry:
+	%t = call %0 @llvm.uadd.with.overflow.i24(i24 %v1, i24 %v2)		; <%0> [#uses=1]
+	%obit = extractvalue %0 %t, 1		; <i1> [#uses=1]
+	br i1 %obit, label %carry, label %normal
+
+normal:		; preds = %entry
+	ret i1 true
+
+carry:		; preds = %entry
+	ret i1 false
+}
+
+declare %0 @llvm.uadd.with.overflow.i24(i24, i24) nounwind

diff --git a/src/LLVM/test/CodeGen/Blackfin/add.ll b/src/LLVM/test/CodeGen/Blackfin/add.ll
new file mode 100644
index 0000000..3311c03
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/add.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+define i32 @add(i32 %A, i32 %B) {
+	%R = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/addsub-i128.ll b/src/LLVM/test/CodeGen/Blackfin/addsub-i128.ll
new file mode 100644
index 0000000..dd56101
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/addsub-i128.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; These functions have just the right size to annoy the register scavenger: They
+; use all the scratch registers, but not all the callee-saved registers.
+
+define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
+	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
+	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
+	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
+	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
+	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
+	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
+	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
+	%tmp15 = add i128 %tmp12, %tmp5		; <i128> [#uses=2]
+	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
+	store i64 %tmp1617, i64* %RL
+	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
+	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
+	store i64 %tmp2122, i64* %RH
+	ret void
+}
+
+define void @test_sub(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
+	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
+	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
+	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
+	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
+	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
+	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
+	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
+	%tmp15 = sub i128 %tmp5, %tmp12		; <i128> [#uses=2]
+	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
+	store i64 %tmp1617, i64* %RL
+	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
+	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
+	store i64 %tmp2122, i64* %RH
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/basic-i1.ll b/src/LLVM/test/CodeGen/Blackfin/basic-i1.ll
new file mode 100644
index 0000000..c63adab
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/basic-i1.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin > %t
+
+define i1 @add(i1 %A, i1 %B) {
+	%R = add i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @sub(i1 %A, i1 %B) {
+	%R = sub i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @mul(i1 %A, i1 %B) {
+	%R = mul i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @sdiv(i1 %A, i1 %B) {
+	%R = sdiv i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @udiv(i1 %A, i1 %B) {
+	%R = udiv i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @srem(i1 %A, i1 %B) {
+	%R = srem i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @urem(i1 %A, i1 %B) {
+	%R = urem i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @and(i1 %A, i1 %B) {
+	%R = and i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @or(i1 %A, i1 %B) {
+	%R = or i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @xor(i1 %A, i1 %B) {
+	%R = xor i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/basic-i16.ll b/src/LLVM/test/CodeGen/Blackfin/basic-i16.ll
new file mode 100644
index 0000000..541e9a8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/basic-i16.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=bfin
+
+define i16 @add(i16 %A, i16 %B) {
+	%R = add i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @sub(i16 %A, i16 %B) {
+	%R = sub i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @mul(i16 %A, i16 %B) {
+	%R = mul i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @sdiv(i16 %A, i16 %B) {
+	%R = sdiv i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @udiv(i16 %A, i16 %B) {
+	%R = udiv i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @srem(i16 %A, i16 %B) {
+	%R = srem i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @urem(i16 %A, i16 %B) {
+	%R = urem i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/basic-i32.ll b/src/LLVM/test/CodeGen/Blackfin/basic-i32.ll
new file mode 100644
index 0000000..4b5dbfc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/basic-i32.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i32 @add(i32 %A, i32 %B) {
+	%R = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @sub(i32 %A, i32 %B) {
+	%R = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @mul(i32 %A, i32 %B) {
+	%R = mul i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @sdiv(i32 %A, i32 %B) {
+	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @udiv(i32 %A, i32 %B) {
+	%R = udiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @srem(i32 %A, i32 %B) {
+	%R = srem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @urem(i32 %A, i32 %B) {
+	%R = urem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @and(i32 %A, i32 %B) {
+	%R = and i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @or(i32 %A, i32 %B) {
+	%R = or i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @xor(i32 %A, i32 %B) {
+	%R = xor i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/basic-i64.ll b/src/LLVM/test/CodeGen/Blackfin/basic-i64.ll
new file mode 100644
index 0000000..d4dd8e2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/basic-i64.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i64 @add(i64 %A, i64 %B) {
+	%R = add i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @sub(i64 %A, i64 %B) {
+	%R = sub i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @mul(i64 %A, i64 %B) {
+	%R = mul i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @sdiv(i64 %A, i64 %B) {
+	%R = sdiv i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @udiv(i64 %A, i64 %B) {
+	%R = udiv i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @srem(i64 %A, i64 %B) {
+	%R = srem i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @urem(i64 %A, i64 %B) {
+	%R = urem i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @and(i64 %A, i64 %B) {
+	%R = and i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @or(i64 %A, i64 %B) {
+	%R = or i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @xor(i64 %A, i64 %B) {
+	%R = xor i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/basic-i8.ll b/src/LLVM/test/CodeGen/Blackfin/basic-i8.ll
new file mode 100644
index 0000000..2c7ce9d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/basic-i8.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin
+
+define i8 @add(i8 %A, i8 %B) {
+	%R = add i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @sub(i8 %A, i8 %B) {
+	%R = sub i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @mul(i8 %A, i8 %B) {
+	%R = mul i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @sdiv(i8 %A, i8 %B) {
+	%R = sdiv i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @udiv(i8 %A, i8 %B) {
+	%R = udiv i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @srem(i8 %A, i8 %B) {
+	%R = srem i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @urem(i8 %A, i8 %B) {
+	%R = urem i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @and(i8 %A, i8 %B) {
+	%R = and i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @or(i8 %A, i8 %B) {
+	%R = or i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @xor(i8 %A, i8 %B) {
+	%R = xor i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/basictest.ll b/src/LLVM/test/CodeGen/Blackfin/basictest.ll
new file mode 100644
index 0000000..85040df
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/basictest.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define void @void(i32, i32) {
+        add i32 0, 0            ; <i32>:3 [#uses=2]
+        sub i32 0, 4            ; <i32>:4 [#uses=2]
+        br label %5
+
+; <label>:5             ; preds = %5, %2
+        add i32 %0, %1          ; <i32>:6 [#uses=2]
+        sub i32 %6, %4          ; <i32>:7 [#uses=1]
+        icmp sle i32 %7, %3             ; <i1>:8 [#uses=1]
+        br i1 %8, label %9, label %5
+
+; <label>:9             ; preds = %5
+        add i32 %0, %1          ; <i32>:10 [#uses=0]
+        sub i32 %6, %4          ; <i32>:11 [#uses=1]
+        icmp sle i32 %11, %3            ; <i1>:12 [#uses=0]
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/cmp-small-imm.ll b/src/LLVM/test/CodeGen/Blackfin/cmp-small-imm.ll
new file mode 100644
index 0000000..e1732a8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/cmp-small-imm.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=bfin > %t
+
+define i1 @cmp3(i32 %A) {
+	%R = icmp uge i32 %A, 2
+	ret i1 %R
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/cmp64.ll b/src/LLVM/test/CodeGen/Blackfin/cmp64.ll
new file mode 100644
index 0000000..6c4f9c5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/cmp64.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin
+
+; This test tries to use a JustCC register as a data operand for MOVEcc.  It
+; copies (JustCC -> DP), failing because JustCC can only be copied to D.
+; The proper solution would be to restrict the virtual register to D only.
+
+define i32 @main() {
+entry:
+	br label %loopentry
+
+loopentry:
+	%done = icmp sle i64 undef, 5
+	br i1 %done, label %loopentry, label %exit.1
+
+exit.1:
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/ct32.ll b/src/LLVM/test/CodeGen/Blackfin/ct32.ll
new file mode 100644
index 0000000..363286d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/ct32.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.ctpop.i32(i32)
+
+define i32 @ctlztest(i32 %B) {
+	%b = call i32 @llvm.ctlz.i32( i32 %B )
+	ret i32 %b
+}
+
+define i32 @cttztest(i32 %B) {
+	%b = call i32 @llvm.cttz.i32( i32 %B )
+	ret i32 %b
+}
+
+define i32 @ctpoptest(i32 %B) {
+	%b = call i32 @llvm.ctpop.i32( i32 %B )
+	ret i32 %b
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/ct64.ll b/src/LLVM/test/CodeGen/Blackfin/ct64.ll
new file mode 100644
index 0000000..7502434
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/ct64.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=bfin
+
+declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.cttz.i64(i64)
+declare i64 @llvm.ctpop.i64(i64)
+
+define i64 @ctlztest(i64 %B) {
+	%b = call i64 @llvm.ctlz.i64( i64 %B )
+	ret i64 %b
+}
+
+define i64 @cttztest(i64 %B) {
+	%b = call i64 @llvm.cttz.i64( i64 %B )
+	ret i64 %b
+}
+
+define i64 @ctpoptest(i64 %B) {
+	%b = call i64 @llvm.ctpop.i64( i64 %B )
+	ret i64 %b
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/ctlz16.ll b/src/LLVM/test/CodeGen/Blackfin/ctlz16.ll
new file mode 100644
index 0000000..eb4af23
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/ctlz16.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.ctlz.i16(i16)
+
+define i16 @ctlztest(i16 %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+define i16 @ctlztest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+
+define i16 @ctlztest_s(i16 signext %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+

diff --git a/src/LLVM/test/CodeGen/Blackfin/ctlz64.ll b/src/LLVM/test/CodeGen/Blackfin/ctlz64.ll
new file mode 100644
index 0000000..3e22f88
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/ctlz64.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+@.str = external constant [14 x i8]		; <[14 x i8]*> [#uses=1]
+
+define i32 @main(i64 %arg) nounwind {
+entry:
+	%tmp47 = tail call i64 @llvm.cttz.i64(i64 %arg)		; <i64> [#uses=1]
+	%tmp48 = trunc i64 %tmp47 to i32		; <i32> [#uses=1]
+	%tmp40 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([14 x i8]* @.str, i32 0, i32 0), i64 %arg, i32 0, i32 %tmp48, i32 0) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @printf(i8* noalias, ...) nounwind
+
+declare i64 @llvm.cttz.i64(i64) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/Blackfin/ctpop16.ll b/src/LLVM/test/CodeGen/Blackfin/ctpop16.ll
new file mode 100644
index 0000000..8b6c07e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/ctpop16.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.ctpop.i16(i16)
+
+define i16 @ctpoptest(i16 %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+define i16 @ctpoptest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+
+define i16 @ctpoptest_s(i16 signext %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+

diff --git a/src/LLVM/test/CodeGen/Blackfin/cttz16.ll b/src/LLVM/test/CodeGen/Blackfin/cttz16.ll
new file mode 100644
index 0000000..510882a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/cttz16.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.cttz.i16(i16)
+
+define i16 @cttztest(i16 %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+define i16 @cttztest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+
+define i16 @cttztest_s(i16 signext %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+

diff --git a/src/LLVM/test/CodeGen/Blackfin/cycles.ll b/src/LLVM/test/CodeGen/Blackfin/cycles.ll
new file mode 100644
index 0000000..6451c74
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/cycles.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin | FileCheck %s
+
+declare i64 @llvm.readcyclecounter()
+
+; CHECK: cycles
+; CHECK: cycles2
+define i64 @cyc64() {
+	%tmp.1 = call i64 @llvm.readcyclecounter()
+	ret i64 %tmp.1
+}
+
+; CHECK: cycles
+define i32@cyc32() {
+	%tmp.1 = call i64 @llvm.readcyclecounter()
+        %s = trunc i64 %tmp.1 to i32
+	ret i32 %s
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/dg.exp b/src/LLVM/test/CodeGen/Blackfin/dg.exp
new file mode 100644
index 0000000..5fdbe5f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target Blackfin] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/double-cast.ll b/src/LLVM/test/CodeGen/Blackfin/double-cast.ll
new file mode 100644
index 0000000..815ca79
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/double-cast.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%1 = call i32 (i8*, ...)* @printf(i8* undef, double undef)
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/frameindex.ll b/src/LLVM/test/CodeGen/Blackfin/frameindex.ll
new file mode 100644
index 0000000..7e677fb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/frameindex.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+declare i32 @SIM(i8*, i8*, i32, i32, i32, [256 x i32]*, i32, i32, i32)
+
+define void @foo() {
+bb0:
+	%V = alloca [256 x i32], i32 256		; <[256 x i32]*> [#uses=1]
+	%0 = call i32 @SIM(i8* null, i8* null, i32 0, i32 0, i32 0, [256 x i32]* %V, i32 0, i32 0, i32 2)		; <i32> [#uses=0]
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/i17mem.ll b/src/LLVM/test/CodeGen/Blackfin/i17mem.ll
new file mode 100644
index 0000000..bc5ade7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/i17mem.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i17_l = external global i17		; <i17*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+
+define void @i17_ls() nounwind  {
+	%tmp = load i17* @i17_l		; <i17> [#uses=1]
+	store i17 %tmp, i17* @i17_s
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/i1mem.ll b/src/LLVM/test/CodeGen/Blackfin/i1mem.ll
new file mode 100644
index 0000000..cb03e3d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/i1mem.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i1_l = external global i1		; <i1*> [#uses=1]
+@i1_s = external global i1		; <i1*> [#uses=1]
+
+define void @i1_ls() nounwind  {
+	%tmp = load i1* @i1_l		; <i1> [#uses=1]
+	store i1 %tmp, i1* @i1_s
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/i1ops.ll b/src/LLVM/test/CodeGen/Blackfin/i1ops.ll
new file mode 100644
index 0000000..6b5612c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/i1ops.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i32 @adj(i32 %d.1, i32 %ct.1) {
+entry:
+	%tmp.22.not = trunc i32 %ct.1 to i1		; <i1> [#uses=1]
+	%tmp.221 = xor i1 %tmp.22.not, true		; <i1> [#uses=1]
+	%tmp.26 = or i1 false, %tmp.221		; <i1> [#uses=1]
+	%tmp.27 = zext i1 %tmp.26 to i32		; <i32> [#uses=1]
+	ret i32 %tmp.27
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/i216mem.ll b/src/LLVM/test/CodeGen/Blackfin/i216mem.ll
new file mode 100644
index 0000000..9f8cf48
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/i216mem.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i216_l = external global i216		; <i216*> [#uses=1]
+@i216_s = external global i216		; <i216*> [#uses=1]
+
+define void @i216_ls() nounwind  {
+	%tmp = load i216* @i216_l		; <i216> [#uses=1]
+	store i216 %tmp, i216* @i216_s
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/i248mem.ll b/src/LLVM/test/CodeGen/Blackfin/i248mem.ll
new file mode 100644
index 0000000..db23f54
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/i248mem.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin
+@i248_l = external global i248		; <i248*> [#uses=1]
+@i248_s = external global i248		; <i248*> [#uses=1]
+
+define void @i248_ls() nounwind  {
+	%tmp = load i248* @i248_l		; <i248> [#uses=1]
+	store i248 %tmp, i248* @i248_s
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/i256mem.ll b/src/LLVM/test/CodeGen/Blackfin/i256mem.ll
new file mode 100644
index 0000000..bc5ade7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/i256mem.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i17_l = external global i17		; <i17*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+
+define void @i17_ls() nounwind  {
+	%tmp = load i17* @i17_l		; <i17> [#uses=1]
+	store i17 %tmp, i17* @i17_s
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/i256param.ll b/src/LLVM/test/CodeGen/Blackfin/i256param.ll
new file mode 100644
index 0000000..df74c9a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/i256param.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i256_s = external global i256		; <i256*> [#uses=1]
+
+define void @i256_ls(i256 %x) nounwind  {
+	store i256 %x, i256* @i256_s
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/i56param.ll b/src/LLVM/test/CodeGen/Blackfin/i56param.ll
new file mode 100644
index 0000000..ca02563
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/i56param.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i56_l = external global i56		; <i56*> [#uses=1]
+@i56_s = external global i56		; <i56*> [#uses=1]
+
+define void @i56_ls(i56 %x) nounwind  {
+	store i56 %x, i56* @i56_s
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/i8mem.ll b/src/LLVM/test/CodeGen/Blackfin/i8mem.ll
new file mode 100644
index 0000000..ea3a67e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/i8mem.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin
+
+@i8_l = external global i8		; <i8*> [#uses=1]
+@i8_s = external global i8		; <i8*> [#uses=1]
+
+define void @i8_ls() nounwind  {
+	%tmp = load i8* @i8_l		; <i8> [#uses=1]
+	store i8 %tmp, i8* @i8_s
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/inline-asm.ll b/src/LLVM/test/CodeGen/Blackfin/inline-asm.ll
new file mode 100644
index 0000000..d623f6b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/inline-asm.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=bfin | FileCheck %s
+
+; Standard "r"
+; CHECK: r0 = r0 + r1;
+define i32 @add_r(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 + $2;", "=r,r,r"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "d"
+; CHECK: r0 = r0 - r1;
+define i32 @add_d(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 - $2;", "=d,d,d"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "a" for P-regs
+; CHECK: p0 = (p0 + p1) << 1;
+define i32 @add_a(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = ($1 + $2) << 1;", "=a,a,a"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "z" for P0, P1, P2. This is not a real regclass
+; CHECK: p0 = (p0 + p1) << 2;
+define i32 @add_Z(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = ($1 + $2) << 2;", "=z,z,z"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "C" for CC. This is a single register
+; CHECK: cc = p0 < p1;
+; CHECK: r0 = cc;
+define i32 @add_C(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 < $2;", "=C,z,z"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+

diff --git a/src/LLVM/test/CodeGen/Blackfin/int-setcc.ll b/src/LLVM/test/CodeGen/Blackfin/int-setcc.ll
new file mode 100644
index 0000000..6bd9f86
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/int-setcc.ll

@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define fastcc void @Evaluate() {
+entry:
+	br i1 false, label %cond_false186, label %cond_true
+
+cond_true:		; preds = %entry
+	ret void
+
+cond_false186:		; preds = %entry
+	br i1 false, label %cond_true293, label %bb203
+
+bb203:		; preds = %cond_false186
+	ret void
+
+cond_true293:		; preds = %cond_false186
+	br i1 false, label %cond_true298, label %cond_next317
+
+cond_true298:		; preds = %cond_true293
+	br i1 false, label %cond_next518, label %cond_true397.preheader
+
+cond_next317:		; preds = %cond_true293
+	ret void
+
+cond_true397.preheader:		; preds = %cond_true298
+	ret void
+
+cond_next518:		; preds = %cond_true298
+	br i1 false, label %bb1069, label %cond_true522
+
+cond_true522:		; preds = %cond_next518
+	ret void
+
+bb1069:		; preds = %cond_next518
+	br i1 false, label %cond_next1131, label %bb1096
+
+bb1096:		; preds = %bb1069
+	ret void
+
+cond_next1131:		; preds = %bb1069
+	br i1 false, label %cond_next1207, label %cond_true1150
+
+cond_true1150:		; preds = %cond_next1131
+	ret void
+
+cond_next1207:		; preds = %cond_next1131
+	br i1 false, label %cond_next1219, label %cond_true1211
+
+cond_true1211:		; preds = %cond_next1207
+	ret void
+
+cond_next1219:		; preds = %cond_next1207
+	br i1 false, label %cond_true1223, label %cond_next1283
+
+cond_true1223:		; preds = %cond_next1219
+	br i1 false, label %cond_true1254, label %cond_true1264
+
+cond_true1254:		; preds = %cond_true1223
+	br i1 false, label %bb1567, label %cond_true1369.preheader
+
+cond_true1264:		; preds = %cond_true1223
+	ret void
+
+cond_next1283:		; preds = %cond_next1219
+	ret void
+
+cond_true1369.preheader:		; preds = %cond_true1254
+	ret void
+
+bb1567:		; preds = %cond_true1254
+	%tmp1605 = load i8* null		; <i8> [#uses=1]
+	%tmp1606 = icmp eq i8 %tmp1605, 0		; <i1> [#uses=1]
+	br i1 %tmp1606, label %cond_next1637, label %cond_true1607
+
+cond_true1607:		; preds = %bb1567
+	ret void
+
+cond_next1637:		; preds = %bb1567
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/invalid-apint.ll b/src/LLVM/test/CodeGen/Blackfin/invalid-apint.ll
new file mode 100644
index 0000000..a8c01ba
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/invalid-apint.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=bfin
+
+; Assertion failed: (width < BitWidth && "Invalid APInt Truncate request"),
+; function trunc, file APInt.cpp, line 956.
+
+@str2 = external global [29 x i8]
+
+define void @printArgsNoRet(i32 %a1, float %a2, i8 %a3, double %a4, i8* %a5, i32 %a6, float %a7, i8 %a8, double %a9, i8* %a10, i32 %a11, float %a12, i8 %a13, double %a14, i8* %a15) {
+entry:
+	%tmp17 = sext i8 %a13 to i32
+	%tmp23 = call i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @str2, i32 0, i64 0), i32 %a11, double 0.000000e+00, i32 %tmp17, double %a14, i32 0)
+	ret void
+}
+
+declare i32 @printf(i8*, ...)

diff --git a/src/LLVM/test/CodeGen/Blackfin/jumptable.ll b/src/LLVM/test/CodeGen/Blackfin/jumptable.ll
new file mode 100644
index 0000000..263533c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/jumptable.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
+
+; CHECK: .section .rodata
+; CHECK: JTI0_0:
+; CHECK: .long .BB0_1
+
+define i32 @oper(i32 %op, i32 %A, i32 %B) {
+entry:
+        switch i32 %op, label %bbx [
+               i32 1 , label %bb1
+               i32 2 , label %bb2
+               i32 3 , label %bb3
+               i32 4 , label %bb4
+               i32 5 , label %bb5
+               i32 6 , label %bb6
+               i32 7 , label %bb7
+               i32 8 , label %bb8
+               i32 9 , label %bb9
+               i32 10, label %bb10
+        ]
+bb1:
+	%R1 = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R1
+bb2:
+	%R2 = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R2
+bb3:
+	%R3 = mul i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R3
+bb4:
+	%R4 = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R4
+bb5:
+	%R5 = udiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R5
+bb6:
+	%R6 = srem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R6
+bb7:
+	%R7 = urem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R7
+bb8:
+	%R8 = and i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R8
+bb9:
+	%R9 = or i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R9
+bb10:
+	%R10 = xor i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R10
+bbx:
+        ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/large-switch.ll b/src/LLVM/test/CodeGen/Blackfin/large-switch.ll
new file mode 100644
index 0000000..02d32ef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/large-switch.ll

@@ -0,0 +1,187 @@
+; RUN: llc < %s -march=bfin
+
+; The switch expansion uses a dynamic shl, and it produces a jumptable
+
+define void @athlon_fp_unit_ready_cost() {
+entry:
+	switch i32 0, label %UnifiedReturnBlock [
+		i32 -1, label %bb2063
+		i32 19, label %bb2035
+		i32 20, label %bb2035
+		i32 21, label %bb2035
+		i32 23, label %bb2035
+		i32 24, label %bb2035
+		i32 27, label %bb2035
+		i32 32, label %bb2035
+		i32 33, label %bb1994
+		i32 35, label %bb2035
+		i32 36, label %bb1994
+		i32 90, label %bb1948
+		i32 94, label %bb1948
+		i32 95, label %bb1948
+		i32 133, label %bb1419
+		i32 135, label %bb1238
+		i32 136, label %bb1238
+		i32 137, label %bb1238
+		i32 138, label %bb1238
+		i32 139, label %bb1201
+		i32 140, label %bb1201
+		i32 141, label %bb1154
+		i32 142, label %bb1126
+		i32 144, label %bb1201
+		i32 145, label %bb1126
+		i32 146, label %bb1201
+		i32 147, label %bb1126
+		i32 148, label %bb1201
+		i32 149, label %bb1126
+		i32 150, label %bb1201
+		i32 151, label %bb1126
+		i32 152, label %bb1096
+		i32 153, label %bb1096
+		i32 154, label %bb1096
+		i32 157, label %bb1096
+		i32 158, label %bb1096
+		i32 159, label %bb1096
+		i32 162, label %bb1096
+		i32 163, label %bb1096
+		i32 164, label %bb1096
+		i32 167, label %bb1201
+		i32 168, label %bb1201
+		i32 170, label %bb1201
+		i32 171, label %bb1201
+		i32 173, label %bb1201
+		i32 174, label %bb1201
+		i32 176, label %bb1201
+		i32 177, label %bb1201
+		i32 179, label %bb993
+		i32 180, label %bb993
+		i32 181, label %bb993
+		i32 182, label %bb993
+		i32 183, label %bb993
+		i32 184, label %bb993
+		i32 365, label %bb1126
+		i32 366, label %bb1126
+		i32 367, label %bb1126
+		i32 368, label %bb1126
+		i32 369, label %bb1126
+		i32 370, label %bb1126
+		i32 371, label %bb1126
+		i32 372, label %bb1126
+		i32 373, label %bb1126
+		i32 384, label %bb1126
+		i32 385, label %bb1126
+		i32 386, label %bb1126
+		i32 387, label %bb1126
+		i32 388, label %bb1126
+		i32 389, label %bb1126
+		i32 390, label %bb1126
+		i32 391, label %bb1126
+		i32 392, label %bb1126
+		i32 525, label %bb919
+		i32 526, label %bb839
+		i32 528, label %bb919
+		i32 529, label %bb839
+		i32 532, label %cond_next6.i97
+		i32 533, label %cond_next6.i81
+		i32 534, label %bb495
+		i32 536, label %cond_next6.i81
+		i32 537, label %cond_next6.i81
+		i32 538, label %bb396
+		i32 539, label %bb288
+		i32 541, label %bb396
+		i32 542, label %bb396
+		i32 543, label %bb396
+		i32 544, label %bb396
+		i32 545, label %bb189
+		i32 546, label %cond_next6.i
+		i32 547, label %bb189
+		i32 548, label %cond_next6.i
+		i32 549, label %bb189
+		i32 550, label %cond_next6.i
+		i32 551, label %bb189
+		i32 552, label %cond_next6.i
+		i32 553, label %bb189
+		i32 554, label %cond_next6.i
+		i32 555, label %bb189
+		i32 556, label %cond_next6.i
+		i32 557, label %bb189
+		i32 558, label %cond_next6.i
+		i32 618, label %bb40
+		i32 619, label %bb18
+		i32 620, label %bb40
+		i32 621, label %bb10
+		i32 622, label %bb10
+	]
+
+bb10:
+	ret void
+
+bb18:
+	ret void
+
+bb40:
+	ret void
+
+cond_next6.i:
+	ret void
+
+bb189:
+	ret void
+
+bb288:
+	ret void
+
+bb396:
+	ret void
+
+bb495:
+	ret void
+
+cond_next6.i81:
+	ret void
+
+cond_next6.i97:
+	ret void
+
+bb839:
+	ret void
+
+bb919:
+	ret void
+
+bb993:
+	ret void
+
+bb1096:
+	ret void
+
+bb1126:
+	ret void
+
+bb1154:
+	ret void
+
+bb1201:
+	ret void
+
+bb1238:
+	ret void
+
+bb1419:
+	ret void
+
+bb1948:
+	ret void
+
+bb1994:
+	ret void
+
+bb2035:
+	ret void
+
+bb2063:
+	ret void
+
+UnifiedReturnBlock:
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/load-i16.ll b/src/LLVM/test/CodeGen/Blackfin/load-i16.ll
new file mode 100644
index 0000000..eb18d41
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/load-i16.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; This somewhat contrived function heavily exercises register classes
+; It can trick -join-cross-class-copies into making illegal joins
+
+define void @f(i16** nocapture %p) nounwind readonly {
+entry:
+	%tmp1 = load i16** %p		; <i16*> [#uses=1]
+	%tmp2 = load i16* %tmp1		; <i16> [#uses=1]
+	%ptr = getelementptr i16* %tmp1, i16 %tmp2
+    store i16 %tmp2, i16* %ptr
+    ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/logic-i16.ll b/src/LLVM/test/CodeGen/Blackfin/logic-i16.ll
new file mode 100644
index 0000000..e44672f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/logic-i16.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin
+
+define i16 @and(i16 %A, i16 %B) {
+	%R = and i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @or(i16 %A, i16 %B) {
+	%R = or i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @xor(i16 %A, i16 %B) {
+	%R = xor i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/many-args.ll b/src/LLVM/test/CodeGen/Blackfin/many-args.ll
new file mode 100644
index 0000000..2df32ca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/many-args.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+	%0 = type { i32, float, float, float, float, float, float, float, float, float, float }		; type %0
+	%struct..s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+entry:
+	%tmp.218 = load float* null		; <float> [#uses=1]
+	%tmp.219 = getelementptr %0* null, i64 0, i32 6		; <float*> [#uses=1]
+	%tmp.220 = load float* %tmp.219		; <float> [#uses=1]
+	%tmp.221 = getelementptr %0* null, i64 0, i32 7		; <float*> [#uses=1]
+	%tmp.222 = load float* %tmp.221		; <float> [#uses=1]
+	%tmp.223 = getelementptr %0* null, i64 0, i32 8		; <float*> [#uses=1]
+	%tmp.224 = load float* %tmp.223		; <float> [#uses=1]
+	%tmp.225 = getelementptr %0* null, i64 0, i32 9		; <float*> [#uses=1]
+	%tmp.226 = load float* %tmp.225		; <float> [#uses=1]
+	%tmp.227 = getelementptr %0* null, i64 0, i32 10		; <float*> [#uses=1]
+	%tmp.228 = load float* %tmp.227		; <float> [#uses=1]
+	call void @place_and_route(i32 0, i32 0, float 0.000000e+00, i32 0, i32 0, i8* null, i32 0, i32 0, i8* null, i8* null, i8* null, i8* null, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i16 0, i16 0, i16 0, float 0.000000e+00, float 0.000000e+00, %struct..s_segment_inf* null, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %tmp.218, float %tmp.220, float %tmp.222, float %tmp.224, float %tmp.226, float %tmp.228)
+	ret i32 0
+}
+
+declare void @place_and_route(i32, i32, float, i32, i32, i8*, i32, i32, i8*, i8*, i8*, i8*, i32, i32, i32, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, float, float, float, i32, i32, i16, i16, i16, float, float, %struct..s_segment_inf*, i32, float, float, float, float, float, float, float, float, float, float)

diff --git a/src/LLVM/test/CodeGen/Blackfin/mulhu.ll b/src/LLVM/test/CodeGen/Blackfin/mulhu.ll
new file mode 100644
index 0000000..72bacee
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/mulhu.ll

@@ -0,0 +1,106 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
+	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.cost_pair = type { %struct.iv_cand*, i32, %struct.bitmap_head_def* }
+	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }
+	%struct.def_operand_ptr = type { %struct.tree_node** }
+	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i1, i1, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.iv = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i1, i1, i32 }
+	%struct.iv_cand = type { i32, i1, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.iv*, i32 }
+	%struct.iv_use = type { i32, i32, %struct.iv*, %struct.tree_node*, %struct.tree_node**, %struct.bitmap_head_def*, i32, %struct.cost_pair*, %struct.iv_cand* }
+	%struct.ivopts_data = type { %struct.loop*, %struct.htab*, i32, %struct.version_info*, %struct.bitmap_head_def*, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.bitmap_head_def*, i1 }
+	%struct.lang_decl = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.loop = type { i32, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.lpt_decision, i32, i32, %struct.edge_def**, i32, %struct.basic_block_def*, %struct.basic_block_def*, i32, %struct.edge_def**, i32, %struct.edge_def**, i32, %struct.simple_bitmap_def*, i32, %struct.loop**, i32, %struct.loop*, %struct.loop*, %struct.loop*, %struct.loop*, i32, i8*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound*, %struct.edge_def*, i1 }
+	%struct.lpt_decision = type { i32, i32 }
+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
+	%struct.nb_iter_bound = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound* }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }
+	%struct.stack_local_entry = type opaque
+	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }
+	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }
+	%struct.temp_slot = type opaque
+	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
+	%struct.tree_ann_d = type { %struct.stmt_ann_d }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.u = type { [1 x i64] }
+	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }
+	%struct.v_may_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
+	%struct.version_info = type { %struct.tree_node*, %struct.iv*, i1, i32, i1 }
+	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }
+
+define i1 @determine_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand) {
+entry:
+	switch i32 0, label %bb91 [
+		i32 0, label %bb
+		i32 1, label %bb6
+		i32 3, label %cond_next135
+	]
+
+bb:		; preds = %entry
+	ret i1 false
+
+bb6:		; preds = %entry
+	br i1 false, label %bb87, label %cond_next27
+
+cond_next27:		; preds = %bb6
+	br i1 false, label %cond_true30, label %cond_next55
+
+cond_true30:		; preds = %cond_next27
+	br i1 false, label %cond_next41, label %cond_true35
+
+cond_true35:		; preds = %cond_true30
+	ret i1 false
+
+cond_next41:		; preds = %cond_true30
+	%tmp44 = call i32 @force_var_cost(%struct.ivopts_data* %data, %struct.tree_node* null, %struct.bitmap_head_def** null)		; <i32> [#uses=1]
+	%tmp46 = udiv i32 %tmp44, 5		; <i32> [#uses=1]
+	call void @set_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand, i32 %tmp46, %struct.bitmap_head_def* null)
+	br label %bb87
+
+cond_next55:		; preds = %cond_next27
+	ret i1 false
+
+bb87:		; preds = %cond_next41, %bb6
+	ret i1 false
+
+bb91:		; preds = %entry
+	ret i1 false
+
+cond_next135:		; preds = %entry
+	ret i1 false
+}
+
+declare void @set_use_iv_cost(%struct.ivopts_data*, %struct.iv_use*, %struct.iv_cand*, i32, %struct.bitmap_head_def*)
+
+declare i32 @force_var_cost(%struct.ivopts_data*, %struct.tree_node*, %struct.bitmap_head_def**)

diff --git a/src/LLVM/test/CodeGen/Blackfin/printf.ll b/src/LLVM/test/CodeGen/Blackfin/printf.ll
new file mode 100644
index 0000000..9e54b73
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/printf.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@.str_1 = external constant [42 x i8]		; <[42 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+entry:
+	%tmp.16 = call i32 (i8*, ...)* @printf(i8* getelementptr ([42 x i8]* @.str_1, i64 0, i64 0), i32 0, i32 0, i64 0, i64 0)
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/printf2.ll b/src/LLVM/test/CodeGen/Blackfin/printf2.ll
new file mode 100644
index 0000000..7ac7e80
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/printf2.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%1 = call i32 (i8*, ...)* @printf(i8* undef, i1 undef)
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/promote-logic.ll b/src/LLVM/test/CodeGen/Blackfin/promote-logic.ll
new file mode 100644
index 0000000..1ac1408
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/promote-logic.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=bfin 
+
+; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR
+; operation after LegalizeOps.
+
+define void @mng_display_bgr565() {
+entry:
+	br i1 false, label %bb.preheader, label %return
+
+bb.preheader:
+	br i1 false, label %cond_true48, label %cond_next80
+
+cond_true48:
+	%tmp = load i8* null
+	%tmp51 = zext i8 %tmp to i16
+	%tmp99 = load i8* null
+	%tmp54 = bitcast i8 %tmp99 to i8
+	%tmp54.upgrd.1 = zext i8 %tmp54 to i32
+	%tmp55 = lshr i32 %tmp54.upgrd.1, 3
+	%tmp55.upgrd.2 = trunc i32 %tmp55 to i16
+	%tmp52 = shl i16 %tmp51, 5
+	%tmp56 = and i16 %tmp55.upgrd.2, 28
+	%tmp57 = or i16 %tmp56, %tmp52
+	%tmp60 = zext i16 %tmp57 to i32
+	%tmp62 = xor i32 0, 65535
+	%tmp63 = mul i32 %tmp60, %tmp62
+	%tmp65 = add i32 0, %tmp63
+	%tmp69 = add i32 0, %tmp65
+	%tmp70 = lshr i32 %tmp69, 16
+	%tmp70.upgrd.3 = trunc i32 %tmp70 to i16
+	%tmp75 = lshr i16 %tmp70.upgrd.3, 8
+	%tmp75.upgrd.4 = trunc i16 %tmp75 to i8
+	%tmp76 = lshr i8 %tmp75.upgrd.4, 5
+	store i8 %tmp76, i8* null
+	ret void
+
+cond_next80:
+	ret void
+
+return:
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/promote-setcc.ll b/src/LLVM/test/CodeGen/Blackfin/promote-setcc.ll
new file mode 100644
index 0000000..d344fad
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/promote-setcc.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=bfin > %t
+
+; The DAG combiner may sometimes create illegal i16 SETCC operations when run
+; after LegalizeOps. Try to tease out all the optimizations in
+; TargetLowering::SimplifySetCC.
+
+@x = external global i16
+@y = external global i16
+
+declare i16 @llvm.ctlz.i16(i16)
+
+; Case (srl (ctlz x), 5) == const
+; Note: ctlz is promoted, so this test does not catch the DAG combiner
+define i1 @srl_ctlz_const() {
+  %x = load i16* @x
+  %c = call i16 @llvm.ctlz.i16(i16 %x)
+  %s = lshr i16 %c, 4
+  %r = icmp eq i16 %s, 1
+  ret i1 %r
+}
+
+; Case (zext x) == const
+define i1 @zext_const() {
+  %x = load i16* @x
+  %r = icmp ugt i16 %x, 1
+  ret i1 %r
+}
+
+; Case (sext x) == const
+define i1 @sext_const() {
+  %x = load i16* @x
+  %y = add i16 %x, 1
+  %x2 = sext i16 %y to i32
+  %r = icmp ne i32 %x2, -1
+  ret i1 %r
+}
+

diff --git a/src/LLVM/test/CodeGen/Blackfin/sdiv.ll b/src/LLVM/test/CodeGen/Blackfin/sdiv.ll
new file mode 100644
index 0000000..1426655
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/sdiv.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+define i32 @sdiv(i32 %A, i32 %B) {
+	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/simple-select.ll b/src/LLVM/test/CodeGen/Blackfin/simple-select.ll
new file mode 100644
index 0000000..0f7f270
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/simple-select.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+declare i1 @foo()
+
+define i32 @test(i32* %A, i32* %B) {
+	%a = load i32* %A
+	%b = load i32* %B
+	%cond = call i1 @foo()
+	%c = select i1 %cond, i32 %a, i32 %b
+	ret i32 %c
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/switch.ll b/src/LLVM/test/CodeGen/Blackfin/switch.ll
new file mode 100644
index 0000000..3680ec6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/switch.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define i32 @foo(i32 %A, i32 %B, i32 %C) {
+entry:
+	switch i32 %A, label %out [
+		i32 1, label %bb
+		i32 0, label %bb13
+	]
+
+bb:		; preds = %entry
+	ret i32 1
+
+bb13:		; preds = %entry
+	ret i32 1
+
+out:		; preds = %entry
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/switch2.ll b/src/LLVM/test/CodeGen/Blackfin/switch2.ll
new file mode 100644
index 0000000..7877bce
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/switch2.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define i8* @FindChar(i8* %CurPtr) {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%tmp = load i8* null		; <i8> [#uses=1]
+	switch i8 %tmp, label %bb [
+		i8 0, label %bb7
+		i8 120, label %bb7
+	]
+
+bb7:		; preds = %bb, %bb
+	ret i8* null
+}

diff --git a/src/LLVM/test/CodeGen/Blackfin/sync-intr.ll b/src/LLVM/test/CodeGen/Blackfin/sync-intr.ll
new file mode 100644
index 0000000..0b103a3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Blackfin/sync-intr.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
+
+define void @f() nounwind {
+entry:
+        ; CHECK-NOT: llvm.bfin
+        ; CHECK: csync;
+        call void @llvm.bfin.csync()
+
+        ; CHECK-NOT: llvm.bfin
+        ; CHECK: ssync;
+        call void @llvm.bfin.ssync()
+	ret void
+}
+
+declare void @llvm.bfin.csync() nounwind
+declare void @llvm.bfin.ssync() nounwind

diff --git a/src/LLVM/test/CodeGen/CBackend/2002-05-16-NameCollide.ll b/src/LLVM/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
new file mode 100644
index 0000000..c8d88b2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2002-05-16-NameCollide.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=c

+

+; Make sure that global variables do not collide if they have the same name,

+; but different types.

+

+@X = global i32 5               ; <i32*> [#uses=0]

+@X.upgrd.1 = global i64 7               ; <i64*> [#uses=0]

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll b/src/LLVM/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
new file mode 100644
index 0000000..9c8a0d5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=c

+

+; This case was emitting code that looked like this:

+; ...

+;   llvm_BB1:       /* no statement here */

+; }

+; 

+; Which the Sun C compiler rejected, so now we are sure to put a return 

+; instruction in there if the basic block is otherwise empty.

+;

+define void @test() {

+        br label %BB1

+

+BB2:            ; preds = %BB2

+        br label %BB2

+

+BB1:            ; preds = %0

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll b/src/LLVM/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
new file mode 100644
index 0000000..5782e64
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=c

+

+; Test const pointer refs & forward references

+

+@t3 = global i32* @t1           ; <i32**> [#uses=0]

+@t1 = global i32 4              ; <i32*> [#uses=1]

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll b/src/LLVM/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
new file mode 100644
index 0000000..f81a275
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=c

+

+global i32* bitcast (float* @2 to i32*)   ;; Forward numeric reference

+global float* @2                       ;; Duplicate forward numeric reference

+global float 0.0

+

+@array = constant [2 x i32] [ i32 12, i32 52 ]

+@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)


diff --git a/src/LLVM/test/CodeGen/CBackend/2002-08-19-DataPointer.ll b/src/LLVM/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
new file mode 100644
index 0000000..f14e12d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2002-08-19-DataPointer.ll

@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=c

+

+@sptr1 = global [11 x i8]* @somestr         ;; Forward ref to a constant

+@somestr = constant [11 x i8] c"hello world"


diff --git a/src/LLVM/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll b/src/LLVM/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
new file mode 100644
index 0000000..16935b0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=c

+

+@fptr = global void ()* @f       ;; Forward ref method defn

+declare void @f()               ;; External method

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll b/src/LLVM/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
new file mode 100644
index 0000000..369707a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=c

+

+@array = constant [2 x i32] [ i32 12, i32 52 ]          ; <[2 x i32]*> [#uses=1]

+@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)         ; <i32**> [#uses=0]

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll b/src/LLVM/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
new file mode 100644
index 0000000..83fe9ed
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=c

+

+; The C Writer bombs on this testcase because it tries the print the prototype

+; for the test function, which tries to print the argument name.  The function

+; has not been incorporated into the slot calculator, so after it does the name

+; lookup, it tries a slot calculator lookup, which fails.

+

+define i32 @test(i32) {

+        ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll b/src/LLVM/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
new file mode 100644
index 0000000..4fc0852
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=c

+

+; Indirect function call test... found by Joel & Brian

+;

+

+@taskArray = external global i32*               ; <i32**> [#uses=1]

+

+define void @test(i32 %X) {

+        %Y = add i32 %X, -1             ; <i32> [#uses=1]

+        %cast100 = sext i32 %Y to i64           ; <i64> [#uses=1]

+        %gep100 = getelementptr i32** @taskArray, i64 %cast100          ; <i32**> [#uses=1]

+        %fooPtr = load i32** %gep100            ; <i32*> [#uses=1]

+        %cast101 = bitcast i32* %fooPtr to void (i32)*          ; <void (i32)*> [#uses=1]

+        call void %cast101( i32 1000 )

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll b/src/LLVM/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
new file mode 100644
index 0000000..67df1a6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=c

+

+; This testcase fails because the C backend does not arrange to output the 

+; contents of a structure type before it outputs the structure type itself.

+

+@Y = external global { { i32 } }                ; <{ { i32 } }*> [#uses=0]

+@X = external global { float }          ; <{ float }*> [#uses=0]

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll b/src/LLVM/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
new file mode 100644
index 0000000..adb5440
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=c

+

+define void @test() {

+        %X = alloca [4 x i32]           ; <[4 x i32]*> [#uses=0]

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll b/src/LLVM/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
new file mode 100644
index 0000000..2c3f597
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=c

+

+

+declare void @foo(...)

+

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2002-10-16-External.ll b/src/LLVM/test/CodeGen/CBackend/2002-10-16-External.ll
new file mode 100644
index 0000000..adafd23
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2002-10-16-External.ll

@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=c

+

+@bob = external global i32              ; <i32*> [#uses=0]

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll b/src/LLVM/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
new file mode 100644
index 0000000..74b3bc5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=c

+

+@testString = internal constant [18 x i8] c"Escaped newline\5Cn\00"             ; <[18 x i8]*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define i32 @main() {

+        call i32 (i8*, ...)* @printf( i8* getelementptr ([18 x i8]* @testString, i64 0, i64 0) )                ; <i32>:1 [#uses=0]

+        ret i32 0

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll b/src/LLVM/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
new file mode 100644
index 0000000..7d4457a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=c

+

+; Apparently this constant was unsigned in ISO C 90, but not in C 99.

+

+define i32 @foo() {

+        ret i32 -2147483648

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll b/src/LLVM/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
new file mode 100644
index 0000000..a00ef9c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=c

+

+; This testcase breaks the C backend, because gcc doesn't like (...) functions

+; with no arguments at all.

+

+define void @test(i64 %Ptr) {

+        %P = inttoptr i64 %Ptr to void (...)*           ; <void (...)*> [#uses=1]

+        call void (...)* %P( i64 %Ptr )

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll b/src/LLVM/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
new file mode 100644
index 0000000..bf198d9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=c

+

+; The C backend was dying when there was no typename for a struct type!

+

+declare i32 @test(i32, { [32 x i32] }*)


diff --git a/src/LLVM/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll b/src/LLVM/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
new file mode 100644
index 0000000..f6b4b7e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=c

+

+%X = type { i32, float }

+

+define void @test() {

+        getelementptr %X* null, i64 0, i32 1            ; <float*>:1 [#uses=0]

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2003-06-11-HexConstant.ll b/src/LLVM/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
new file mode 100644
index 0000000..0472d0c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2003-06-11-HexConstant.ll

@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=c

+

+; Make sure hex constant does not continue into a valid hexadecimal letter/number

+@version = global [3 x i8] c"\001\00"


diff --git a/src/LLVM/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll b/src/LLVM/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
new file mode 100644
index 0000000..84d298d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll

@@ -0,0 +1,3 @@
+; RUN: llc < %s -march=c

+

+@version = global [3 x i8] c"1\00\00"


diff --git a/src/LLVM/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll b/src/LLVM/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
new file mode 100644
index 0000000..850239d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=c

+

+declare i32 @callee(i32, i32)

+

+define i32 @test(i32 %X) {

+; <label>:0

+        %A = invoke i32 @callee( i32 %X, i32 5 )

+                        to label %Ok unwind label %Threw                ; <i32> [#uses=1]

+

+Ok:             ; preds = %Threw, %0

+        %B = phi i32 [ %A, %0 ], [ -1, %Threw ]         ; <i32> [#uses=1]

+        ret i32 %B

+

+Threw:          ; preds = %0

+        br label %Ok

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll b/src/LLVM/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
new file mode 100644
index 0000000..bd8394e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll

@@ -0,0 +1,3 @@
+; RUN: llc < %s -march=c | grep common | grep X

+

+@X = linkonce global i32 5


diff --git a/src/LLVM/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll b/src/LLVM/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
new file mode 100644
index 0000000..74477ea
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=c

+

+; This is a non-normal FP value: it's a nan.

+@NAN = global { float } { float 0x7FF8000000000000 }            ; <{ float }*> [#uses=0]

+@NANs = global { float } { float 0x7FFC000000000000 }           ; <{ float }*> [#uses=0]


diff --git a/src/LLVM/test/CodeGen/CBackend/2003-10-23-UnusedType.ll b/src/LLVM/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
new file mode 100644
index 0000000..9d66c15
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2003-10-23-UnusedType.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=c

+

+%A = type { i32, i8*, { i32, i32, i32, i32, i32, i32, i32, i32 }*, i16 }

+

+define void @test(%A*) {

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll b/src/LLVM/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
new file mode 100644
index 0000000..fa30462
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=c

+

+; reduced from DOOM.

+        %union._XEvent = type { i32 }

+@.X_event_9 = global %union._XEvent zeroinitializer             ; <%union._XEvent*> [#uses=1]

+

+define void @I_InitGraphics() {

+shortcirc_next.3:

+        %tmp.319 = load i32* getelementptr ({ i32, i32 }* bitcast (%union._XEvent* @.X_event_9 to { i32, i32 }*), i64 0, i32 1)               ; <i32> [#uses=0]

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll b/src/LLVM/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
new file mode 100644
index 0000000..41fd187
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=c

+@y = weak global i8 0           ; <i8*> [#uses=1]

+

+define i32 @testcaseshr() {

+entry:

+        ret i32 lshr (i32 ptrtoint (i8* @y to i32), i32 4)

+}

+

+define i32 @testcaseshl() {

+entry:

+        ret i32 shl (i32 ptrtoint (i8* @y to i32), i32 4)

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll b/src/LLVM/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
new file mode 100644
index 0000000..c214d97
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=c | grep builtin_return_address

+

+declare i8* @llvm.returnaddress(i32)

+

+declare i8* @llvm.frameaddress(i32)

+

+define i8* @test1() {

+        %X = call i8* @llvm.returnaddress( i32 0 )              ; <i8*> [#uses=1]

+        ret i8* %X

+}

+

+define i8* @test2() {

+        %X = call i8* @llvm.frameaddress( i32 0 )               ; <i8*> [#uses=1]

+        ret i8* %X

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll b/src/LLVM/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
new file mode 100644
index 0000000..dff3325
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll

@@ -0,0 +1,18 @@
+; The intrinsic lowering pass was lowering intrinsics like llvm.memcpy to 

+; explicitly specified prototypes, inserting a new function if the old one

+; didn't exist.  This caused there to be two external memcpy functions in 

+; this testcase for example, which caused the CBE to mangle one, screwing

+; everything up.  :(  Test that this does not happen anymore.

+;

+; RUN: llc < %s -march=c | not grep _memcpy

+

+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)

+

+declare float* @memcpy(i32*, i32, i32)

+

+define i32 @test(i8* %A, i8* %B, i32* %C) {

+        call float* @memcpy( i32* %C, i32 4, i32 17 )           ; <float*>:1 [#uses=0]

+        call void @llvm.memcpy.i32( i8* %A, i8* %B, i32 123, i32 14 )

+        ret i32 7

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll b/src/LLVM/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
new file mode 100644
index 0000000..4b8f573
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll

@@ -0,0 +1,11 @@
+; This is a non-normal FP value

+; RUN: llc < %s -march=c | grep FPConstant | grep static

+

+define float @func() {

+        ret float 0xFFF0000000000000

+}

+

+define double @func2() {

+        ret double 0xFF20000000000000

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll b/src/LLVM/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
new file mode 100644
index 0000000..4e4a7f1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=c | grep func1 | grep WEAK

+

+define linkonce i32 @func1() {

+        ret i32 5

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2004-08-09-va-end-null.ll b/src/LLVM/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
new file mode 100644
index 0000000..647789d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2004-08-09-va-end-null.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=c

+

+declare void @llvm.va_end(i8*)

+

+define void @test() {

+        %va.upgrd.1 = bitcast i8* null to i8*           ; <i8*> [#uses=1]

+        call void @llvm.va_end( i8* %va.upgrd.1 )

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll b/src/LLVM/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
new file mode 100644
index 0000000..5868967
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll

@@ -0,0 +1,12 @@
+; The CBE should not emit code that casts the function pointer.  This causes

+; GCC to get testy and insert trap instructions instead of doing the right

+; thing. :(

+; RUN: llc < %s -march=c

+

+declare void @external(i8*)

+

+define i32 @test(i32* %X) {

+        %RV = call i32 bitcast (void (i8*)* @external to i32 (i32*)*)( i32* %X )                ; <i32> [#uses=1]

+        ret i32 %RV

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll b/src/LLVM/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
new file mode 100644
index 0000000..168f12a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=c | not grep extern.*msg

+; PR472

+

+@msg = internal global [6 x i8] c"hello\00"             ; <[6 x i8]*> [#uses=1]

+

+define i8* @foo() {

+entry:

+        ret i8* getelementptr ([6 x i8]* @msg, i32 0, i32 0)

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll b/src/LLVM/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
new file mode 100644
index 0000000..629a3e4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=c

+

+define i32 @foo() {

+        ret i32 and (i32 123456, i32 ptrtoint (i32 ()* @foo to i32))

+}


diff --git a/src/LLVM/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll b/src/LLVM/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
new file mode 100644
index 0000000..ff7ef94
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=c | grep volatile

+

+define void @test(i32* %P) {

+        %X = volatile load i32* %P              ; <i32> [#uses=1]

+        volatile store i32 %X, i32* %P

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll b/src/LLVM/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
new file mode 100644
index 0000000..e2426a5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=c | not grep -- --65535

+; PR596

+

+target datalayout = "e-p:32:32"

+target triple = "i686-pc-linux-gnu"

+

+declare void @func(i32)

+

+define void @funcb() {

+entry:

+        %tmp.1 = sub i32 0, -65535              ; <i32> [#uses=1]

+        call void @func( i32 %tmp.1 )

+        br label %return

+

+return:         ; preds = %entry

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2005-08-23-Fmod.ll b/src/LLVM/test/CodeGen/CBackend/2005-08-23-Fmod.ll
new file mode 100644
index 0000000..5d42724
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2005-08-23-Fmod.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=c | grep fmod

+

+define double @test(double %A, double %B) {

+        %C = frem double %A, %B         ; <double> [#uses=1]

+        ret double %C

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll b/src/LLVM/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
new file mode 100644
index 0000000..0fb4b1a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=c | grep {\\* *volatile *\\*}

+

+@G = external global void ()*           ; <void ()**> [#uses=2]

+

+define void @test() {

+        volatile store void ()* @test, void ()** @G

+        volatile load void ()** @G              ; <void ()*>:1 [#uses=0]

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll b/src/LLVM/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
new file mode 100644
index 0000000..8ff2aaa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=c | \

+; RUN:   grep __BITCAST | count 14

+

+define i32 @test1(float %F) {

+        %X = bitcast float %F to i32            ; <i32> [#uses=1]

+        ret i32 %X

+}

+

+define float @test2(i32 %I) {

+        %X = bitcast i32 %I to float            ; <float> [#uses=1]

+        ret float %X

+}

+

+define i64 @test3(double %D) {

+        %X = bitcast double %D to i64           ; <i64> [#uses=1]

+        ret i64 %X

+}

+

+define double @test4(i64 %L) {

+        %X = bitcast i64 %L to double           ; <double> [#uses=1]

+        ret double %X

+}

+

+define double @test5(double %D) {

+        %X = bitcast double %D to double                ; <double> [#uses=1]

+        %Y = fadd double %X, 2.000000e+00                ; <double> [#uses=1]

+        %Z = bitcast double %Y to i64           ; <i64> [#uses=1]

+        %res = bitcast i64 %Z to double         ; <double> [#uses=1]

+        ret double %res

+}

+

+define float @test6(float %F) {

+        %X = bitcast float %F to float          ; <float> [#uses=1]

+        %Y = fadd float %X, 2.000000e+00         ; <float> [#uses=1]

+        %Z = bitcast float %Y to i32            ; <i32> [#uses=1]

+        %res = bitcast i32 %Z to float          ; <float> [#uses=1]

+        ret float %res

+}

+

+define i32 @main(i32 %argc, i8** %argv) {

+        %a = call i32 @test1( float 0x400921FB40000000 )                ; <i32> [#uses=2]

+        %b = call float @test2( i32 %a )                ; <float> [#uses=0]

+        %c = call i64 @test3( double 0x400921FB4D12D84A )               ; <i64> [#uses=1]

+        %d = call double @test4( i64 %c )               ; <double> [#uses=0]

+        %e = call double @test5( double 7.000000e+00 )          ; <double> [#uses=0]

+        %f = call float @test6( float 7.000000e+00 )            ; <float> [#uses=0]

+        ret i32 %a

+}

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll b/src/LLVM/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
new file mode 100644
index 0000000..1445a18
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll

@@ -0,0 +1,26 @@
+; For PR1099

+; RUN: llc < %s -march=c | grep {(llvm_cbe_tmp2 == llvm_cbe_b_2e_0_2e_0_2e_val)}

+

+target datalayout = "e-p:32:32"

+target triple = "i686-apple-darwin8"

+        %struct.Connector = type { i16, i16, i8, i8, %struct.Connector*, i8* }

+

+

+define i1 @prune_match_entry_2E_ce(%struct.Connector* %a, i16 %b.0.0.val) {

+newFuncRoot:

+        br label %entry.ce

+

+cond_next.exitStub:             ; preds = %entry.ce

+        ret i1 true

+

+entry.return_crit_edge.exitStub:                ; preds = %entry.ce

+        ret i1 false

+

+entry.ce:               ; preds = %newFuncRoot

+        %tmp1 = getelementptr %struct.Connector* %a, i32 0, i32 0                ; <i16*> [#uses=1]

+        %tmp2 = load i16* %tmp1           ; <i16> [#uses=1]

+        %tmp3 = icmp eq i16 %tmp2, %b.0.0.val             ; <i1> [#uses=1]

+        br i1 %tmp3, label %cond_next.exitStub, label %entry.return_crit_edge.exitStub

+}

+

+


diff --git a/src/LLVM/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll b/src/LLVM/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
new file mode 100644
index 0000000..55ffb80
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=c | grep __builtin_stack_save

+; RUN: llc < %s -march=c | grep __builtin_stack_restore

+; PR1028

+

+declare i8* @llvm.stacksave()

+declare void @llvm.stackrestore(i8*)

+

+define i8* @test() {

+    %s = call i8* @llvm.stacksave()

+    call void @llvm.stackrestore(i8* %s)

+    ret i8* %s

+}


diff --git a/src/LLVM/test/CodeGen/CBackend/2007-02-05-memset.ll b/src/LLVM/test/CodeGen/CBackend/2007-02-05-memset.ll
new file mode 100644
index 0000000..55e1654
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2007-02-05-memset.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=c

+; PR1181

+target datalayout = "e-p:64:64"

+target triple = "x86_64-apple-darwin8"

+

+

+declare void @llvm.memset.i64(i8*, i8, i64, i32)

+

+define fastcc void @InitUser_data_unregistered() {

+entry:

+        tail call void @llvm.memset.i64( i8* null, i8 0, i64 65496, i32 1 )

+        ret void

+}


diff --git a/src/LLVM/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll b/src/LLVM/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
new file mode 100644
index 0000000..d15688e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll

@@ -0,0 +1,14 @@
+; PR1164

+; RUN: llc < %s -march=c | grep {llvm_cbe_A = \\*llvm_cbe_G;}

+; RUN: llc < %s -march=c | grep {llvm_cbe_B = \\*(&ltmp_0_1);}

+; RUN: llc < %s -march=c | grep {return (((unsigned int )(((unsigned int )llvm_cbe_A) + ((unsigned int )llvm_cbe_B))));}

+

+@G = global i32 123

+@ltmp_0_1 = global i32 123

+

+define i32 @test(i32 *%G) {

+        %A = load i32* %G

+        %B = load i32* @ltmp_0_1

+        %C = add i32 %A, %B

+        ret i32 %C

+}


diff --git a/src/LLVM/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll b/src/LLVM/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
new file mode 100644
index 0000000..c8bfdd6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=c | grep {packed}
+
+	%struct.p = type <{ i16 }>
+
+define i32 @main() {
+entry:
+        %t = alloca %struct.p, align 2
+	ret i32 5
+}

diff --git a/src/LLVM/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll b/src/LLVM/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
new file mode 100644
index 0000000..6e0cf68
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=c | \
+; RUN:          grep {struct __attribute__ ((packed, aligned(} | count 4
+
+define void @test(i32* %P) {
+        %X = load i32* %P, align 1
+        store i32 %X, i32* %P, align 1
+        ret void
+}
+
+define void @test2(i32* %P) {
+        %X = volatile load i32* %P, align 2
+        volatile store i32 %X, i32* %P, align 2
+        ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll b/src/LLVM/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
new file mode 100644
index 0000000..e9fa552
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=c | grep {llvm_cbe_t.*&1}
+define i32 @test(i32 %r) {
+  %s = icmp eq i32 %r, 0
+  %t = add i1 %s, %s
+  %u = zext i1 %t to i32
+  br i1 %t, label %A, label %B
+A:
+
+  ret i32 %u
+B:
+
+  %v = select i1 %t, i32 %r, i32 %u
+  ret i32 %v
+}

diff --git a/src/LLVM/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll b/src/LLVM/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
new file mode 100644
index 0000000..b72b573
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=c
+; PR2907
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9.5"
+	%"struct.Point<0>" = type { %"struct.Tensor<1,0>" }
+	%"struct.QGauss2<1>" = type { %"struct.Quadrature<0>" }
+	%"struct.Quadrature<0>" = type { %struct.Subscriptor, i32, %"struct.std::vector<Point<0>,std::allocator<Point<0> > >", %"struct.std::vector<double,std::allocator<double> >" }
+	%struct.Subscriptor = type { i32 (...)**, i32, %"struct.std::type_info"* }
+	%"struct.Tensor<1,0>" = type { [1 x double] }
+	%"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >" = type { %"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >::_Vector_impl" }
+	%"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >::_Vector_impl" = type { %"struct.Point<0>"*, %"struct.Point<0>"*, %"struct.Point<0>"* }
+	%"struct.std::_Vector_base<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" }
+	%"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" = type { double*, double*, double* }
+	%"struct.std::type_info" = type { i32 (...)**, i8* }
+	%"struct.std::vector<Point<0>,std::allocator<Point<0> > >" = type { %"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >" }
+	%"struct.std::vector<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >" }
+
+define fastcc void @_ZN6QGaussILi1EEC1Ej(%"struct.QGauss2<1>"* %this, i32 %n) {
+entry:
+	br label %bb4
+
+bb4:		; preds = %bb5.split, %bb4, %entry
+	%0 = fcmp ogt ppc_fp128 0xM00000000000000000000000000000000, select (i1 fcmp olt (ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000)), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000), ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128))		; <i1> [#uses=1]
+	br i1 %0, label %bb4, label %bb5.split
+
+bb5.split:		; preds = %bb4
+	%1 = getelementptr double* null, i32 0		; <double*> [#uses=0]
+	br label %bb4
+}

diff --git a/src/LLVM/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll b/src/LLVM/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll
new file mode 100644
index 0000000..0ae480d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=c
+; Check that uadd and sadd with overflow are handled by C Backend.
+
+%0 = type { i32, i1 }        ; type %0
+
+define i1 @func1(i32 zeroext %v1, i32 zeroext %v2) nounwind {
+entry:
+    %t = call %0 @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)     ; <%0> [#uses=1]
+    %obit = extractvalue %0 %t, 1       ; <i1> [#uses=1]
+    br i1 %obit, label %carry, label %normal
+
+normal:     ; preds = %entry
+    ret i1 true
+
+carry:      ; preds = %entry
+    ret i1 false
+}
+
+define i1 @func2(i32 signext %v1, i32 signext %v2) nounwind {
+entry:
+    %t = call %0 @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)     ; <%0> [#uses=1]
+    %obit = extractvalue %0 %t, 1       ; <i1> [#uses=1]
+    br i1 %obit, label %carry, label %normal
+
+normal:     ; preds = %entry
+    ret i1 true
+
+carry:      ; preds = %entry
+    ret i1 false
+}
+
+declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
+
+declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind
+

diff --git a/src/LLVM/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll b/src/LLVM/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll
new file mode 100644
index 0000000..054a3ca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=c | grep {"m"(llvm_cbe_newcw))}
+; PR2407
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @foo() {
+  %newcw = alloca i16             ; <i16*> [#uses=2]
+  call void asm sideeffect "fldcw $0", "*m,~{dirflag},~{fpsr},~{flags}"( i16*
+%newcw ) nounwind 
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/CBackend/X86/dg.exp b/src/LLVM/test/CodeGen/CBackend/X86/dg.exp
new file mode 100644
index 0000000..44e3a5e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/X86/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] && [llvm_supports_target CBackend] } {
+    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
+}

diff --git a/src/LLVM/test/CodeGen/CBackend/dg.exp b/src/LLVM/test/CodeGen/CBackend/dg.exp
new file mode 100644
index 0000000..05ecf9c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp

+

+if { [llvm_supports_target CBackend] } {

+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]

+}


diff --git a/src/LLVM/test/CodeGen/CBackend/fneg.ll b/src/LLVM/test/CodeGen/CBackend/fneg.ll
new file mode 100644
index 0000000..7dec3d9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/fneg.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=c
+
+define void @func() nounwind {
+  entry:
+  %0 = fsub double -0.0, undef
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/CBackend/pr2408.ll b/src/LLVM/test/CodeGen/CBackend/pr2408.ll
new file mode 100644
index 0000000..bf8477b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/pr2408.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=c | grep {\\* ((unsigned int )}
+; PR2408
+
+define i32 @a(i32 %a) {
+entry:
+        %shr = ashr i32 %a, 0           ; <i32> [#uses=1]
+        %shr2 = ashr i32 2, 0           ; <i32> [#uses=1]
+        %mul = mul i32 %shr, %shr2              ; <i32> [#uses=1]
+        %shr4 = ashr i32 2, 0           ; <i32> [#uses=1]
+        %div = sdiv i32 %mul, %shr4             ; <i32> [#uses=1]
+        ret i32 %div
+}

diff --git a/src/LLVM/test/CodeGen/CBackend/vectors.ll b/src/LLVM/test/CodeGen/CBackend/vectors.ll
new file mode 100644
index 0000000..b7b7677
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CBackend/vectors.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=c
+@.str15 = external global [2 x i8]
+
+define <4 x i32> @foo(<4 x i32> %a, i32 %b) {
+  %c = insertelement <4 x i32> %a, i32 1, i32 %b
+  
+  ret <4 x i32> %c
+}
+
+define i32 @test2(<4 x i32> %a, i32 %b) {
+  %c = extractelement <4 x i32> %a, i32 1
+  
+  ret i32 %c
+}
+
+define <4 x float> @test3(<4 x float> %Y) {
+	%Z = fadd <4 x float> %Y, %Y
+	%X = shufflevector <4 x float> zeroinitializer, <4 x float> %Z, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >
+	ret <4 x float> %X
+}
+
+define void @test4() {
+	%x = alloca <4 x float>
+	%tmp3.i16 = getelementptr <4 x float>* %x, i32 0, i32 0
+	store float 1.0, float* %tmp3.i16
+	ret void
+}
+
+define i32* @test5({i32, i32} * %P) {
+	%x = getelementptr {i32, i32} * %P, i32 0, i32 1
+	ret i32* %x
+}
+
+define i8* @test6() {
+  ret i8* getelementptr ([2 x i8]* @.str15, i32 0, i32 0) 
+}
+

diff --git a/src/LLVM/test/CodeGen/CPP/2007-06-16-Funcname.ll b/src/LLVM/test/CodeGen/CPP/2007-06-16-Funcname.ll
new file mode 100644
index 0000000..0da1dbd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CPP/2007-06-16-Funcname.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=cpp -cppfname=WAKKA | not grep makeLLVMModule

+; PR1515

+

+define void @foo() {

+  ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/CPP/2009-05-01-Long-Double.ll b/src/LLVM/test/CodeGen/CPP/2009-05-01-Long-Double.ll
new file mode 100644
index 0000000..0b2d882
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CPP/2009-05-01-Long-Double.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=cpp -cppgen=program -o %t
+
+define x86_fp80 @some_func() nounwind {
+entry:
+	%retval = alloca x86_fp80		; <x86_fp80*> [#uses=2]
+	%call = call i32 (...)* @other_func()		; <i32> [#uses=1]
+	%conv = sitofp i32 %call to x86_fp80		; <x86_fp80> [#uses=1]
+	store x86_fp80 %conv, x86_fp80* %retval
+	%0 = load x86_fp80* %retval		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}
+
+declare i32 @other_func(...)

diff --git a/src/LLVM/test/CodeGen/CPP/2009-05-04-CondBr.ll b/src/LLVM/test/CodeGen/CPP/2009-05-04-CondBr.ll
new file mode 100644
index 0000000..feb2cf7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CPP/2009-05-04-CondBr.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=cpp -cppgen=program -o %t
+; RUN: grep "BranchInst::Create(label_if_then, label_if_end, int1_cmp, label_entry);" %t
+
+define i32 @some_func(i32 %a) nounwind {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%a.addr = alloca i32		; <i32*> [#uses=8]
+	store i32 %a, i32* %a.addr
+	%tmp = load i32* %a.addr		; <i32> [#uses=1]
+	%inc = add i32 %tmp, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %a.addr
+	%tmp1 = load i32* %a.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp1, 3		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	store i32 7, i32* %a.addr
+	br label %if.end
+
+if.end:		; preds = %if.then, %entry
+	%tmp2 = load i32* %a.addr		; <i32> [#uses=1]
+	%inc3 = add i32 %tmp2, 1		; <i32> [#uses=1]
+	store i32 %inc3, i32* %a.addr
+	%tmp4 = load i32* %a.addr		; <i32> [#uses=1]
+	store i32 %tmp4, i32* %retval
+	%0 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/CPP/dg.exp b/src/LLVM/test/CodeGen/CPP/dg.exp
new file mode 100644
index 0000000..3276dcc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CPP/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target CppBackend] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/2009-01-01-BrCond.ll b/src/LLVM/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
new file mode 100644
index 0000000..58e3190
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/2009-01-01-BrCond.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=cellspu -o - | grep brnz
+; PR3274
+
+target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
+target triple = "spu"
+	%struct.anon = type { i64 }
+	%struct.fp_number_type = type { i32, i32, i32, [4 x i8], %struct.anon }
+
+define double @__floatunsidf(i32 %arg_a) nounwind {
+entry:
+	%in = alloca %struct.fp_number_type, align 16
+	%0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1
+	store i32 0, i32* %0, align 4
+	%1 = icmp eq i32 %arg_a, 0
+	%2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0
+	br i1 %1, label %bb, label %bb1
+
+bb:		; preds = %entry
+	store i32 2, i32* %2, align 8
+	br label %bb7
+
+bb1:		; preds = %entry
+	ret double 0.0
+
+bb7:		; preds = %bb5, %bb1, %bb
+	ret double 1.0
+}
+
+; declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+
+declare double @__pack_d(%struct.fp_number_type*)

diff --git a/src/LLVM/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll b/src/LLVM/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 0000000..401399f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll

@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=cellspu -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+

diff --git a/src/LLVM/test/CodeGen/CellSPU/and_ops.ll b/src/LLVM/test/CodeGen/CellSPU/and_ops.ll
new file mode 100644
index 0000000..72478a1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/and_ops.ll

@@ -0,0 +1,279 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep and    %t1.s | count 234
+; RUN: grep andc   %t1.s | count 85
+; RUN: grep andi   %t1.s | count 37
+; RUN: grep andhi  %t1.s | count 30
+; RUN: grep andbi  %t1.s | count 4
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; AND instruction generation:
+define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = and <4 x i32> %arg1, %arg2
+        ret <4 x i32> %A
+}
+
+define <4 x i32> @and_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = and <4 x i32> %arg2, %arg1
+        ret <4 x i32> %A
+}
+
+define <8 x i16> @and_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = and <8 x i16> %arg1, %arg2
+        ret <8 x i16> %A
+}
+
+define <8 x i16> @and_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = and <8 x i16> %arg2, %arg1
+        ret <8 x i16> %A
+}
+
+define <16 x i8> @and_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = and <16 x i8> %arg2, %arg1
+        ret <16 x i8> %A
+}
+
+define <16 x i8> @and_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = and <16 x i8> %arg1, %arg2
+        ret <16 x i8> %A
+}
+
+define i32 @and_i32_1(i32 %arg1, i32 %arg2) {
+        %A = and i32 %arg2, %arg1
+        ret i32 %A
+}
+
+define i32 @and_i32_2(i32 %arg1, i32 %arg2) {
+        %A = and i32 %arg1, %arg2
+        ret i32 %A
+}
+
+define i16 @and_i16_1(i16 %arg1, i16 %arg2) {
+        %A = and i16 %arg2, %arg1
+        ret i16 %A
+}
+
+define i16 @and_i16_2(i16 %arg1, i16 %arg2) {
+        %A = and i16 %arg1, %arg2
+        ret i16 %A
+}
+
+define i8 @and_i8_1(i8 %arg1, i8 %arg2) {
+        %A = and i8 %arg2, %arg1
+        ret i8 %A
+}
+
+define i8 @and_i8_2(i8 %arg1, i8 %arg2) {
+        %A = and i8 %arg1, %arg2
+        ret i8 %A
+}
+
+; ANDC instruction generation:
+define <4 x i32> @andc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = and <4 x i32> %arg1, %A
+        ret <4 x i32> %B
+}
+
+define <4 x i32> @andc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = and <4 x i32> %arg2, %A
+        ret <4 x i32> %B
+}
+
+define <4 x i32> @andc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = and <4 x i32> %A, %arg2
+        ret <4 x i32> %B
+}
+
+define <8 x i16> @andc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                    i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %arg1, %A
+        ret <8 x i16> %B
+}
+
+define <8 x i16> @andc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                    i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %arg2, %A
+        ret <8 x i16> %B
+}
+
+define <16 x i8> @andc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %arg2, %A
+        ret <16 x i8> %B
+}
+
+define <16 x i8> @andc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %arg1, %A
+        ret <16 x i8> %B
+}
+
+define <16 x i8> @andc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %A, %arg1
+        ret <16 x i8> %B
+}
+
+define i32 @andc_i32_1(i32 %arg1, i32 %arg2) {
+        %A = xor i32 %arg2, -1
+        %B = and i32 %A, %arg1
+        ret i32 %B
+}
+
+define i32 @andc_i32_2(i32 %arg1, i32 %arg2) {
+        %A = xor i32 %arg1, -1
+        %B = and i32 %A, %arg2
+        ret i32 %B
+}
+
+define i32 @andc_i32_3(i32 %arg1, i32 %arg2) {
+        %A = xor i32 %arg2, -1
+        %B = and i32 %arg1, %A
+        ret i32 %B
+}
+
+define i16 @andc_i16_1(i16 %arg1, i16 %arg2) {
+        %A = xor i16 %arg2, -1
+        %B = and i16 %A, %arg1
+        ret i16 %B
+}
+
+define i16 @andc_i16_2(i16 %arg1, i16 %arg2) {
+        %A = xor i16 %arg1, -1
+        %B = and i16 %A, %arg2
+        ret i16 %B
+}
+
+define i16 @andc_i16_3(i16 %arg1, i16 %arg2) {
+        %A = xor i16 %arg2, -1
+        %B = and i16 %arg1, %A
+        ret i16 %B
+}
+
+define i8 @andc_i8_1(i8 %arg1, i8 %arg2) {
+        %A = xor i8 %arg2, -1
+        %B = and i8 %A, %arg1
+        ret i8 %B
+}
+
+define i8 @andc_i8_2(i8 %arg1, i8 %arg2) {
+        %A = xor i8 %arg1, -1
+        %B = and i8 %A, %arg2
+        ret i8 %B
+}
+
+define i8 @andc_i8_3(i8 %arg1, i8 %arg2) {
+        %A = xor i8 %arg2, -1
+        %B = and i8 %arg1, %A
+        ret i8 %B
+}
+
+; ANDI instruction generation (i32 data type):
+define <4 x i32> @andi_v4i32_1(<4 x i32> %in) {
+        %tmp2 = and <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 >
+        ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @andi_v4i32_2(<4 x i32> %in) {
+        %tmp2 = and <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 >
+        ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @andi_v4i32_3(<4 x i32> %in) {
+        %tmp2 = and <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @andi_v4i32_4(<4 x i32> %in) {
+        %tmp2 = and <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 >
+        ret <4 x i32> %tmp2
+}
+
+define zeroext i32 @andi_u32(i32 zeroext  %in)   {
+        %tmp37 = and i32 %in, 37
+        ret i32 %tmp37
+}
+
+define signext i32 @andi_i32(i32 signext  %in)   {
+        %tmp38 = and i32 %in, 37
+        ret i32 %tmp38
+}
+
+define i32 @andi_i32_1(i32 %in) {
+        %tmp37 = and i32 %in, 37
+        ret i32 %tmp37
+}
+
+; ANDHI instruction generation (i16 data type):
+define <8 x i16> @andhi_v8i16_1(<8 x i16> %in) {
+        %tmp2 = and <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511,
+                                     i16 511, i16 511, i16 511, i16 511 >
+        ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @andhi_v8i16_2(<8 x i16> %in) {
+        %tmp2 = and <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510,
+                                     i16 510, i16 510, i16 510, i16 510 >
+        ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @andhi_v8i16_3(<8 x i16> %in) {
+        %tmp2 = and <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
+                                     i16 -1, i16 -1, i16 -1 >
+        ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @andhi_v8i16_4(<8 x i16> %in) {
+        %tmp2 = and <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512,
+                                     i16 -512, i16 -512, i16 -512, i16 -512 >
+        ret <8 x i16> %tmp2
+}
+
+define zeroext i16 @andhi_u16(i16 zeroext  %in)   {
+        %tmp37 = and i16 %in, 37         ; <i16> [#uses=1]
+        ret i16 %tmp37
+}
+
+define signext i16 @andhi_i16(i16 signext  %in)   {
+        %tmp38 = and i16 %in, 37         ; <i16> [#uses=1]
+        ret i16 %tmp38
+}
+
+; i8 data type (s/b ANDBI if 8-bit registers were supported):
+define <16 x i8> @and_v16i8(<16 x i8> %in) {
+        ; ANDBI generated for vector types
+        %tmp2 = and <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
+                                     i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
+                                     i8 42, i8 42, i8 42, i8 42 >
+        ret <16 x i8> %tmp2
+}
+
+define zeroext i8 @and_u8(i8 zeroext  %in)   {
+        ; ANDBI generated:
+        %tmp37 = and i8 %in, 37
+        ret i8 %tmp37
+}
+
+define signext i8 @and_sext8(i8 signext  %in)   {
+        ; ANDBI generated
+        %tmp38 = and i8 %in, 37
+        ret i8 %tmp38
+}
+
+define i8 @and_i8(i8 %in) {
+        ; ANDBI generated
+        %tmp38 = and i8 %in, 205
+        ret i8 %tmp38
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/arg_ret.ll b/src/LLVM/test/CodeGen/CellSPU/arg_ret.ll
new file mode 100644
index 0000000..7410b72
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/arg_ret.ll

@@ -0,0 +1,34 @@
+; Test parameter passing and return values
+;RUN: llc --march=cellspu %s -o - | FileCheck %s
+
+; this fits into registers r3-r74
+%paramstruct = type { i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32}
+define ccc i32 @test_regs( %paramstruct %prm )
+{
+;CHECK:	lr	$3, $74
+;CHECK:	bi	$lr
+  %1 = extractvalue %paramstruct %prm, 71
+  ret i32 %1
+}
+
+define ccc i32 @test_regs_and_stack( %paramstruct %prm, i32 %stackprm )
+{
+;CHECK-NOT:	a	$3, $74, $75
+  %1 = extractvalue %paramstruct %prm, 71
+  %2 = add i32 %1, %stackprm
+  ret i32 %2
+}
+
+define ccc %paramstruct @test_return( i32 %param,  %paramstruct %prm )
+{
+;CHECK:  lqd	{{\$[0-9]+}}, 80($sp)
+;CHECK-NOT:	ori	{{\$[0-9]+, \$[0-9]+, 0}}
+;CHECK:  lr    $3, $4
+  ret %paramstruct %prm
+}
+

diff --git a/src/LLVM/test/CodeGen/CellSPU/bigstack.ll b/src/LLVM/test/CodeGen/CellSPU/bigstack.ll
new file mode 100644
index 0000000..63293e2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/bigstack.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=cellspu -o %t1.s
+; RUN: grep lqx   %t1.s | count 3
+; RUN: grep il    %t1.s | grep -v file | count 5
+; RUN: grep stqx  %t1.s | count 1
+
+define i32 @bigstack() nounwind {
+entry:
+  %avar = alloca i32                            
+  %big_data = alloca [2048 x i32]                
+  store i32 3840, i32* %avar, align 4
+  br label %return
+
+return:                                          
+  %retval = load i32* %avar                
+  ret i32 %retval
+}
+

diff --git a/src/LLVM/test/CodeGen/CellSPU/bss.ll b/src/LLVM/test/CodeGen/CellSPU/bss.ll
new file mode 100644
index 0000000..327800d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/bss.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=cellspu | FileCheck %s
+
+@bssVar = global i32 zeroinitializer
+; CHECK: .section .bss
+; CHECK-NEXT: .globl
+
+@localVar= internal global i32 zeroinitializer
+; CHECK-NOT: .lcomm
+; CHECK: .local
+; CHECK-NEXT: .comm
+

diff --git a/src/LLVM/test/CodeGen/CellSPU/call.ll b/src/LLVM/test/CodeGen/CellSPU/call.ll
new file mode 100644
index 0000000..559b266
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/call.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=cellspu -regalloc=linearscan > %t1.s
+; RUN: grep brsl    %t1.s | count 1
+; RUN: grep brasl   %t1.s | count 2
+; RUN: grep stqd    %t1.s | count 82
+; RUN: llc < %s -march=cellspu | FileCheck %s
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i32 @main() {
+entry:
+  %a = call i32 @stub_1(i32 1, float 0x400921FA00000000)
+  call void @extern_stub_1(i32 %a, i32 4)
+  ret i32 %a
+}
+
+declare void @extern_stub_1(i32, i32)
+
+define i32 @stub_1(i32 %x, float %y) {
+ ; CHECK: il $3, 0
+ ; CHECK: bi $lr 
+entry:
+  ret i32 0
+}
+
+; vararg call: ensure that all caller-saved registers are spilled to the
+; stack:
+define i32 @stub_2(...) {
+entry:
+  ret i32 0
+}
+
+; check that struct is passed in r3->
+; assert this by changing the second field in the struct
+%0 = type { i32, i32, i32 }
+declare %0 @callee()
+define %0 @test_structret()
+{
+;CHECK:	stqd	$lr, 16($sp)
+;CHECK:	stqd	$sp, -48($sp)
+;CHECK:	ai	$sp, $sp, -48
+;CHECK:	brasl	$lr, callee
+  %rv = call %0 @callee()
+;CHECK: ai	$4, $4, 1
+;CHECK: lqd	$lr, 64($sp)
+;CHECK:	ai	$sp, $sp, 48
+;CHECK:	bi	$lr
+  %oldval = extractvalue %0 %rv, 1
+  %newval = add i32 %oldval,1
+  %newrv = insertvalue %0 %rv, i32 %newval, 1
+  ret %0 %newrv
+}
+

diff --git a/src/LLVM/test/CodeGen/CellSPU/call_indirect.ll b/src/LLVM/test/CodeGen/CellSPU/call_indirect.ll
new file mode 100644
index 0000000..141361d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/call_indirect.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=cellspu -asm-verbose=0 -regalloc=linearscan > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem -asm-verbose=0 -regalloc=linearscan > %t2.s
+; RUN: grep bisl    %t1.s | count 7
+; RUN: grep ila     %t1.s | count 1
+; RUN: grep rotqby  %t1.s | count 5
+; RUN: grep lqa     %t1.s | count 1
+; RUN: grep lqd     %t1.s | count 12
+; RUN: grep dispatch_tab %t1.s | count 5
+; RUN: grep bisl    %t2.s | count 7
+; RUN: grep ilhu    %t2.s | count 2
+; RUN: grep iohl    %t2.s | count 2
+; RUN: grep rotqby  %t2.s | count 5
+; RUN: grep lqd     %t2.s | count 13
+; RUN: grep ilhu    %t2.s | count 2
+; RUN: grep ai      %t2.s | count 9
+; RUN: grep dispatch_tab %t2.s | count 6
+
+; ModuleID = 'call_indirect.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
+target triple = "spu-unknown-elf"
+
+@dispatch_tab = global [6 x void (i32, float)*] zeroinitializer, align 16
+
+define void @dispatcher(i32 %i_arg, float %f_arg) {
+entry:
+        %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16
+        tail call void %tmp2( i32 %i_arg, float %f_arg )
+        %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4
+        tail call void %tmp2.1( i32 %i_arg, float %f_arg )
+        %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4
+        tail call void %tmp2.2( i32 %i_arg, float %f_arg )
+        %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4
+        tail call void %tmp2.3( i32 %i_arg, float %f_arg )
+        %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4
+        tail call void %tmp2.4( i32 %i_arg, float %f_arg )
+        %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4
+        tail call void %tmp2.5( i32 %i_arg, float %f_arg )
+        ret void
+}
+
+@ptr_list = internal global [1 x void ()*] [ void ()* inttoptr (i64 4294967295 to void ()*) ], align 4
+@ptr.a = internal global void ()** getelementptr ([1 x void ()*]* @ptr_list, i32 0, i32 1), align 16
+
+define void @double_indirect_call() {
+        %a = load void ()*** @ptr.a, align 16
+        %b = load void ()** %a, align 4
+        tail call void %b()
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/crash.ll b/src/LLVM/test/CodeGen/CellSPU/crash.ll
new file mode 100644
index 0000000..cc2ab71
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/crash.ll

@@ -0,0 +1,8 @@
+; RUN: llc %s -march=cellspu -o -
+declare i8 @return_i8()
+declare i16 @return_i16()
+define void @testfunc() {
+ %rv1 = call i8 @return_i8()
+ %rv2 = call i16 @return_i16()
+ ret void
+}
\ No newline at end of file

diff --git a/src/LLVM/test/CodeGen/CellSPU/ctpop.ll b/src/LLVM/test/CodeGen/CellSPU/ctpop.ll
new file mode 100644
index 0000000..e1a6cd8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/ctpop.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep cntb    %t1.s | count 3
+; RUN: grep andi    %t1.s | count 3
+; RUN: grep rotmi   %t1.s | count 2
+; RUN: grep rothmi  %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare i8 @llvm.ctpop.i8(i8)
+declare i16 @llvm.ctpop.i16(i16)
+declare i32 @llvm.ctpop.i32(i32)
+
+define i32 @test_i8(i8 %X) {
+        call i8 @llvm.ctpop.i8(i8 %X)
+        %Y = zext i8 %1 to i32
+        ret i32 %Y
+}
+
+define i32 @test_i16(i16 %X) {
+        call i16 @llvm.ctpop.i16(i16 %X)
+        %Y = zext i16 %1 to i32
+        ret i32 %Y
+}
+
+define i32 @test_i32(i32 %X) {
+        call i32 @llvm.ctpop.i32(i32 %X)
+        %Y = bitcast i32 %1 to i32
+        ret i32 %Y
+}
+

diff --git a/src/LLVM/test/CodeGen/CellSPU/dg.exp b/src/LLVM/test/CodeGen/CellSPU/dg.exp
new file mode 100644
index 0000000..d416479
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target CellSPU] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/div_ops.ll b/src/LLVM/test/CodeGen/CellSPU/div_ops.ll
new file mode 100644
index 0000000..0c93d83
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/div_ops.ll

@@ -0,0 +1,22 @@
+; RUN: llc --march=cellspu %s -o - | FileCheck %s
+
+; signed division rounds towards zero, rotma don't.
+define i32 @sdivide (i32 %val )
+{
+; CHECK: rotmai
+; CHECK: rotmi
+; CHECK: a
+; CHECK: rotmai
+; CHECK: bi $lr
+   %rv = sdiv i32 %val, 4
+   ret i32 %rv
+}
+
+define i32 @udivide (i32 %val )
+{
+; CHECK: rotmi
+; CHECK: bi $lr
+   %rv = udiv i32 %val, 4
+   ret i32 %rv
+}
+

diff --git a/src/LLVM/test/CodeGen/CellSPU/dp_farith.ll b/src/LLVM/test/CodeGen/CellSPU/dp_farith.ll
new file mode 100644
index 0000000..66bff3e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/dp_farith.ll

@@ -0,0 +1,102 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep dfa    %t1.s | count 2
+; RUN: grep dfs    %t1.s | count 2
+; RUN: grep dfm    %t1.s | count 6
+; RUN: grep dfma   %t1.s | count 2
+; RUN: grep dfms   %t1.s | count 2
+; RUN: grep dfnms  %t1.s | count 4
+;
+; This file includes double precision floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define double @fadd(double %arg1, double %arg2) {
+        %A = fadd double %arg1, %arg2
+        ret double %A
+}
+
+define <2 x double> @fadd_vec(<2 x double> %arg1, <2 x double> %arg2) {
+        %A = fadd <2 x double> %arg1, %arg2
+        ret <2 x double> %A
+}
+
+define double @fsub(double %arg1, double %arg2) {
+        %A = fsub double %arg1,  %arg2
+        ret double %A
+}
+
+define <2 x double> @fsub_vec(<2 x double> %arg1, <2 x double> %arg2) {
+        %A = fsub <2 x double> %arg1,  %arg2
+        ret <2 x double> %A
+}
+
+define double @fmul(double %arg1, double %arg2) {
+        %A = fmul double %arg1,  %arg2
+        ret double %A
+}
+
+define <2 x double> @fmul_vec(<2 x double> %arg1, <2 x double> %arg2) {
+        %A = fmul <2 x double> %arg1,  %arg2
+        ret <2 x double> %A
+}
+
+define double @fma(double %arg1, double %arg2, double %arg3) {
+        %A = fmul double %arg1,  %arg2
+        %B = fadd double %A, %arg3
+        ret double %B
+}
+
+define <2 x double> @fma_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
+        %A = fmul <2 x double> %arg1,  %arg2
+        %B = fadd <2 x double> %A, %arg3
+        ret <2 x double> %B
+}
+
+define double @fms(double %arg1, double %arg2, double %arg3) {
+        %A = fmul double %arg1,  %arg2
+        %B = fsub double %A, %arg3
+        ret double %B
+}
+
+define <2 x double> @fms_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
+        %A = fmul <2 x double> %arg1,  %arg2
+        %B = fsub <2 x double> %A, %arg3
+        ret <2 x double> %B
+}
+
+; - (a * b - c)
+define double @d_fnms_1(double %arg1, double %arg2, double %arg3) {
+        %A = fmul double %arg1,  %arg2
+        %B = fsub double %A, %arg3
+        %C = fsub double -0.000000e+00, %B               ; <double> [#uses=1]
+        ret double %C
+}
+
+; Annother way of getting fnms
+; - ( a * b ) + c => c - (a * b)
+define double @d_fnms_2(double %arg1, double %arg2, double %arg3) {
+        %A = fmul double %arg1,  %arg2
+        %B = fsub double %arg3, %A
+        ret double %B
+}
+
+; FNMS: - (a * b - c) => c - (a * b)
+define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
+        %A = fmul <2 x double> %arg1,  %arg2
+        %B = fsub <2 x double> %arg3, %A
+        ret <2 x double> %B
+}
+
+; Another way to get fnms using a constant vector
+; - ( a * b - c)
+define <2 x double> @d_fnms_vec_2(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
+        %A = fmul <2 x double> %arg1,  %arg2     ; <<2 x double>> [#uses=1]
+        %B = fsub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1]
+        %C = fsub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B
+        ret <2 x double> %C
+}
+
+;define double @fdiv_1(double %arg1, double %arg2) {
+;       %A = fdiv double %arg1,  %arg2  ; <double> [#uses=1]
+;       ret double %A
+;}

diff --git a/src/LLVM/test/CodeGen/CellSPU/eqv.ll b/src/LLVM/test/CodeGen/CellSPU/eqv.ll
new file mode 100644
index 0000000..7967681
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/eqv.ll

@@ -0,0 +1,152 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep eqv  %t1.s | count 18
+; RUN: grep xshw %t1.s | count 6
+; RUN: grep xsbh %t1.s | count 3
+; RUN: grep andi %t1.s | count 3
+
+; Test the 'eqv' instruction, whose boolean expression is:
+; (a & b) | (~a & ~b), which simplifies to
+; (a & b) | ~(a | b)
+; Alternatively, a ^ ~b, which the compiler will also match.
+
+; ModuleID = 'eqv.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = and <4 x i32> %arg1, %arg2
+        %B = or <4 x i32> %arg1, %arg2
+        %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %C = or <4 x i32> %A, %Bnot
+        ret <4 x i32> %C
+}
+
+define <4 x i32> @equiv_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %B = or <4 x i32> %arg1, %arg2          ; <<4 x i32>> [#uses=1]
+        %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >            ; <<4 x i32>> [#uses=1]
+        %A = and <4 x i32> %arg1, %arg2         ; <<4 x i32>> [#uses=1]
+        %C = or <4 x i32> %A, %Bnot             ; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %C
+}
+
+define <4 x i32> @equiv_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %B = or <4 x i32> %arg1, %arg2          ; <<4 x i32>> [#uses=1]
+        %A = and <4 x i32> %arg1, %arg2         ; <<4 x i32>> [#uses=1]
+        %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >            ; <<4 x i32>> [#uses=1]
+        %C = or <4 x i32> %A, %Bnot             ; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %C
+}
+
+define <4 x i32> @equiv_v4i32_4(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %arg2not = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %C = xor <4 x i32> %arg1, %arg2not
+        ret <4 x i32> %C
+}
+
+define i32 @equiv_i32_1(i32 %arg1, i32 %arg2) {
+        %A = and i32 %arg1, %arg2               ; <i32> [#uses=1]
+        %B = or i32 %arg1, %arg2                ; <i32> [#uses=1]
+        %Bnot = xor i32 %B, -1                  ; <i32> [#uses=1]
+        %C = or i32 %A, %Bnot                   ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i32 @equiv_i32_2(i32 %arg1, i32 %arg2) {
+        %B = or i32 %arg1, %arg2                ; <i32> [#uses=1]
+        %Bnot = xor i32 %B, -1                  ; <i32> [#uses=1]
+        %A = and i32 %arg1, %arg2               ; <i32> [#uses=1]
+        %C = or i32 %A, %Bnot                   ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i32 @equiv_i32_3(i32 %arg1, i32 %arg2) {
+        %B = or i32 %arg1, %arg2                ; <i32> [#uses=1]
+        %A = and i32 %arg1, %arg2               ; <i32> [#uses=1]
+        %Bnot = xor i32 %B, -1                  ; <i32> [#uses=1]
+        %C = or i32 %A, %Bnot                   ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i32 @equiv_i32_4(i32 %arg1, i32 %arg2) {
+        %arg2not = xor i32 %arg2, -1
+        %C = xor i32 %arg1, %arg2not
+        ret i32 %C
+}
+
+define i32 @equiv_i32_5(i32 %arg1, i32 %arg2) {
+        %arg1not = xor i32 %arg1, -1
+        %C = xor i32 %arg2, %arg1not
+        ret i32 %C
+}
+
+define signext i16 @equiv_i16_1(i16 signext %arg1, i16 signext %arg2)  {
+        %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
+        %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
+        %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
+        %C = or i16 %A, %Bnot                   ; <i16> [#uses=1]
+        ret i16 %C
+}
+
+define signext i16 @equiv_i16_2(i16 signext %arg1, i16 signext %arg2) {
+        %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
+        %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
+        %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
+        %C = or i16 %A, %Bnot                   ; <i16> [#uses=1]
+        ret i16 %C
+}
+
+define signext i16 @equiv_i16_3(i16 signext %arg1, i16 signext %arg2)  {
+        %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
+        %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
+        %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
+        %C = or i16 %A, %Bnot                   ; <i16> [#uses=1]
+        ret i16 %C
+}
+
+define signext i8 @equiv_i8_1(i8 signext %arg1, i8 signext %arg2)  {
+        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
+        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
+        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
+        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define signext i8 @equiv_i8_2(i8 signext %arg1, i8 signext %arg2)  {
+        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
+        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
+        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
+        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define signext i8 @equiv_i8_3(i8 signext %arg1, i8 signext %arg2)  {
+        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
+        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
+        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
+        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define zeroext i8 @equiv_u8_1(i8 zeroext %arg1, i8 zeroext %arg2)  {
+        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
+        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
+        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
+        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define zeroext i8 @equiv_u8_2(i8 zeroext %arg1, i8 zeroext %arg2)  {
+        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
+        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
+        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
+        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define zeroext i8 @equiv_u8_3(i8 zeroext %arg1, i8 zeroext %arg2)  {
+        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
+        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
+        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
+        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
+        ret i8 %C
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/extract_elt.ll b/src/LLVM/test/CodeGen/CellSPU/extract_elt.ll
new file mode 100644
index 0000000..0ac971c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/extract_elt.ll

@@ -0,0 +1,277 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep shufb   %t1.s | count 39
+; RUN: grep ilhu    %t1.s | count 27
+; RUN: grep iohl    %t1.s | count 27
+; RUN: grep lqa     %t1.s | count 10
+; RUN: grep shlqby  %t1.s | count 12
+; RUN: grep   515   %t1.s | count 1
+; RUN: grep  1029   %t1.s | count 2
+; RUN: grep  1543   %t1.s | count 2
+; RUN: grep  2057   %t1.s | count 2
+; RUN: grep  2571   %t1.s | count 2
+; RUN: grep  3085   %t1.s | count 2
+; RUN: grep  3599   %t1.s | count 2
+; RUN: grep 32768   %t1.s | count 1
+; RUN: grep 32769   %t1.s | count 1
+; RUN: grep 32770   %t1.s | count 1
+; RUN: grep 32771   %t1.s | count 1
+; RUN: grep 32772   %t1.s | count 1
+; RUN: grep 32773   %t1.s | count 1
+; RUN: grep 32774   %t1.s | count 1
+; RUN: grep 32775   %t1.s | count 1
+; RUN: grep 32776   %t1.s | count 1
+; RUN: grep 32777   %t1.s | count 1
+; RUN: grep 32778   %t1.s | count 1
+; RUN: grep 32779   %t1.s | count 1
+; RUN: grep 32780   %t1.s | count 1
+; RUN: grep 32781   %t1.s | count 1
+; RUN: grep 32782   %t1.s | count 1
+; RUN: grep 32783   %t1.s | count 1
+; RUN: grep 32896   %t1.s | count 24
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i32 @i32_extract_0(<4 x i32> %v) {
+entry:
+  %a = extractelement <4 x i32> %v, i32 0
+  ret i32 %a
+}
+
+define i32 @i32_extract_1(<4 x i32> %v) {
+entry:
+  %a = extractelement <4 x i32> %v, i32 1
+  ret i32 %a
+}
+
+define i32 @i32_extract_2(<4 x i32> %v) {
+entry:
+  %a = extractelement <4 x i32> %v, i32 2
+  ret i32 %a
+}
+
+define i32 @i32_extract_3(<4 x i32> %v) {
+entry:
+  %a = extractelement <4 x i32> %v, i32 3
+  ret i32 %a
+}
+
+define i16 @i16_extract_0(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 0
+  ret i16 %a
+}
+
+define i16 @i16_extract_1(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 1
+  ret i16 %a
+}
+
+define i16 @i16_extract_2(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 2
+  ret i16 %a
+}
+
+define i16 @i16_extract_3(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 3
+  ret i16 %a
+}
+
+define i16 @i16_extract_4(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 4
+  ret i16 %a
+}
+
+define i16 @i16_extract_5(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 5
+  ret i16 %a
+}
+
+define i16 @i16_extract_6(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 6
+  ret i16 %a
+}
+
+define i16 @i16_extract_7(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 7
+  ret i16 %a
+}
+
+define i8 @i8_extract_0(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 0
+  ret i8 %a
+}
+
+define i8 @i8_extract_1(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 1
+  ret i8 %a
+}
+
+define i8 @i8_extract_2(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 2
+  ret i8 %a
+}
+
+define i8 @i8_extract_3(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 3
+  ret i8 %a
+}
+
+define i8 @i8_extract_4(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 4
+  ret i8 %a
+}
+
+define i8 @i8_extract_5(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 5
+  ret i8 %a
+}
+
+define i8 @i8_extract_6(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 6
+  ret i8 %a
+}
+
+define i8 @i8_extract_7(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 7
+  ret i8 %a
+}
+
+define i8 @i8_extract_8(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 8
+  ret i8 %a
+}
+
+define i8 @i8_extract_9(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 9
+  ret i8 %a
+}
+
+define i8 @i8_extract_10(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 10
+  ret i8 %a
+}
+
+define i8 @i8_extract_11(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 11
+  ret i8 %a
+}
+
+define i8 @i8_extract_12(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 12
+  ret i8 %a
+}
+
+define i8 @i8_extract_13(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 13
+  ret i8 %a
+}
+
+define i8 @i8_extract_14(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 14
+  ret i8 %a
+}
+
+define i8 @i8_extract_15(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 15
+  ret i8 %a
+}
+
+;;--------------------------------------------------------------------------
+;; extract element, variable index:
+;;--------------------------------------------------------------------------
+
+define i8 @extract_varadic_i8(i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <16 x i8> < i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i32 %i
+        ret i8 %0
+}
+
+define i8 @extract_varadic_i8_1(<16 x i8> %v, i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <16 x i8> %v, i32 %i
+        ret i8 %0
+}
+
+define i16 @extract_varadic_i16(i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <8 x i16> < i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i32 %i
+        ret i16 %0
+}
+
+define i16 @extract_varadic_i16_1(<8 x i16> %v, i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <8 x i16> %v, i32 %i
+        ret i16 %0
+}
+
+define i32 @extract_varadic_i32(i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <4 x i32> < i32 0, i32 1, i32 2, i32 3>, i32 %i
+        ret i32 %0
+}
+
+define i32 @extract_varadic_i32_1(<4 x i32> %v, i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <4 x i32> %v, i32 %i
+        ret i32 %0
+}
+
+define float @extract_varadic_f32(i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <4 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >, i32 %i
+        ret float %0
+}
+
+define float @extract_varadic_f32_1(<4 x float> %v, i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <4 x float> %v, i32 %i
+        ret float %0
+}
+
+define i64 @extract_varadic_i64(i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <2 x i64> < i64 0, i64 1>, i32 %i
+        ret i64 %0
+}
+
+define i64 @extract_varadic_i64_1(<2 x i64> %v, i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <2 x i64> %v, i32 %i
+        ret i64 %0
+}
+
+define double @extract_varadic_f64(i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <2 x double> < double 1.000000e+00, double 2.000000e+00>, i32 %i
+        ret double %0
+}
+
+define double @extract_varadic_f64_1(<2 x double> %v, i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <2 x double> %v, i32 %i
+        ret double %0
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/fcmp32.ll b/src/LLVM/test/CodeGen/CellSPU/fcmp32.ll
new file mode 100644
index 0000000..c14fd7b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/fcmp32.ll

@@ -0,0 +1,36 @@
+; RUN: llc --march=cellspu %s -o - | FileCheck %s
+
+; Exercise the floating point comparison operators for f32:
+
+declare double @fabs(double)
+declare float @fabsf(float)
+
+define i1 @fcmp_eq(float %arg1, float %arg2) {
+; CHECK: fceq
+; CHECK: bi $lr
+        %A = fcmp oeq float %arg1,  %arg2
+        ret i1 %A
+}
+
+define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
+; CHECK: fcmeq
+; CHECK: bi $lr
+        %1 = call float @fabsf(float %arg1)
+        %2 = call float @fabsf(float %arg2)
+        %3 = fcmp oeq float %1, %2
+        ret i1 %3
+}
+
+define i1 @test_ogt(float %a, float %b) {
+; CHECK: fcgt
+; CHECK: bi $lr
+	%cmp = fcmp ogt float %a, %b
+	ret i1 %cmp
+}
+
+define i1 @test_ugt(float %a, float %b) {
+; CHECK: fcgt
+; CHECK: bi $lr
+	%cmp = fcmp ugt float %a, %b
+	ret i1 %cmp
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/fcmp64.ll b/src/LLVM/test/CodeGen/CellSPU/fcmp64.ll
new file mode 100644
index 0000000..2b61fa6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/fcmp64.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+
+define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind {
+entry:
+       %A = fcmp oeq double %arg1, %arg2
+       ret i1 %A
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/fdiv.ll b/src/LLVM/test/CodeGen/CellSPU/fdiv.ll
new file mode 100644
index 0000000..9921626
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/fdiv.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep frest    %t1.s | count 2 
+; RUN: grep -w fi    %t1.s | count 2 
+; RUN: grep -w fm    %t1.s | count 2
+; RUN: grep fma      %t1.s | count 2 
+; RUN: grep fnms     %t1.s | count 4
+; RUN: grep cgti     %t1.s | count 2
+; RUN: grep selb     %t1.s | count 2
+;
+; This file includes standard floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define float @fdiv32(float %arg1, float %arg2) {
+        %A = fdiv float %arg1,  %arg2
+        ret float %A
+}
+
+define <4 x float> @fdiv_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
+        %A = fdiv <4 x float> %arg1,  %arg2
+        ret <4 x float> %A
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/fneg-fabs.ll b/src/LLVM/test/CodeGen/CellSPU/fneg-fabs.ll
new file mode 100644
index 0000000..1e5e3b3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/fneg-fabs.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep 32768   %t1.s | count 2
+; RUN: grep xor     %t1.s | count 4
+; RUN: grep and     %t1.s | count 2
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define double @fneg_dp(double %X) {
+        %Y = fsub double -0.000000e+00, %X
+        ret double %Y
+}
+
+define <2 x double> @fneg_dp_vec(<2 x double> %X) {
+        %Y = fsub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X
+        ret <2 x double> %Y
+}
+
+define float @fneg_sp(float %X) {
+        %Y = fsub float -0.000000e+00, %X
+        ret float %Y
+}
+
+define <4 x float> @fneg_sp_vec(<4 x float> %X) {
+        %Y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00,
+                              float -0.000000e+00, float -0.000000e+00>, %X
+        ret <4 x float> %Y
+}
+
+declare double @fabs(double)
+
+declare float @fabsf(float)
+
+define double @fabs_dp(double %X) {
+        %Y = call double @fabs( double %X )
+        ret double %Y
+}
+
+define float @fabs_sp(float %X) {
+        %Y = call float @fabsf( float %X )
+        ret float %Y
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/i64ops.ll b/src/LLVM/test/CodeGen/CellSPU/i64ops.ll
new file mode 100644
index 0000000..3553cbb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/i64ops.ll

@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep xswd	     %t1.s | count 3
+; RUN: grep xsbh	     %t1.s | count 1
+; RUN: grep xshw	     %t1.s | count 2
+; RUN: grep shufb        %t1.s | count 7
+; RUN: grep cg           %t1.s | count 4
+; RUN: grep addx         %t1.s | count 4
+; RUN: grep fsmbi        %t1.s | count 3
+; RUN: grep il           %t1.s | count 2
+; RUN: grep mpy          %t1.s | count 10
+; RUN: grep mpyh         %t1.s | count 6
+; RUN: grep mpyhhu       %t1.s | count 2
+; RUN: grep mpyu         %t1.s | count 4
+
+; ModuleID = 'stores.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i64 @sext_i64_i8(i8 %a) nounwind {
+  %1 = sext i8 %a to i64
+  ret i64 %1
+}
+
+define i64 @sext_i64_i16(i16 %a) nounwind {
+  %1 = sext i16 %a to i64
+  ret i64 %1
+}
+
+define i64 @sext_i64_i32(i32 %a) nounwind {
+  %1 = sext i32 %a to i64
+  ret i64 %1
+}
+
+define i64 @zext_i64_i8(i8 %a) nounwind {
+  %1 = zext i8 %a to i64
+  ret i64 %1
+}
+
+define i64 @zext_i64_i16(i16 %a) nounwind {
+  %1 = zext i16 %a to i64
+  ret i64 %1
+}
+
+define i64 @zext_i64_i32(i32 %a) nounwind {
+  %1 = zext i32 %a to i64
+  ret i64 %1
+}
+
+define i64 @add_i64(i64 %a, i64 %b) nounwind {
+  %1 = add i64 %a, %b
+  ret i64 %1
+}
+
+define i64 @mul_i64(i64 %a, i64 %b) nounwind {
+  %1 = mul i64 %a, %b
+  ret i64 %1
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/i8ops.ll b/src/LLVM/test/CodeGen/CellSPU/i8ops.ll
new file mode 100644
index 0000000..57a2aa8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/i8ops.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+
+; ModuleID = 'i8ops.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i8 @add_i8(i8 %a, i8 %b) nounwind {
+  %1 = add i8 %a, %b
+  ret i8 %1
+}
+
+define i8 @add_i8_imm(i8 %a, i8 %b) nounwind {
+  %1 = add i8 %a, 15 
+  ret i8 %1
+}
+
+define i8 @sub_i8(i8 %a, i8 %b) nounwind {
+  %1 = sub i8 %a, %b
+  ret i8 %1
+}
+
+define i8 @sub_i8_imm(i8 %a, i8 %b) nounwind {
+  %1 = sub i8 %a, 15 
+  ret i8 %1
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/icmp16.ll b/src/LLVM/test/CodeGen/CellSPU/icmp16.ll
new file mode 100644
index 0000000..32b1261
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/icmp16.ll

@@ -0,0 +1,350 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep ilh                                %t1.s | count 15
+; RUN: grep ceqh                               %t1.s | count 29
+; RUN: grep ceqhi                              %t1.s | count 13
+; RUN: grep clgth                              %t1.s | count 15
+; RUN: grep cgth                               %t1.s | count 14
+; RUN: grep cgthi                              %t1.s | count 6
+; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 17
+; RUN: grep {selb\t\\\$3, \\\$4, \\\$5, \\\$3} %t1.s | count 6
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
+; $3 = %arg1, $4 = %val1, $5 = %val2
+;
+; For "positive" comparisons:
+; selb $3, $6, $5, <i1>
+; selb $3, $5, $4, <i1>
+;
+; For "negative" comparisons, i.e., those where the result of the comparison
+; must be inverted (setne, for example):
+; selb $3, $5, $6, <i1>
+; selb $3, $4, $5, <i1>
+
+; i16 integer comparisons:
+define i16 @icmp_eq_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp eq i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_eq_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp eq i16 %arg1, %arg2
+       ret i1 %A
+}
+
+define i16 @icmp_eq_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp eq i16 %arg1, 511
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_eq_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp eq i16 %arg1, -512
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_eq_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp eq i16 %arg1, -1
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_eq_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp eq i16 %arg1, 32768
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ne_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ne i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_ne_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ne i16 %arg1, %arg2
+       ret i1 %A
+}
+
+define i16 @icmp_ne_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ne i16 %arg1, 511
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ne_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ne i16 %arg1, -512
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ne_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ne i16 %arg1, -1
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ne_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ne i16 %arg1, 32768
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ugt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ugt i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_ugt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ugt i16 %arg1, %arg2
+       ret i1 %A
+}
+
+define i16 @icmp_ugt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ugt i16 %arg1, 500
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ugt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ugt i16 %arg1, 0
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ugt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ugt i16 %arg1, 65024
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ugt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ugt i16 %arg1, 32768
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_uge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp uge i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_uge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp uge i16 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp uge i16 %arg1, <immed> can always be transformed into
+;;       icmp ugt i16 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i16 @icmp_ult_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ult i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_ult_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ult i16 %arg1, %arg2
+       ret i1 %A
+}
+
+define i16 @icmp_ult_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ult i16 %arg1, 511
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ult_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ult i16 %arg1, 65534
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ult_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ult i16 %arg1, 65024
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ult_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ult i16 %arg1, 32769
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ule_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ule i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_ule_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ule i16 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp ule i16 %arg1, <immed> can always be transformed into
+;;       icmp ult i16 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i16 @icmp_sgt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sgt i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_sgt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sgt i16 %arg1, %arg2
+       ret i1 %A
+}
+
+define i16 @icmp_sgt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sgt i16 %arg1, 511
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_sgt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sgt i16 %arg1, -1
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_sgt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sgt i16 %arg1, -512
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_sgt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sgt i16 %arg1, 32768
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_sge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sge i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_sge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sge i16 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp sge i16 %arg1, <immed> can always be transformed into
+;;       icmp sgt i16 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i16 @icmp_slt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp slt i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_slt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp slt i16 %arg1, %arg2
+       ret i1 %A
+}
+
+define i16 @icmp_slt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp slt i16 %arg1, 511
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_slt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp slt i16 %arg1, -512
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_slt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp slt i16 %arg1, -1
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_slt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp slt i16 %arg1, 32768
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_sle_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sle i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_sle_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sle i16 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp sle i16 %arg1, <immed> can always be transformed into
+;;       icmp slt i16 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+

diff --git a/src/LLVM/test/CodeGen/CellSPU/icmp32.ll b/src/LLVM/test/CodeGen/CellSPU/icmp32.ll
new file mode 100644
index 0000000..ccbb5f7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/icmp32.ll

@@ -0,0 +1,350 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep ila                                %t1.s | count 6
+; RUN: grep ceq                                %t1.s | count 28
+; RUN: grep ceqi                               %t1.s | count 12
+; RUN: grep clgt                               %t1.s | count 16
+; RUN: grep clgti                              %t1.s | count 6
+; RUN: grep cgt                                %t1.s | count 16
+; RUN: grep cgti                               %t1.s | count 6
+; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 20
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
+; $3 = %arg1, $4 = %val1, $5 = %val2
+;
+; For "positive" comparisons:
+; selb $3, $6, $5, <i1>
+; selb $3, $5, $4, <i1>
+;
+; For "negative" comparisons, i.e., those where the result of the comparison
+; must be inverted (setne, for example):
+; selb $3, $5, $6, <i1>
+; selb $3, $4, $5, <i1>
+
+; i32 integer comparisons:
+define i32 @icmp_eq_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp eq i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_eq_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp eq i32 %arg1, %arg2
+       ret i1 %A
+}
+
+define i32 @icmp_eq_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp eq i32 %arg1, 511
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_eq_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp eq i32 %arg1, -512
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_eq_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp eq i32 %arg1, -1
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_eq_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp eq i32 %arg1, 32768
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ne_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ne i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_ne_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ne i32 %arg1, %arg2
+       ret i1 %A
+}
+
+define i32 @icmp_ne_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ne i32 %arg1, 511
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ne_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ne i32 %arg1, -512
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ne_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ne i32 %arg1, -1
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ne_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ne i32 %arg1, 32768
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ugt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ugt i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_ugt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ugt i32 %arg1, %arg2
+       ret i1 %A
+}
+
+define i32 @icmp_ugt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ugt i32 %arg1, 511
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ugt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ugt i32 %arg1, 4294966784
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ugt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ugt i32 %arg1, 4294967293
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ugt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ugt i32 %arg1, 32768
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_uge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp uge i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_uge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp uge i32 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp uge i32 %arg1, <immed> can always be transformed into
+;;       icmp ugt i32 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i32 @icmp_ult_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ult i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_ult_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ult i32 %arg1, %arg2
+       ret i1 %A
+}
+
+define i32 @icmp_ult_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ult i32 %arg1, 511
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ult_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ult i32 %arg1, 4294966784
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ult_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ult i32 %arg1, 4294967293
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ult_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ult i32 %arg1, 32768
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ule_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ule i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_ule_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ule i32 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp ule i32 %arg1, <immed> can always be transformed into
+;;       icmp ult i32 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i32 @icmp_sgt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sgt i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_sgt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sgt i32 %arg1, %arg2
+       ret i1 %A
+}
+
+define i32 @icmp_sgt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sgt i32 %arg1, 511
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_sgt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sgt i32 %arg1, 4294966784
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_sgt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sgt i32 %arg1, 4294967293
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_sgt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sgt i32 %arg1, 32768
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_sge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sge i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_sge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sge i32 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp sge i32 %arg1, <immed> can always be transformed into
+;;       icmp sgt i32 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i32 @icmp_slt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp slt i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_slt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp slt i32 %arg1, %arg2
+       ret i1 %A
+}
+
+define i32 @icmp_slt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp slt i32 %arg1, 511
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_slt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp slt i32 %arg1, -512
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_slt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp slt i32 %arg1, -1
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_slt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp slt i32 %arg1, 32768
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_sle_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sle i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_sle_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sle i32 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp sle i32 %arg1, <immed> can always be transformed into
+;;       icmp slt i32 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+

diff --git a/src/LLVM/test/CodeGen/CellSPU/icmp64.ll b/src/LLVM/test/CodeGen/CellSPU/icmp64.ll
new file mode 100644
index 0000000..9dd2cdc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/icmp64.ll

@@ -0,0 +1,146 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep ceq                                %t1.s | count 20
+; RUN: grep cgti                               %t1.s | count 12
+; RUN: grep cgt                                %t1.s | count 16
+; RUN: grep clgt                               %t1.s | count 12
+; RUN: grep gb                                 %t1.s | count 12
+; RUN: grep fsm                                %t1.s | count 10
+; RUN: grep xori                               %t1.s | count 5
+; RUN: grep selb                               %t1.s | count 18
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
+; $3 = %arg1, $4 = %val1, $5 = %val2
+;
+; i64 integer comparisons:
+define i64 @icmp_eq_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp eq i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_eq_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp eq i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_ne_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ne i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_ne_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ne i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_ugt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ugt i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_ugt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ugt i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_uge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp uge i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_uge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp uge i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_ult_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ult i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_ult_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ult i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_ule_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ule i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_ule_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ule i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_sgt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp sgt i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_sgt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp sgt i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_sge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp sge i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_sge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp sge i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_slt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp slt i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_slt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp slt i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_sle_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp sle i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_sle_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp sle i64 %arg1, %arg2
+       ret i1 %A
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/icmp8.ll b/src/LLVM/test/CodeGen/CellSPU/icmp8.ll
new file mode 100644
index 0000000..5517d10
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/icmp8.ll

@@ -0,0 +1,286 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep ceqb                               %t1.s | count 24
+; RUN: grep ceqbi                              %t1.s | count 12
+; RUN: grep clgtb                              %t1.s | count 11
+; RUN: grep cgtb                               %t1.s | count 13
+; RUN: grep cgtbi                              %t1.s | count 5
+; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 11
+; RUN: grep {selb\t\\\$3, \\\$4, \\\$5, \\\$3} %t1.s | count 4
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
+; $3 = %arg1, $4 = %val1, $5 = %val2
+;
+; For "positive" comparisons:
+; selb $3, $6, $5, <i1>
+; selb $3, $5, $4, <i1>
+;
+; For "negative" comparisons, i.e., those where the result of the comparison
+; must be inverted (setne, for example):
+; selb $3, $5, $6, <i1>
+; selb $3, $4, $5, <i1>
+
+; i8 integer comparisons:
+define i8 @icmp_eq_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp eq i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_eq_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp eq i8 %arg1, %arg2
+       ret i1 %A
+}
+
+define i8 @icmp_eq_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp eq i8 %arg1, 127
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_eq_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp eq i8 %arg1, -128
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_eq_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp eq i8 %arg1, -1
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_ne_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ne i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_ne_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ne i8 %arg1, %arg2
+       ret i1 %A
+}
+
+define i8 @icmp_ne_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ne i8 %arg1, 127
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_ne_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ne i8 %arg1, -128
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_ne_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ne i8 %arg1, -1
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_ugt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ugt i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_ugt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ugt i8 %arg1, %arg2
+       ret i1 %A
+}
+
+define i8 @icmp_ugt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ugt i8 %arg1, 126
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_uge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp uge i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_uge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp uge i8 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp uge i8 %arg1, <immed> can always be transformed into
+;;       icmp ugt i8 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i8 @icmp_ult_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ult i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_ult_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ult i8 %arg1, %arg2
+       ret i1 %A
+}
+
+define i8 @icmp_ult_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ult i8 %arg1, 253
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_ult_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ult i8 %arg1, 129
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_ule_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ule i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_ule_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ule i8 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp ule i8 %arg1, <immed> can always be transformed into
+;;       icmp ult i8 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i8 @icmp_sgt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sgt i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_sgt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sgt i8 %arg1, %arg2
+       ret i1 %A
+}
+
+define i8 @icmp_sgt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sgt i8 %arg1, 96
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_sgt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sgt i8 %arg1, -1
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_sgt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sgt i8 %arg1, -128
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_sge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sge i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_sge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sge i8 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp sge i8 %arg1, <immed> can always be transformed into
+;;       icmp sgt i8 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i8 @icmp_slt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp slt i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_slt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp slt i8 %arg1, %arg2
+       ret i1 %A
+}
+
+define i8 @icmp_slt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp slt i8 %arg1, 96
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_slt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp slt i8 %arg1, -120
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_slt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp slt i8 %arg1, -1
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_sle_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sle i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_sle_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sle i8 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp sle i8 %arg1, <immed> can always be transformed into
+;;       icmp slt i8 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+

diff --git a/src/LLVM/test/CodeGen/CellSPU/immed16.ll b/src/LLVM/test/CodeGen/CellSPU/immed16.ll
new file mode 100644
index 0000000..077d071
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/immed16.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep "ilh" %t1.s | count 11
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i16 @test_1() {
+  %x = alloca i16, align 16
+  store i16 419, i16* %x        ;; ILH via pattern
+  ret i16 0
+}
+
+define i16 @test_2() {
+  %x = alloca i16, align 16
+  store i16 1023, i16* %x       ;; ILH via pattern
+  ret i16 0
+}
+
+define i16 @test_3() {
+  %x = alloca i16, align 16
+  store i16 -1023, i16* %x      ;; ILH via pattern
+  ret i16 0
+}
+
+define i16 @test_4() {
+  %x = alloca i16, align 16
+  store i16 32767, i16* %x      ;; ILH via pattern
+  ret i16 0
+}
+
+define i16 @test_5() {
+  %x = alloca i16, align 16
+  store i16 -32768, i16* %x     ;; ILH via pattern
+  ret i16 0
+}
+
+define i16 @test_6() {
+  ret i16 0
+}
+
+

diff --git a/src/LLVM/test/CodeGen/CellSPU/immed32.ll b/src/LLVM/test/CodeGen/CellSPU/immed32.ll
new file mode 100644
index 0000000..8e48f0b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/immed32.ll

@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep ilhu  %t1.s | count 9
+; RUN: grep iohl  %t1.s | count 7
+; RUN: grep -w il    %t1.s | count 3
+; RUN: grep 16429 %t1.s | count 1
+; RUN: grep 63572 %t1.s | count 1
+; RUN: grep   128 %t1.s | count 1
+; RUN: grep 32639 %t1.s | count 1
+; RUN: grep 65535 %t1.s | count 1
+; RUN: grep 16457 %t1.s | count 1
+; RUN: grep  4059 %t1.s | count 1
+; RUN: grep 49077 %t1.s | count 1
+; RUN: grep  1267 %t1.s | count 2
+; RUN: grep 16309 %t1.s | count 1
+; RUN: cat %t1.s | FileCheck %s
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i32 @test_1() {
+  ret i32 4784128               ;; ILHU via pattern (0x49000)
+}
+
+define i32 @test_2() {
+  ret i32 5308431               ;; ILHU/IOHL via pattern (0x5100f)
+}
+
+define i32 @test_3() {
+  ret i32 511                   ;; IL via pattern
+}
+
+define i32 @test_4() {
+  ret i32 -512                  ;; IL via pattern
+}
+
+define i32 @test_5()
+{
+;CHECK: test_5:
+;CHECK-NOT: ila $3, 40000
+;CHECK: ilhu
+;CHECK: iohl
+;CHECK: bi $lr
+  ret i32 400000
+}
+
+;; double             float       floatval
+;; 0x4005bf0a80000000 0x402d|f854 2.718282
+define float @float_const_1() {
+  ret float 0x4005BF0A80000000  ;; ILHU/IOHL
+}
+
+;; double             float       floatval
+;; 0x3810000000000000 0x0080|0000 0.000000
+define float @float_const_2() {
+  ret float 0x3810000000000000  ;; IL 128
+}
+
+;; double             float       floatval
+;; 0x47efffffe0000000 0x7f7f|ffff NaN
+define float @float_const_3() {
+  ret float 0x47EFFFFFE0000000  ;; ILHU/IOHL via pattern
+}
+
+;; double             float       floatval
+;; 0x400921fb60000000 0x4049|0fdb 3.141593
+define float @float_const_4() {
+  ret float 0x400921FB60000000  ;; ILHU/IOHL via pattern
+}
+
+;; double             float       floatval
+;; 0xbff6a09e60000000 0xbfb5|04f3 -1.414214
+define float @float_const_5() {
+  ret float 0xBFF6A09E60000000  ;; ILHU/IOHL via pattern
+}
+
+;; double             float       floatval
+;; 0x3ff6a09e60000000 0x3fb5|04f3 1.414214
+define float @float_const_6() {
+  ret float 0x3FF6A09E60000000  ;; ILHU/IOHL via pattern
+}
+
+define float @float_const_7() {
+  ret float 0.000000e+00        ;; IL 0 via pattern
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/immed64.ll b/src/LLVM/test/CodeGen/CellSPU/immed64.ll
new file mode 100644
index 0000000..fd48365
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/immed64.ll

@@ -0,0 +1,95 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep lqa        %t1.s | count 13
+; RUN: grep ilhu       %t1.s | count 15
+; RUN: grep ila        %t1.s | count 1
+; RUN: grep -w il      %t1.s | count 6
+; RUN: grep shufb      %t1.s | count 13
+; RUN: grep      65520 %t1.s | count  1
+; RUN: grep      43981 %t1.s | count  1
+; RUN: grep      13702 %t1.s | count  1
+; RUN: grep      28225 %t1.s | count  1
+; RUN: grep      30720 %t1.s | count  1
+; RUN: grep 3233857728 %t1.s | count  8
+; RUN: grep 2155905152 %t1.s | count  6
+; RUN: grep      66051 %t1.s | count  7
+; RUN: grep  471670303 %t1.s | count 11
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+;  1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202)
+; 18446744073709551591 => 0x ffffffff ffffffe7 (-25)
+; 18446744073708516742 => 0x ffffffff fff03586 (-1034874)
+;              5308431 => 0x 00000000 0051000F
+;  9223372038704560128 => 0x 80000000 6e417800
+
+define i64 @i64_const_1() {
+  ret i64  1311768467750121234          ;; Constant pool spill
+}
+
+define i64 @i64_const_2() {
+  ret i64 18446744073709551591          ;; IL/SHUFB
+}
+
+define i64 @i64_const_3() {
+  ret i64 18446744073708516742          ;; IHLU/IOHL/SHUFB
+}
+
+define i64 @i64_const_4() {
+  ret i64              5308431          ;; ILHU/IOHL/SHUFB
+}
+
+define i64 @i64_const_5() {
+  ret i64                  511          ;; IL/SHUFB
+}
+
+define i64 @i64_const_6() {
+  ret i64                 -512          ;; IL/SHUFB
+}
+
+define i64 @i64_const_7() {
+  ret i64  9223372038704560128          ;; IHLU/IOHL/SHUFB
+}
+
+define i64 @i64_const_8() {
+  ret i64 0                             ;; IL
+}
+
+define i64 @i64_const_9() {
+  ret i64 -1                            ;; IL
+}
+
+define i64 @i64_const_10() {
+  ret i64 281470681808895                ;; IL 65535
+}
+
+; 0x4005bf0a8b145769 ->
+;   (ILHU 0x4005 [16389]/IOHL 0xbf0a [48906])
+;   (ILHU 0x8b14 [35604]/IOHL 0x5769 [22377])
+define double @f64_const_1() {
+ ret double 0x4005bf0a8b145769        ;; ILHU/IOHL via pattern
+}
+ 
+define double @f64_const_2() {
+ ret double 0x0010000000000000
+}
+
+define double @f64_const_3() {
+ ret double 0x7fefffffffffffff
+}
+
+define double @f64_const_4() {
+ ret double 0x400921fb54442d18
+}
+ 
+define double @f64_const_5() {
+  ret double 0xbff6a09e667f3bcd         ;; ILHU/IOHL via pattern
+}
+ 
+define double @f64_const_6() {
+  ret double 0x3ff6a09e667f3bcd
+}
+
+define double @f64_const_7() {
+  ret double 0.000000e+00
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/int2fp.ll b/src/LLVM/test/CodeGen/CellSPU/int2fp.ll
new file mode 100644
index 0000000..984c017
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/int2fp.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep csflt %t1.s | count 5
+; RUN: grep cuflt %t1.s | count 1
+; RUN: grep xshw  %t1.s | count 2
+; RUN: grep xsbh  %t1.s | count 1
+; RUN: grep and   %t1.s | count 2
+; RUN: grep andi  %t1.s | count 1
+; RUN: grep ila   %t1.s | count 1
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define float @sitofp_i32(i32 %arg1) {
+        %A = sitofp i32 %arg1 to float          ; <float> [#uses=1]
+        ret float %A
+}
+
+define float @uitofp_u32(i32 %arg1) {
+        %A = uitofp i32 %arg1 to float          ; <float> [#uses=1]
+        ret float %A
+}
+
+define float @sitofp_i16(i16 %arg1) {
+        %A = sitofp i16 %arg1 to float          ; <float> [#uses=1]
+        ret float %A
+}
+
+define float @uitofp_i16(i16 %arg1) {
+        %A = uitofp i16 %arg1 to float          ; <float> [#uses=1]
+        ret float %A
+}
+
+define float @sitofp_i8(i8 %arg1) {
+        %A = sitofp i8 %arg1 to float           ; <float> [#uses=1]
+        ret float %A
+}
+
+define float @uitofp_i8(i8 %arg1) {
+        %A = uitofp i8 %arg1 to float           ; <float> [#uses=1]
+        ret float %A
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/intrinsics_branch.ll b/src/LLVM/test/CodeGen/CellSPU/intrinsics_branch.ll
new file mode 100644
index 0000000..b0f6a62
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/intrinsics_branch.ll

@@ -0,0 +1,150 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep ceq     %t1.s | count 30 
+; RUN: grep ceqb    %t1.s | count 10
+; RUN: grep ceqhi   %t1.s | count 5
+; RUN: grep ceqi    %t1.s | count 5
+; RUN: grep cgt     %t1.s | count 30
+; RUN: grep cgtb    %t1.s | count 10
+; RUN: grep cgthi   %t1.s | count 5
+; RUN: grep cgti    %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
+
+declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8)
+
+
+
+define <4 x i32> @test(<4 x i32> %A) {
+        call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) {
+        call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) {
+        call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B)
+        %Y = bitcast <8 x i16> %1 to <8 x i16>
+        ret <8 x i16> %Y
+}
+
+define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) {
+        call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B)
+        %Y = bitcast <16 x i8> %1 to <16 x i8>
+        ret <16 x i8> %Y
+}
+
+define <4 x i32> @ceqitest(<4 x i32> %A) {
+        call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <8 x i16> @ceqhitest(<8 x i16> %A) {
+        call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65)
+        %Y = bitcast <8 x i16> %1 to <8 x i16>
+        ret <8 x i16> %Y
+}
+
+define <16 x i8> @ceqbitest(<16 x i8> %A) {
+        call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65)
+        %Y = bitcast <16 x i8> %1 to <16 x i8>
+        ret <16 x i8> %Y
+}
+
+define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) {
+        call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) {
+        call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B)
+        %Y = bitcast <8 x i16> %1 to <8 x i16>
+        ret <8 x i16> %Y
+}
+
+define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) {
+        call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B)
+        %Y = bitcast <16 x i8> %1 to <16 x i8>
+        ret <16 x i8> %Y
+}
+
+define <4 x i32> @cgtitest(<4 x i32> %A) {
+        call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <8 x i16> @cgthitest(<8 x i16> %A) {
+        call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65)
+        %Y = bitcast <8 x i16> %1 to <8 x i16>
+        ret <8 x i16> %Y
+}
+
+define <16 x i8> @cgtbitest(<16 x i8> %A) {
+        call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65)
+        %Y = bitcast <16 x i8> %1 to <16 x i8>
+        ret <16 x i8> %Y
+}
+
+define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) {
+        call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) {
+        call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B)
+        %Y = bitcast <8 x i16> %1 to <8 x i16>
+        ret <8 x i16> %Y
+}
+
+define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) {
+        call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B)
+        %Y = bitcast <16 x i8> %1 to <16 x i8>
+        ret <16 x i8> %Y
+}
+
+define <4 x i32> @clgtitest(<4 x i32> %A) {
+        call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <8 x i16> @clgthitest(<8 x i16> %A) {
+        call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65)
+        %Y = bitcast <8 x i16> %1 to <8 x i16>
+        ret <8 x i16> %Y
+}
+
+define <16 x i8> @clgtbitest(<16 x i8> %A) {
+        call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65)
+        %Y = bitcast <16 x i8> %1 to <16 x i8>
+        ret <16 x i8> %Y
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/intrinsics_float.ll b/src/LLVM/test/CodeGen/CellSPU/intrinsics_float.ll
new file mode 100644
index 0000000..8137347
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/intrinsics_float.ll

@@ -0,0 +1,94 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep fa      %t1.s | count 5
+; RUN: grep fs      %t1.s | count 5
+; RUN: grep fm      %t1.s | count 15
+; RUN: grep fceq    %t1.s | count 5
+; RUN: grep fcmeq   %t1.s | count 5
+; RUN: grep fcgt    %t1.s | count 5
+; RUN: grep fcmgt   %t1.s | count 5
+; RUN: grep fma     %t1.s | count 5
+; RUN: grep fnms    %t1.s | count 5
+; RUN: grep fms     %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
+
+declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>)
+
+define <4 x i32> @test(<4 x i32> %A) {
+        call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+        call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+        call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+        call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/intrinsics_logical.ll b/src/LLVM/test/CodeGen/CellSPU/intrinsics_logical.ll
new file mode 100644
index 0000000..a29ee4c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/intrinsics_logical.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep and       %t1.s | count 20
+; RUN: grep andc      %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) {
+        call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) {
+        call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <4 x i32> @anditest(<4 x i32> %A) {
+        call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <8 x i16> @andhitest(<8 x i16> %A) {
+        call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65)
+        %Y = bitcast <8 x i16> %1 to <8 x i16>
+        ret <8 x i16> %Y
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/jumptable.ll b/src/LLVM/test/CodeGen/CellSPU/jumptable.ll
new file mode 100644
index 0000000..66c2fde
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/jumptable.ll

@@ -0,0 +1,21 @@
+;RUN: llc --march=cellspu -disable-cgp-branch-opts %s -o - | FileCheck %s
+; This is to check that emitting jumptables doesn't crash llc
+define i32 @test(i32 %param) {
+entry:
+;CHECK:        ai      {{\$.}}, $3, -1
+;CHECK:        clgti   {{\$., \$.}}, 3
+;CHECK:        brnz    {{\$.}},.LBB0_
+  switch i32 %param, label %bb2 [
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb2
+  ]
+;CHECK-NOT: # BB#2
+bb1:                                            
+  ret i32 1
+bb2:      
+  ret i32 2
+bb3:     
+  ret i32 %param
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/loads.ll b/src/LLVM/test/CodeGen/CellSPU/loads.ll
new file mode 100644
index 0000000..4771752
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/loads.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=cellspu | FileCheck %s
+
+; ModuleID = 'loads.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define <4 x float> @load_v4f32_1(<4 x float>* %a) nounwind readonly {
+entry:
+	%tmp1 = load <4 x float>* %a
+	ret <4 x float> %tmp1
+; CHECK:	lqd	$3, 0($3)
+}
+
+define <4 x float> @load_v4f32_2(<4 x float>* %a) nounwind readonly {
+entry:
+	%arrayidx = getelementptr <4 x float>* %a, i32 1
+	%tmp1 = load <4 x float>* %arrayidx
+	ret <4 x float> %tmp1
+; CHECK:	lqd	$3, 16($3)
+}
+
+
+declare <4 x i32>* @getv4f32ptr()
+define <4 x i32> @func() {
+	;CHECK: brasl
+	; we need to have some instruction to move the result to safety.
+	; which instruction (lr, stqd...) depends on the regalloc
+	;CHECK: {{.*}}
+	;CHECK: brasl
+	%rv1 = call <4 x i32>* @getv4f32ptr()
+	%rv2 = call <4 x i32>* @getv4f32ptr()
+	%rv3 = load <4 x i32>* %rv1
+	ret <4 x i32> %rv3
+}
+
+define <4 x float> @load_undef(){
+	; CHECK: lqd	$3, 0($3)
+	%val = load <4 x float>* undef
+	ret <4 x float> %val
+}
+
+;check that 'misaligned' loads that may span two memory chunks
+;have two loads. Don't check for the bitmanipulation, as that 
+;might change with improved algorithms or scheduling 
+define i32 @load_misaligned( i32* %ptr ){
+;CHECK: load_misaligned
+;CHECK: lqd
+;CHECK: lqd
+;CHECK: bi $lr
+  %rv = load i32* %ptr, align 2
+  ret i32 %rv
+}
+
+define <4 x i32> @load_null_vec( ) {
+;CHECK: lqa
+;CHECK: bi $lr
+	%rv = load <4 x i32>* null
+	ret <4 x i32> %rv
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/mul-with-overflow.ll b/src/LLVM/test/CodeGen/CellSPU/mul-with-overflow.ll
new file mode 100644
index 0000000..c04e69e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/mul-with-overflow.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=cellspu
+
+declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
+define zeroext i1 @a(i16 %x)  nounwind {
+  %res = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %x, i16 3)
+  %obil = extractvalue {i16, i1} %res, 1
+  ret i1 %obil
+}
+
+declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
+define zeroext i1 @b(i16 %x)  nounwind {
+  %res = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %x, i16 3)
+  %obil = extractvalue {i16, i1} %res, 1
+  ret i1 %obil
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/mul_ops.ll b/src/LLVM/test/CodeGen/CellSPU/mul_ops.ll
new file mode 100644
index 0000000..1e28fc7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/mul_ops.ll

@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep mpy     %t1.s | count 44
+; RUN: grep mpyu    %t1.s | count 4
+; RUN: grep mpyh    %t1.s | count 10
+; RUN: grep mpyhh   %t1.s | count 2
+; RUN: grep rotma   %t1.s | count 12
+; RUN: grep rotmahi %t1.s | count 4
+; RUN: grep and     %t1.s | count 2
+; RUN: grep selb    %t1.s | count 6
+; RUN: grep fsmbi   %t1.s | count 4
+; RUN: grep shli    %t1.s | count 4
+; RUN: grep shlhi   %t1.s | count 4
+; RUN: grep ila     %t1.s | count 2
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; 32-bit multiply instruction generation:
+define <4 x i32> @mpy_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+entry:
+        %A = mul <4 x i32> %arg1, %arg2
+        ret <4 x i32> %A
+}
+
+define <4 x i32> @mpy_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+entry:
+        %A = mul <4 x i32> %arg2, %arg1
+        ret <4 x i32> %A
+}
+
+define <8 x i16> @mpy_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+entry:
+        %A = mul <8 x i16> %arg1, %arg2
+        ret <8 x i16> %A
+}
+
+define <8 x i16> @mpy_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+entry:
+        %A = mul <8 x i16> %arg2, %arg1
+        ret <8 x i16> %A
+}
+
+define <16 x i8> @mul_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+entry:
+        %A = mul <16 x i8> %arg2, %arg1
+        ret <16 x i8> %A
+}
+
+define <16 x i8> @mul_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+entry:
+        %A = mul <16 x i8> %arg1, %arg2
+        ret <16 x i8> %A
+}
+
+define i32 @mul_i32_1(i32 %arg1, i32 %arg2) {
+entry:
+        %A = mul i32 %arg2, %arg1
+        ret i32 %A
+}
+
+define i32 @mul_i32_2(i32 %arg1, i32 %arg2) {
+entry:
+        %A = mul i32 %arg1, %arg2
+        ret i32 %A
+}
+
+define i16 @mul_i16_1(i16 %arg1, i16 %arg2) {
+entry:
+        %A = mul i16 %arg2, %arg1
+        ret i16 %A
+}
+
+define i16 @mul_i16_2(i16 %arg1, i16 %arg2) {
+entry:
+        %A = mul i16 %arg1, %arg2
+        ret i16 %A
+}
+
+define i8 @mul_i8_1(i8 %arg1, i8 %arg2) {
+entry:
+        %A = mul i8 %arg2, %arg1
+        ret i8 %A
+}
+
+define i8 @mul_i8_2(i8 %arg1, i8 %arg2) {
+entry:
+        %A = mul i8 %arg1, %arg2
+        ret i8 %A
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/nand.ll b/src/LLVM/test/CodeGen/CellSPU/nand.ll
new file mode 100644
index 0000000..b770cad
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/nand.ll

@@ -0,0 +1,121 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep nand   %t1.s | count 90
+; RUN: grep and    %t1.s | count 94
+; RUN: grep xsbh   %t1.s | count 2
+; RUN: grep xshw   %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = and <4 x i32> %arg2, %arg1      ; <<4 x i32>> [#uses=1]
+        %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        ret <4 x i32> %B
+}
+
+define <4 x i32> @nand_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = and <4 x i32> %arg1, %arg2      ; <<4 x i32>> [#uses=1]
+        %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        ret <4 x i32> %B
+}
+
+define <8 x i16> @nand_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = and <8 x i16> %arg2, %arg1      ; <<8 x i16>> [#uses=1]
+        %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                 i16 -1, i16 -1, i16 -1, i16 -1 >
+        ret <8 x i16> %B
+}
+
+define <8 x i16> @nand_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = and <8 x i16> %arg1, %arg2      ; <<8 x i16>> [#uses=1]
+        %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                 i16 -1, i16 -1, i16 -1, i16 -1 >
+        ret <8 x i16> %B
+}
+
+define <16 x i8> @nand_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = and <16 x i8> %arg2, %arg1      ; <<16 x i8>> [#uses=1]
+        %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        ret <16 x i8> %B
+}
+
+define <16 x i8> @nand_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = and <16 x i8> %arg1, %arg2      ; <<16 x i8>> [#uses=1]
+        %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        ret <16 x i8> %B
+}
+
+define i32 @nand_i32_1(i32 %arg1, i32 %arg2) {
+        %A = and i32 %arg2, %arg1            ; <i32> [#uses=1]
+        %B = xor i32 %A, -1                  ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i32 @nand_i32_2(i32 %arg1, i32 %arg2) {
+        %A = and i32 %arg1, %arg2            ; <i32> [#uses=1]
+        %B = xor i32 %A, -1                  ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define signext i16 @nand_i16_1(i16 signext  %arg1, i16 signext  %arg2)   {
+        %A = and i16 %arg2, %arg1            ; <i16> [#uses=1]
+        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
+        ret i16 %B
+}
+
+define signext i16 @nand_i16_2(i16 signext  %arg1, i16 signext  %arg2)   {
+        %A = and i16 %arg1, %arg2            ; <i16> [#uses=1]
+        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
+        ret i16 %B
+}
+
+define zeroext i16 @nand_i16u_1(i16 zeroext  %arg1, i16 zeroext  %arg2)   {
+        %A = and i16 %arg2, %arg1            ; <i16> [#uses=1]
+        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
+        ret i16 %B
+}
+
+define zeroext i16 @nand_i16u_2(i16 zeroext  %arg1, i16 zeroext  %arg2)   {
+        %A = and i16 %arg1, %arg2            ; <i16> [#uses=1]
+        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
+        ret i16 %B
+}
+
+define zeroext i8 @nand_i8u_1(i8 zeroext  %arg1, i8 zeroext  %arg2)   {
+        %A = and i8 %arg2, %arg1             ; <i8> [#uses=1]
+        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
+        ret i8 %B
+}
+
+define zeroext i8 @nand_i8u_2(i8 zeroext  %arg1, i8 zeroext  %arg2)   {
+        %A = and i8 %arg1, %arg2             ; <i8> [#uses=1]
+        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
+        ret i8 %B
+}
+
+define signext i8 @nand_i8_1(i8 signext  %arg1, i8 signext  %arg2)   {
+        %A = and i8 %arg2, %arg1             ; <i8> [#uses=1]
+        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
+        ret i8 %B
+}
+
+define signext i8 @nand_i8_2(i8 signext  %arg1, i8 signext  %arg2) {
+        %A = and i8 %arg1, %arg2             ; <i8> [#uses=1]
+        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
+        ret i8 %B
+}
+
+define i8 @nand_i8_3(i8 %arg1, i8 %arg2) {
+        %A = and i8 %arg2, %arg1             ; <i8> [#uses=1]
+        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
+        ret i8 %B
+}
+
+define i8 @nand_i8_4(i8 %arg1, i8 %arg2) {
+        %A = and i8 %arg1, %arg2             ; <i8> [#uses=1]
+        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
+        ret i8 %B
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/or_ops.ll b/src/LLVM/test/CodeGen/CellSPU/or_ops.ll
new file mode 100644
index 0000000..4f1febb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/or_ops.ll

@@ -0,0 +1,275 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep and    %t1.s | count 2
+; RUN: grep orc    %t1.s | count 85
+; RUN: grep ori    %t1.s | count 34
+; RUN: grep orhi   %t1.s | count 30
+; RUN: grep orbi   %t1.s | count 15
+; RUN: FileCheck %s < %t1.s
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; OR instruction generation:
+define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = or <4 x i32> %arg1, %arg2
+        ret <4 x i32> %A
+}
+
+define <4 x i32> @or_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = or <4 x i32> %arg2, %arg1
+        ret <4 x i32> %A
+}
+
+define <8 x i16> @or_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = or <8 x i16> %arg1, %arg2
+        ret <8 x i16> %A
+}
+
+define <8 x i16> @or_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = or <8 x i16> %arg2, %arg1
+        ret <8 x i16> %A
+}
+
+define <16 x i8> @or_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = or <16 x i8> %arg2, %arg1
+        ret <16 x i8> %A
+}
+
+define <16 x i8> @or_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = or <16 x i8> %arg1, %arg2
+        ret <16 x i8> %A
+}
+
+define i32 @or_i32_1(i32 %arg1, i32 %arg2) {
+        %A = or i32 %arg2, %arg1
+        ret i32 %A
+}
+
+define i32 @or_i32_2(i32 %arg1, i32 %arg2) {
+        %A = or i32 %arg1, %arg2
+        ret i32 %A
+}
+
+define i16 @or_i16_1(i16 %arg1, i16 %arg2) {
+        %A = or i16 %arg2, %arg1
+        ret i16 %A
+}
+
+define i16 @or_i16_2(i16 %arg1, i16 %arg2) {
+        %A = or i16 %arg1, %arg2
+        ret i16 %A
+}
+
+define i8 @or_i8_1(i8 %arg1, i8 %arg2) {
+        %A = or i8 %arg2, %arg1
+        ret i8 %A
+}
+
+define i8 @or_i8_2(i8 %arg1, i8 %arg2) {
+        %A = or i8 %arg1, %arg2
+        ret i8 %A
+}
+
+; ORC instruction generation:
+define <4 x i32> @orc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = or <4 x i32> %arg1, %A
+        ret <4 x i32> %B
+}
+
+define <4 x i32> @orc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = or <4 x i32> %arg2, %A
+        ret <4 x i32> %B
+}
+
+define <4 x i32> @orc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = or <4 x i32> %A, %arg2
+        ret <4 x i32> %B
+}
+
+define <8 x i16> @orc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                    i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = or <8 x i16> %arg1, %A
+        ret <8 x i16> %B
+}
+
+define <8 x i16> @orc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                    i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = or <8 x i16> %arg2, %A
+        ret <8 x i16> %B
+}
+
+define <16 x i8> @orc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = or <16 x i8> %arg2, %A
+        ret <16 x i8> %B
+}
+
+define <16 x i8> @orc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = or <16 x i8> %arg1, %A
+        ret <16 x i8> %B
+}
+
+define <16 x i8> @orc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = or <16 x i8> %A, %arg1
+        ret <16 x i8> %B
+}
+
+define i32 @orc_i32_1(i32 %arg1, i32 %arg2) {
+        %A = xor i32 %arg2, -1
+        %B = or i32 %A, %arg1
+        ret i32 %B
+}
+
+define i32 @orc_i32_2(i32 %arg1, i32 %arg2) {
+        %A = xor i32 %arg1, -1
+        %B = or i32 %A, %arg2
+        ret i32 %B
+}
+
+define i32 @orc_i32_3(i32 %arg1, i32 %arg2) {
+        %A = xor i32 %arg2, -1
+        %B = or i32 %arg1, %A
+        ret i32 %B
+}
+
+define i16 @orc_i16_1(i16 %arg1, i16 %arg2) {
+        %A = xor i16 %arg2, -1
+        %B = or i16 %A, %arg1
+        ret i16 %B
+}
+
+define i16 @orc_i16_2(i16 %arg1, i16 %arg2) {
+        %A = xor i16 %arg1, -1
+        %B = or i16 %A, %arg2
+        ret i16 %B
+}
+
+define i16 @orc_i16_3(i16 %arg1, i16 %arg2) {
+        %A = xor i16 %arg2, -1
+        %B = or i16 %arg1, %A
+        ret i16 %B
+}
+
+define i8 @orc_i8_1(i8 %arg1, i8 %arg2) {
+        %A = xor i8 %arg2, -1
+        %B = or i8 %A, %arg1
+        ret i8 %B
+}
+
+define i8 @orc_i8_2(i8 %arg1, i8 %arg2) {
+        %A = xor i8 %arg1, -1
+        %B = or i8 %A, %arg2
+        ret i8 %B
+}
+
+define i8 @orc_i8_3(i8 %arg1, i8 %arg2) {
+        %A = xor i8 %arg2, -1
+        %B = or i8 %arg1, %A
+        ret i8 %B
+}
+
+; ORI instruction generation (i32 data type):
+define <4 x i32> @ori_v4i32_1(<4 x i32> %in) {
+        %tmp2 = or <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 >
+        ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @ori_v4i32_2(<4 x i32> %in) {
+        %tmp2 = or <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 >
+        ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @ori_v4i32_3(<4 x i32> %in) {
+        %tmp2 = or <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @ori_v4i32_4(<4 x i32> %in) {
+        %tmp2 = or <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 >
+        ret <4 x i32> %tmp2
+}
+
+define zeroext i32 @ori_u32(i32 zeroext  %in)   {
+        %tmp37 = or i32 %in, 37         ; <i32> [#uses=1]
+        ret i32 %tmp37
+}
+
+define signext i32 @ori_i32(i32 signext  %in)   {
+        %tmp38 = or i32 %in, 37         ; <i32> [#uses=1]
+        ret i32 %tmp38
+}
+
+define i32 @ori_i32_600(i32 %in) {
+	;600 does not fit into 'ori' immediate field
+	;CHECK: ori_i32_600
+	;CHECK: il
+	;CHECK: ori
+	%tmp = or i32 %in, 600
+	ret i32 %tmp
+}
+
+; ORHI instruction generation (i16 data type):
+define <8 x i16> @orhi_v8i16_1(<8 x i16> %in) {
+        %tmp2 = or <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511,
+                                    i16 511, i16 511, i16 511, i16 511 >
+        ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @orhi_v8i16_2(<8 x i16> %in) {
+        %tmp2 = or <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510,
+                                    i16 510, i16 510, i16 510, i16 510 >
+        ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @orhi_v8i16_3(<8 x i16> %in) {
+        %tmp2 = or <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
+                                    i16 -1, i16 -1, i16 -1 >
+        ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @orhi_v8i16_4(<8 x i16> %in) {
+        %tmp2 = or <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512,
+                                    i16 -512, i16 -512, i16 -512, i16 -512 >
+        ret <8 x i16> %tmp2
+}
+
+define zeroext i16 @orhi_u16(i16 zeroext  %in)   {
+        %tmp37 = or i16 %in, 37         ; <i16> [#uses=1]
+        ret i16 %tmp37
+}
+
+define signext i16 @orhi_i16(i16 signext  %in)   {
+        %tmp38 = or i16 %in, 37         ; <i16> [#uses=1]
+        ret i16 %tmp38
+}
+
+; ORBI instruction generation (i8 data type):
+define <16 x i8> @orbi_v16i8(<16 x i8> %in) {
+        %tmp2 = or <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
+                                    i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
+                                    i8 42, i8 42, i8 42, i8 42 >
+        ret <16 x i8> %tmp2
+}
+
+define zeroext i8 @orbi_u8(i8 zeroext  %in)   {
+        %tmp37 = or i8 %in, 37         ; <i8> [#uses=1]
+        ret i8 %tmp37
+}
+
+define signext i8 @orbi_i8(i8 signext  %in)   {
+        %tmp38 = or i8 %in, 37         ; <i8> [#uses=1]
+        ret i8 %tmp38
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/private.ll b/src/LLVM/test/CodeGen/CellSPU/private.ll
new file mode 100644
index 0000000..1d933ad
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/private.ll

@@ -0,0 +1,19 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -march=cellspu > %t
+; RUN: grep .Lfoo: %t
+; RUN: grep brsl.*\.Lfoo %t
+; RUN: grep .Lbaz: %t
+; RUN: grep ila.*\.Lbaz %t
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/rotate_ops.ll b/src/LLVM/test/CodeGen/CellSPU/rotate_ops.ll
new file mode 100644
index 0000000..b1219e6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/rotate_ops.ll

@@ -0,0 +1,172 @@
+; RUN: llc < %s -march=cellspu -o %t1.s
+; RUN: grep rot          %t1.s | count 86
+; RUN: grep roth         %t1.s | count 8
+; RUN: grep roti.*5      %t1.s | count 1
+; RUN: grep roti.*27     %t1.s | count 1
+; RUN: grep rothi.*5      %t1.s | count 2
+; RUN: grep rothi.*11     %t1.s | count 1
+; RUN: grep rothi.*,.3    %t1.s | count 1
+; RUN: grep andhi        %t1.s | count 4
+; RUN: grep shlhi        %t1.s | count 4
+; RUN: cat %t1.s | FileCheck %s
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; Vector rotates are not currently supported in gcc or llvm assembly. These are
+; not tested.
+
+; 32-bit rotates:
+define i32 @rotl32_1a(i32 %arg1, i8 %arg2) {
+        %tmp1 = zext i8 %arg2 to i32    ; <i32> [#uses=1]
+        %B = shl i32 %arg1, %tmp1       ; <i32> [#uses=1]
+        %arg22 = sub i8 32, %arg2       ; <i8> [#uses=1]
+        %tmp2 = zext i8 %arg22 to i32   ; <i32> [#uses=1]
+        %C = lshr i32 %arg1, %tmp2      ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @rotl32_1b(i32 %arg1, i16 %arg2) {
+        %tmp1 = zext i16 %arg2 to i32   ; <i32> [#uses=1]
+        %B = shl i32 %arg1, %tmp1       ; <i32> [#uses=1]
+        %arg22 = sub i16 32, %arg2      ; <i8> [#uses=1]
+        %tmp2 = zext i16 %arg22 to i32  ; <i32> [#uses=1]
+        %C = lshr i32 %arg1, %tmp2      ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @rotl32_2(i32 %arg1, i32 %arg2) {
+        %B = shl i32 %arg1, %arg2       ; <i32> [#uses=1]
+        %tmp1 = sub i32 32, %arg2       ; <i32> [#uses=1]
+        %C = lshr i32 %arg1, %tmp1      ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @rotl32_3(i32 %arg1, i32 %arg2) {
+        %tmp1 = sub i32 32, %arg2       ; <i32> [#uses=1]
+        %B = shl i32 %arg1, %arg2       ; <i32> [#uses=1]
+        %C = lshr i32 %arg1, %tmp1      ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @rotl32_4(i32 %arg1, i32 %arg2) {
+        %tmp1 = sub i32 32, %arg2       ; <i32> [#uses=1]
+        %C = lshr i32 %arg1, %tmp1      ; <i32> [#uses=1]
+        %B = shl i32 %arg1, %arg2       ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @rotr32_1(i32 %A, i8 %Amt) {
+        %tmp1 = zext i8 %Amt to i32     ; <i32> [#uses=1]
+        %B = lshr i32 %A, %tmp1         ; <i32> [#uses=1]
+        %Amt2 = sub i8 32, %Amt         ; <i8> [#uses=1]
+        %tmp2 = zext i8 %Amt2 to i32    ; <i32> [#uses=1]
+        %C = shl i32 %A, %tmp2          ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @rotr32_2(i32 %A, i8 %Amt) {
+        %Amt2 = sub i8 32, %Amt         ; <i8> [#uses=1]
+        %tmp1 = zext i8 %Amt to i32     ; <i32> [#uses=1]
+        %B = lshr i32 %A, %tmp1         ; <i32> [#uses=1]
+        %tmp2 = zext i8 %Amt2 to i32    ; <i32> [#uses=1]
+        %C = shl i32 %A, %tmp2          ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+; Rotate left with immediate
+define i32 @rotli32(i32 %A) {
+        %B = shl i32 %A, 5              ; <i32> [#uses=1]
+        %C = lshr i32 %A, 27            ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+; Rotate right with immediate
+define i32 @rotri32(i32 %A) {
+        %B = lshr i32 %A, 5             ; <i32> [#uses=1]
+        %C = shl i32 %A, 27             ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+; 16-bit rotates:
+define i16 @rotr16_1(i16 %arg1, i8 %arg) {
+        %tmp1 = zext i8 %arg to i16             ; <i16> [#uses=1]
+        %B = lshr i16 %arg1, %tmp1              ; <i16> [#uses=1]
+        %arg2 = sub i8 16, %arg                 ; <i8> [#uses=1]
+        %tmp2 = zext i8 %arg2 to i16            ; <i16> [#uses=1]
+        %C = shl i16 %arg1, %tmp2               ; <i16> [#uses=1]
+        %D = or i16 %B, %C                      ; <i16> [#uses=1]
+        ret i16 %D
+}
+
+define i16 @rotr16_2(i16 %arg1, i16 %arg) {
+        %B = lshr i16 %arg1, %arg       ; <i16> [#uses=1]
+        %tmp1 = sub i16 16, %arg        ; <i16> [#uses=1]
+        %C = shl i16 %arg1, %tmp1       ; <i16> [#uses=1]
+        %D = or i16 %B, %C              ; <i16> [#uses=1]
+        ret i16 %D
+}
+
+define i16 @rotli16(i16 %A) {
+        %B = shl i16 %A, 5              ; <i16> [#uses=1]
+        %C = lshr i16 %A, 11            ; <i16> [#uses=1]
+        %D = or i16 %B, %C              ; <i16> [#uses=1]
+        ret i16 %D
+}
+
+define i16 @rotri16(i16 %A) {
+        %B = lshr i16 %A, 5             ; <i16> [#uses=1]
+        %C = shl i16 %A, 11             ; <i16> [#uses=1]
+        %D = or i16 %B, %C              ; <i16> [#uses=1]
+        ret i16 %D
+}
+
+define i8 @rotl8(i8 %A, i8 %Amt) {
+        %B = shl i8 %A, %Amt            ; <i8> [#uses=1]
+        %Amt2 = sub i8 8, %Amt          ; <i8> [#uses=1]
+        %C = lshr i8 %A, %Amt2          ; <i8> [#uses=1]
+        %D = or i8 %B, %C               ; <i8> [#uses=1]
+        ret i8 %D
+}
+
+define i8 @rotr8(i8 %A, i8 %Amt) {
+        %B = lshr i8 %A, %Amt           ; <i8> [#uses=1]
+        %Amt2 = sub i8 8, %Amt          ; <i8> [#uses=1]
+        %C = shl i8 %A, %Amt2           ; <i8> [#uses=1]
+        %D = or i8 %B, %C               ; <i8> [#uses=1]
+        ret i8 %D
+}
+
+define i8 @rotli8(i8 %A) {
+        %B = shl i8 %A, 5               ; <i8> [#uses=1]
+        %C = lshr i8 %A, 3              ; <i8> [#uses=1]
+        %D = or i8 %B, %C               ; <i8> [#uses=1]
+        ret i8 %D
+}
+
+define i8 @rotri8(i8 %A) {
+        %B = lshr i8 %A, 5              ; <i8> [#uses=1]
+        %C = shl i8 %A, 3               ; <i8> [#uses=1]
+        %D = or i8 %B, %C               ; <i8> [#uses=1]
+        ret i8 %D
+}
+
+define <2 x float> @test1(<4 x float> %param )
+{
+; CHECK: test1
+; CHECK: rotqbyi
+  %el = extractelement <4 x float> %param, i32 1
+  %vec1 = insertelement <1 x float> undef, float %el, i32 0
+  %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32><i32 0,i32 0>
+; CHECK: bi $lr
+  ret <2 x float> %rv
+} 

diff --git a/src/LLVM/test/CodeGen/CellSPU/select_bits.ll b/src/LLVM/test/CodeGen/CellSPU/select_bits.ll
new file mode 100644
index 0000000..c804256
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/select_bits.ll

@@ -0,0 +1,569 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep selb   %t1.s | count 56
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; v2i64
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define <2 x i64> @selectbits_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %C = and <2 x i64> %rC, %rB
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %A, %rA
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define <2 x i64> @selectbits_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %C = and <2 x i64> %rB, %rC
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %A, %rA
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define <2 x i64> @selectbits_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %A, %rA
+        %C = and <2 x i64> %rB, %rC
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define <2 x i64> @selectbits_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %A, %rA
+        %C = and <2 x i64> %rC, %rB
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define <2 x i64> @selectbits_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %C = and <2 x i64> %rC, %rB
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %rA, %A
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define <2 x i64> @selectbits_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %C = and <2 x i64> %rB, %rC
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %rA, %A
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define <2 x i64> @selectbits_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %rA, %A
+        %C = and <2 x i64> %rB, %rC
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define <2 x i64> @selectbits_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %rA, %A
+        %C = and <2 x i64> %rC, %rB
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; v4i32
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define <4 x i32> @selectbits_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %C = and <4 x i32> %rC, %rB
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = and <4 x i32> %A, %rA
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define <4 x i32> @selectbits_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %C = and <4 x i32> %rB, %rC
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = and <4 x i32> %A, %rA
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define <4 x i32> @selectbits_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = and <4 x i32> %A, %rA
+        %C = and <4 x i32> %rB, %rC
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define <4 x i32> @selectbits_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+        %B = and <4 x i32> %A, %rA
+        %C = and <4 x i32> %rC, %rB
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define <4 x i32> @selectbits_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %C = and <4 x i32> %rC, %rB
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+        %B = and <4 x i32> %rA, %A
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define <4 x i32> @selectbits_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %C = and <4 x i32> %rB, %rC
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+        %B = and <4 x i32> %rA, %A
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define <4 x i32> @selectbits_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+        %B = and <4 x i32> %rA, %A
+        %C = and <4 x i32> %rB, %rC
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define <4 x i32> @selectbits_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+        %B = and <4 x i32> %rA, %A
+        %C = and <4 x i32> %rC, %rB
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; v8i16
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define <8 x i16> @selectbits_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %C = and <8 x i16> %rC, %rB
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %A, %rA
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define <8 x i16> @selectbits_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %C = and <8 x i16> %rB, %rC
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %A, %rA
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define <8 x i16> @selectbits_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %A, %rA
+        %C = and <8 x i16> %rB, %rC
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define <8 x i16> @selectbits_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %A, %rA
+        %C = and <8 x i16> %rC, %rB
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define <8 x i16> @selectbits_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %C = and <8 x i16> %rC, %rB
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %rA, %A
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define <8 x i16> @selectbits_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %C = and <8 x i16> %rB, %rC
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %rA, %A
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define <8 x i16> @selectbits_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %rA, %A
+        %C = and <8 x i16> %rB, %rC
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define <8 x i16> @selectbits_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %rA, %A
+        %C = and <8 x i16> %rC, %rB
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; v16i8
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define <16 x i8> @selectbits_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %C = and <16 x i8> %rC, %rB
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %A, %rA
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define <16 x i8> @selectbits_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %C = and <16 x i8> %rB, %rC
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %A, %rA
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define <16 x i8> @selectbits_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %A, %rA
+        %C = and <16 x i8> %rB, %rC
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define <16 x i8> @selectbits_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %A, %rA
+        %C = and <16 x i8> %rC, %rB
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define <16 x i8> @selectbits_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %C = and <16 x i8> %rC, %rB
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %rA, %A
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define <16 x i8> @selectbits_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %C = and <16 x i8> %rB, %rC
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %rA, %A
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define <16 x i8> @selectbits_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %rA, %A
+        %C = and <16 x i8> %rB, %rC
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define <16 x i8> @selectbits_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %rA, %A
+        %C = and <16 x i8> %rC, %rB
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; i32
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define i32 @selectbits_i32_01(i32 %rA, i32 %rB, i32 %rC) {
+        %C = and i32 %rC, %rB
+        %A = xor i32 %rC, -1
+        %B = and i32 %A, %rA
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define i32 @selectbits_i32_02(i32 %rA, i32 %rB, i32 %rC) {
+        %C = and i32 %rB, %rC
+        %A = xor i32 %rC, -1
+        %B = and i32 %A, %rA
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define i32 @selectbits_i32_03(i32 %rA, i32 %rB, i32 %rC) {
+        %A = xor i32 %rC, -1
+        %B = and i32 %A, %rA
+        %C = and i32 %rB, %rC
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define i32 @selectbits_i32_04(i32 %rA, i32 %rB, i32 %rC) {
+        %A = xor i32 %rC, -1
+        %B = and i32 %A, %rA
+        %C = and i32 %rC, %rB
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define i32 @selectbits_i32_05(i32 %rA, i32 %rB, i32 %rC) {
+        %C = and i32 %rC, %rB
+        %A = xor i32 %rC, -1
+        %B = and i32 %rA, %A
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define i32 @selectbits_i32_06(i32 %rA, i32 %rB, i32 %rC) {
+        %C = and i32 %rB, %rC
+        %A = xor i32 %rC, -1
+        %B = and i32 %rA, %A
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define i32 @selectbits_i32_07(i32 %rA, i32 %rB, i32 %rC) {
+        %A = xor i32 %rC, -1
+        %B = and i32 %rA, %A
+        %C = and i32 %rB, %rC
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define i32 @selectbits_i32_08(i32 %rA, i32 %rB, i32 %rC) {
+        %A = xor i32 %rC, -1
+        %B = and i32 %rA, %A
+        %C = and i32 %rC, %rB
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; i16
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define i16 @selectbits_i16_01(i16 %rA, i16 %rB, i16 %rC) {
+        %C = and i16 %rC, %rB
+        %A = xor i16 %rC, -1
+        %B = and i16 %A, %rA
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define i16 @selectbits_i16_02(i16 %rA, i16 %rB, i16 %rC) {
+        %C = and i16 %rB, %rC
+        %A = xor i16 %rC, -1
+        %B = and i16 %A, %rA
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define i16 @selectbits_i16_03(i16 %rA, i16 %rB, i16 %rC) {
+        %A = xor i16 %rC, -1
+        %B = and i16 %A, %rA
+        %C = and i16 %rB, %rC
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define i16 @selectbits_i16_04(i16 %rA, i16 %rB, i16 %rC) {
+        %A = xor i16 %rC, -1
+        %B = and i16 %A, %rA
+        %C = and i16 %rC, %rB
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define i16 @selectbits_i16_05(i16 %rA, i16 %rB, i16 %rC) {
+        %C = and i16 %rC, %rB
+        %A = xor i16 %rC, -1
+        %B = and i16 %rA, %A
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define i16 @selectbits_i16_06(i16 %rA, i16 %rB, i16 %rC) {
+        %C = and i16 %rB, %rC
+        %A = xor i16 %rC, -1
+        %B = and i16 %rA, %A
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define i16 @selectbits_i16_07(i16 %rA, i16 %rB, i16 %rC) {
+        %A = xor i16 %rC, -1
+        %B = and i16 %rA, %A
+        %C = and i16 %rB, %rC
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define i16 @selectbits_i16_08(i16 %rA, i16 %rB, i16 %rC) {
+        %A = xor i16 %rC, -1
+        %B = and i16 %rA, %A
+        %C = and i16 %rC, %rB
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; i8
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define i8 @selectbits_i8_01(i8 %rA, i8 %rB, i8 %rC) {
+        %C = and i8 %rC, %rB
+        %A = xor i8 %rC, -1
+        %B = and i8 %A, %rA
+        %D = or i8 %C, %B
+        ret i8 %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define i8 @selectbits_i8_02(i8 %rA, i8 %rB, i8 %rC) {
+        %C = and i8 %rB, %rC
+        %A = xor i8 %rC, -1
+        %B = and i8 %A, %rA
+        %D = or i8 %C, %B
+        ret i8 %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define i8 @selectbits_i8_03(i8 %rA, i8 %rB, i8 %rC) {
+        %A = xor i8 %rC, -1
+        %B = and i8 %A, %rA
+        %C = and i8 %rB, %rC
+        %D = or i8 %C, %B
+        ret i8 %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define i8 @selectbits_i8_04(i8 %rA, i8 %rB, i8 %rC) {
+        %A = xor i8 %rC, -1
+        %B = and i8 %A, %rA
+        %C = and i8 %rC, %rB
+        %D = or i8 %C, %B
+        ret i8 %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define i8 @selectbits_i8_05(i8 %rA, i8 %rB, i8 %rC) {
+        %C = and i8 %rC, %rB
+        %A = xor i8 %rC, -1
+        %B = and i8 %rA, %A
+        %D = or i8 %C, %B
+        ret i8 %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define i8 @selectbits_i8_06(i8 %rA, i8 %rB, i8 %rC) {
+        %C = and i8 %rB, %rC
+        %A = xor i8 %rC, -1
+        %B = and i8 %rA, %A
+        %D = or i8 %C, %B
+        ret i8 %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define i8 @selectbits_i8_07(i8 %rA, i8 %rB, i8 %rC) {
+        %A = xor i8 %rC, -1
+        %B = and i8 %rA, %A
+        %C = and i8 %rB, %rC
+        %D = or i8 %C, %B
+        ret i8 %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define i8 @selectbits_i8_08(i8 %rA, i8 %rB, i8 %rC) {
+        %A = xor i8 %rC, -1
+        %B = and i8 %rA, %A
+        %C = and i8 %rC, %rB
+        %D = or i8 %C, %B
+        ret i8 %D
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/sext128.ll b/src/LLVM/test/CodeGen/CellSPU/sext128.ll
new file mode 100644
index 0000000..6ae9aa5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/sext128.ll

@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=cellspu | FileCheck %s 
+
+; ModuleID = 'sext128.bc'
+target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:128:128-v128:128:128-a0:0:128-s0:128:128"
+target triple = "spu"
+
+define i128 @sext_i64_i128(i64 %a) {
+entry:
+        %0 = sext i64 %a to i128
+        ret i128 %0
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK: 	long	67438087
+; CHECK-NOT: rotqmbyi
+; CHECK:	lqa
+; CHECK: 	rotmai
+; CHECK:	shufb
+}
+
+define i128 @sext_i32_i128(i32 %a) {
+entry:
+        %0 = sext i32 %a to i128
+        ret i128 %0
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK-NOT: rotqmbyi
+; CHECK:	lqa
+; CHECK: 	rotmai
+; CHECK:	shufb
+}
+
+define i128 @sext_i32_i128a(float %a) {
+entry:
+  %0 = call i32 @myfunc(float %a)
+  %1 = sext i32 %0 to i128
+  ret i128 %1
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK-NOT: rotqmbyi
+; CHECK:	lqa
+; CHECK: 	rotmai
+; CHECK:	shufb
+}
+
+declare i32 @myfunc(float)
+
+define i128 @func1(i8 %u) {
+entry:
+; CHECK: xsbh
+; CHECK: xshw
+; CHECK: rotmai
+; CHECK: shufb
+; CHECK: bi $lr
+      %0 = sext i8 %u to i128
+      ret i128 %0
+}
+
+define i128 @func2(i16 %u) {
+entry:
+; CHECK: xshw
+; CHECK: rotmai
+; CHECK: shufb
+; CHECK: bi $lr
+      %0 = sext i16 %u to i128
+      ret i128 %0
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/shift_ops.ll b/src/LLVM/test/CodeGen/CellSPU/shift_ops.ll
new file mode 100644
index 0000000..3252c77
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/shift_ops.ll

@@ -0,0 +1,344 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep {shlh	}  %t1.s | count 10
+; RUN: grep {shlhi	}  %t1.s | count 3
+; RUN: grep {shl	}  %t1.s | count 11
+; RUN: grep {shli	}  %t1.s | count 3
+; RUN: grep {xshw	}  %t1.s | count 5
+; RUN: grep {and	}  %t1.s | count 14
+; RUN: grep {andi	}  %t1.s | count 2
+; RUN: grep {rotmi	}  %t1.s | count 2
+; RUN: grep {rotqmbyi	}  %t1.s | count 1
+; RUN: grep {rotqmbii	}  %t1.s | count 2
+; RUN: grep {rotqmby	}  %t1.s | count 1
+; RUN: grep {rotqmbi	}  %t1.s | count 2
+; RUN: grep {rotqbyi	}  %t1.s | count 1
+; RUN: grep {rotqbii	}  %t1.s | count 2
+; RUN: grep {rotqbybi	}  %t1.s | count 1
+; RUN: grep {sfi	}  %t1.s | count 6
+; RUN: cat %t1.s | FileCheck %s
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; Shift left i16 via register, note that the second operand to shl is promoted
+; to a 32-bit type:
+
+define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) {
+        %A = shl i16 %arg1, %arg2
+        ret i16 %A
+}
+
+define i16 @shlh_i16_2(i16 %arg1, i16 %arg2) {
+        %A = shl i16 %arg2, %arg1
+        ret i16 %A
+}
+
+define signext i16 @shlh_i16_3(i16 signext %arg1, i16 signext %arg2) {
+        %A = shl i16 %arg1, %arg2
+        ret i16 %A
+}
+
+define signext i16 @shlh_i16_4(i16 signext %arg1, i16 signext %arg2) {
+        %A = shl i16 %arg2, %arg1
+        ret i16 %A
+}
+
+define zeroext i16 @shlh_i16_5(i16 zeroext %arg1, i16 zeroext %arg2)  {
+        %A = shl i16 %arg1, %arg2
+        ret i16 %A
+}
+
+define zeroext i16 @shlh_i16_6(i16 zeroext %arg1, i16 zeroext %arg2) {
+        %A = shl i16 %arg2, %arg1
+        ret i16 %A
+}
+
+; Shift left i16 with immediate:
+define i16 @shlhi_i16_1(i16 %arg1) {
+        %A = shl i16 %arg1, 12
+        ret i16 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define i16 @shlhi_i16_2(i16 %arg1) {
+        %A = shl i16 %arg1, 0
+        ret i16 %A
+}
+
+define i16 @shlhi_i16_3(i16 %arg1) {
+        %A = shl i16 16383, %arg1
+        ret i16 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define i16 @shlhi_i16_4(i16 %arg1) {
+        %A = shl i16 0, %arg1
+        ret i16 %A
+}
+
+define signext i16 @shlhi_i16_5(i16 signext %arg1)  {
+        %A = shl i16 %arg1, 12
+        ret i16 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define signext i16 @shlhi_i16_6(i16 signext %arg1) {
+        %A = shl i16 %arg1, 0
+        ret i16 %A
+}
+
+define signext i16 @shlhi_i16_7(i16 signext %arg1) {
+        %A = shl i16 16383, %arg1
+        ret i16 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define signext i16 @shlhi_i16_8(i16 signext %arg1)  {
+        %A = shl i16 0, %arg1
+        ret i16 %A
+}
+
+define zeroext i16 @shlhi_i16_9(i16 zeroext %arg1)  {
+        %A = shl i16 %arg1, 12
+        ret i16 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define zeroext i16 @shlhi_i16_10(i16 zeroext %arg1)  {
+        %A = shl i16 %arg1, 0
+        ret i16 %A
+}
+
+define zeroext i16 @shlhi_i16_11(i16 zeroext %arg1)  {
+        %A = shl i16 16383, %arg1
+        ret i16 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define zeroext i16 @shlhi_i16_12(i16 zeroext %arg1)  {
+        %A = shl i16 0, %arg1
+        ret i16 %A
+}
+
+; Shift left i32 via register, note that the second operand to shl is promoted
+; to a 32-bit type:
+
+define i32 @shl_i32_1(i32 %arg1, i32 %arg2) {
+        %A = shl i32 %arg1, %arg2
+        ret i32 %A
+}
+
+define i32 @shl_i32_2(i32 %arg1, i32 %arg2) {
+        %A = shl i32 %arg2, %arg1
+        ret i32 %A
+}
+
+define signext i32 @shl_i32_3(i32 signext %arg1, i32 signext %arg2)  {
+        %A = shl i32 %arg1, %arg2
+        ret i32 %A
+}
+
+define signext i32 @shl_i32_4(i32 signext %arg1, i32 signext %arg2)  {
+        %A = shl i32 %arg2, %arg1
+        ret i32 %A
+}
+
+define zeroext i32 @shl_i32_5(i32 zeroext %arg1, i32 zeroext %arg2)  {
+        %A = shl i32 %arg1, %arg2
+        ret i32 %A
+}
+
+define zeroext i32 @shl_i32_6(i32 zeroext %arg1, i32 zeroext %arg2)  {
+        %A = shl i32 %arg2, %arg1
+        ret i32 %A
+}
+
+; Shift left i32 with immediate:
+define i32 @shli_i32_1(i32 %arg1) {
+        %A = shl i32 %arg1, 12
+        ret i32 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define i32 @shli_i32_2(i32 %arg1) {
+        %A = shl i32 %arg1, 0
+        ret i32 %A
+}
+
+define i32 @shli_i32_3(i32 %arg1) {
+        %A = shl i32 16383, %arg1
+        ret i32 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define i32 @shli_i32_4(i32 %arg1) {
+        %A = shl i32 0, %arg1
+        ret i32 %A
+}
+
+define signext i32 @shli_i32_5(i32 signext %arg1)  {
+        %A = shl i32 %arg1, 12
+        ret i32 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define signext i32 @shli_i32_6(i32 signext %arg1) {
+        %A = shl i32 %arg1, 0
+        ret i32 %A
+}
+
+define signext i32 @shli_i32_7(i32 signext %arg1)  {
+        %A = shl i32 16383, %arg1
+        ret i32 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define signext i32 @shli_i32_8(i32 signext %arg1) {
+        %A = shl i32 0, %arg1
+        ret i32 %A
+}
+
+define zeroext i32 @shli_i32_9(i32 zeroext %arg1)  {
+        %A = shl i32 %arg1, 12
+        ret i32 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define zeroext i32 @shli_i32_10(i32 zeroext %arg1)  {
+        %A = shl i32 %arg1, 0
+        ret i32 %A
+}
+
+define zeroext i32 @shli_i32_11(i32 zeroext %arg1) {
+        %A = shl i32 16383, %arg1
+        ret i32 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define zeroext i32 @shli_i32_12(i32 zeroext %arg1) {
+        %A = shl i32 0, %arg1
+        ret i32 %A
+}
+
+;; i64 shift left
+
+define i64 @shl_i64_1(i64 %arg1) {
+	%A = shl i64 %arg1, 9
+	ret i64 %A
+}
+
+define i64 @shl_i64_2(i64 %arg1) {
+	%A = shl i64 %arg1, 3
+	ret i64 %A
+}
+
+define i64 @shl_i64_3(i64 %arg1, i32 %shift) {
+	%1 = zext i32 %shift to i64
+	%2 = shl i64 %arg1, %1
+	ret i64 %2
+}
+
+;; i64 shift right logical (shift 0s from the right)
+
+define i64 @lshr_i64_1(i64 %arg1) {
+	%1 = lshr i64 %arg1, 9
+	ret i64 %1
+}
+
+define i64 @lshr_i64_2(i64 %arg1) {
+	%1 = lshr i64 %arg1, 3
+	ret i64 %1
+}
+
+define i64 @lshr_i64_3(i64 %arg1, i32 %shift) {
+	%1 = zext i32 %shift to i64
+	%2 = lshr i64 %arg1, %1
+	ret i64 %2
+}
+
+;; i64 shift right arithmetic (shift 1s from the right)
+
+define i64 @ashr_i64_1(i64 %arg) {
+	%1 = ashr i64 %arg, 9
+	ret i64 %1
+}
+
+define i64 @ashr_i64_2(i64 %arg) {
+	%1 = ashr i64 %arg, 3
+	ret i64 %1
+}
+
+define i64 @ashr_i64_3(i64 %arg1, i32 %shift) {
+	%1 = zext i32 %shift to i64
+	%2 = ashr i64 %arg1, %1
+	ret i64 %2
+}
+
+define i32 @hi32_i64(i64 %arg) {
+	%1 = lshr i64 %arg, 32
+	%2 = trunc i64 %1 to i32
+	ret i32 %2
+}
+
+; some random tests
+define i128 @test_lshr_i128( i128 %val ) {
+ 	;CHECK: test_lshr_i128
+	;CHECK: sfi
+	;CHECK: rotqmbi
+	;CHECK: rotqmbybi
+	;CHECK: bi $lr
+	%rv = lshr i128 %val, 64
+	ret i128 %rv
+}
+
+;Vector shifts
+define <2 x i32> @shl_v2i32(<2 x i32> %val, <2 x i32> %sh) {
+;CHECK: shl
+;CHECK: bi $lr
+	%rv = shl <2 x i32> %val, %sh
+	ret <2 x i32> %rv
+}
+
+define <4 x i32> @shl_v4i32(<4 x i32> %val, <4 x i32> %sh) {
+;CHECK: shl
+;CHECK: bi $lr
+	%rv = shl <4 x i32> %val, %sh
+	ret <4 x i32> %rv
+}
+
+define <8 x i16> @shl_v8i16(<8 x i16> %val, <8 x i16> %sh) {
+;CHECK: shlh
+;CHECK: bi $lr
+	%rv = shl <8 x i16> %val, %sh
+	ret <8 x i16> %rv
+}
+
+define <4 x i32> @lshr_v4i32(<4 x i32> %val, <4 x i32> %sh) {
+;CHECK: rotm
+;CHECK: bi $lr
+	%rv = lshr <4 x i32> %val, %sh
+	ret <4 x i32> %rv
+}
+
+define <8 x i16> @lshr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
+;CHECK: sfhi
+;CHECK: rothm
+;CHECK: bi $lr
+	%rv = lshr <8 x i16> %val, %sh
+	ret <8 x i16> %rv
+}
+
+define <4 x i32> @ashr_v4i32(<4 x i32> %val, <4 x i32> %sh) {
+;CHECK: rotma
+;CHECK: bi $lr
+	%rv = ashr <4 x i32> %val, %sh
+	ret <4 x i32> %rv
+}
+
+define <8 x i16> @ashr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
+;CHECK: sfhi
+;CHECK: rotmah
+;CHECK: bi $lr
+	%rv = ashr <8 x i16> %val, %sh
+	ret <8 x i16> %rv
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/shuffles.ll b/src/LLVM/test/CodeGen/CellSPU/shuffles.ll
new file mode 100644
index 0000000..c88a258
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/shuffles.ll

@@ -0,0 +1,67 @@
+; RUN: llc -O1  --march=cellspu < %s | FileCheck %s
+
+define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) {
+  ; CHECK: cwd {{\$.}}, 0($sp)
+  ; CHECK: shufb {{\$., \$4, \$3, \$.}}
+  %val= shufflevector <4 x float> %param1, <4 x float> %param2, <4 x i32> <i32 4,i32 1,i32 2,i32 3>
+  ret <4 x float> %val
+}
+ 
+define <4 x float> @splat(float %param1) {
+  ; CHECK: lqa
+  ; CHECK: shufb $3
+  ; CHECK: bi
+  %vec = insertelement <1 x float> undef, float %param1, i32 0
+  %val= shufflevector <1 x float> %vec, <1 x float> undef, <4 x i32> <i32 0,i32 0,i32 0,i32 0>
+  ret <4 x float> %val  
+}
+
+define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
+  %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0
+;CHECK:	lqa	$6,
+;CHECK:	shufb	$4, $4, $5, $6
+  %sl2_17 = insertelement <2 x float> %sl2_17_tmp1, float %val2, i32 1
+
+;CHECK: cdd	$5, 0($3)
+;CHECK: lqd	$6, 0($3)
+;CHECK: shufb	$4, $4, $6, $5
+;CHECK: stqd	$4, 0($3)
+;CHECK:	bi	$lr
+  store <2 x float> %sl2_17, <2 x float>* %ptr
+  ret void 
+}
+
+define <4 x float>  @test_insert_1(<4 x float> %vparam, float %eltparam) {
+;CHECK: cwd     $5, 4($sp)
+;CHECK: shufb   $3, $4, $3, $5
+;CHECK: bi      $lr
+  %rv = insertelement <4 x float> %vparam, float %eltparam, i32 1
+  ret <4 x float> %rv
+}
+
+define <2 x i32> @test_v2i32(<4 x i32>%vec)
+{
+;CHECK: rotqbyi $3, $3, 4
+;CHECK: bi $lr
+  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, <2 x i32><i32 1,i32 2>
+  ret <2 x i32> %rv
+}
+
+define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec)
+{
+;CHECK: rotqbyi $3, $3, 8
+;CHECK: bi $lr
+  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
+        <4 x i32> <i32 2,i32 3,i32 0, i32 1>
+  ret <4 x i32> %rv
+}
+
+define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec)
+{
+;CHECK: rotqbyi $3, $3, 4
+;CHECK: bi $lr
+  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
+        <4 x i32> <i32 1,i32 2,i32 3, i32 0>
+  ret <4 x i32> %rv
+}
+

diff --git a/src/LLVM/test/CodeGen/CellSPU/sp_farith.ll b/src/LLVM/test/CodeGen/CellSPU/sp_farith.ll
new file mode 100644
index 0000000..80bf47c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/sp_farith.ll

@@ -0,0 +1,90 @@
+; RUN: llc < %s -march=cellspu -enable-unsafe-fp-math > %t1.s
+; RUN: grep fa %t1.s | count 2
+; RUN: grep fs %t1.s | count 2
+; RUN: grep fm %t1.s | count 6
+; RUN: grep fma %t1.s | count 2
+; RUN: grep fms %t1.s | count 2
+; RUN: grep fnms %t1.s | count 3
+;
+; This file includes standard floating point arithmetic instructions
+; NOTE fdiv is tested separately since it is a compound operation
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define float @fp_add(float %arg1, float %arg2) {
+        %A = fadd float %arg1, %arg2     ; <float> [#uses=1]
+        ret float %A
+}
+
+define <4 x float> @fp_add_vec(<4 x float> %arg1, <4 x float> %arg2) {
+        %A = fadd <4 x float> %arg1, %arg2       ; <<4 x float>> [#uses=1]
+        ret <4 x float> %A
+}
+
+define float @fp_sub(float %arg1, float %arg2) {
+        %A = fsub float %arg1,  %arg2    ; <float> [#uses=1]
+        ret float %A
+}
+
+define <4 x float> @fp_sub_vec(<4 x float> %arg1, <4 x float> %arg2) {
+        %A = fsub <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        ret <4 x float> %A
+}
+
+define float @fp_mul(float %arg1, float %arg2) {
+        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
+        ret float %A
+}
+
+define <4 x float> @fp_mul_vec(<4 x float> %arg1, <4 x float> %arg2) {
+        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        ret <4 x float> %A
+}
+
+define float @fp_mul_add(float %arg1, float %arg2, float %arg3) {
+        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
+        %B = fadd float %A, %arg3        ; <float> [#uses=1]
+        ret float %B
+}
+
+define <4 x float> @fp_mul_add_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
+        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        %B = fadd <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
+        ret <4 x float> %B
+}
+
+define float @fp_mul_sub(float %arg1, float %arg2, float %arg3) {
+        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
+        %B = fsub float %A, %arg3        ; <float> [#uses=1]
+        ret float %B
+}
+
+define <4 x float> @fp_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
+        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        %B = fsub <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
+        ret <4 x float> %B
+}
+
+; Test the straightforward way of getting fnms
+; c - a * b
+define float @fp_neg_mul_sub_1(float %arg1, float %arg2, float %arg3) {
+        %A = fmul float %arg1,  %arg2
+        %B = fsub float %arg3, %A
+        ret float %B
+}
+
+; Test another way of getting fnms
+; - ( a *b -c ) = c - a * b
+define float @fp_neg_mul_sub_2(float %arg1, float %arg2, float %arg3) {
+        %A = fmul float %arg1,  %arg2
+        %B = fsub float %A, %arg3
+        %C = fsub float -0.0, %B
+        ret float %C
+}
+
+define <4 x float> @fp_neg_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
+        %A = fmul <4 x float> %arg1,  %arg2
+        %B = fsub <4 x float> %A, %arg3
+        %D = fsub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B
+        ret <4 x float> %D
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/stores.ll b/src/LLVM/test/CodeGen/CellSPU/stores.ll
new file mode 100644
index 0000000..6ca5b08
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/stores.ll

@@ -0,0 +1,181 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep {stqd.*0(\$3)}      %t1.s | count 4
+; RUN: grep {stqd.*16(\$3)}     %t1.s | count 4
+; RUN: grep 16256               %t1.s | count 2
+; RUN: grep 16384               %t1.s | count 1
+; RUN: grep 771                 %t1.s | count 4
+; RUN: grep 515                 %t1.s | count 2
+; RUN: grep 1799                %t1.s | count 2
+; RUN: grep 1543                %t1.s | count 5
+; RUN: grep 1029                %t1.s | count 3
+; RUN: grep {shli.*, 4}         %t1.s | count 4
+; RUN: grep stqx                %t1.s | count 4
+; RUN: grep ilhu                %t1.s | count 11
+; RUN: grep iohl                %t1.s | count 8
+; RUN: grep shufb               %t1.s | count 15
+; RUN: grep frds                %t1.s | count 1
+; RUN: llc < %s -march=cellspu | FileCheck %s
+
+; ModuleID = 'stores.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define void @store_v16i8_1(<16 x i8>* %a) nounwind {
+entry:
+	store <16 x i8> < i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1 >, <16 x i8>* %a
+	ret void
+}
+
+define void @store_v16i8_2(<16 x i8>* %a) nounwind {
+entry:
+	%arrayidx = getelementptr <16 x i8>* %a, i32 1
+	store <16 x i8> < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >, <16 x i8>* %arrayidx
+	ret void
+}
+
+define void @store_v16i8_3(<16 x i8>* %a, i32 %i) nounwind {
+entry:
+        %arrayidx = getelementptr <16 x i8>* %a, i32 %i
+	store <16 x i8> < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >, <16 x i8>* %arrayidx
+        ret void
+}
+
+define void @store_v8i16_1(<8 x i16>* %a) nounwind {
+entry:
+	store <8 x i16> < i16 1, i16 2, i16 1, i16 1, i16 1, i16 2, i16 1, i16 1 >, <8 x i16>* %a
+	ret void
+}
+
+define void @store_v8i16_2(<8 x i16>* %a) nounwind {
+entry:
+	%arrayidx = getelementptr <8 x i16>* %a, i16 1
+	store <8 x i16> < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2 >, <8 x i16>* %arrayidx
+	ret void
+}
+
+define void @store_v8i16_3(<8 x i16>* %a, i32 %i) nounwind {
+entry:
+        %arrayidx = getelementptr <8 x i16>* %a, i32 %i
+	store <8 x i16> < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >, <8 x i16>* %arrayidx
+        ret void
+}
+
+define void @store_v4i32_1(<4 x i32>* %a) nounwind {
+entry:
+	store <4 x i32> < i32 1, i32 2, i32 1, i32 1 >, <4 x i32>* %a
+	ret void
+}
+
+define void @store_v4i32_2(<4 x i32>* %a) nounwind {
+entry:
+	%arrayidx = getelementptr <4 x i32>* %a, i32 1
+	store <4 x i32> < i32 2, i32 2, i32 2, i32 2 >, <4 x i32>* %arrayidx
+	ret void
+}
+
+define void @store_v4i32_3(<4 x i32>* %a, i32 %i) nounwind {
+entry:
+        %arrayidx = getelementptr <4 x i32>* %a, i32 %i
+        store <4 x i32> < i32 1, i32 1, i32 1, i32 1 >, <4 x i32>* %arrayidx
+        ret void
+}
+
+define void @store_v4f32_1(<4 x float>* %a) nounwind {
+entry:
+	store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %a
+	ret void
+}
+
+define void @store_v4f32_2(<4 x float>* %a) nounwind {
+entry:
+	%arrayidx = getelementptr <4 x float>* %a, i32 1
+	store <4 x float> < float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00 >, <4 x float>* %arrayidx
+	ret void
+}
+
+define void @store_v4f32_3(<4 x float>* %a, i32 %i) nounwind {
+entry:
+        %arrayidx = getelementptr <4 x float>* %a, i32 %i
+        store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %arrayidx
+        ret void
+}
+
+; Test truncating stores:
+
+define zeroext i8 @tstore_i16_i8(i16 signext %val, i8* %dest) nounwind {
+entry:
+	%conv = trunc i16 %val to i8
+	store i8 %conv, i8* %dest
+	ret i8 %conv
+}
+
+define zeroext i8 @tstore_i32_i8(i32 %val, i8* %dest) nounwind {
+entry:
+	%conv = trunc i32 %val to i8
+	store i8 %conv, i8* %dest
+	ret i8 %conv
+}
+
+define signext i16 @tstore_i32_i16(i32 %val, i16* %dest) nounwind {
+entry:
+	%conv = trunc i32 %val to i16
+	store i16 %conv, i16* %dest
+	ret i16 %conv
+}
+
+define zeroext i8 @tstore_i64_i8(i64 %val, i8* %dest) nounwind {
+entry:
+	%conv = trunc i64 %val to i8
+	store i8 %conv, i8* %dest
+	ret i8 %conv
+}
+
+define signext i16 @tstore_i64_i16(i64 %val, i16* %dest) nounwind {
+entry:
+	%conv = trunc i64 %val to i16
+	store i16 %conv, i16* %dest
+	ret i16 %conv
+}
+
+define i32 @tstore_i64_i32(i64 %val, i32* %dest) nounwind {
+entry:
+	%conv = trunc i64 %val to i32
+	store i32 %conv, i32* %dest
+	ret i32 %conv
+}
+
+define float @tstore_f64_f32(double %val, float* %dest) nounwind {
+entry:
+	%conv = fptrunc double %val to float
+	store float %conv, float* %dest
+	ret float %conv
+}
+
+;Check stores that might span two 16 byte memory blocks
+define void @store_misaligned( i32 %val, i32* %ptr) {	
+;CHECK: store_misaligned
+;CHECK: lqd
+;CHECK: lqd
+;CHECK: stqd
+;CHECK: stqd
+;CHECK: bi $lr
+	store i32 %val, i32*%ptr, align 2
+	ret void
+}
+
+define void @store_v8( <8 x float> %val, <8 x float>* %ptr )
+{
+;CHECK: stq
+;CHECK: stq
+;CHECK: bi $lr
+	store <8 x float> %val, <8 x float>* %ptr
+	ret void
+}
+
+define void @store_null_vec( <4 x i32> %val ) {
+; FIXME - this is for some reason compiled into a il+stqd, not a sta. 
+;CHECK: stqd
+;CHECK: bi $lr
+	store <4 x i32> %val, <4 x i32>* null
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/storestruct.ll b/src/LLVM/test/CodeGen/CellSPU/storestruct.ll
new file mode 100644
index 0000000..47185e8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/storestruct.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=cellspu | FileCheck %s
+
+%0 = type {i32, i32} 
+@buffer = global [ 72 x %0 ] zeroinitializer
+
+define void@test( ) {
+; Check that there is no illegal "a rt, ra, imm" instruction 
+; CHECK-NOT:	a	 {{\$., \$., 5..}}
+; CHECK:	a	{{\$., \$., \$.}}
+	store %0 {i32 1, i32 2} , 
+                %0* getelementptr ([72 x %0]* @buffer, i32 0, i32 71)
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/struct_1.ll b/src/LLVM/test/CodeGen/CellSPU/struct_1.ll
new file mode 100644
index 0000000..adbb5ef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/struct_1.ll

@@ -0,0 +1,144 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
+; RUN: grep lqa     %t1.s | count 5
+; RUN: grep lqd     %t1.s | count 11
+; RUN: grep rotqbyi %t1.s | count 7
+; RUN: grep xshw    %t1.s | count 1
+; RUN: grep andi    %t1.s | count 5
+; RUN: grep cbd     %t1.s | count 3
+; RUN: grep chd     %t1.s | count 1
+; RUN: grep cwd     %t1.s | count 3
+; RUN: grep shufb   %t1.s | count 7
+; RUN: grep stqd    %t1.s | count 7
+; RUN: grep iohl    %t2.s | count 16
+; RUN: grep ilhu    %t2.s | count 16
+; RUN: grep lqd     %t2.s | count 16
+; RUN: grep rotqbyi %t2.s | count 7
+; RUN: grep xshw    %t2.s | count 1
+; RUN: grep andi    %t2.s | count 5
+; RUN: grep cbd     %t2.s | count 3
+; RUN: grep chd     %t2.s | count 1
+; RUN: grep cwd     %t2.s | count 3
+; RUN: grep shufb   %t2.s | count 7
+; RUN: grep stqd    %t2.s | count 7
+
+; ModuleID = 'struct_1.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; struct hackstate {
+;   unsigned char c1;   // offset 0 (rotate left by 13 bytes to byte 3)
+;   unsigned char c2;   // offset 1 (rotate left by 14 bytes to byte 3)
+;   unsigned char c3;   // offset 2 (rotate left by 15 bytes to byte 3)
+;   int           i1;   // offset 4 (rotate left by 4 bytes to byte 0)
+;   short         s1;   // offset 8 (rotate left by 6 bytes to byte 2)
+;   int           i2;   // offset 12 [ignored]
+;   unsigned char c4;   // offset 16 [ignored]
+;   unsigned char c5;   // offset 17 [ignored]
+;   unsigned char c6;   // offset 18 (rotate left by 14 bytes to byte 3)
+;   unsigned char c7;   // offset 19 (no rotate, in preferred slot)
+;   int           i3;   // offset 20 [ignored]
+;   int           i4;   // offset 24 [ignored]
+;   int           i5;   // offset 28 [ignored]
+;   int           i6;   // offset 32 (no rotate, in preferred slot)
+; }
+%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 }
+
+; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+@state = global %struct.hackstate zeroinitializer, align 16
+
+define zeroext i8 @get_hackstate_c1()  nounwind  {
+entry:
+        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
+        ret i8 %tmp2
+}
+
+define zeroext i8 @get_hackstate_c2()  nounwind  {
+entry:
+        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
+        ret i8 %tmp2
+}
+
+define zeroext i8 @get_hackstate_c3()  nounwind  {
+entry:
+        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
+        ret i8 %tmp2
+}
+
+define i32 @get_hackstate_i1() nounwind  {
+entry:
+        %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
+        ret i32 %tmp2
+}
+
+define signext i16 @get_hackstate_s1()  nounwind  {
+entry:
+        %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
+        ret i16 %tmp2
+}
+
+define zeroext i8 @get_hackstate_c6()  nounwind  {
+entry:
+        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 8), align 16
+        ret i8 %tmp2
+}
+
+define zeroext i8 @get_hackstate_c7()  nounwind  {
+entry:
+        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16
+        ret i8 %tmp2
+}
+
+define i32 @get_hackstate_i3() nounwind  {
+entry:
+        %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16
+        ret i32 %tmp2
+}
+
+define i32 @get_hackstate_i6() nounwind  {
+entry:
+        %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
+        ret i32 %tmp2
+}
+
+define void @set_hackstate_c1(i8 zeroext  %c) nounwind  {
+entry:
+        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
+        ret void
+}
+
+define void @set_hackstate_c2(i8 zeroext  %c) nounwind  {
+entry:
+        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
+        ret void
+}
+
+define void @set_hackstate_c3(i8 zeroext  %c) nounwind  {
+entry:
+        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
+        ret void
+}
+
+define void @set_hackstate_i1(i32 %i) nounwind  {
+entry:
+        store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
+        ret void
+}
+
+define void @set_hackstate_s1(i16 signext  %s) nounwind  {
+entry:
+        store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
+        ret void
+}
+
+define void @set_hackstate_i3(i32 %i) nounwind  {
+entry:
+        store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16
+        ret void
+}
+
+define void @set_hackstate_i6(i32 %i) nounwind  {
+entry:
+        store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/sub_ops.ll b/src/LLVM/test/CodeGen/CellSPU/sub_ops.ll
new file mode 100644
index 0000000..f0c40d3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/sub_ops.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=cellspu | FileCheck %s
+
+define i32 @subword( i32 %param1, i32 %param2) {
+; Check ordering of registers ret=param1-param2 -> rt=rb-ra
+; CHECK-NOT:	sf	$3, $3, $4
+; CHECK:	sf	$3, $4, $3
+	%1 = sub i32 %param1, %param2
+	ret i32 %1
+}
+
+define i16 @subhword( i16 %param1, i16 %param2) {
+; Check ordering of registers ret=param1-param2 -> rt=rb-ra
+; CHECK-NOT:	sfh	$3, $3, $4
+; CHECK:	sfh	$3, $4, $3
+	%1 = sub i16 %param1, %param2
+	ret i16 %1
+}
+
+define float @subfloat( float %param1, float %param2) {
+; Check ordering of registers ret=param1-param2 -> rt=ra-rb 
+; (yes this is reverse of i32 instruction)
+; CHECK-NOT:	fs	$3, $4, $3 
+; CHECK:	fs	$3, $3, $4
+	%1 = fsub float %param1, %param2
+	ret float %1
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/trunc.ll b/src/LLVM/test/CodeGen/CellSPU/trunc.ll
new file mode 100644
index 0000000..d161852
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/trunc.ll

@@ -0,0 +1,94 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep shufb   %t1.s | count 19
+; RUN: grep {ilhu.*1799}  %t1.s | count 1
+; RUN: grep {ilhu.*771}  %t1.s | count 2
+; RUN: grep {ilhu.*1543}  %t1.s | count 1
+; RUN: grep {ilhu.*1029}  %t1.s | count 1
+; RUN: grep {ilhu.*515}  %t1.s | count 1
+; RUN: grep {ilhu.*3855}  %t1.s | count 1
+; RUN: grep {ilhu.*3599}  %t1.s | count 1
+; RUN: grep {ilhu.*3085}  %t1.s | count 1
+; RUN: grep {iohl.*3855}  %t1.s | count 1
+; RUN: grep {iohl.*3599}  %t1.s | count 2
+; RUN: grep {iohl.*1543}  %t1.s | count 2
+; RUN: grep {iohl.*771}  %t1.s | count 2
+; RUN: grep {iohl.*515}  %t1.s | count 1
+; RUN: grep {iohl.*1799}  %t1.s | count 1
+; RUN: grep lqa  %t1.s | count 1
+; RUN: grep cbd  %t1.s | count 4
+; RUN: grep chd  %t1.s | count 3
+; RUN: grep cwd  %t1.s | count 1
+; RUN: grep cdd  %t1.s | count 1
+
+; ModuleID = 'trunc.bc'
+target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
+target triple = "spu"
+
+define <16 x i8> @trunc_i128_i8(i128 %u, <16 x i8> %v) {
+entry:
+	%0 = trunc i128 %u to i8
+    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 15 
+    ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @trunc_i128_i16(i128 %u, <8 x i16> %v) {
+entry:
+    %0 = trunc i128 %u to i16
+    %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 8 
+    ret <8 x i16> %tmp1
+}
+
+define <4 x i32> @trunc_i128_i32(i128 %u, <4 x i32> %v) {
+entry:
+    %0 = trunc i128 %u to i32
+    %tmp1 = insertelement <4 x i32> %v, i32 %0, i32 2
+    ret <4 x i32> %tmp1
+}
+
+define <2 x i64> @trunc_i128_i64(i128 %u, <2 x i64> %v) {
+entry:
+    %0 = trunc i128 %u to i64
+    %tmp1 = insertelement <2 x i64> %v, i64 %0, i32 1
+    ret <2 x i64> %tmp1
+}
+
+define <16 x i8> @trunc_i64_i8(i64 %u, <16 x i8> %v) {
+entry:
+    %0 = trunc i64 %u to i8
+    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 10
+    ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @trunc_i64_i16(i64 %u, <8 x i16> %v) {
+entry:
+    %0 = trunc i64 %u to i16
+    %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 6
+    ret <8 x i16> %tmp1
+}
+
+define i32 @trunc_i64_i32(i64 %u) {
+entry:
+    %0 = trunc i64 %u to i32
+    ret i32 %0
+}
+
+define <16 x i8> @trunc_i32_i8(i32 %u, <16 x i8> %v) {
+entry:
+    %0 = trunc i32 %u to i8
+    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 7
+    ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @trunc_i32_i16(i32 %u, <8 x i16> %v) {
+entry:
+    %0 = trunc i32 %u to i16
+    %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 3
+    ret <8 x i16> %tmp1
+}
+
+define <16 x i8> @trunc_i16_i8(i16 %u, <16 x i8> %v) {
+entry:
+    %0 = trunc i16 %u to i8
+    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 5
+    ret <16 x i8> %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/README.txt b/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/README.txt
new file mode 100644
index 0000000..d87b398
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/README.txt

@@ -0,0 +1,5 @@
+This directory contains code that's not part of the DejaGNU test suite,
+but is generally useful as various test harnesses.
+
+vecoperations.c: Various vector operation sanity checks, e.g., shuffles,
+  8-bit vector add and multiply.

diff --git a/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/i32operations.c b/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/i32operations.c
new file mode 100644
index 0000000..12fc30b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/i32operations.c

@@ -0,0 +1,69 @@
+#include <stdio.h>
+
+typedef unsigned int  		uint32_t;
+typedef int           		int32_t;
+
+const char *boolstring(int val) {
+  return val ? "true" : "false";
+}
+
+int i32_eq(int32_t a, int32_t b) {
+  return (a == b);
+}
+
+int i32_neq(int32_t a, int32_t b) {
+  return (a != b);
+}
+
+int32_t i32_eq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
+  return ((a == b) ? c : d);
+}
+
+int32_t i32_neq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
+  return ((a != b) ? c : d);
+}
+
+struct pred_s {
+  const char *name;
+  int (*predfunc)(int32_t, int32_t);
+  int (*selfunc)(int32_t, int32_t, int32_t, int32_t);
+};
+
+struct pred_s preds[] = {
+  { "eq",  i32_eq,  i32_eq_select },
+  { "neq", i32_neq, i32_neq_select }
+};
+
+int main(void) {
+  int i;
+  int32_t a = 1234567890;
+  int32_t b =  345678901;
+  int32_t c = 1234500000;
+  int32_t d =      10001;
+  int32_t e =      10000;
+
+  printf("a = %12d (0x%08x)\n", a, a);
+  printf("b = %12d (0x%08x)\n", b, b);
+  printf("c = %12d (0x%08x)\n", c, c);
+  printf("d = %12d (0x%08x)\n", d, d);
+  printf("e = %12d (0x%08x)\n", e, e);
+  printf("----------------------------------------\n");
+
+  for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) {
+    printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
+    printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
+    printf("a %s b = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, b)));
+    printf("a %s c = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, c)));
+    printf("d %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(d, e)));
+    printf("e %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(e, e)));
+
+    printf("a %s a ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, a, c, d));
+    printf("a %s a ? c : d == c (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, a, c, d) == c));
+    printf("a %s b ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, b, c, d));
+    printf("a %s b ? c : d == d (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, b, c, d) == d));
+
+    printf("----------------------------------------\n");
+  }
+
+  return 0;
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
new file mode 100644
index 0000000..b613bd8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/i64operations.c

@@ -0,0 +1,673 @@
+#include <stdio.h>
+#include "i64operations.h"
+
+int64_t         tval_a = 1234567890003LL;
+int64_t         tval_b = 2345678901235LL;
+int64_t         tval_c = 1234567890001LL;
+int64_t         tval_d = 10001LL;
+int64_t         tval_e = 10000LL;
+uint64_t        tval_f = 0xffffff0750135eb9;
+int64_t		tval_g = -1;
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int
+i64_eq(int64_t a, int64_t b)
+{
+  return (a == b);
+}
+
+int
+i64_neq(int64_t a, int64_t b)
+{
+  return (a != b);
+}
+
+int
+i64_gt(int64_t a, int64_t b)
+{
+  return (a > b);
+}
+
+int
+i64_le(int64_t a, int64_t b)
+{
+  return (a <= b);
+}
+
+int
+i64_ge(int64_t a, int64_t b) {
+  return (a >= b);
+}
+
+int
+i64_lt(int64_t a, int64_t b) {
+  return (a < b);
+}
+
+int
+i64_uge(uint64_t a, uint64_t b)
+{
+  return (a >= b);
+}
+
+int
+i64_ult(uint64_t a, uint64_t b)
+{
+  return (a < b);
+}
+
+int
+i64_ugt(uint64_t a, uint64_t b)
+{
+  return (a > b);
+}
+
+int
+i64_ule(uint64_t a, uint64_t b)
+{
+  return (a <= b);
+}
+
+int64_t
+i64_eq_select(int64_t a, int64_t b, int64_t c, int64_t d)
+{
+  return ((a == b) ? c : d);
+}
+
+int64_t
+i64_neq_select(int64_t a, int64_t b, int64_t c, int64_t d)
+{
+  return ((a != b) ? c : d);
+}
+
+int64_t
+i64_gt_select(int64_t a, int64_t b, int64_t c, int64_t d) {
+  return ((a > b) ? c : d);
+}
+
+int64_t
+i64_le_select(int64_t a, int64_t b, int64_t c, int64_t d) {
+  return ((a <= b) ? c : d);
+}
+
+int64_t
+i64_ge_select(int64_t a, int64_t b, int64_t c, int64_t d) {
+  return ((a >= b) ? c : d);
+}
+
+int64_t
+i64_lt_select(int64_t a, int64_t b, int64_t c, int64_t d) {
+  return ((a < b) ? c : d);
+}
+
+uint64_t
+i64_ugt_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
+{
+  return ((a > b) ? c : d);
+}
+
+uint64_t
+i64_ule_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
+{
+  return ((a <= b) ? c : d);
+}
+
+uint64_t
+i64_uge_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
+  return ((a >= b) ? c : d);
+}
+
+uint64_t
+i64_ult_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
+  return ((a < b) ? c : d);
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+struct harness_int64_pred int64_tests_eq[] = {
+  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
+};
+
+struct harness_int64_pred int64_tests_neq[] = {
+  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
+};
+
+struct harness_int64_pred int64_tests_sgt[] = {
+  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
+};
+
+struct harness_int64_pred int64_tests_sle[] = {
+  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
+};
+
+struct harness_int64_pred int64_tests_sge[] = {
+  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
+};
+
+struct harness_int64_pred int64_tests_slt[] = {
+  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
+};
+
+struct int64_pred_s int64_preds[] = {
+  {"eq", i64_eq, i64_eq_select,
+     int64_tests_eq, ARR_SIZE(int64_tests_eq)},
+  {"neq", i64_neq, i64_neq_select,
+     int64_tests_neq, ARR_SIZE(int64_tests_neq)},
+  {"gt", i64_gt, i64_gt_select,
+     int64_tests_sgt, ARR_SIZE(int64_tests_sgt)},
+  {"le", i64_le, i64_le_select,
+     int64_tests_sle, ARR_SIZE(int64_tests_sle)},
+  {"ge", i64_ge, i64_ge_select,
+     int64_tests_sge, ARR_SIZE(int64_tests_sge)},
+  {"lt", i64_lt, i64_lt_select,
+     int64_tests_slt, ARR_SIZE(int64_tests_slt)}
+};
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+struct harness_uint64_pred uint64_tests_ugt[] = {
+  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d },
+  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c },
+  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c },
+  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d }
+};
+
+struct harness_uint64_pred uint64_tests_ule[] = {
+  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}
+};
+
+struct harness_uint64_pred uint64_tests_uge[] = {
+  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}
+};
+
+struct harness_uint64_pred uint64_tests_ult[] = {
+  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}
+};
+
+struct uint64_pred_s uint64_preds[] = {
+  {"ugt", i64_ugt, i64_ugt_select,
+     uint64_tests_ugt, ARR_SIZE(uint64_tests_ugt)},
+  {"ule", i64_ule, i64_ule_select,
+     uint64_tests_ule, ARR_SIZE(uint64_tests_ule)},
+  {"uge", i64_uge, i64_uge_select,
+     uint64_tests_uge, ARR_SIZE(uint64_tests_uge)},
+  {"ult", i64_ult, i64_ult_select,
+     uint64_tests_ult, ARR_SIZE(uint64_tests_ult)}
+};
+
+int
+compare_expect_int64(const struct int64_pred_s * pred)
+{
+  int             j, failed = 0;
+
+  for (j = 0; j < pred->n_tests; ++j) {
+    int             pred_result;
+
+    pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs);
+
+    if (pred_result != pred->tests[j].expected) {
+      char            str[64];
+
+      sprintf(str, pred->tests[j].fmt_string, pred->name);
+      printf("%s: returned value is %d, expecting %d\n", str,
+	     pred_result, pred->tests[j].expected);
+      printf("  lhs = %19lld (0x%016llx)\n", *pred->tests[j].lhs,
+             *pred->tests[j].lhs);
+      printf("  rhs = %19lld (0x%016llx)\n", *pred->tests[j].rhs,
+             *pred->tests[j].rhs);
+      ++failed;
+    } else {
+      int64_t         selresult;
+
+      selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs,
+                                   *pred->tests[j].select_a,
+                                   *pred->tests[j].select_b);
+
+      if (selresult != *pred->tests[j].select_expected) {
+	char            str[64];
+
+	sprintf(str, pred->tests[j].fmt_string, pred->name);
+	printf("%s select: returned value is %d, expecting %d\n", str,
+	       pred_result, pred->tests[j].expected);
+	printf("  lhs   = %19lld (0x%016llx)\n", *pred->tests[j].lhs,
+	       *pred->tests[j].lhs);
+	printf("  rhs   = %19lld (0x%016llx)\n", *pred->tests[j].rhs,
+	       *pred->tests[j].rhs);
+	printf("  true  = %19lld (0x%016llx)\n", *pred->tests[j].select_a,
+	       *pred->tests[j].select_a);
+	printf("  false = %19lld (0x%016llx)\n", *pred->tests[j].select_b,
+	       *pred->tests[j].select_b);
+	++failed;
+      }
+    }
+  }
+
+  printf("  %d tests performed, should be %d.\n", j, pred->n_tests);
+
+  return failed;
+}
+
+int
+compare_expect_uint64(const struct uint64_pred_s * pred)
+{
+  int             j, failed = 0;
+
+  for (j = 0; j < pred->n_tests; ++j) {
+    int             pred_result;
+
+    pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs);
+    if (pred_result != pred->tests[j].expected) {
+      char            str[64];
+
+      sprintf(str, pred->tests[j].fmt_string, pred->name);
+      printf("%s: returned value is %d, expecting %d\n", str,
+	     pred_result, pred->tests[j].expected);
+      printf("  lhs = %19llu (0x%016llx)\n", *pred->tests[j].lhs,
+             *pred->tests[j].lhs);
+      printf("  rhs = %19llu (0x%016llx)\n", *pred->tests[j].rhs,
+             *pred->tests[j].rhs);
+      ++failed;
+    } else {
+      uint64_t        selresult;
+
+      selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs,
+                                   *pred->tests[j].select_a,
+                                   *pred->tests[j].select_b);
+      if (selresult != *pred->tests[j].select_expected) {
+	char            str[64];
+
+	sprintf(str, pred->tests[j].fmt_string, pred->name);
+	printf("%s select: returned value is %d, expecting %d\n", str,
+	       pred_result, pred->tests[j].expected);
+	printf("  lhs   = %19llu (0x%016llx)\n", *pred->tests[j].lhs,
+	       *pred->tests[j].lhs);
+	printf("  rhs   = %19llu (0x%016llx)\n", *pred->tests[j].rhs,
+	       *pred->tests[j].rhs);
+	printf("  true  = %19llu (0x%016llx)\n", *pred->tests[j].select_a,
+	       *pred->tests[j].select_a);
+	printf("  false = %19llu (0x%016llx)\n", *pred->tests[j].select_b,
+	       *pred->tests[j].select_b);
+	++failed;
+      }
+    }
+  }
+
+  printf("  %d tests performed, should be %d.\n", j, pred->n_tests);
+
+  return failed;
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int
+test_i64_sext_i32(int in, int64_t expected) {
+  int64_t result = (int64_t) in;
+
+  if (result != expected) {
+    char str[64];
+    sprintf(str, "i64_sext_i32(%d) returns %lld\n", in, result);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_i64_sext_i16(short in, int64_t expected) {
+  int64_t result = (int64_t) in;
+
+  if (result != expected) {
+    char str[64];
+    sprintf(str, "i64_sext_i16(%hd) returns %lld\n", in, result);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_i64_sext_i8(signed char in, int64_t expected) {
+  int64_t result = (int64_t) in;
+
+  if (result != expected) {
+    char str[64];
+    sprintf(str, "i64_sext_i8(%d) returns %lld\n", in, result);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_i64_zext_i32(unsigned int in, uint64_t expected) {
+  uint64_t result = (uint64_t) in;
+
+  if (result != expected) {
+    char str[64];
+    sprintf(str, "i64_zext_i32(%u) returns %llu\n", in, result);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_i64_zext_i16(unsigned short in, uint64_t expected) {
+  uint64_t result = (uint64_t) in;
+
+  if (result != expected) {
+    char str[64];
+    sprintf(str, "i64_zext_i16(%hu) returns %llu\n", in, result);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_i64_zext_i8(unsigned char in, uint64_t expected) {
+  uint64_t result = (uint64_t) in;
+
+  if (result != expected) {
+    char str[64];
+    sprintf(str, "i64_zext_i8(%u) returns %llu\n", in, result);
+    return 1;
+  }
+
+  return 0;
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int64_t
+i64_shl_const(int64_t a) {
+  return a << 10;
+}
+
+int64_t
+i64_shl(int64_t a, int amt) {
+  return a << amt;
+}
+
+uint64_t
+u64_shl_const(uint64_t a) {
+  return a << 10;
+}
+
+uint64_t
+u64_shl(uint64_t a, int amt) {
+  return a << amt;
+}
+
+int64_t
+i64_srl_const(int64_t a) {
+  return a >> 10;
+}
+
+int64_t
+i64_srl(int64_t a, int amt) {
+  return a >> amt;
+}
+
+uint64_t
+u64_srl_const(uint64_t a) {
+  return a >> 10;
+}
+
+uint64_t
+u64_srl(uint64_t a, int amt) {
+  return a >> amt;
+}
+
+int64_t
+i64_sra_const(int64_t a) {
+  return a >> 10;
+}
+
+int64_t
+i64_sra(int64_t a, int amt) {
+  return a >> amt;
+}
+
+uint64_t
+u64_sra_const(uint64_t a) {
+  return a >> 10;
+}
+
+uint64_t
+u64_sra(uint64_t a, int amt) {
+  return a >> amt;
+}
+
+int
+test_u64_constant_shift(const char *func_name, uint64_t (*func)(uint64_t), uint64_t a, uint64_t expected) {
+  uint64_t result = (*func)(a);
+
+  if (result != expected) {
+    printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_i64_constant_shift(const char *func_name, int64_t (*func)(int64_t), int64_t a, int64_t expected) {
+  int64_t result = (*func)(a);
+
+  if (result != expected) {
+    printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_u64_variable_shift(const char *func_name, uint64_t (*func)(uint64_t, int), uint64_t a, unsigned int b, uint64_t expected) {
+  uint64_t result = (*func)(a, b);
+
+  if (result != expected) {
+    printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_i64_variable_shift(const char *func_name, int64_t (*func)(int64_t, int), int64_t a, unsigned int b, int64_t expected) {
+  int64_t result = (*func)(a, b);
+
+  if (result != expected) {
+    printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected);
+    return 1;
+  }
+
+  return 0;
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int64_t i64_mul(int64_t a, int64_t b) {
+  return a * b;
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int
+main(void)
+{
+  int             i, j, failed = 0;
+  const char     *something_failed = "  %d tests failed.\n";
+  const char     *all_tests_passed = "  All tests passed.\n";
+
+  printf("tval_a = %20lld (0x%016llx)\n", tval_a, tval_a);
+  printf("tval_b = %20lld (0x%016llx)\n", tval_b, tval_b);
+  printf("tval_c = %20lld (0x%016llx)\n", tval_c, tval_c);
+  printf("tval_d = %20lld (0x%016llx)\n", tval_d, tval_d);
+  printf("tval_e = %20lld (0x%016llx)\n", tval_e, tval_e);
+  printf("tval_f = %20llu (0x%016llx)\n", tval_f, tval_f);
+  printf("tval_g = %20llu (0x%016llx)\n", tval_g, tval_g);
+  printf("----------------------------------------\n");
+
+  for (i = 0; i < ARR_SIZE(int64_preds); ++i) {
+    printf("%s series:\n", int64_preds[i].name);
+    if ((failed = compare_expect_int64(int64_preds + i)) > 0) {
+      printf(something_failed, failed);
+    } else {
+      printf(all_tests_passed);
+    }
+
+    printf("----------------------------------------\n");
+  }
+
+  for (i = 0; i < ARR_SIZE(uint64_preds); ++i) {
+    printf("%s series:\n", uint64_preds[i].name);
+    if ((failed = compare_expect_uint64(uint64_preds + i)) > 0) {
+      printf(something_failed, failed);
+    } else {
+      printf(all_tests_passed);
+    }
+
+    printf("----------------------------------------\n");
+  }
+
+  /*----------------------------------------------------------------------*/
+
+  puts("signed/zero-extend tests:");
+
+  failed = 0;
+  failed += test_i64_sext_i32(-1, -1LL);
+  failed += test_i64_sext_i32(10, 10LL);
+  failed += test_i64_sext_i32(0x7fffffff, 0x7fffffffLL);
+  failed += test_i64_sext_i16(-1, -1LL);
+  failed += test_i64_sext_i16(10, 10LL);
+  failed += test_i64_sext_i16(0x7fff, 0x7fffLL);
+  failed += test_i64_sext_i8(-1, -1LL);
+  failed += test_i64_sext_i8(10, 10LL);
+  failed += test_i64_sext_i8(0x7f, 0x7fLL);
+
+  failed += test_i64_zext_i32(0xffffffff, 0x00000000ffffffffLLU);
+  failed += test_i64_zext_i32(0x01234567, 0x0000000001234567LLU);
+  failed += test_i64_zext_i16(0xffff,     0x000000000000ffffLLU);
+  failed += test_i64_zext_i16(0x569a,     0x000000000000569aLLU);
+  failed += test_i64_zext_i8(0xff,        0x00000000000000ffLLU);
+  failed += test_i64_zext_i8(0xa0,        0x00000000000000a0LLU);
+
+  if (failed > 0) {
+    printf("  %d tests failed.\n", failed);
+  } else {
+    printf("  All tests passed.\n");
+  }
+
+  printf("----------------------------------------\n");
+
+  failed = 0;
+  puts("signed left/right shift tests:");
+  failed += test_i64_constant_shift("i64_shl_const", i64_shl_const, tval_a,     0x00047dc7ec114c00LL);
+  failed += test_i64_variable_shift("i64_shl",       i64_shl,       tval_a, 10, 0x00047dc7ec114c00LL);
+  failed += test_i64_constant_shift("i64_srl_const", i64_srl_const, tval_a,     0x0000000047dc7ec1LL);
+  failed += test_i64_variable_shift("i64_srl",       i64_srl,       tval_a, 10, 0x0000000047dc7ec1LL);
+  failed += test_i64_constant_shift("i64_sra_const", i64_sra_const, tval_a,     0x0000000047dc7ec1LL);
+  failed += test_i64_variable_shift("i64_sra",       i64_sra,       tval_a, 10, 0x0000000047dc7ec1LL);
+
+  if (failed > 0) {
+    printf("  %d tests ailed.\n", failed);
+  } else {
+    printf("  All tests passed.\n");
+  }
+
+  printf("----------------------------------------\n");
+
+  failed = 0;
+  puts("unsigned left/right shift tests:");
+  failed += test_u64_constant_shift("u64_shl_const", u64_shl_const,  tval_f,     0xfffc1d404d7ae400LL);
+  failed += test_u64_variable_shift("u64_shl",       u64_shl,        tval_f, 10, 0xfffc1d404d7ae400LL);
+  failed += test_u64_constant_shift("u64_srl_const", u64_srl_const,  tval_f,     0x003fffffc1d404d7LL);
+  failed += test_u64_variable_shift("u64_srl",       u64_srl,        tval_f, 10, 0x003fffffc1d404d7LL);
+  failed += test_i64_constant_shift("i64_sra_const", i64_sra_const,  tval_f,     0xffffffffc1d404d7LL);
+  failed += test_i64_variable_shift("i64_sra",       i64_sra,        tval_f, 10, 0xffffffffc1d404d7LL);
+  failed += test_u64_constant_shift("u64_sra_const", u64_sra_const,  tval_f,     0x003fffffc1d404d7LL);
+  failed += test_u64_variable_shift("u64_sra",       u64_sra,        tval_f, 10, 0x003fffffc1d404d7LL);
+
+  if (failed > 0) {
+    printf("  %d tests ailed.\n", failed);
+  } else {
+    printf("  All tests passed.\n");
+  }
+
+  printf("----------------------------------------\n");
+
+  int64_t result;
+  
+  result = i64_mul(tval_g, tval_g);
+  printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_g, tval_g, result, result);
+  result = i64_mul(tval_d, tval_e);
+  printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_d, tval_e, result, result);
+  /* 0xba7a664f13077c9 */
+  result = i64_mul(tval_a, tval_b);
+  printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_a, tval_b, result, result);
+
+  printf("----------------------------------------\n");
+
+  return 0;
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/i64operations.h b/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/i64operations.h
new file mode 100644
index 0000000..7a02794
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/i64operations.h

@@ -0,0 +1,43 @@
+#define TRUE_VAL (!0)
+#define FALSE_VAL 0
+#define ARR_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
+
+typedef unsigned long long int uint64_t;
+typedef long long int int64_t;
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+struct harness_int64_pred {
+  const char     *fmt_string;
+  int64_t        *lhs;
+  int64_t        *rhs;
+  int64_t        *select_a;
+  int64_t        *select_b;
+  int             expected;
+  int64_t        *select_expected;
+};
+
+struct harness_uint64_pred {
+  const char     *fmt_string;
+  uint64_t       *lhs;
+  uint64_t       *rhs;
+  uint64_t       *select_a;
+  uint64_t       *select_b;
+  int             expected;
+  uint64_t       *select_expected;
+};
+
+struct int64_pred_s {
+  const char     *name;
+  int             (*predfunc) (int64_t, int64_t);
+  int64_t         (*selfunc) (int64_t, int64_t, int64_t, int64_t);
+  struct harness_int64_pred *tests;
+  int             n_tests;
+};
+
+struct uint64_pred_s {
+  const char     *name;
+  int             (*predfunc) (uint64_t, uint64_t);
+  uint64_t        (*selfunc) (uint64_t, uint64_t, uint64_t, uint64_t);
+  struct harness_uint64_pred *tests;
+  int             n_tests;
+};

diff --git a/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg b/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg
new file mode 100644
index 0000000..e6f55ee
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg

@@ -0,0 +1 @@
+config.suffixes = []

diff --git a/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c b/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c
new file mode 100644
index 0000000..c4c86e3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c

@@ -0,0 +1,179 @@
+#include <stdio.h>
+
+typedef unsigned char v16i8 __attribute__((ext_vector_type(16))); 
+typedef short         v8i16 __attribute__((ext_vector_type(16))); 
+typedef int           v4i32 __attribute__((ext_vector_type(4))); 
+typedef float         v4f32 __attribute__((ext_vector_type(4))); 
+typedef long long     v2i64 __attribute__((ext_vector_type(2))); 
+typedef double        v2f64 __attribute__((ext_vector_type(2))); 
+
+void print_v16i8(const char *str, const v16i8 v) {
+  union {
+    unsigned char elts[16];
+    v16i8 vec;
+  } tv;
+  tv.vec = v;
+  printf("%s = { %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, "
+                "%hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, "
+		"%hhu, %hhu }\n",
+	str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5],
+	tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11],
+	tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]);
+}
+
+void print_v16i8_hex(const char *str, const v16i8 v) {
+  union {
+    unsigned char elts[16];
+    v16i8 vec;
+  } tv;
+  tv.vec = v;
+  printf("%s = { 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, "
+                "0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, "
+		"0x%02hhx, 0x%02hhx }\n",
+	str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5],
+	tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11],
+	tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]);
+}
+
+void print_v8i16_hex(const char *str, v8i16 v) {
+  union {
+    short elts[8];
+    v8i16 vec;
+  } tv;
+  tv.vec = v;
+  printf("%s = { 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, "
+                "0x%04hx, 0x%04hx, 0x%04hx }\n",
+	str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4],
+	tv.elts[5], tv.elts[6], tv.elts[7]);
+}
+
+void print_v4i32(const char *str, v4i32 v) {
+  printf("%s = { %d, %d, %d, %d }\n", str, v.x, v.y, v.z, v.w);
+}
+
+void print_v4f32(const char *str, v4f32 v) {
+  printf("%s = { %f, %f, %f, %f }\n", str, v.x, v.y, v.z, v.w);
+}
+
+void print_v2i64(const char *str, v2i64 v) {
+  printf("%s = { %lld, %lld }\n", str, v.x, v.y);
+}
+
+void print_v2f64(const char *str, v2f64 v) {
+  printf("%s = { %g, %g }\n", str, v.x, v.y);
+}
+
+/*----------------------------------------------------------------------*/
+
+v16i8 v16i8_mpy(v16i8 v1, v16i8 v2) {
+  return v1 * v2;
+}
+
+v16i8 v16i8_add(v16i8 v1, v16i8 v2) {
+  return v1 + v2;
+}
+
+v4i32 v4i32_shuffle_1(v4i32 a) {
+  v4i32 c2 = a.yzwx;
+  return c2;
+}
+
+v4i32 v4i32_shuffle_2(v4i32 a) {
+  v4i32 c2 = a.zwxy;
+  return c2;
+}
+
+v4i32 v4i32_shuffle_3(v4i32 a) {
+  v4i32 c2 = a.wxyz;
+  return c2;
+}
+
+v4i32 v4i32_shuffle_4(v4i32 a) {
+  v4i32 c2 = a.xyzw;
+  return c2;
+}
+
+v4i32 v4i32_shuffle_5(v4i32 a) {
+  v4i32 c2 = a.xwzy;
+  return c2;
+}
+
+v4f32 v4f32_shuffle_1(v4f32 a) {
+  v4f32 c2 = a.yzwx;
+  return c2;
+}
+
+v4f32 v4f32_shuffle_2(v4f32 a) {
+  v4f32 c2 = a.zwxy;
+  return c2;
+}
+
+v4f32 v4f32_shuffle_3(v4f32 a) {
+  v4f32 c2 = a.wxyz;
+  return c2;
+}
+
+v4f32 v4f32_shuffle_4(v4f32 a) {
+  v4f32 c2 = a.xyzw;
+  return c2;
+}
+
+v4f32 v4f32_shuffle_5(v4f32 a) {
+  v4f32 c2 = a.xwzy;
+  return c2;
+}
+
+v2i64 v2i64_shuffle(v2i64 a) {
+  v2i64 c2 = a.yx;
+  return c2;
+}
+
+v2f64 v2f64_shuffle(v2f64 a) {
+  v2f64 c2 = a.yx;
+  return c2;
+}
+
+int main(void) {
+  v16i8 v00 = { 0xf4, 0xad, 0x01, 0xe9, 0x51, 0x78, 0xc1, 0x8a,
+                0x94, 0x7c, 0x49, 0x6c, 0x21, 0x32, 0xb2, 0x04 };
+  v16i8 va0 = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+                0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10 };
+  v16i8 va1 = { 0x11, 0x83, 0x4b, 0x63, 0xff, 0x90, 0x32, 0xe5,
+                0x5a, 0xaa, 0x20, 0x01, 0x0d, 0x15, 0x77, 0x05 };
+  v8i16 v01 = { 0x1a87, 0x0a14, 0x5014, 0xfff0,
+                0xe194, 0x0184, 0x801e, 0x5940 };
+  v4i32 v1 = { 1, 2, 3, 4 };
+  v4f32 v2 = { 1.0, 2.0, 3.0, 4.0 };
+  v2i64 v3 = { 691043ll, 910301513ll };
+  v2f64 v4 = { 5.8e56, 9.103e-62 };
+
+  puts("---- vector tests start ----");
+
+  print_v16i8_hex("v00                        ", v00);
+  print_v16i8_hex("va0                        ", va0);
+  print_v16i8_hex("va1                        ", va1);
+  print_v16i8_hex("va0 x va1                  ", v16i8_mpy(va0, va1));
+  print_v16i8_hex("va0 + va1                  ", v16i8_add(va0, va1));
+  print_v8i16_hex("v01                        ", v01);
+
+  print_v4i32("v4i32_shuffle_1(1, 2, 3, 4)", v4i32_shuffle_1(v1));
+  print_v4i32("v4i32_shuffle_2(1, 2, 3, 4)", v4i32_shuffle_2(v1));
+  print_v4i32("v4i32_shuffle_3(1, 2, 3, 4)", v4i32_shuffle_3(v1));
+  print_v4i32("v4i32_shuffle_4(1, 2, 3, 4)", v4i32_shuffle_4(v1));
+  print_v4i32("v4i32_shuffle_5(1, 2, 3, 4)", v4i32_shuffle_5(v1));
+
+  print_v4f32("v4f32_shuffle_1(1, 2, 3, 4)", v4f32_shuffle_1(v2));
+  print_v4f32("v4f32_shuffle_2(1, 2, 3, 4)", v4f32_shuffle_2(v2));
+  print_v4f32("v4f32_shuffle_3(1, 2, 3, 4)", v4f32_shuffle_3(v2));
+  print_v4f32("v4f32_shuffle_4(1, 2, 3, 4)", v4f32_shuffle_4(v2));
+  print_v4f32("v4f32_shuffle_5(1, 2, 3, 4)", v4f32_shuffle_5(v2));
+
+  print_v2i64("v3                         ", v3);
+  print_v2i64("v2i64_shuffle              ", v2i64_shuffle(v3));
+  print_v2f64("v4                         ", v4);
+  print_v2f64("v2f64_shuffle              ", v2f64_shuffle(v4));
+
+  puts("---- vector tests end ----");
+
+  return 0;
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/v2f32.ll b/src/LLVM/test/CodeGen/CellSPU/v2f32.ll
new file mode 100644
index 0000000..09e15ff
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/v2f32.ll

@@ -0,0 +1,78 @@
+;RUN: llc --march=cellspu %s -o - | FileCheck %s
+%vec = type <2 x float>
+
+define %vec @test_ret(%vec %param)
+{
+;CHECK: bi $lr
+ ret %vec %param
+}
+
+define %vec @test_add(%vec %param)
+{
+;CHECK: fa {{\$.}}, $3, $3
+ %1 = fadd %vec %param, %param
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+define %vec @test_sub(%vec %param)
+{
+;CHECK: fs {{\$.}}, $3, $3
+ %1 = fsub %vec %param, %param
+
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+define %vec @test_mul(%vec %param)
+{
+;CHECK: fm {{\$.}}, $3, $3
+ %1 = fmul %vec %param, %param
+
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+; CHECK: test_splat:
+define %vec @test_splat(float %param ) {
+;CHECK: lqa
+;CHECK: shufb
+  %sv = insertelement <1 x float> undef, float %param, i32 0 
+  %rv = shufflevector <1 x float> %sv, <1 x float> undef, <2 x i32> zeroinitializer 
+;CHECK: bi $lr
+  ret %vec %rv
+}
+
+define void @test_store(%vec %val, %vec* %ptr){
+; CHECK: test_store:
+;CHECK: stqd 
+  store %vec zeroinitializer, %vec* null
+
+;CHECK: stqd $3, 0(${{.*}})
+;CHECK: bi $lr
+  store %vec %val, %vec* %ptr
+  ret void
+}
+
+; CHECK: test_insert:
+define %vec @test_insert(){
+;CHECK: cwd
+;CHECK: shufb $3
+  %rv = insertelement %vec undef, float 0.0e+00, i32 undef
+;CHECK: bi $lr
+  ret %vec %rv
+}
+
+; CHECK: test_unaligned_store:
+
+define void @test_unaligned_store()  {
+;CHECK:	cdd
+;CHECK:	shufb
+;CHECK:	stqd
+  %data = alloca [4 x float], align 16         ; <[4 x float]*> [#uses=1]
+  %ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; <float*> [#uses=1]
+  %vptr = bitcast float* %ptr to  <2 x float>* ; <[1 x <2 x float>]*> [#uses=1]
+  store <2 x float> zeroinitializer, <2 x float>* %vptr
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/CellSPU/v2i32.ll b/src/LLVM/test/CodeGen/CellSPU/v2i32.ll
new file mode 100644
index 0000000..71d4aba
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/v2i32.ll

@@ -0,0 +1,77 @@
+;RUN: llc --march=cellspu %s -o - | FileCheck %s
+%vec = type <2 x i32>
+
+define %vec @test_ret(%vec %param)
+{
+;CHECK:	bi	$lr
+  ret %vec %param
+}
+
+define %vec @test_add(%vec %param)
+{
+;CHECK: a {{\$.}}, $3, $3
+  %1 = add %vec %param, %param
+;CHECK: bi $lr
+  ret %vec %1
+}
+
+define %vec @test_sub(%vec %param)
+{
+;CHECK: sf {{\$.}}, $4, $3
+  %1 = sub %vec %param, <i32 1, i32 1>
+
+;CHECK: bi $lr
+  ret %vec %1
+}
+
+define %vec @test_mul(%vec %param)
+{
+;CHECK: mpyu
+;CHECK: mpyh
+;CHECK: a {{\$., \$., \$.}}
+;CHECK: a {{\$., \$., \$.}}
+  %1 = mul %vec %param, %param
+
+;CHECK: bi $lr
+  ret %vec %1
+}
+
+define <2 x i32> @test_splat(i32 %param ) {
+;see svn log for why this is here...
+;CHECK-NOT: or $3, $3, $3
+;CHECK: lqa
+;CHECK: shufb
+  %sv = insertelement <1 x i32> undef, i32 %param, i32 0 
+  %rv = shufflevector <1 x i32> %sv, <1 x i32> undef, <2 x i32> zeroinitializer 
+;CHECK: bi $lr
+  ret <2 x i32> %rv
+}
+
+define i32 @test_extract() {
+;CHECK: shufb $3
+  %rv = extractelement <2 x i32> zeroinitializer, i32 undef ; <i32> [#uses=1]
+;CHECK: bi $lr
+  ret i32 %rv
+}
+
+define void @test_store( %vec %val, %vec* %ptr)
+{
+;CHECK: stqd $3, 0(${{.}})
+;CHECK: bi $lr
+  store %vec %val, %vec* %ptr
+  ret void
+}
+
+;Alignment of <2 x i32> is not *directly* defined in the ABI
+;It probably is safe to interpret it as an array, thus having 8 byte
+;alignment (according to ABI). This tests that the size of
+;[2 x <2 x i32>] is 16 bytes, i.e. there is no padding between the
+;two arrays
+define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr)
+{
+; CHECK-NOT:	ai	$3, $3, 16
+; CHECK:	ai	$3, $3, 8
+; CHECK:	bi	$lr
+   %rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1
+   ret <2 x i32>* %rv
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/vec_const.ll b/src/LLVM/test/CodeGen/CellSPU/vec_const.ll
new file mode 100644
index 0000000..24c05c6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/vec_const.ll

@@ -0,0 +1,154 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
+; RUN: grep -w il  %t1.s | count 3
+; RUN: grep ilhu   %t1.s | count 8
+; RUN: grep -w ilh %t1.s | count 5
+; RUN: grep iohl   %t1.s | count 7
+; RUN: grep lqa    %t1.s | count 6
+; RUN: grep 24672  %t1.s | count 2
+; RUN: grep 16429  %t1.s | count 1
+; RUN: grep 63572  %t1.s | count 1
+; RUN: grep  4660  %t1.s | count 1
+; RUN: grep 22136  %t1.s | count 1
+; RUN: grep 43981  %t1.s | count 1
+; RUN: grep 61202  %t1.s | count 1
+; RUN: grep 16393  %t1.s | count 1
+; RUN: grep  8699  %t1.s | count 1
+; RUN: grep 21572  %t1.s | count 1
+; RUN: grep 11544  %t1.s | count 1
+; RUN: grep 1311768467750121234 %t1.s | count 1
+; RUN: grep lqd    %t2.s | count 6
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
+target triple = "spu-unknown-elf"
+
+; Vector constant load tests:
+
+; IL <reg>, 2
+define <4 x i32> @v4i32_constvec() {
+        ret <4 x i32> < i32 2, i32 2, i32 2, i32 2 >
+}
+
+; Spill to constant pool
+define <4 x i32> @v4i32_constpool() {
+        ret <4 x i32> < i32 2, i32 1, i32 1, i32 2 >
+}
+
+; Max negative range for IL
+define <4 x i32> @v4i32_constvec_2() {
+        ret <4 x i32> < i32 -32768, i32 -32768, i32 -32768, i32 -32768 >
+}
+
+; ILHU <reg>, 73 (0x49)
+; 4784128 = 0x490000
+define <4 x i32> @v4i32_constvec_3() {
+        ret <4 x i32> < i32 4784128, i32 4784128,
+                        i32 4784128, i32 4784128 >
+}
+
+; ILHU <reg>, 61 (0x3d)
+; IOHL <reg>, 15395 (0x3c23)
+define <4 x i32> @v4i32_constvec_4() {
+        ret <4 x i32> < i32 4013091, i32 4013091,
+                        i32 4013091, i32 4013091 >
+}
+
+; ILHU <reg>, 0x5050 (20560)
+; IOHL <reg>, 0x5050 (20560)
+; Tests for whether we expand the size of the bit pattern properly, because
+; this could be interpreted as an i8 pattern (0x50)
+define <4 x i32> @v4i32_constvec_5() {
+        ret <4 x i32> < i32 1347440720, i32 1347440720,
+                        i32 1347440720, i32 1347440720 >
+}
+
+; ILH
+define <8 x i16> @v8i16_constvec_1() {
+        ret <8 x i16> < i16 32767, i16 32767, i16 32767, i16 32767,
+                        i16 32767, i16 32767, i16 32767, i16 32767 >
+}
+
+; ILH
+define <8 x i16> @v8i16_constvec_2() {
+        ret <8 x i16> < i16 511, i16 511, i16 511, i16 511, i16 511,
+                        i16 511, i16 511, i16 511 >
+}
+
+; ILH
+define <8 x i16> @v8i16_constvec_3() {
+        ret <8 x i16> < i16 -512, i16 -512, i16 -512, i16 -512, i16 -512,
+                        i16 -512, i16 -512, i16 -512 >
+}
+
+; ILH <reg>, 24672 (0x6060)
+; Tests whether we expand the size of the bit pattern properly, because
+; this could be interpreted as an i8 pattern (0x60)
+define <8 x i16> @v8i16_constvec_4() {
+        ret <8 x i16> < i16 24672, i16 24672, i16 24672, i16 24672, i16 24672,
+                        i16 24672, i16 24672, i16 24672 >
+}
+
+; ILH <reg>, 24672 (0x6060)
+; Tests whether we expand the size of the bit pattern properly, because
+; this is an i8 pattern but has to be expanded out to i16 to load it
+; properly into the vector register.
+define <16 x i8> @v16i8_constvec_1() {
+        ret <16 x i8> < i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96,
+                        i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96 >
+}
+
+define <4 x float> @v4f32_constvec_1() {
+entry:
+        ret <4 x float> < float 0x4005BF0A80000000,
+                          float 0x4005BF0A80000000,
+                          float 0x4005BF0A80000000,
+                          float 0x4005BF0A80000000 >
+}
+
+define <4 x float> @v4f32_constvec_2() {
+entry:
+        ret <4 x float> < float 0.000000e+00,
+                          float 0.000000e+00,
+                          float 0.000000e+00,
+                          float 0.000000e+00 >
+}
+
+
+define <4 x float> @v4f32_constvec_3() {
+entry:
+        ret <4 x float> < float 0x4005BF0A80000000,
+                          float 0x3810000000000000,
+                          float 0x47EFFFFFE0000000,
+                          float 0x400921FB60000000 >
+}
+
+;  1311768467750121234 => 0x 12345678 abcdef12
+;  HI32_hi:  4660
+;  HI32_lo: 22136
+;  LO32_hi: 43981
+;  LO32_lo: 61202
+define <2 x i64> @i64_constvec_1() {
+entry:
+        ret <2 x i64> < i64 1311768467750121234,
+                        i64 1311768467750121234 >
+}
+
+define <2 x i64> @i64_constvec_2() {
+entry:
+        ret <2 x i64> < i64 1, i64 1311768467750121234 >
+}
+
+define <2 x double> @f64_constvec_1() {
+entry:
+ ret <2 x double> < double 0x400921fb54442d18,
+                    double 0xbff6a09e667f3bcd >
+}
+
+; 0x400921fb 54442d18 ->
+;   (ILHU 0x4009 [16393]/IOHL 0x21fb [ 8699])
+;   (ILHU 0x5444 [21572]/IOHL 0x2d18 [11544])
+define <2 x double> @f64_constvec_2() {
+entry:
+ ret <2 x double> < double 0x400921fb54442d18,
+                    double 0x400921fb54442d18 >
+}

diff --git a/src/LLVM/test/CodeGen/CellSPU/vecinsert.ll b/src/LLVM/test/CodeGen/CellSPU/vecinsert.ll
new file mode 100644
index 0000000..8dcab1d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/CellSPU/vecinsert.ll

@@ -0,0 +1,131 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep cbd     %t1.s | count 5
+; RUN: grep chd     %t1.s | count 5
+; RUN: grep cwd     %t1.s | count 11
+; RUN: grep -w il   %t1.s | count 5
+; RUN: grep -w ilh  %t1.s | count 6
+; RUN: grep iohl    %t1.s | count 1
+; RUN: grep ilhu    %t1.s | count 4
+; RUN: grep shufb   %t1.s | count 27
+; RUN: grep 17219   %t1.s | count 1 
+; RUN: grep 22598   %t1.s | count 1
+; RUN: grep -- -39  %t1.s | count 1
+; RUN: grep    24   %t1.s | count 1
+; RUN: grep  1159   %t1.s | count 1
+; RUN: FileCheck %s < %t1.s
+
+; ModuleID = 'vecinsert.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
+target triple = "spu-unknown-elf"
+
+; 67 -> 0x43, as 8-bit vector constant load = 0x4343 (17219)0x4343
+define <16 x i8> @test_v16i8(<16 x i8> %P, i8 %x) {
+entry:
+        %tmp1 = insertelement <16 x i8> %P, i8 %x, i32 10
+        %tmp1.1 = insertelement <16 x i8> %tmp1, i8 67, i32 7
+        %tmp1.2 = insertelement <16 x i8> %tmp1.1, i8 %x, i32 15
+        ret <16 x i8> %tmp1.2
+}
+
+; 22598 -> 0x5846
+define <8 x i16> @test_v8i16(<8 x i16> %P, i16 %x) {
+entry:
+        %tmp1 = insertelement <8 x i16> %P, i16 %x, i32 5
+        %tmp1.1 = insertelement <8 x i16> %tmp1, i16 22598, i32 7
+        %tmp1.2 = insertelement <8 x i16> %tmp1.1, i16 %x, i32 2
+        ret <8 x i16> %tmp1.2
+}
+
+; 1574023 -> 0x180487 (ILHU 24/IOHL 1159)
+define <4 x i32> @test_v4i32_1(<4 x i32> %P, i32 %x) {
+entry:
+        %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2
+        %tmp1.1 = insertelement <4 x i32> %tmp1, i32 1574023, i32 1
+        %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3
+        ret <4 x i32> %tmp1.2
+}
+
+; Should generate IL for the load
+define <4 x i32> @test_v4i32_2(<4 x i32> %P, i32 %x) {
+entry:
+        %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2
+        %tmp1.1 = insertelement <4 x i32> %tmp1, i32 -39, i32 1
+        %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3
+        ret <4 x i32> %tmp1.2
+}
+
+define void @variable_v16i8_1(<16 x i8>* %a, i32 %i) nounwind {
+entry:
+	%arrayidx = getelementptr <16 x i8>* %a, i32 %i
+	%tmp2 = load <16 x i8>* %arrayidx
+	%tmp3 = insertelement <16 x i8> %tmp2, i8 1, i32 1
+	%tmp8 = insertelement <16 x i8> %tmp3, i8 2, i32 11
+	store <16 x i8> %tmp8, <16 x i8>* %arrayidx
+	ret void
+}
+
+define void @variable_v8i16_1(<8 x i16>* %a, i32 %i) nounwind {
+entry:
+	%arrayidx = getelementptr <8 x i16>* %a, i32 %i
+	%tmp2 = load <8 x i16>* %arrayidx
+	%tmp3 = insertelement <8 x i16> %tmp2, i16 1, i32 1
+	%tmp8 = insertelement <8 x i16> %tmp3, i16 2, i32 6
+	store <8 x i16> %tmp8, <8 x i16>* %arrayidx
+	ret void
+}
+
+define void @variable_v4i32_1(<4 x i32>* %a, i32 %i) nounwind {
+entry:
+	%arrayidx = getelementptr <4 x i32>* %a, i32 %i
+	%tmp2 = load <4 x i32>* %arrayidx
+	%tmp3 = insertelement <4 x i32> %tmp2, i32 1, i32 1
+	%tmp8 = insertelement <4 x i32> %tmp3, i32 2, i32 2
+	store <4 x i32> %tmp8, <4 x i32>* %arrayidx
+	ret void
+}
+
+define void @variable_v4f32_1(<4 x float>* %a, i32 %i) nounwind {
+entry:
+	%arrayidx = getelementptr <4 x float>* %a, i32 %i
+	%tmp2 = load <4 x float>* %arrayidx
+	%tmp3 = insertelement <4 x float> %tmp2, float 1.000000e+00, i32 1
+	%tmp8 = insertelement <4 x float> %tmp3, float 2.000000e+00, i32 2
+	store <4 x float> %tmp8, <4 x float>* %arrayidx
+	ret void
+}
+
+define void @variable_v2i64_1(<2 x i64>* %a, i32 %i) nounwind {
+entry:
+	%arrayidx = getelementptr <2 x i64>* %a, i32 %i
+	%tmp2 = load <2 x i64>* %arrayidx
+	%tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 0
+	store <2 x i64> %tmp3, <2 x i64>* %arrayidx
+	ret void
+}
+
+define void @variable_v2i64_2(<2 x i64>* %a, i32 %i) nounwind {
+entry:
+	%arrayidx = getelementptr <2 x i64>* %a, i32 %i
+	%tmp2 = load <2 x i64>* %arrayidx
+	%tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 1
+	store <2 x i64> %tmp3, <2 x i64>* %arrayidx
+	ret void
+}
+
+define void @variable_v2f64_1(<2 x double>* %a, i32 %i) nounwind {
+entry:
+	%arrayidx = getelementptr <2 x double>* %a, i32 %i
+	%tmp2 = load <2 x double>* %arrayidx
+	%tmp3 = insertelement <2 x double> %tmp2, double 1.000000e+00, i32 1
+	store <2 x double> %tmp3, <2 x double>* %arrayidx
+	ret void
+}
+
+define <4 x i32> @undef_v4i32( i32 %param ) {
+	;CHECK: cwd
+	;CHECK: lqa
+	;CHECK: shufb
+	%val = insertelement <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i32 %param, i32 undef 
+	ret <4 x i32> %val
+}
+

diff --git a/src/LLVM/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll b/src/LLVM/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll
new file mode 100644
index 0000000..fbe572d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s

+

+; This caused the backend to assert out with:

+; SparcInstrInfo.cpp:103: failed assertion `0 && "Unexpected unsigned type"'

+;

+

+declare void @bar(i8*)

+

+define void @foo() {

+        %cast225 = inttoptr i64 123456 to i8*           ; <i8*> [#uses=1]

+        call void @bar( i8* %cast225 )

+        ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll b/src/LLVM/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll
new file mode 100644
index 0000000..eab2c5f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s

+

+; Compiling this file produces:

+; Sparc.cpp:91: failed assertion `(offset - OFFSET) % getStackFrameSizeAlignment() == 0'

+;

+declare i32 @SIM(i8*, i8*, i32, i32, i32, [256 x i32]*, i32, i32, i32)

+

+define void @foo() {

+bb0:

+        %V = alloca [256 x i32], i32 256                ; <[256 x i32]*> [#uses=1]

+        call i32 @SIM( i8* null, i8* null, i32 0, i32 0, i32 0, [256 x i32]* %V, i32 0, i32 0, i32 2 )          ; <i32>:0 [#uses=0]

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2003-05-27-phifcmpd.ll b/src/LLVM/test/CodeGen/Generic/2003-05-27-phifcmpd.ll
new file mode 100644
index 0000000..d128990
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2003-05-27-phifcmpd.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s

+

+define void @QRiterate(i32 %p.1, double %tmp.212) {

+entry:

+        %tmp.184 = icmp sgt i32 %p.1, 0         ; <i1> [#uses=1]

+        br i1 %tmp.184, label %shortcirc_next.1, label %shortcirc_done.1

+

+shortcirc_next.1:               ; preds = %shortcirc_done.1, %entry

+        %tmp.213 = fcmp une double %tmp.212, 0.000000e+00               ; <i1> [#uses=1]

+        br label %shortcirc_done.1

+

+shortcirc_done.1:               ; preds = %shortcirc_next.1, %entry

+        %val.1 = phi i1 [ false, %entry ], [ %tmp.213, %shortcirc_next.1 ]              ; <i1> [#uses=1]

+        br i1 %val.1, label %shortcirc_next.1, label %exit.1

+

+exit.1:         ; preds = %shortcirc_done.1

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll b/src/LLVM/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll
new file mode 100644
index 0000000..b73d89f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s

+

+define void @QRiterate(double %tmp.212) {

+        %tmp.213 = fcmp une double %tmp.212, 0.000000e+00               ; <i1> [#uses=1]

+        br label %shortcirc_next.1

+

+shortcirc_next.1:               ; preds = %shortcirc_next.1, %0

+        br i1 %tmp.213, label %shortcirc_next.1, label %exit.1

+

+exit.1:         ; preds = %shortcirc_next.1

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll b/src/LLVM/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll
new file mode 100644
index 0000000..2404b3e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s

+

+define void @QRiterate(double %tmp.212) {

+entry:

+        br label %shortcirc_next.1

+

+shortcirc_next.1:               ; preds = %shortcirc_next.1, %entry

+        %tmp.213 = fcmp une double %tmp.212, 0.000000e+00               ; <i1> [#uses=1]

+        br i1 %tmp.213, label %shortcirc_next.1, label %exit.1

+

+exit.1:         ; preds = %shortcirc_next.1

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2003-05-28-ManyArgs.ll b/src/LLVM/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
new file mode 100644
index 0000000..5591806
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2003-05-28-ManyArgs.ll

@@ -0,0 +1,153 @@
+; RUN: llc < %s

+

+;; Date:     May 28, 2003.

+;; From:     test/Programs/External/SPEC/CINT2000/175.vpr.llvm.bc

+;; Function: int %main(int %argc.1, sbyte** %argv.1)

+;;

+;; Error:    A function call with about 56 arguments causes an assertion failure

+;;           in llc because the register allocator cannot find a register

+;;           not used explicitly by the call instruction.

+;;

+;; Cause:    Regalloc was not keeping track of free registers correctly.

+;;           It was counting the registers allocated to all outgoing arguments,

+;;           even though most of those are copied to the stack (so those

+;;           registers are not actually used by the call instruction).

+;;

+;; Fixed:    By rewriting selection and allocation so that selection explicitly

+;;           inserts all copy operations required for passing arguments and

+;;           for the return value of a call, copying to/from registers

+;;           and/or to stack locations as needed.

+;;

+	%struct..s_annealing_sched = type { i32, float, float, float, float }

+	%struct..s_chan = type { i32, float, float, float, float }

+	%struct..s_det_routing_arch = type { i32, float, float, float, i32, i32, i16, i16, i16, float, float }

+	%struct..s_placer_opts = type { i32, float, i32, i32, i8*, i32, i32 }

+	%struct..s_router_opts = type { float, float, float, float, float, i32, i32, i32, i32 }

+	%struct..s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }

+	%struct..s_switch_inf = type { i32, float, float, float, float }

+

+define i32 @main(i32 %argc.1, i8** %argv.1) {

+entry:

+	%net_file = alloca [300 x i8]		; <[300 x i8]*> [#uses=1]

+	%place_file = alloca [300 x i8]		; <[300 x i8]*> [#uses=1]

+	%arch_file = alloca [300 x i8]		; <[300 x i8]*> [#uses=1]

+	%route_file = alloca [300 x i8]		; <[300 x i8]*> [#uses=1]

+	%full_stats = alloca i32		; <i32*> [#uses=1]

+	%operation = alloca i32		; <i32*> [#uses=1]

+	%verify_binary_search = alloca i32		; <i32*> [#uses=1]

+	%show_graphics = alloca i32		; <i32*> [#uses=1]

+	%annealing_sched = alloca %struct..s_annealing_sched		; <%struct..s_annealing_sched*> [#uses=5]

+	%placer_opts = alloca %struct..s_placer_opts		; <%struct..s_placer_opts*> [#uses=7]

+	%router_opts = alloca %struct..s_router_opts		; <%struct..s_router_opts*> [#uses=9]

+	%det_routing_arch = alloca %struct..s_det_routing_arch		; <%struct..s_det_routing_arch*> [#uses=11]

+	%segment_inf = alloca %struct..s_segment_inf*		; <%struct..s_segment_inf**> [#uses=1]

+	%timing_inf = alloca { i32, float, float, float, float, float, float, float, float, float, float }		; <{ i32, float, float, float, float, float, float, float, float, float, float }*> [#uses=11]

+	%tmp.101 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 4		; <i8**> [#uses=1]

+	%tmp.105 = getelementptr [300 x i8]* %net_file, i64 0, i64 0		; <i8*> [#uses=1]

+	%tmp.106 = getelementptr [300 x i8]* %arch_file, i64 0, i64 0		; <i8*> [#uses=1]

+	%tmp.107 = getelementptr [300 x i8]* %place_file, i64 0, i64 0		; <i8*> [#uses=1]

+	%tmp.108 = getelementptr [300 x i8]* %route_file, i64 0, i64 0		; <i8*> [#uses=1]

+	%tmp.109 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 0		; <i32*> [#uses=1]

+	%tmp.112 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 0		; <i32*> [#uses=1]

+	%tmp.114 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 6		; <i32*> [#uses=1]

+	%tmp.118 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 7		; <i32*> [#uses=1]

+	%tmp.135 = load i32* %operation		; <i32> [#uses=1]

+	%tmp.137 = load i32* %tmp.112		; <i32> [#uses=1]

+	%tmp.138 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 1		; <float*> [#uses=1]

+	%tmp.139 = load float* %tmp.138		; <float> [#uses=1]

+	%tmp.140 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 2		; <i32*> [#uses=1]

+	%tmp.141 = load i32* %tmp.140		; <i32> [#uses=1]

+	%tmp.142 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 3		; <i32*> [#uses=1]

+	%tmp.143 = load i32* %tmp.142		; <i32> [#uses=1]

+	%tmp.145 = load i8** %tmp.101		; <i8*> [#uses=1]

+	%tmp.146 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 5		; <i32*> [#uses=1]

+	%tmp.147 = load i32* %tmp.146		; <i32> [#uses=1]

+	%tmp.149 = load i32* %tmp.114		; <i32> [#uses=1]

+	%tmp.154 = load i32* %full_stats		; <i32> [#uses=1]

+	%tmp.155 = load i32* %verify_binary_search		; <i32> [#uses=1]

+	%tmp.156 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 0		; <i32*> [#uses=1]

+	%tmp.157 = load i32* %tmp.156		; <i32> [#uses=1]

+	%tmp.158 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 1		; <float*> [#uses=1]

+	%tmp.159 = load float* %tmp.158		; <float> [#uses=1]

+	%tmp.160 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 2		; <float*> [#uses=1]

+	%tmp.161 = load float* %tmp.160		; <float> [#uses=1]

+	%tmp.162 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 3		; <float*> [#uses=1]

+	%tmp.163 = load float* %tmp.162		; <float> [#uses=1]

+	%tmp.164 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 4		; <float*> [#uses=1]

+	%tmp.165 = load float* %tmp.164		; <float> [#uses=1]

+	%tmp.166 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 0		; <float*> [#uses=1]

+	%tmp.167 = load float* %tmp.166		; <float> [#uses=1]

+	%tmp.168 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 1		; <float*> [#uses=1]

+	%tmp.169 = load float* %tmp.168		; <float> [#uses=1]

+	%tmp.170 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 2		; <float*> [#uses=1]

+	%tmp.171 = load float* %tmp.170		; <float> [#uses=1]

+	%tmp.172 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 3		; <float*> [#uses=1]

+	%tmp.173 = load float* %tmp.172		; <float> [#uses=1]

+	%tmp.174 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 4		; <float*> [#uses=1]

+	%tmp.175 = load float* %tmp.174		; <float> [#uses=1]

+	%tmp.176 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 5		; <i32*> [#uses=1]

+	%tmp.177 = load i32* %tmp.176		; <i32> [#uses=1]

+	%tmp.178 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 6		; <i32*> [#uses=1]

+	%tmp.179 = load i32* %tmp.178		; <i32> [#uses=1]

+	%tmp.181 = load i32* %tmp.118		; <i32> [#uses=1]

+	%tmp.182 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 8		; <i32*> [#uses=1]

+	%tmp.183 = load i32* %tmp.182		; <i32> [#uses=1]

+	%tmp.184 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 0		; <i32*> [#uses=1]

+	%tmp.185 = load i32* %tmp.184		; <i32> [#uses=1]

+	%tmp.186 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 1		; <float*> [#uses=1]

+	%tmp.187 = load float* %tmp.186		; <float> [#uses=1]

+	%tmp.188 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 2		; <float*> [#uses=1]

+	%tmp.189 = load float* %tmp.188		; <float> [#uses=1]

+	%tmp.190 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 3		; <float*> [#uses=1]

+	%tmp.191 = load float* %tmp.190		; <float> [#uses=1]

+	%tmp.192 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 4		; <i32*> [#uses=1]

+	%tmp.193 = load i32* %tmp.192		; <i32> [#uses=1]

+	%tmp.194 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 5		; <i32*> [#uses=1]

+	%tmp.195 = load i32* %tmp.194		; <i32> [#uses=1]

+	%tmp.196 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 6		; <i16*> [#uses=1]

+	%tmp.197 = load i16* %tmp.196		; <i16> [#uses=1]

+	%tmp.198 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 7		; <i16*> [#uses=1]

+	%tmp.199 = load i16* %tmp.198		; <i16> [#uses=1]

+	%tmp.200 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 8		; <i16*> [#uses=1]

+	%tmp.201 = load i16* %tmp.200		; <i16> [#uses=1]

+	%tmp.202 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 9		; <float*> [#uses=1]

+	%tmp.203 = load float* %tmp.202		; <float> [#uses=1]

+	%tmp.204 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 10		; <float*> [#uses=1]

+	%tmp.205 = load float* %tmp.204		; <float> [#uses=1]

+	%tmp.206 = load %struct..s_segment_inf** %segment_inf		; <%struct..s_segment_inf*> [#uses=1]

+	%tmp.208 = load i32* %tmp.109		; <i32> [#uses=1]

+	%tmp.209 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 1		; <float*> [#uses=1]

+	%tmp.210 = load float* %tmp.209		; <float> [#uses=1]

+	%tmp.211 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 2		; <float*> [#uses=1]

+	%tmp.212 = load float* %tmp.211		; <float> [#uses=1]

+	%tmp.213 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 3		; <float*> [#uses=1]

+	%tmp.214 = load float* %tmp.213		; <float> [#uses=1]

+	%tmp.215 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 4		; <float*> [#uses=1]

+	%tmp.216 = load float* %tmp.215		; <float> [#uses=1]

+	%tmp.217 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 5		; <float*> [#uses=1]

+	%tmp.218 = load float* %tmp.217		; <float> [#uses=1]

+	%tmp.219 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 6		; <float*> [#uses=1]

+	%tmp.220 = load float* %tmp.219		; <float> [#uses=1]

+	%tmp.221 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 7		; <float*> [#uses=1]

+	%tmp.222 = load float* %tmp.221		; <float> [#uses=1]

+	%tmp.223 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 8		; <float*> [#uses=1]

+	%tmp.224 = load float* %tmp.223		; <float> [#uses=1]

+	%tmp.225 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 9		; <float*> [#uses=1]

+	%tmp.226 = load float* %tmp.225		; <float> [#uses=1]

+	%tmp.227 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 10		; <float*> [#uses=1]

+	%tmp.228 = load float* %tmp.227		; <float> [#uses=1]

+	call void @place_and_route( i32 %tmp.135, i32 %tmp.137, float %tmp.139, i32 %tmp.141, i32 %tmp.143, i8* %tmp.145, i32 %tmp.147, i32 %tmp.149, i8* %tmp.107, i8* %tmp.105, i8* %tmp.106, i8* %tmp.108, i32 %tmp.154, i32 %tmp.155, i32 %tmp.157, float %tmp.159, float %tmp.161, float %tmp.163, float %tmp.165, float %tmp.167, float %tmp.169, float %tmp.171, float %tmp.173, float %tmp.175, i32 %tmp.177, i32 %tmp.179, i32 %tmp.181, i32 %tmp.183, i32 %tmp.185, float %tmp.187, float %tmp.189, float %tmp.191, i32 %tmp.193, i32 %tmp.195, i16 %tmp.197, i16 %tmp.199, i16 %tmp.201, float %tmp.203, float %tmp.205, %struct..s_segment_inf* %tmp.206, i32 %tmp.208, float %tmp.210, float %tmp.212, float %tmp.214, float %tmp.216, float %tmp.218, float %tmp.220, float %tmp.222, float %tmp.224, float %tmp.226, float %tmp.228 )

+	%tmp.231 = load i32* %show_graphics		; <i32> [#uses=1]

+	%tmp.232 = icmp ne i32 %tmp.231, 0		; <i1> [#uses=1]

+	br i1 %tmp.232, label %then.2, label %endif.2

+

+then.2:		; preds = %entry

+	br label %endif.2

+

+endif.2:		; preds = %then.2, %entry

+	ret i32 0

+}

+

+declare i32 @printf(i8*, ...)

+

+declare void @place_and_route(i32, i32, float, i32, i32, i8*, i32, i32, i8*, i8*, i8*, i8*, i32, i32, i32, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, float, float, float, i32, i32, i16, i16, i16, float, float, %struct..s_segment_inf*, i32, float, float, float, float, float, float, float, float, float, float)


diff --git a/src/LLVM/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll b/src/LLVM/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
new file mode 100644
index 0000000..e9b20de
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s

+

+;; Date:     May 28, 2003.

+;; From:     test/Programs/External/SPEC/CINT2000/254.gap.llvm.bc

+;; Function: int %OpenOutput(sbyte* %filename.1)

+;;

+;; Error:    A sequence of GEPs is folded incorrectly by llc during selection

+;;	     causing an assertion about a dynamic casting error.

+;;	     This code sequence was produced (correctly) by preselection

+;;	     from a nested pair of ConstantExpr getelementptrs.

+;;	     The code below is the output of preselection.

+;;	     The original ConstantExprs are included in a comment.

+;;

+;; Cause:    FoldGetElemChain() was inserting an extra leading 0 even though

+;;	     the first instruction in the sequence contributes no indices.

+;;	     The next instruction contributes a leading non-zero so another

+;;	     zero should not be added before it!

+;;

+        %FileType = type { i32, [256 x i8], i32, i32, i32, i32 }

+@OutputFiles = external global [16 x %FileType]         ; <[16 x %FileType]*> [#uses=1]

+@Output = internal global %FileType* null               ; <%FileType**> [#uses=1]

+

+define internal i32 @OpenOutput(i8* %filename.1) {

+entry:

+        %tmp.0 = load %FileType** @Output               ; <%FileType*> [#uses=1]

+        %tmp.4 = getelementptr %FileType* %tmp.0, i64 1         ; <%FileType*> [#uses=1]

+        %addrOfGlobal = getelementptr [16 x %FileType]* @OutputFiles, i64 0             ; <[16 x %FileType]*> [#uses=1]

+        %constantGEP = getelementptr [16 x %FileType]* %addrOfGlobal, i64 1             ; <[16 x %FileType]*> [#uses=1]

+        %constantGEP.upgrd.1 = getelementptr [16 x %FileType]* %constantGEP, i64 0, i64 0               ; <%FileType*> [#uses=1]

+        %tmp.10 = icmp eq %FileType* %tmp.4, %constantGEP.upgrd.1               ; <i1> [#uses=1]

+        br i1 %tmp.10, label %return, label %endif.0

+

+endif.0:                ; preds = %entry

+        ret i32 0

+

+return:         ; preds = %entry

+        ret i32 1

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll b/src/LLVM/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll
new file mode 100644
index 0000000..13e53b9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s

+

+;; Date:     May 28, 2003.

+;; From:     test/Programs/SingleSource/richards_benchmark.c

+;; Function: struct task *handlerfn(struct packet *pkt)

+;;

+;; Error:    PreSelection puts the arguments of the Phi just before

+;;           the Phi instead of in predecessor blocks.  This later

+;;           causes llc to produces an invalid register <NULL VALUE>

+;;           for the phi arguments.

+

+        %struct..packet = type { %struct..packet*, i32, i32, i32, [4 x i8] }

+        %struct..task = type { %struct..task*, i32, i32, %struct..packet*, i32, %struct..task* (%struct..packet*)*, i32, i32 }

+@v1 = external global i32               ; <i32*> [#uses=1]

+@v2 = external global i32               ; <i32*> [#uses=1]

+

+define %struct..task* @handlerfn(%struct..packet* %pkt.2) {

+entry:

+        %tmp.1 = icmp ne %struct..packet* %pkt.2, null          ; <i1> [#uses=1]

+        br i1 %tmp.1, label %cond_false, label %cond_continue

+

+cond_false:             ; preds = %entry

+        br label %cond_continue

+

+cond_continue:          ; preds = %cond_false, %entry

+        %mem_tmp.0 = phi i32* [ @v2, %cond_false ], [ @v1, %entry ]             ; <i32*> [#uses=1]

+        %tmp.12 = bitcast i32* %mem_tmp.0 to %struct..packet*           ; <%struct..packet*> [#uses=1]

+        call void @append( %struct..packet* %pkt.2, %struct..packet* %tmp.12 )

+        ret %struct..task* null

+}

+

+declare void @append(%struct..packet*, %struct..packet*)

+


diff --git a/src/LLVM/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll b/src/LLVM/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
new file mode 100644
index 0000000..aea41c9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s

+

+;; Date: May 28, 2003.

+;; From: test/Programs/MultiSource/Olden-perimeter/maketree.c

+;; Function: int CheckOutside(int x, int y)

+;; 

+;; Note: The .ll code below for this regression test has identical

+;;	 behavior to the above function up to the error, but then prints

+;; 	 true/false on the two branches.

+;; 

+;; Error: llc generates a branch-on-xcc instead of branch-on-icc, which

+;;        is wrong because the value being compared (int euclid = x*x + y*y)

+;;	  overflows, so that the 64-bit and 32-bit compares are not equal.

+

+@.str_1 = internal constant [6 x i8] c"true\0A\00"              ; <[6 x i8]*> [#uses=1]

+@.str_2 = internal constant [7 x i8] c"false\0A\00"             ; <[7 x i8]*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define internal void @__main() {

+entry:

+        ret void

+}

+

+define internal void @CheckOutside(i32 %x.1, i32 %y.1) {

+entry:

+        %tmp.2 = mul i32 %x.1, %x.1             ; <i32> [#uses=1]

+        %tmp.5 = mul i32 %y.1, %y.1             ; <i32> [#uses=1]

+        %tmp.6 = add i32 %tmp.2, %tmp.5         ; <i32> [#uses=1]

+        %tmp.8 = icmp sle i32 %tmp.6, 4194304           ; <i1> [#uses=1]

+        br i1 %tmp.8, label %then, label %else

+

+then:           ; preds = %entry

+        %tmp.11 = call i32 (i8*, ...)* @printf( i8* getelementptr ([6 x i8]* @.str_1, i64 0, i64 0) )           ; <i32> [#uses=0]

+        br label %UnifiedExitNode

+

+else:           ; preds = %entry

+        %tmp.13 = call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @.str_2, i64 0, i64 0) )           ; <i32> [#uses=0]

+        br label %UnifiedExitNode

+

+UnifiedExitNode:                ; preds = %else, %then

+        ret void

+}

+

+define i32 @main() {

+entry:

+        call void @__main( )

+        call void @CheckOutside( i32 2097152, i32 2097152 )

+        ret i32 0

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2003-07-07-BadLongConst.ll b/src/LLVM/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
new file mode 100644
index 0000000..98b5c0f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2003-07-07-BadLongConst.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s

+

+@.str_1 = internal constant [42 x i8] c"   ui = %u (0x%x)\09\09UL-ui = %lld (0x%llx)\0A\00"             ; <[42 x i8]*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define internal i64 @getL() {

+entry:

+        ret i64 -5787213826675591005

+}

+

+define i32 @main(i32 %argc.1, i8** %argv.1) {

+entry:

+        %tmp.11 = call i64 @getL( )             ; <i64> [#uses=2]

+        %tmp.5 = trunc i64 %tmp.11 to i32               ; <i32> [#uses=2]

+        %tmp.23 = and i64 %tmp.11, -4294967296          ; <i64> [#uses=2]

+        %tmp.16 = call i32 (i8*, ...)* @printf( i8* getelementptr ([42 x i8]* @.str_1, i64 0, i64 0), i32 %tmp.5, i32 %tmp.5, i64 %tmp.23, i64 %tmp.23 )              ; <i32> [#uses=0]

+        ret i32 0

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll b/src/LLVM/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
new file mode 100644
index 0000000..5557123
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s

+

+;; Date:     Jul 8, 2003.

+;; From:     test/Programs/MultiSource/Olden-perimeter

+;; Function: int %adj(uint %d.1, uint %ct.1)

+;;

+;; Errors: (1) cast-int-to-bool was being treated as a NOP (i.e., the int

+;;	       register was treated as effectively true if non-zero).

+;;	       This cannot be used for later boolean operations.

+;;	   (2) (A or NOT(B)) was being folded into A orn B, which is ok

+;;	       for bitwise operations but not booleans!  For booleans,

+;;	       the result has to be compared with 0.

+

+@.str_1 = internal constant [30 x i8] c"d = %d, ct = %d, d ^ ct = %d\0A\00"

+

+declare i32 @printf(i8*, ...)

+

+define i32 @adj(i32 %d.1, i32 %ct.1) {

+entry:

+        %tmp.19 = icmp eq i32 %ct.1, 2          ; <i1> [#uses=1]

+        %tmp.22.not = trunc i32 %ct.1 to i1              ; <i1> [#uses=1]

+        %tmp.221 = xor i1 %tmp.22.not, true             ; <i1> [#uses=1]

+        %tmp.26 = or i1 %tmp.19, %tmp.221               ; <i1> [#uses=1]

+        %tmp.27 = zext i1 %tmp.26 to i32                ; <i32> [#uses=1]

+        ret i32 %tmp.27

+}

+

+define i32 @main() {

+entry:

+        %result = call i32 @adj( i32 3, i32 2 )         ; <i32> [#uses=1]

+        %tmp.0 = call i32 (i8*, ...)* @printf( i8* getelementptr ([30 x i8]* @.str_1, i64 0, i64 0), i32 3, i32 2, i32 %result )              ; <i32> [#uses=0]

+        ret i32 0

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll b/src/LLVM/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
new file mode 100644
index 0000000..91ec967
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s

+

+;; Date:     Jul 29, 2003.

+;; From:     test/Programs/MultiSource/Ptrdist-bc

+;; Function: ---

+;; Global:   %yy_ec = internal constant [256 x sbyte] ...

+;;           A subset of this array is used in the test below.

+;;

+;; Error:    Character '\07' was being emitted as '\a', at yy_ec[38].

+;;	     When loaded, this returned the value 97 ('a'), instead of 7.

+;; 

+;; Incorrect LLC Output for the array yy_ec was:

+;; yy_ec_1094:

+;; 	.ascii	"\000\001\001\001\001\001\001\001\001\002\003\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\002\004\005\001\001\006\a\001\b\t\n\v\f\r\016\017\020\020\020\020\020\020\020\020\020\020\001\021\022\023\024\001\001\025\025\025\025\025\025\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\026\027\030\031\032\001\033\034\035\036\037 !\"#$%&'()*+,-./$0$1$234\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001"

+;;

+

+@yy_ec = internal constant [6 x i8] c"\06\07\01\08\01\09"               ; <[6 x i8]*> [#uses=1]

+@.str_3 = internal constant [8 x i8] c"[%d] = \00"              ; <[8 x i8]*> [#uses=1]

+@.str_4 = internal constant [4 x i8] c"%d\0A\00"                ; <[4 x i8]*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define i32 @main() {

+entry:

+        br label %loopentry

+

+loopentry:              ; preds = %loopentry, %entry

+        %i = phi i64 [ 0, %entry ], [ %inc.i, %loopentry ]              ; <i64> [#uses=3]

+        %cptr = getelementptr [6 x i8]* @yy_ec, i64 0, i64 %i           ; <i8*> [#uses=1]

+        %c = load i8* %cptr             ; <i8> [#uses=1]

+        %ignore = call i32 (i8*, ...)* @printf( i8* getelementptr ([8 x i8]* @.str_3, i64 0, i64 0), i64 %i )        ; <i32> [#uses=0]

+        %ignore2 = call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str_4, i64 0, i64 0), i8 %c )        ; <i32> [#uses=0]

+        %inc.i = add i64 %i, 1          ; <i64> [#uses=2]

+        %done = icmp sle i64 %inc.i, 5          ; <i1> [#uses=1]

+        br i1 %done, label %loopentry, label %exit.1

+

+exit.1:         ; preds = %loopentry

+        ret i32 0

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll b/src/LLVM/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
new file mode 100644
index 0000000..49e3e8b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s

+@global_long_1 = linkonce global i64 7          ; <i64*> [#uses=1]

+@global_long_2 = linkonce global i64 49         ; <i64*> [#uses=1]

+

+define i32 @main() {

+        %l1 = load i64* @global_long_1          ; <i64> [#uses=1]

+        %l2 = load i64* @global_long_2          ; <i64> [#uses=1]

+        %cond = icmp sle i64 %l1, %l2           ; <i1> [#uses=1]

+        %cast2 = zext i1 %cond to i32           ; <i32> [#uses=1]

+        %RV = sub i32 1, %cast2         ; <i32> [#uses=1]

+        ret i32 %RV

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll b/src/LLVM/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll
new file mode 100644
index 0000000..97645fb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s

+

+define void @intersect_pixel() {

+entry:

+        %tmp125 = fcmp uno double 0.000000e+00, 0.000000e+00            ; <i1> [#uses=1]

+        %tmp126 = or i1 %tmp125, false          ; <i1> [#uses=1]

+        %tmp126.not = xor i1 %tmp126, true              ; <i1> [#uses=1]

+        %brmerge1 = or i1 %tmp126.not, false            ; <i1> [#uses=1]

+        br i1 %brmerge1, label %bb154, label %cond_false133

+

+cond_false133:          ; preds = %entry

+        ret void

+

+bb154:          ; preds = %entry

+        %tmp164 = icmp eq i32 0, 0              ; <i1> [#uses=0]

+        ret void

+}

+

+declare i1 @llvm.isunordered.f64(double, double)

+


diff --git a/src/LLVM/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll b/src/LLVM/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
new file mode 100644
index 0000000..a2847cf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s 

+        %struct.TypHeader = type { i32, %struct.TypHeader**, [3 x i8], i8 }

+@.str_67 = external global [4 x i8]             ; <[4 x i8]*> [#uses=1]

+@.str_87 = external global [17 x i8]            ; <[17 x i8]*> [#uses=1]

+

+define void @PrBinop() {

+entry:

+        br i1 false, label %cond_true, label %else.0

+

+cond_true:              ; preds = %entry

+        br label %else.0

+

+else.0:         ; preds = %cond_true, %entry

+        %tmp.167.1 = phi i32 [ ptrtoint ([17 x i8]* @.str_87 to i32), %entry ], [ 0, %cond_true ]               ; <i32> [#uses=0]

+        call void @Pr( i8* getelementptr ([4 x i8]* @.str_67, i32 0, i32 0), i32 0, i32 0 )

+        ret void

+}

+

+declare void @Pr(i8*, i32, i32)

+


diff --git a/src/LLVM/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll b/src/LLVM/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll
new file mode 100644
index 0000000..bc41ee5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s

+

+define void @test() {

+        %X = alloca {  }                ; <{  }*> [#uses=0]

+        ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2005-10-21-longlonggtu.ll b/src/LLVM/test/CodeGen/Generic/2005-10-21-longlonggtu.ll
new file mode 100644
index 0000000..ca5b7b5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2005-10-21-longlonggtu.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s

+

+define float @t(i64 %u_arg) {

+        %u = bitcast i64 %u_arg to i64          ; <i64> [#uses=1]

+        %tmp5 = add i64 %u, 9007199254740991            ; <i64> [#uses=1]

+        %tmp = icmp ugt i64 %tmp5, 18014398509481982            ; <i1> [#uses=1]

+        br i1 %tmp, label %T, label %F

+

+T:              ; preds = %0

+        ret float 1.000000e+00

+

+F:              ; preds = %0

+        call float @t( i64 0 )          ; <float>:1 [#uses=0]

+        ret float 0.000000e+00

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2005-12-01-Crash.ll b/src/LLVM/test/CodeGen/Generic/2005-12-01-Crash.ll
new file mode 100644
index 0000000..d4000bd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2005-12-01-Crash.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s

+@str = external global [36 x i8]		; <[36 x i8]*> [#uses=0]

+@str.upgrd.1 = external global [29 x i8]		; <[29 x i8]*> [#uses=0]

+@str1 = external global [29 x i8]		; <[29 x i8]*> [#uses=0]

+@str2 = external global [29 x i8]		; <[29 x i8]*> [#uses=1]

+@str.upgrd.2 = external global [2 x i8]		; <[2 x i8]*> [#uses=0]

+@str3 = external global [2 x i8]		; <[2 x i8]*> [#uses=0]

+@str4 = external global [2 x i8]		; <[2 x i8]*> [#uses=0]

+@str5 = external global [2 x i8]		; <[2 x i8]*> [#uses=0]

+

+define void @printArgsNoRet(i32 %a1, float %a2, i8 %a3, double %a4, i8* %a5, i32 %a6, float %a7, i8 %a8, double %a9, i8* %a10, i32 %a11, float %a12, i8 %a13, double %a14, i8* %a15) {

+entry:

+	%tmp17 = sext i8 %a13 to i32		; <i32> [#uses=1]

+	%tmp23 = call i32 (i8*, ...)* @printf( i8* getelementptr ([29 x i8]* @str2, i32 0, i64 0), i32 %a11, double 0.000000e+00, i32 %tmp17, double %a14, i32 0 )		; <i32> [#uses=0]

+	ret void

+}

+

+declare i32 @printf(i8*, ...)

+

+declare i32 @main(i32, i8**)


diff --git a/src/LLVM/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll b/src/LLVM/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll
new file mode 100644
index 0000000..f783650
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s

+

+define i64 @test(i64 %A) {

+        %B = trunc i64 %A to i8         ; <i8> [#uses=1]

+        %C = sext i8 %B to i64          ; <i64> [#uses=1]

+        ret i64 %C

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll b/src/LLVM/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll
new file mode 100644
index 0000000..f41923f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s

+; ModuleID = '2006-01-12-BadSetCCFold.ll'

+	%struct.node_t = type { double*, %struct.node_t*, %struct.node_t**, double**, double*, i32, i32 }

+

+define void @main() {

+entry:

+	br i1 false, label %then.2.i, label %endif.2.i

+

+then.2.i:		; preds = %entry

+	br label %dealwithargs.exit

+

+endif.2.i:		; preds = %entry

+	br i1 false, label %then.3.i, label %dealwithargs.exit

+

+then.3.i:		; preds = %endif.2.i

+	br label %dealwithargs.exit

+

+dealwithargs.exit:		; preds = %then.3.i, %endif.2.i, %then.2.i

+	%n_nodes.4 = phi i32 [ 64, %then.3.i ], [ 64, %then.2.i ], [ 64, %endif.2.i ]		; <i32> [#uses=1]

+	%tmp.14.i1134.i.i = icmp sgt i32 %n_nodes.4, 1		; <i1> [#uses=2]

+	br i1 %tmp.14.i1134.i.i, label %no_exit.i12.i.i, label %fill_table.exit22.i.i

+

+no_exit.i12.i.i:		; preds = %no_exit.i12.i.i, %dealwithargs.exit

+	br i1 false, label %fill_table.exit22.i.i, label %no_exit.i12.i.i

+

+fill_table.exit22.i.i:		; preds = %no_exit.i12.i.i, %dealwithargs.exit

+	%cur_node.0.i8.1.i.i = phi %struct.node_t* [ undef, %dealwithargs.exit ], [ null, %no_exit.i12.i.i ]		; <%struct.node_t*> [#uses=0]

+	br i1 %tmp.14.i1134.i.i, label %no_exit.i.preheader.i.i, label %make_tables.exit.i

+

+no_exit.i.preheader.i.i:		; preds = %fill_table.exit22.i.i

+	ret void

+

+make_tables.exit.i:		; preds = %fill_table.exit22.i.i

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll b/src/LLVM/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll
new file mode 100644
index 0000000..687e290
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s

+; This crashed the PPC backend.

+

+define void @test() {

+        %tmp125 = fcmp uno double 0.000000e+00, 0.000000e+00            ; <i1> [#uses=1]

+        br i1 %tmp125, label %bb154, label %cond_false133

+

+cond_false133:          ; preds = %0

+        ret void

+

+bb154:          ; preds = %0

+        %tmp164 = icmp eq i32 0, 0              ; <i1> [#uses=0]

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll b/src/LLVM/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
new file mode 100644
index 0000000..67817e5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll

@@ -0,0 +1,60 @@
+; RUN: llc < %s

+@G = external global i32		; <i32*> [#uses=1]

+

+define void @encode_one_frame(i64 %tmp.2i) {

+entry:

+	%tmp.9 = icmp eq i32 0, 0		; <i1> [#uses=1]

+	br i1 %tmp.9, label %endif.0, label %shortcirc_next.0

+

+then.5.i:		; preds = %shortcirc_next.i

+	%tmp.114.i = sdiv i64 %tmp.2i, 3		; <i64> [#uses=1]

+	%tmp.111.i = call i64 @lseek( i32 0, i64 %tmp.114.i, i32 1 )		; <i64> [#uses=0]

+	ret void

+

+shortcirc_next.0:		; preds = %entry

+	ret void

+

+endif.0:		; preds = %entry

+	%tmp.324.i = icmp eq i32 0, 0		; <i1> [#uses=2]

+	%tmp.362.i = icmp slt i32 0, 0		; <i1> [#uses=1]

+	br i1 %tmp.324.i, label %else.4.i, label %then.11.i37

+

+then.11.i37:		; preds = %endif.0

+	ret void

+

+else.4.i:		; preds = %endif.0

+	br i1 %tmp.362.i, label %else.5.i, label %then.12.i

+

+then.12.i:		; preds = %else.4.i

+	ret void

+

+else.5.i:		; preds = %else.4.i

+	br i1 %tmp.324.i, label %then.0.i40, label %then.17.i

+

+then.17.i:		; preds = %else.5.i

+	ret void

+

+then.0.i40:		; preds = %else.5.i

+	%tmp.8.i42 = icmp eq i32 0, 0		; <i1> [#uses=1]

+	br i1 %tmp.8.i42, label %else.1.i56, label %then.1.i52

+

+then.1.i52:		; preds = %then.0.i40

+	ret void

+

+else.1.i56:		; preds = %then.0.i40

+	%tmp.28.i = load i32* @G		; <i32> [#uses=1]

+	%tmp.29.i = icmp eq i32 %tmp.28.i, 1		; <i1> [#uses=1]

+	br i1 %tmp.29.i, label %shortcirc_next.i, label %shortcirc_done.i

+

+shortcirc_next.i:		; preds = %else.1.i56

+	%tmp.34.i = icmp eq i32 0, 3		; <i1> [#uses=1]

+	br i1 %tmp.34.i, label %then.5.i, label %endif.5.i

+

+shortcirc_done.i:		; preds = %else.1.i56

+	ret void

+

+endif.5.i:		; preds = %shortcirc_next.i

+	ret void

+}

+

+declare i64 @lseek(i32, i64, i32)


diff --git a/src/LLVM/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll b/src/LLVM/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
new file mode 100644
index 0000000..ed53266
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll

@@ -0,0 +1,95 @@
+; RUN: llc < %s

+; Infinite loop in the dag combiner, reduced from 176.gcc.	

+%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }

+	%struct.anon = type { i32 }

+	%struct.lang_decl = type opaque

+	%struct.lang_type = type { i32, [1 x %struct.tree_node*] }

+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }

+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.anon] }

+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, i8, i8, i8, i8 }

+	%struct.tree_decl = type { [12 x i8], i8*, i32, %struct.tree_node*, i32, i8, i8, i8, i8, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.anon, { %struct.rtx_def* }, %struct.tree_node*, %struct.lang_decl* }

+	%struct.tree_list = type { [12 x i8], %struct.tree_node*, %struct.tree_node* }

+	%struct.tree_node = type { %struct.tree_decl }

+	%struct.tree_type = type { [12 x i8], %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32, i8, i8, i8, i8, i32, %struct.tree_node*, %struct.tree_node*, %struct.anon, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.obstack*, %struct.lang_type* }

+@void_type_node = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=1]

+@char_type_node = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=1]

+@short_integer_type_node = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=1]

+@short_unsigned_type_node = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=1]

+@float_type_node = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=1]

+@signed_char_type_node = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=1]

+@unsigned_char_type_node = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=1]

+

+define fastcc i32 @self_promoting_args_p(%struct.tree_node* %parms) {

+entry:

+	%tmp915 = icmp eq %struct.tree_node* %parms, null		; <i1> [#uses=1]

+	br i1 %tmp915, label %return, label %cond_true92.preheader

+

+cond_true:		; preds = %cond_true92

+	%tmp9.not = icmp ne %struct.tree_node* %tmp2, %tmp7		; <i1> [#uses=1]

+	%tmp14 = icmp eq %struct.tree_node* %tmp2, null		; <i1> [#uses=1]

+	%bothcond = or i1 %tmp9.not, %tmp14		; <i1> [#uses=1]

+	br i1 %bothcond, label %return, label %cond_next18

+

+cond_next12:		; preds = %cond_true92

+	%tmp14.old = icmp eq %struct.tree_node* %tmp2, null		; <i1> [#uses=1]

+	br i1 %tmp14.old, label %return, label %cond_next18

+

+cond_next18:		; preds = %cond_next12, %cond_true

+	%tmp20 = bitcast %struct.tree_node* %tmp2 to %struct.tree_type*		; <%struct.tree_type*> [#uses=1]

+	%tmp21 = getelementptr %struct.tree_type* %tmp20, i32 0, i32 17		; <%struct.tree_node**> [#uses=1]

+	%tmp22 = load %struct.tree_node** %tmp21		; <%struct.tree_node*> [#uses=6]

+	%tmp24 = icmp eq %struct.tree_node* %tmp22, %tmp23		; <i1> [#uses=1]

+	br i1 %tmp24, label %return, label %cond_next28

+

+cond_next28:		; preds = %cond_next18

+	%tmp30 = bitcast %struct.tree_node* %tmp2 to %struct.tree_common*		; <%struct.tree_common*> [#uses=1]

+	%tmp = getelementptr %struct.tree_common* %tmp30, i32 0, i32 2		; <i8*> [#uses=1]

+	%tmp.upgrd.1 = bitcast i8* %tmp to i32*		; <i32*> [#uses=1]

+	%tmp.upgrd.2 = load i32* %tmp.upgrd.1		; <i32> [#uses=1]

+	%tmp32 = trunc i32 %tmp.upgrd.2 to i8		; <i8> [#uses=1]

+	%tmp33 = icmp eq i8 %tmp32, 7		; <i1> [#uses=1]

+	br i1 %tmp33, label %cond_true34, label %cond_next84

+

+cond_true34:		; preds = %cond_next28

+	%tmp40 = icmp eq %struct.tree_node* %tmp22, %tmp39		; <i1> [#uses=1]

+	%tmp49 = icmp eq %struct.tree_node* %tmp22, %tmp48		; <i1> [#uses=1]

+	%bothcond6 = or i1 %tmp40, %tmp49		; <i1> [#uses=1]

+	%tmp58 = icmp eq %struct.tree_node* %tmp22, %tmp57		; <i1> [#uses=1]

+	%bothcond7 = or i1 %bothcond6, %tmp58		; <i1> [#uses=1]

+	%tmp67 = icmp eq %struct.tree_node* %tmp22, %tmp66		; <i1> [#uses=1]

+	%bothcond8 = or i1 %bothcond7, %tmp67		; <i1> [#uses=1]

+	%tmp76 = icmp eq %struct.tree_node* %tmp22, %tmp75		; <i1> [#uses=1]

+	%bothcond9 = or i1 %bothcond8, %tmp76		; <i1> [#uses=2]

+	%brmerge = or i1 %bothcond9, %tmp.upgrd.6		; <i1> [#uses=1]

+	%bothcond9.upgrd.3 = zext i1 %bothcond9 to i32		; <i32> [#uses=1]

+	%.mux = xor i32 %bothcond9.upgrd.3, 1		; <i32> [#uses=1]

+	br i1 %brmerge, label %return, label %cond_true92

+

+cond_next84:		; preds = %cond_next28

+	br i1 %tmp.upgrd.6, label %return, label %cond_true92

+

+cond_true92.preheader:		; preds = %entry

+	%tmp7 = load %struct.tree_node** @void_type_node		; <%struct.tree_node*> [#uses=1]

+	%tmp23 = load %struct.tree_node** @float_type_node		; <%struct.tree_node*> [#uses=1]

+	%tmp39 = load %struct.tree_node** @char_type_node		; <%struct.tree_node*> [#uses=1]

+	%tmp48 = load %struct.tree_node** @signed_char_type_node		; <%struct.tree_node*> [#uses=1]

+	%tmp57 = load %struct.tree_node** @unsigned_char_type_node		; <%struct.tree_node*> [#uses=1]

+	%tmp66 = load %struct.tree_node** @short_integer_type_node		; <%struct.tree_node*> [#uses=1]

+	%tmp75 = load %struct.tree_node** @short_unsigned_type_node		; <%struct.tree_node*> [#uses=1]

+	br label %cond_true92

+

+cond_true92:		; preds = %cond_true92.preheader, %cond_next84, %cond_true34

+	%t.0.0 = phi %struct.tree_node* [ %parms, %cond_true92.preheader ], [ %tmp6, %cond_true34 ], [ %tmp6, %cond_next84 ]		; <%struct.tree_node*> [#uses=2]

+	%tmp.upgrd.4 = bitcast %struct.tree_node* %t.0.0 to %struct.tree_list*		; <%struct.tree_list*> [#uses=1]

+	%tmp.upgrd.5 = getelementptr %struct.tree_list* %tmp.upgrd.4, i32 0, i32 2		; <%struct.tree_node**> [#uses=1]

+	%tmp2 = load %struct.tree_node** %tmp.upgrd.5		; <%struct.tree_node*> [#uses=5]

+	%tmp4 = bitcast %struct.tree_node* %t.0.0 to %struct.tree_common*		; <%struct.tree_common*> [#uses=1]

+	%tmp5 = getelementptr %struct.tree_common* %tmp4, i32 0, i32 0		; <%struct.tree_node**> [#uses=1]

+	%tmp6 = load %struct.tree_node** %tmp5		; <%struct.tree_node*> [#uses=3]

+	%tmp.upgrd.6 = icmp eq %struct.tree_node* %tmp6, null		; <i1> [#uses=3]

+	br i1 %tmp.upgrd.6, label %cond_true, label %cond_next12

+

+return:		; preds = %cond_next84, %cond_true34, %cond_next18, %cond_next12, %cond_true, %entry

+	%retval.0 = phi i32 [ 1, %entry ], [ 1, %cond_next84 ], [ %.mux, %cond_true34 ], [ 0, %cond_next18 ], [ 0, %cond_next12 ], [ 0, %cond_true ]		; <i32> [#uses=1]

+	ret i32 %retval.0

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll b/src/LLVM/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
new file mode 100644
index 0000000..141084b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s

+; PR748

+@G = external global i16		; <i16*> [#uses=1]

+

+define void @OmNewObjHdr() {

+entry:

+	br i1 false, label %endif.4, label %then.0

+

+then.0:		; preds = %entry

+	ret void

+

+endif.4:		; preds = %entry

+	br i1 false, label %else.3, label %shortcirc_next.3

+

+shortcirc_next.3:		; preds = %endif.4

+	ret void

+

+else.3:		; preds = %endif.4

+	switch i32 0, label %endif.10 [

+		 i32 5001, label %then.10

+		 i32 -5008, label %then.10

+	]

+

+then.10:		; preds = %else.3, %else.3

+	%tmp.112 = load i16* null		; <i16> [#uses=2]

+	%tmp.113 = load i16* @G		; <i16> [#uses=2]

+	%tmp.114 = icmp ugt i16 %tmp.112, %tmp.113		; <i1> [#uses=1]

+	%tmp.120 = icmp ult i16 %tmp.112, %tmp.113		; <i1> [#uses=1]

+	%bothcond = and i1 %tmp.114, %tmp.120		; <i1> [#uses=1]

+	br i1 %bothcond, label %else.4, label %then.11

+

+then.11:		; preds = %then.10

+	ret void

+

+else.4:		; preds = %then.10

+	ret void

+

+endif.10:		; preds = %else.3

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll b/src/LLVM/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll
new file mode 100644
index 0000000..78d74ba
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s

+

+define i32 @test(i32 %tmp93) {

+        %tmp98 = shl i32 %tmp93, 31             ; <i32> [#uses=1]

+        %tmp99 = ashr i32 %tmp98, 31            ; <i32> [#uses=1]

+        %tmp99.upgrd.1 = trunc i32 %tmp99 to i8         ; <i8> [#uses=1]

+        %tmp99100 = sext i8 %tmp99.upgrd.1 to i32               ; <i32> [#uses=1]

+        ret i32 %tmp99100

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll b/src/LLVM/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
new file mode 100644
index 0000000..7a542b6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s	

+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }

+	%struct.SYMBOL_TABLE_ENTRY = type { [9 x i8], [9 x i8], i32, i32, i32, %struct.SYMBOL_TABLE_ENTRY* }

+	%struct.__sFILEX = type opaque

+	%struct.__sbuf = type { i8*, i32 }

+@str14 = external global [6 x i8]		; <[6 x i8]*> [#uses=0]

+

+declare void @fprintf(i32, ...)

+

+define void @OUTPUT_TABLE(%struct.SYMBOL_TABLE_ENTRY* %SYM_TAB) {

+entry:

+	%tmp11 = getelementptr %struct.SYMBOL_TABLE_ENTRY* %SYM_TAB, i32 0, i32 1, i32 0		; <i8*> [#uses=2]

+	%tmp.i = bitcast i8* %tmp11 to i8*		; <i8*> [#uses=1]

+	br label %bb.i

+

+bb.i:		; preds = %cond_next.i, %entry

+	%s1.0.i = phi i8* [ %tmp.i, %entry ], [ null, %cond_next.i ]		; <i8*> [#uses=0]

+	br i1 false, label %cond_true.i31, label %cond_next.i

+

+cond_true.i31:		; preds = %bb.i

+	call void (i32, ...)* @fprintf( i32 0, i8* %tmp11, i8* null )

+	ret void

+

+cond_next.i:		; preds = %bb.i

+	br i1 false, label %bb.i, label %bb19.i

+

+bb19.i:		; preds = %cond_next.i

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll b/src/LLVM/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll
new file mode 100644
index 0000000..605a0a9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -O0

+

+define float @test(i32 %tmp12771278) {

+        switch i32 %tmp12771278, label %bb1279 [

+        ]

+

+bb1279:         ; preds = %0

+        ret float 1.000000e+00

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll b/src/LLVM/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
new file mode 100644
index 0000000..b8efadf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -O0

+	

+%struct.cl_perfunc_opts = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32 }

+@cl_pf_opts = external global %struct.cl_perfunc_opts		; <%struct.cl_perfunc_opts*> [#uses=2]

+

+define void @set_flags_from_O() {

+entry:

+	%tmp22 = icmp sgt i32 0, 0		; <i1> [#uses=1]

+	br i1 %tmp22, label %cond_true23, label %cond_next159

+

+cond_true23:		; preds = %entry

+	%tmp138 = getelementptr %struct.cl_perfunc_opts* @cl_pf_opts, i32 0, i32 8		; <i8*> [#uses=1]

+	%tmp138.upgrd.1 = bitcast i8* %tmp138 to i32*		; <i32*> [#uses=2]

+	%tmp139 = load i32* %tmp138.upgrd.1		; <i32> [#uses=1]

+	%tmp140 = shl i32 1, 27		; <i32> [#uses=1]

+	%tmp141 = and i32 %tmp140, 134217728		; <i32> [#uses=1]

+	%tmp142 = and i32 %tmp139, -134217729		; <i32> [#uses=1]

+	%tmp143 = or i32 %tmp142, %tmp141		; <i32> [#uses=1]

+	store i32 %tmp143, i32* %tmp138.upgrd.1

+	%tmp144 = getelementptr %struct.cl_perfunc_opts* @cl_pf_opts, i32 0, i32 8		; <i8*> [#uses=1]

+	%tmp144.upgrd.2 = bitcast i8* %tmp144 to i32*		; <i32*> [#uses=1]

+	%tmp145 = load i32* %tmp144.upgrd.2		; <i32> [#uses=1]

+	%tmp146 = shl i32 %tmp145, 22		; <i32> [#uses=1]

+	%tmp147 = lshr i32 %tmp146, 31		; <i32> [#uses=1]

+	%tmp147.upgrd.3 = trunc i32 %tmp147 to i8		; <i8> [#uses=1]

+	%tmp148 = icmp eq i8 %tmp147.upgrd.3, 0		; <i1> [#uses=1]

+	br i1 %tmp148, label %cond_true149, label %cond_next159

+

+cond_true149:		; preds = %cond_true23

+	%tmp150 = bitcast i8* null to i32*		; <i32*> [#uses=0]

+	ret void

+

+cond_next159:		; preds = %cond_true23, %entry

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll b/src/LLVM/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
new file mode 100644
index 0000000..30b3e14
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll

@@ -0,0 +1,279 @@
+; RUN: llc < %s	

+%struct.rtunion = type { i64 }

+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }

+@ix86_cpu = external global i32		; <i32*> [#uses=1]

+@which_alternative = external global i32		; <i32*> [#uses=3]

+

+declare fastcc i32 @recog()

+

+define void @athlon_fp_unit_ready_cost() {

+entry:

+	%tmp = icmp slt i32 0, 0		; <i1> [#uses=1]

+	br i1 %tmp, label %cond_true.i, label %cond_true

+

+cond_true:		; preds = %entry

+	ret void

+

+cond_true.i:		; preds = %entry

+	%tmp8.i = tail call fastcc i32 @recog( )		; <i32> [#uses=1]

+	switch i32 %tmp8.i, label %UnifiedReturnBlock [

+		 i32 -1, label %bb2063

+		 i32 19, label %bb2035

+		 i32 20, label %bb2035

+		 i32 21, label %bb2035

+		 i32 23, label %bb2035

+		 i32 24, label %bb2035

+		 i32 27, label %bb2035

+		 i32 32, label %bb2035

+		 i32 33, label %bb1994

+		 i32 35, label %bb2035

+		 i32 36, label %bb1994

+		 i32 90, label %bb1948

+		 i32 94, label %bb1948

+		 i32 95, label %bb1948

+		 i32 101, label %bb1648

+		 i32 102, label %bb1648

+		 i32 103, label %bb1648

+		 i32 104, label %bb1648

+		 i32 133, label %bb1419

+		 i32 135, label %bb1238

+		 i32 136, label %bb1238

+		 i32 137, label %bb1238

+		 i32 138, label %bb1238

+		 i32 139, label %bb1201

+		 i32 140, label %bb1201

+		 i32 141, label %bb1154

+		 i32 142, label %bb1126

+		 i32 144, label %bb1201

+		 i32 145, label %bb1126

+		 i32 146, label %bb1201

+		 i32 147, label %bb1126

+		 i32 148, label %bb1201

+		 i32 149, label %bb1126

+		 i32 150, label %bb1201

+		 i32 151, label %bb1126

+		 i32 152, label %bb1096

+		 i32 153, label %bb1096

+		 i32 154, label %bb1096

+		 i32 157, label %bb1096

+		 i32 158, label %bb1096

+		 i32 159, label %bb1096

+		 i32 162, label %bb1096

+		 i32 163, label %bb1096

+		 i32 164, label %bb1096

+		 i32 167, label %bb1201

+		 i32 168, label %bb1201

+		 i32 170, label %bb1201

+		 i32 171, label %bb1201

+		 i32 173, label %bb1201

+		 i32 174, label %bb1201

+		 i32 176, label %bb1201

+		 i32 177, label %bb1201

+		 i32 179, label %bb993

+		 i32 180, label %bb993

+		 i32 181, label %bb993

+		 i32 182, label %bb993

+		 i32 183, label %bb993

+		 i32 184, label %bb993

+		 i32 365, label %bb1126

+		 i32 366, label %bb1126

+		 i32 367, label %bb1126

+		 i32 368, label %bb1126

+		 i32 369, label %bb1126

+		 i32 370, label %bb1126

+		 i32 371, label %bb1126

+		 i32 372, label %bb1126

+		 i32 373, label %bb1126

+		 i32 384, label %bb1126

+		 i32 385, label %bb1126

+		 i32 386, label %bb1126

+		 i32 387, label %bb1126

+		 i32 388, label %bb1126

+		 i32 389, label %bb1126

+		 i32 390, label %bb1126

+		 i32 391, label %bb1126

+		 i32 392, label %bb1126

+		 i32 525, label %bb919

+		 i32 526, label %bb839

+		 i32 528, label %bb919

+		 i32 529, label %bb839

+		 i32 531, label %cond_next6.i119

+		 i32 532, label %cond_next6.i97

+		 i32 533, label %cond_next6.i81

+		 i32 534, label %bb495

+		 i32 536, label %cond_next6.i81

+		 i32 537, label %cond_next6.i81

+		 i32 538, label %bb396

+		 i32 539, label %bb288

+		 i32 541, label %bb396

+		 i32 542, label %bb396

+		 i32 543, label %bb396

+		 i32 544, label %bb396

+		 i32 545, label %bb189

+		 i32 546, label %cond_next6.i

+		 i32 547, label %bb189

+		 i32 548, label %cond_next6.i

+		 i32 549, label %bb189

+		 i32 550, label %cond_next6.i

+		 i32 551, label %bb189

+		 i32 552, label %cond_next6.i

+		 i32 553, label %bb189

+		 i32 554, label %cond_next6.i

+		 i32 555, label %bb189

+		 i32 556, label %cond_next6.i

+		 i32 557, label %bb189

+		 i32 558, label %cond_next6.i

+		 i32 618, label %bb40

+		 i32 619, label %bb18

+		 i32 620, label %bb40

+		 i32 621, label %bb10

+		 i32 622, label %bb10

+	]

+

+bb10:		; preds = %cond_true.i, %cond_true.i

+	ret void

+

+bb18:		; preds = %cond_true.i

+	ret void

+

+bb40:		; preds = %cond_true.i, %cond_true.i

+	ret void

+

+cond_next6.i:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i

+	ret void

+

+bb189:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i

+	ret void

+

+bb288:		; preds = %cond_true.i

+	ret void

+

+bb396:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i

+	ret void

+

+bb495:		; preds = %cond_true.i

+	ret void

+

+cond_next6.i81:		; preds = %cond_true.i, %cond_true.i, %cond_true.i

+	ret void

+

+cond_next6.i97:		; preds = %cond_true.i

+	ret void

+

+cond_next6.i119:		; preds = %cond_true.i

+	%tmp.i126 = icmp eq i16 0, 78		; <i1> [#uses=1]

+	br i1 %tmp.i126, label %cond_next778, label %bb802

+

+cond_next778:		; preds = %cond_next6.i119

+	%tmp781 = icmp eq i32 0, 1		; <i1> [#uses=1]

+	br i1 %tmp781, label %cond_next784, label %bb790

+

+cond_next784:		; preds = %cond_next778

+	%tmp785 = load i32* @ix86_cpu		; <i32> [#uses=1]

+	%tmp786 = icmp eq i32 %tmp785, 5		; <i1> [#uses=1]

+	br i1 %tmp786, label %UnifiedReturnBlock, label %bb790

+

+bb790:		; preds = %cond_next784, %cond_next778

+	%tmp793 = icmp eq i32 0, 1		; <i1> [#uses=0]

+	ret void

+

+bb802:		; preds = %cond_next6.i119

+	ret void

+

+bb839:		; preds = %cond_true.i, %cond_true.i

+	ret void

+

+bb919:		; preds = %cond_true.i, %cond_true.i

+	ret void

+

+bb993:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i

+	ret void

+

+bb1096:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i

+	ret void

+

+bb1126:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i

+	ret void

+

+bb1154:		; preds = %cond_true.i

+	ret void

+

+bb1201:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i

+	ret void

+

+bb1238:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i

+	ret void

+

+bb1419:		; preds = %cond_true.i

+	ret void

+

+bb1648:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i

+	%tmp1650 = load i32* @which_alternative		; <i32> [#uses=1]

+	switch i32 %tmp1650, label %bb1701 [

+		 i32 0, label %cond_next1675

+		 i32 1, label %cond_next1675

+		 i32 2, label %cond_next1675

+	]

+

+cond_next1675:		; preds = %bb1648, %bb1648, %bb1648

+	ret void

+

+bb1701:		; preds = %bb1648

+	%tmp1702 = load i32* @which_alternative		; <i32> [#uses=1]

+	switch i32 %tmp1702, label %bb1808 [

+		 i32 0, label %cond_next1727

+		 i32 1, label %cond_next1727

+		 i32 2, label %cond_next1727

+	]

+

+cond_next1727:		; preds = %bb1701, %bb1701, %bb1701

+	ret void

+

+bb1808:		; preds = %bb1701

+	%bothcond696 = or i1 false, false		; <i1> [#uses=1]

+	br i1 %bothcond696, label %bb1876, label %cond_next1834

+

+cond_next1834:		; preds = %bb1808

+	ret void

+

+bb1876:		; preds = %bb1808

+	%tmp1877signed = load i32* @which_alternative		; <i32> [#uses=4]

+	%tmp1877 = bitcast i32 %tmp1877signed to i32		; <i32> [#uses=1]

+	%bothcond699 = icmp ult i32 %tmp1877, 2		; <i1> [#uses=1]

+	%tmp1888 = icmp eq i32 %tmp1877signed, 2		; <i1> [#uses=1]

+	%bothcond700 = or i1 %bothcond699, %tmp1888		; <i1> [#uses=1]

+	%bothcond700.not = xor i1 %bothcond700, true		; <i1> [#uses=1]

+	%tmp1894 = icmp eq i32 %tmp1877signed, 3		; <i1> [#uses=1]

+	%bothcond701 = or i1 %tmp1894, %bothcond700.not		; <i1> [#uses=1]

+	%bothcond702 = or i1 %bothcond701, false		; <i1> [#uses=1]

+	br i1 %bothcond702, label %UnifiedReturnBlock, label %cond_next1902

+

+cond_next1902:		; preds = %bb1876

+	switch i32 %tmp1877signed, label %cond_next1937 [

+		 i32 0, label %bb1918

+		 i32 1, label %bb1918

+		 i32 2, label %bb1918

+	]

+

+bb1918:		; preds = %cond_next1902, %cond_next1902, %cond_next1902

+	ret void

+

+cond_next1937:		; preds = %cond_next1902

+	ret void

+

+bb1948:		; preds = %cond_true.i, %cond_true.i, %cond_true.i

+	ret void

+

+bb1994:		; preds = %cond_true.i, %cond_true.i

+	ret void

+

+bb2035:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i

+	ret void

+

+bb2063:		; preds = %cond_true.i

+	ret void

+

+UnifiedReturnBlock:		; preds = %bb1876, %cond_next784, %cond_true.i

+	%UnifiedRetVal = phi i32 [ 100, %bb1876 ], [ 100, %cond_true.i ], [ 4, %cond_next784 ]		; <i32> [#uses=0]

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2006-07-03-schedulers.ll b/src/LLVM/test/CodeGen/Generic/2006-07-03-schedulers.ll
new file mode 100644
index 0000000..8b5aa43
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-07-03-schedulers.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -pre-RA-sched=default

+; RUN: llc < %s -pre-RA-sched=list-burr

+; RUN: llc < %s -pre-RA-sched=fast

+; PR859

+

+; The top-down schedulers are excluded here because they don't yet support

+; targets that use physreg defs.

+

+declare i32 @printf(i8*, i32, float)

+

+define i32 @testissue(i32 %i, float %x, float %y) {

+	br label %bb1

+

+bb1:		; preds = %bb1, %0

+	%x1 = fmul float %x, %y		; <float> [#uses=1]

+	%y1 = fmul float %y, 7.500000e-01		; <float> [#uses=1]

+	%z1 = fadd float %x1, %y1		; <float> [#uses=1]

+	%x2 = fmul float %x, 5.000000e-01		; <float> [#uses=1]

+	%y2 = fmul float %y, 0x3FECCCCCC0000000		; <float> [#uses=1]

+	%z2 = fadd float %x2, %y2		; <float> [#uses=1]

+	%z3 = fadd float %z1, %z2		; <float> [#uses=1]

+	%i1 = shl i32 %i, 3		; <i32> [#uses=1]

+	%j1 = add i32 %i, 7		; <i32> [#uses=1]

+	%m1 = add i32 %i1, %j1		; <i32> [#uses=2]

+	%b = icmp sle i32 %m1, 6		; <i1> [#uses=1]

+	br i1 %b, label %bb1, label %bb2

+

+bb2:		; preds = %bb1

+	%Msg = inttoptr i64 0 to i8*		; <i8*> [#uses=1]

+	call i32 @printf( i8* %Msg, i32 %m1, float %z3 )		; <i32>:1 [#uses=0]

+	ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll b/src/LLVM/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll
new file mode 100644
index 0000000..a80b5b5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll

@@ -0,0 +1,112 @@
+; RUN: llc < %s	

+%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }

+	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }

+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }

+	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }

+	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }

+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }

+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }

+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }

+	%struct.cost_pair = type { %struct.iv_cand*, i32, %struct.bitmap_head_def* }

+	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }

+	%struct.def_operand_ptr = type { %struct.tree_node** }

+	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }

+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }

+	%struct.edge_def_insns = type { %struct.rtx_def* }

+	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }

+	%struct.eh_status = type opaque

+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }

+	%struct.et_node = type opaque

+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }

+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i1, i1, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }

+	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }

+	%struct.initial_value_struct = type opaque

+	%struct.iv = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i1, i1, i32 }

+	%struct.iv_cand = type { i32, i1, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.iv*, i32 }

+	%struct.iv_use = type { i32, i32, %struct.iv*, %struct.tree_node*, %struct.tree_node**, %struct.bitmap_head_def*, i32, %struct.cost_pair*, %struct.iv_cand* }

+	%struct.ivopts_data = type { %struct.loop*, %struct.htab*, i32, %struct.version_info*, %struct.bitmap_head_def*, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.bitmap_head_def*, i1 }

+	%struct.lang_decl = type opaque

+	%struct.language_function = type opaque

+	%struct.location_t = type { i8*, i32 }

+	%struct.loop = type { i32, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.lpt_decision, i32, i32, %struct.edge_def**, i32, %struct.basic_block_def*, %struct.basic_block_def*, i32, %struct.edge_def**, i32, %struct.edge_def**, i32, %struct.simple_bitmap_def*, i32, %struct.loop**, i32, %struct.loop*, %struct.loop*, %struct.loop*, %struct.loop*, i32, i8*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound*, %struct.edge_def*, i1 }

+	%struct.lpt_decision = type { i32, i32 }

+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }

+	%struct.nb_iter_bound = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound* }

+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }

+	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }

+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }

+	%struct.rtx_def = type { i16, i8, i8, %struct.u }

+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }

+	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }

+	%struct.stack_local_entry = type opaque

+	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }

+	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }

+	%struct.temp_slot = type opaque

+	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }

+	%struct.tree_ann_d = type { %struct.stmt_ann_d }

+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }

+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }

+	%struct.tree_decl_u1 = type { i64 }

+	%struct.tree_decl_u2 = type { %struct.function* }

+	%struct.tree_node = type { %struct.tree_decl }

+	%struct.u = type { [1 x i64] }

+	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }

+	%struct.v_may_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }

+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }

+	%struct.varasm_status = type opaque

+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }

+	%struct.version_info = type { %struct.tree_node*, %struct.iv*, i1, i32, i1 }

+	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }

+

+define i1 @determine_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand) {

+entry:

+	switch i32 0, label %bb91 [

+		 i32 0, label %bb

+		 i32 1, label %bb6

+		 i32 3, label %cond_next135

+	]

+

+bb:		; preds = %entry

+	ret i1 false

+

+bb6:		; preds = %entry

+	br i1 false, label %bb87, label %cond_next27

+

+cond_next27:		; preds = %bb6

+	br i1 false, label %cond_true30, label %cond_next55

+

+cond_true30:		; preds = %cond_next27

+	br i1 false, label %cond_next41, label %cond_true35

+

+cond_true35:		; preds = %cond_true30

+	ret i1 false

+

+cond_next41:		; preds = %cond_true30

+	%tmp44 = call i32 @force_var_cost( %struct.ivopts_data* %data, %struct.tree_node* null, %struct.bitmap_head_def** null )		; <i32> [#uses=2]

+	%tmp46 = udiv i32 %tmp44, 5		; <i32> [#uses=1]

+	call void @set_use_iv_cost( %struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand, i32 %tmp46, %struct.bitmap_head_def* null )

+	%tmp44.off = add i32 %tmp44, -50000000		; <i32> [#uses=1]

+	%tmp52 = icmp ugt i32 %tmp44.off, 4		; <i1> [#uses=1]

+	%tmp52.upgrd.1 = zext i1 %tmp52 to i32		; <i32> [#uses=1]

+	br label %bb87

+

+cond_next55:		; preds = %cond_next27

+	ret i1 false

+

+bb87:		; preds = %cond_next41, %bb6

+	%tmp2.0 = phi i32 [ %tmp52.upgrd.1, %cond_next41 ], [ 1, %bb6 ]		; <i32> [#uses=0]

+	ret i1 false

+

+bb91:		; preds = %entry

+	ret i1 false

+

+cond_next135:		; preds = %entry

+	%tmp193 = call i1 @determine_use_iv_cost_generic( %struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand )		; <i1> [#uses=0]

+	ret i1 false

+}

+

+declare void @set_use_iv_cost(%struct.ivopts_data*, %struct.iv_use*, %struct.iv_cand*, i32, %struct.bitmap_head_def*)

+

+declare i32 @force_var_cost(%struct.ivopts_data*, %struct.tree_node*, %struct.bitmap_head_def**)

+

+declare i1 @determine_use_iv_cost_generic(%struct.ivopts_data*, %struct.iv_use*, %struct.iv_cand*)


diff --git a/src/LLVM/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll b/src/LLVM/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
new file mode 100644
index 0000000..13c0628
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll

@@ -0,0 +1,117 @@
+; RUN: llc < %s -regalloc=fast

+	

+%struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 }

+@search = external global %struct.CHESS_POSITION		; <%struct.CHESS_POSITION*> [#uses=2]

+@bishop_shift_rl45 = external global [64 x i32]		; <[64 x i32]*> [#uses=1]

+@bishop_shift_rr45 = external global [64 x i32]		; <[64 x i32]*> [#uses=1]

+@black_outpost = external global [64 x i8]		; <[64 x i8]*> [#uses=1]

+@bishop_mobility_rl45 = external global [64 x [256 x i32]]		; <[64 x [256 x i32]]*> [#uses=1]

+@bishop_mobility_rr45 = external global [64 x [256 x i32]]		; <[64 x [256 x i32]]*> [#uses=1]

+

+declare fastcc i32 @FirstOne()

+

+define fastcc void @Evaluate() {

+entry:

+	br i1 false, label %cond_false186, label %cond_true

+

+cond_true:		; preds = %entry

+	ret void

+

+cond_false186:		; preds = %entry

+	br i1 false, label %cond_true293, label %bb203

+

+bb203:		; preds = %cond_false186

+	ret void

+

+cond_true293:		; preds = %cond_false186

+	br i1 false, label %cond_true298, label %cond_next317

+

+cond_true298:		; preds = %cond_true293

+	br i1 false, label %cond_next518, label %cond_true397.preheader

+

+cond_next317:		; preds = %cond_true293

+	ret void

+

+cond_true397.preheader:		; preds = %cond_true298

+	ret void

+

+cond_next518:		; preds = %cond_true298

+	br i1 false, label %bb1069, label %cond_true522

+

+cond_true522:		; preds = %cond_next518

+	ret void

+

+bb1069:		; preds = %cond_next518

+	br i1 false, label %cond_next1131, label %bb1096

+

+bb1096:		; preds = %bb1069

+	ret void

+

+cond_next1131:		; preds = %bb1069

+	br i1 false, label %cond_next1207, label %cond_true1150

+

+cond_true1150:		; preds = %cond_next1131

+	ret void

+

+cond_next1207:		; preds = %cond_next1131

+	br i1 false, label %cond_next1219, label %cond_true1211

+

+cond_true1211:		; preds = %cond_next1207

+	ret void

+

+cond_next1219:		; preds = %cond_next1207

+	br i1 false, label %cond_true1223, label %cond_next1283

+

+cond_true1223:		; preds = %cond_next1219

+	br i1 false, label %cond_true1254, label %cond_true1264

+

+cond_true1254:		; preds = %cond_true1223

+	br i1 false, label %bb1567, label %cond_true1369.preheader

+

+cond_true1264:		; preds = %cond_true1223

+	ret void

+

+cond_next1283:		; preds = %cond_next1219

+	ret void

+

+cond_true1369.preheader:		; preds = %cond_true1254

+	ret void

+

+bb1567:		; preds = %cond_true1254

+	%tmp1580 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 3)		; <i64> [#uses=1]

+	%tmp1591 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 4)		; <i64> [#uses=1]

+	%tmp1572 = tail call fastcc i32 @FirstOne( )		; <i32> [#uses=5]

+	%tmp1582 = getelementptr [64 x i32]* @bishop_shift_rl45, i32 0, i32 %tmp1572		; <i32*> [#uses=1]

+	%tmp1583 = load i32* %tmp1582		; <i32> [#uses=1]

+	%tmp1583.upgrd.1 = trunc i32 %tmp1583 to i8		; <i8> [#uses=1]

+	%shift.upgrd.2 = zext i8 %tmp1583.upgrd.1 to i64		; <i64> [#uses=1]

+	%tmp1584 = lshr i64 %tmp1580, %shift.upgrd.2		; <i64> [#uses=1]

+	%tmp1584.upgrd.3 = trunc i64 %tmp1584 to i32		; <i32> [#uses=1]

+	%tmp1585 = and i32 %tmp1584.upgrd.3, 255		; <i32> [#uses=1]

+	%gep.upgrd.4 = zext i32 %tmp1585 to i64		; <i64> [#uses=1]

+	%tmp1587 = getelementptr [64 x [256 x i32]]* @bishop_mobility_rl45, i32 0, i32 %tmp1572, i64 %gep.upgrd.4		; <i32*> [#uses=1]

+	%tmp1588 = load i32* %tmp1587		; <i32> [#uses=1]

+	%tmp1593 = getelementptr [64 x i32]* @bishop_shift_rr45, i32 0, i32 %tmp1572		; <i32*> [#uses=1]

+	%tmp1594 = load i32* %tmp1593		; <i32> [#uses=1]

+	%tmp1594.upgrd.5 = trunc i32 %tmp1594 to i8		; <i8> [#uses=1]

+	%shift.upgrd.6 = zext i8 %tmp1594.upgrd.5 to i64		; <i64> [#uses=1]

+	%tmp1595 = lshr i64 %tmp1591, %shift.upgrd.6		; <i64> [#uses=1]

+	%tmp1595.upgrd.7 = trunc i64 %tmp1595 to i32		; <i32> [#uses=1]

+	%tmp1596 = and i32 %tmp1595.upgrd.7, 255		; <i32> [#uses=1]

+	%gep.upgrd.8 = zext i32 %tmp1596 to i64		; <i64> [#uses=1]

+	%tmp1598 = getelementptr [64 x [256 x i32]]* @bishop_mobility_rr45, i32 0, i32 %tmp1572, i64 %gep.upgrd.8		; <i32*> [#uses=1]

+	%tmp1599 = load i32* %tmp1598		; <i32> [#uses=1]

+	%tmp1600.neg = sub i32 0, %tmp1588		; <i32> [#uses=1]

+	%tmp1602 = sub i32 %tmp1600.neg, %tmp1599		; <i32> [#uses=1]

+	%tmp1604 = getelementptr [64 x i8]* @black_outpost, i32 0, i32 %tmp1572		; <i8*> [#uses=1]

+	%tmp1605 = load i8* %tmp1604		; <i8> [#uses=1]

+	%tmp1606 = icmp eq i8 %tmp1605, 0		; <i1> [#uses=1]

+	br i1 %tmp1606, label %cond_next1637, label %cond_true1607

+

+cond_true1607:		; preds = %bb1567

+	ret void

+

+cond_next1637:		; preds = %bb1567

+	%tmp1662 = sub i32 %tmp1602, 0		; <i32> [#uses=0]

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll b/src/LLVM/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll
new file mode 100644
index 0000000..50a7fd0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll

@@ -0,0 +1,96 @@
+; RUN: llc < %s

+

+define void @foo() {

+	br label %cond_true813.i

+

+cond_true813.i:		; preds = %0

+	br i1 false, label %cond_true818.i, label %cond_next1146.i

+

+cond_true818.i:		; preds = %cond_true813.i

+	br i1 false, label %recog_memoized.exit52, label %cond_next1146.i

+

+recog_memoized.exit52:		; preds = %cond_true818.i

+	switch i32 0, label %bb886.i.preheader [

+		 i32 0, label %bb907.i

+		 i32 44, label %bb866.i

+		 i32 103, label %bb874.i

+		 i32 114, label %bb874.i

+	]

+

+bb857.i:		; preds = %bb886.i, %bb866.i

+	%tmp862.i494.24 = phi i8* [ null, %bb866.i ], [ %tmp862.i494.26, %bb886.i ]		; <i8*> [#uses=4]

+	switch i32 0, label %bb886.i.preheader [

+		 i32 0, label %bb907.i

+		 i32 44, label %bb866.i

+		 i32 103, label %bb874.i

+		 i32 114, label %bb874.i

+	]

+

+bb866.i.loopexit:		; preds = %bb874.i

+	br label %bb866.i

+

+bb866.i.loopexit31:		; preds = %cond_true903.i

+	br label %bb866.i

+

+bb866.i:		; preds = %bb866.i.loopexit31, %bb866.i.loopexit, %bb857.i, %recog_memoized.exit52

+	br i1 false, label %bb907.i, label %bb857.i

+

+bb874.i.preheader.loopexit:		; preds = %cond_true903.i, %cond_true903.i

+	ret void

+

+bb874.i:		; preds = %bb857.i, %bb857.i, %recog_memoized.exit52, %recog_memoized.exit52

+	%tmp862.i494.25 = phi i8* [ %tmp862.i494.24, %bb857.i ], [ %tmp862.i494.24, %bb857.i ], [ undef, %recog_memoized.exit52 ], [ undef, %recog_memoized.exit52 ]		; <i8*> [#uses=1]

+	switch i32 0, label %bb886.i.preheader.loopexit [

+		 i32 0, label %bb907.i

+		 i32 44, label %bb866.i.loopexit

+		 i32 103, label %bb874.i.backedge

+		 i32 114, label %bb874.i.backedge

+	]

+

+bb874.i.backedge:		; preds = %bb874.i, %bb874.i

+	ret void

+

+bb886.i.preheader.loopexit:		; preds = %bb874.i

+	ret void

+

+bb886.i.preheader:		; preds = %bb857.i, %recog_memoized.exit52

+	%tmp862.i494.26 = phi i8* [ undef, %recog_memoized.exit52 ], [ %tmp862.i494.24, %bb857.i ]		; <i8*> [#uses=1]

+	br label %bb886.i

+

+bb886.i:		; preds = %cond_true903.i, %bb886.i.preheader

+	br i1 false, label %bb857.i, label %cond_true903.i

+

+cond_true903.i:		; preds = %bb886.i

+	switch i32 0, label %bb886.i [

+		 i32 0, label %bb907.i

+		 i32 44, label %bb866.i.loopexit31

+		 i32 103, label %bb874.i.preheader.loopexit

+		 i32 114, label %bb874.i.preheader.loopexit

+	]

+

+bb907.i:		; preds = %cond_true903.i, %bb874.i, %bb866.i, %bb857.i, %recog_memoized.exit52

+	%tmp862.i494.0 = phi i8* [ %tmp862.i494.24, %bb857.i ], [ null, %bb866.i ], [ undef, %recog_memoized.exit52 ], [ %tmp862.i494.25, %bb874.i ], [ null, %cond_true903.i ]		; <i8*> [#uses=1]

+	br i1 false, label %cond_next1146.i, label %cond_true910.i

+

+cond_true910.i:		; preds = %bb907.i

+	ret void

+

+cond_next1146.i:		; preds = %bb907.i, %cond_true818.i, %cond_true813.i

+	%tmp862.i494.1 = phi i8* [ %tmp862.i494.0, %bb907.i ], [ undef, %cond_true818.i ], [ undef, %cond_true813.i ]		; <i8*> [#uses=0]

+	ret void

+

+bb2060.i:		; No predecessors!

+	br i1 false, label %cond_true2064.i, label %bb2067.i

+

+cond_true2064.i:		; preds = %bb2060.i

+	unreachable

+

+bb2067.i:		; preds = %bb2060.i

+	ret void

+

+cond_next3473:		; No predecessors!

+	ret void

+

+cond_next3521:		; No predecessors!

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2006-10-27-CondFolding.ll b/src/LLVM/test/CodeGen/Generic/2006-10-27-CondFolding.ll
new file mode 100644
index 0000000..d467d5b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-10-27-CondFolding.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s 

+

+define void @start_pass_huff(i32 %gather_statistics) {

+entry:

+        %tmp = icmp eq i32 %gather_statistics, 0                ; <i1> [#uses=1]

+        br i1 false, label %cond_next22, label %bb166

+

+cond_next22:            ; preds = %entry

+        %bothcond = and i1 false, %tmp          ; <i1> [#uses=1]

+        br i1 %bothcond, label %bb34, label %bb46

+

+bb34:           ; preds = %cond_next22

+        ret void

+

+bb46:           ; preds = %cond_next22

+        ret void

+

+bb166:          ; preds = %entry

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2006-10-29-Crash.ll b/src/LLVM/test/CodeGen/Generic/2006-10-29-Crash.ll
new file mode 100644
index 0000000..d19b1e8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-10-29-Crash.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s

+

+define void @form_component_prediction(i32 %dy) {

+entry:

+        %tmp7 = and i32 %dy, 1          ; <i32> [#uses=1]

+        %tmp27 = icmp eq i32 %tmp7, 0           ; <i1> [#uses=1]

+        br i1 false, label %cond_next30, label %bb115

+

+cond_next30:            ; preds = %entry

+        ret void

+

+bb115:          ; preds = %entry

+        %bothcond1 = or i1 %tmp27, false                ; <i1> [#uses=1]

+        br i1 %bothcond1, label %bb228, label %cond_next125

+

+cond_next125:           ; preds = %bb115

+        ret void

+

+bb228:          ; preds = %bb115

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll b/src/LLVM/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
new file mode 100644
index 0000000..9bd3783
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s

+; PR1011	

+%struct.mng_data = type { i8* (%struct.mng_data*, i32)*, i32, i32, i32, i8, i8, i32, i32, i32, i32, i32 }

+

+define void @mng_display_bgr565() {

+entry:

+	br i1 false, label %bb.preheader, label %return

+

+bb.preheader:		; preds = %entry

+	br i1 false, label %cond_true48, label %cond_next80

+

+cond_true48:		; preds = %bb.preheader

+	%tmp = load i8* null		; <i8> [#uses=1]

+	%tmp51 = zext i8 %tmp to i16		; <i16> [#uses=1]

+	%tmp99 = load i8* null		; <i8> [#uses=1]

+	%tmp54 = bitcast i8 %tmp99 to i8		; <i8> [#uses=1]

+	%tmp54.upgrd.1 = zext i8 %tmp54 to i32		; <i32> [#uses=1]

+	%tmp55 = lshr i32 %tmp54.upgrd.1, 3		; <i32> [#uses=1]

+	%tmp55.upgrd.2 = trunc i32 %tmp55 to i16		; <i16> [#uses=1]

+	%tmp52 = shl i16 %tmp51, 5		; <i16> [#uses=1]

+	%tmp56 = and i16 %tmp55.upgrd.2, 28		; <i16> [#uses=1]

+	%tmp57 = or i16 %tmp56, %tmp52		; <i16> [#uses=1]

+	%tmp60 = zext i16 %tmp57 to i32		; <i32> [#uses=1]

+	%tmp62 = xor i32 0, 65535		; <i32> [#uses=1]

+	%tmp63 = mul i32 %tmp60, %tmp62		; <i32> [#uses=1]

+	%tmp65 = add i32 0, %tmp63		; <i32> [#uses=1]

+	%tmp69 = add i32 0, %tmp65		; <i32> [#uses=1]

+	%tmp70 = lshr i32 %tmp69, 16		; <i32> [#uses=1]

+	%tmp70.upgrd.3 = trunc i32 %tmp70 to i16		; <i16> [#uses=1]

+	%tmp75 = lshr i16 %tmp70.upgrd.3, 8		; <i16> [#uses=1]

+	%tmp75.upgrd.4 = trunc i16 %tmp75 to i8		; <i8> [#uses=1]

+	%tmp76 = lshr i8 %tmp75.upgrd.4, 5		; <i8> [#uses=1]

+	store i8 %tmp76, i8* null

+	ret void

+

+cond_next80:		; preds = %bb.preheader

+	ret void

+

+return:		; preds = %entry

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll b/src/LLVM/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
new file mode 100644
index 0000000..39e8dd5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s

+; PR1114

+

+declare i1 @foo()

+

+define i32 @test(i32* %A, i32* %B) {

+	%a = load i32* %A

+	%b = load i32* %B

+	%cond = call i1 @foo()

+	%c = select i1 %cond, i32 %a, i32 %b

+	ret i32 %c

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2007-02-25-invoke.ll b/src/LLVM/test/CodeGen/Generic/2007-02-25-invoke.ll
new file mode 100644
index 0000000..32d2b4d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2007-02-25-invoke.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s

+

+; PR1224

+

+declare i32 @test()

+define i32 @test2() {

+        %A = invoke i32 @test() to label %invcont unwind label %blat

+invcont:

+        ret i32 %A

+blat:

+  %lpad = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0

+            cleanup

+  ret i32 0

+}

+

+declare i32 @__gxx_personality_v0(...)


diff --git a/src/LLVM/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll b/src/LLVM/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll
new file mode 100644
index 0000000..e50320d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s

+; XFAIL: sparc-sun-solaris2

+; PR1308

+; PR1557

+

+define i32 @stuff(i32, ...) {

+        %foo = alloca i8*

+        %bar = alloca i32*

+        %A = call i32 asm sideeffect "inline asm $0 $2 $3 $4", "=r,0,i,m,m"( i32 0, i32 1, i8** %foo, i32** %bar )

+        ret i32 %A

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll b/src/LLVM/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll
new file mode 100644
index 0000000..50ff36b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -O0

+; PR 1323

+

+	%struct.comp = type { i8*, i32, i8*, [3 x i8], i32 }

+

+define void @regbranch() {

+cond_next240.i:

+	br i1 false, label %cond_true251.i, label %cond_next272.i

+

+cond_true251.i:		; preds = %cond_next240.i

+	switch i8 0, label %cond_next272.i [

+		 i8 42, label %bb268.i

+		 i8 43, label %bb268.i

+		 i8 63, label %bb268.i

+	]

+

+bb268.i:		; preds = %cond_true251.i, %cond_true251.i, %cond_true251.i

+	br label %cond_next272.i

+

+cond_next272.i:		; preds = %bb268.i, %cond_true251.i, %cond_next240.i

+	%len.2.i = phi i32 [ 0, %bb268.i ], [ 0, %cond_next240.i ], [ 0, %cond_true251.i ]		; <i32> [#uses=1]

+	%tmp278.i = icmp eq i32 %len.2.i, 1		; <i1> [#uses=0]

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2007-04-17-lsr-crash.ll b/src/LLVM/test/CodeGen/Generic/2007-04-17-lsr-crash.ll
new file mode 100644
index 0000000..b5fbdd6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2007-04-17-lsr-crash.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s

+

+define void @foo(i32 %inTextSize) {

+entry:

+	br label %bb236.outer

+

+cond_next193:		; preds = %bb236

+	%tmp211 = add i32 %inTextSize_addr.1.ph17, -2		; <i32> [#uses=1]

+	br i1 false, label %cond_next232, label %cond_true227

+

+cond_true227:		; preds = %cond_next193

+	ret void

+

+cond_next232:		; preds = %cond_next193

+	%indvar.next49 = add i32 %indvar48, 1		; <i32> [#uses=1]

+	br label %bb236.outer

+

+bb236.outer:		; preds = %cond_next232, %entry

+	%indvar48 = phi i32 [ %indvar.next49, %cond_next232 ], [ 0, %entry ]		; <i32> [#uses=2]

+	%inTextSize_addr.1.ph17 = phi i32 [ %tmp211, %cond_next232 ], [ %inTextSize, %entry ]		; <i32> [#uses=3]

+	%tmp.50 = sub i32 0, %indvar48		; <i32> [#uses=1]

+	%tmp219 = icmp eq i32 %tmp.50, 0		; <i1> [#uses=1]

+	br i1 %tmp219, label %bb236.us, label %bb236

+

+bb236.us:		; preds = %bb236.outer

+	%inTextSize_addr.1.us = add i32 0, %inTextSize_addr.1.ph17		; <i32> [#uses=0]

+	ret void

+

+bb236:		; preds = %bb236.outer

+	%tmp238 = icmp eq i32 %inTextSize_addr.1.ph17, 0		; <i1> [#uses=1]

+	br i1 %tmp238, label %exit, label %cond_next193

+

+exit:		; preds = %bb236

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll b/src/LLVM/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll
new file mode 100644
index 0000000..c7dacc4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s

+

+; Test that we can have an "X" output constraint.

+

+define void @test(i16 * %t) {

+        call void asm sideeffect "foo $0", "=*X,~{dirflag},~{fpsr},~{flags},~{memory}"( i16* %t )

+        ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll b/src/LLVM/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll
new file mode 100644
index 0000000..d71671c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s

+

+        %struct..0anon = type { [100 x i32] }

+

+define void @test() {

+entry:

+        %currfpu = alloca %struct..0anon, align 16              ; <%struct..0anon*> [#uses=2]

+        %mxcsr = alloca %struct..0anon, align 16                ; <%struct..0anon*> [#uses=1]

+        call void asm sideeffect "fnstenv $0", "=*m,~{dirflag},~{fpsr},~{flags}"( %struct..0anon* %currfpu )

+        call void asm sideeffect "$0  $1", "=*m,*m,~{dirflag},~{fpsr},~{flags}"( %struct..0anon* %mxcsr, %struct..0anon* %currfpu )

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll b/src/LLVM/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
new file mode 100644
index 0000000..2b0a25c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll

@@ -0,0 +1,63 @@
+; RUN: llc < %s 

+; PR1228

+

+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }

+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }

+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }

+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }

+	%"struct.std::string" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }

+

+define void @_ZNKSt6locale4nameEv(%"struct.std::string"* %agg.result) {

+entry:

+	%tmp105 = icmp eq i8* null, null		; <i1> [#uses=1]

+	br i1 %tmp105, label %cond_true, label %cond_true222

+

+cond_true:		; preds = %entry

+	invoke void @_ZNSs14_M_replace_auxEjjjc( )

+			to label %cond_next1328 unwind label %cond_true1402

+

+cond_true222:		; preds = %cond_true222, %entry

+	%tmp207 = call i32 @strcmp( )		; <i32> [#uses=1]

+	%tmp208 = icmp eq i32 %tmp207, 0		; <i1> [#uses=2]

+	%bothcond1480 = and i1 %tmp208, false		; <i1> [#uses=1]

+	br i1 %bothcond1480, label %cond_true222, label %cond_next226.loopexit

+

+cond_next226.loopexit:		; preds = %cond_true222

+	%phitmp = xor i1 %tmp208, true		; <i1> [#uses=1]

+	br i1 %phitmp, label %cond_false280, label %cond_true235

+

+cond_true235:		; preds = %cond_next226.loopexit

+	invoke void @_ZNSs6assignEPKcj( )

+			to label %cond_next1328 unwind label %cond_true1402

+

+cond_false280:		; preds = %cond_next226.loopexit

+	invoke void @_ZNSs7reserveEj( )

+			to label %invcont282 unwind label %cond_true1402

+

+invcont282:		; preds = %cond_false280

+	invoke void @_ZNSs6appendEPKcj( )

+			to label %invcont317 unwind label %cond_true1402

+

+invcont317:		; preds = %invcont282

+	ret void

+

+cond_next1328:		; preds = %cond_true235, %cond_true

+	ret void

+

+cond_true1402:		; preds = %invcont282, %cond_false280, %cond_true235, %cond_true

+  %lpad = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0

+            cleanup

+  ret void

+}

+

+declare void @_ZNSs14_M_replace_auxEjjjc()

+

+declare i32 @strcmp()

+

+declare void @_ZNSs6assignEPKcj()

+

+declare void @_ZNSs7reserveEj()

+

+declare void @_ZNSs6appendEPKcj()

+

+declare i32 @__gxx_personality_v0(...)


diff --git a/src/LLVM/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll b/src/LLVM/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
new file mode 100644
index 0000000..2880691
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s

+

+	%struct.exception = type { i8, i8, i32, i8*, i8*, i32, i8* }

+@program_error = external global %struct.exception		; <%struct.exception*> [#uses=1]

+

+define void @typeinfo() {

+entry:

+	%eh_typeid = tail call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @program_error, i32 0, i32 0) )		; <i32> [#uses=0]

+	ret void

+}

+

+declare i32 @llvm.eh.typeid.for.i32(i8*)


diff --git a/src/LLVM/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll b/src/LLVM/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
new file mode 100644
index 0000000..ddfa15b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll

@@ -0,0 +1,90 @@
+; RUN: llc < %s

+

+	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }

+	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }

+	%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 }

+	%struct.AVEvalExpr = type opaque

+	%struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*] }

+	%struct.AVOption = type opaque

+	%struct.AVPaletteControl = type { i32, [256 x i32] }

+	%struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }

+	%struct.AVRational = type { i32, i32 }

+	%struct.DSPContext = type { void (i16*, i8*, i32)*, void (i16*, i8*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, void (i16*)*, i32 (i8*, i32)*, i32 (i8*, i32)*, [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], i32 (i8*, i16*, i32)*, [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [2 x void (i8*, i8*, i8*, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [8 x void (i8*, i8*, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [10 x void (i8*, i32, i32, i32, i32)*], [10 x void (i8*, i8*, i32, i32, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i16*, i32)*, [2 x [4 x i32 (i8*, i8*, i8*, i32, i32)*]], void (i8*, i8*, i32)*, void (i8*, i8*, i8*, i32)*, void (i8*, i8*, i8*, i32, i32*, i32*)*, void (i32*, i32*, i32)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32)*, void ([4 x [4 x i16]]*, i8*, [40 x i8]*, [40 x [2 x i16]]*, i32, i32, i32, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32)*, void (float*, float*, i32)*, void (float*, float*, i32)*, void (float*, float*, float*, i32)*, void (float*, float*, float*, float*, i32, i32, i32)*, void (i16*, float*, i32)*, void (i16*)*, void (i16*)*, void (i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, [64 x i8], i32, i32 (i16*, i16*, i16*, i32)*, void (i16*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void ([4 x i16]*)*, void (i32*, i32*, i32*, i32*, i32*, i32*, i32)*, void (i32*, i32)*, void (i8*, i32, i8**, i32, i32, i32, i32, i32, %struct.slice_buffer*, i32, i8*)*, void (i8*, i32, i32)*, [4 x void (i8*, i32, i8*, i32, i32, i32)*], void (i16*)*, void (i16*, i32)*, void (i16*, i32)*, void (i16*, i32)*, void (i8*, i32)*, void (i8*, i32)*, [16 x void (i8*, i8*, i32, i32)*] }

+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }

+	%struct.GetBitContext = type { i8*, i8*, i32*, i32, i32, i32, i32 }

+	%struct.MJpegContext = type opaque

+	%struct.MotionEstContext = type { %struct.AVCodecContext*, i32, [4 x [2 x i32]], [4 x [2 x i32]], i8*, i8*, [2 x i8*], i8*, i32, i32*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x [4 x i8*]], [4 x [4 x i8*]], i32, i32, i32, i32, i32, [4 x void (i8*, i8*, i32, i32)*]*, [4 x void (i8*, i8*, i32, i32)*]*, [16 x void (i8*, i8*, i32)*]*, [16 x void (i8*, i8*, i32)*]*, [4097 x i8]*, i8*, i32 (%struct.MpegEncContext*, i32*, i32*, i32, i32, i32, i32, i32)* }

+	%struct.MpegEncContext = type { %struct.AVCodecContext*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.PutBitContext, i32, i32, i32, i32, i32, i32, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Picture*, %struct.Picture**, %struct.Picture**, i32, i32, [8 x %struct.MpegEncContext*], %struct.Picture, %struct.Picture, %struct.Picture, %struct.Picture, %struct.Picture*, %struct.Picture*, %struct.Picture*, [3 x i8*], [3 x i32], i16*, [3 x i16*], [20 x i16], i32, i32, i8*, i8*, i8*, i8*, i8*, [16 x i16]*, [3 x [16 x i16]*], i32, i8*, i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, [5 x i32], i32, i32, i32, i32, %struct.DSPContext, i32, i32, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x [2 x [2 x i16]*]], [2 x [2 x [2 x [2 x i16]*]]], [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x [2 x [2 x i16]*]], [2 x [2 x [2 x [2 x i16]*]]], [2 x i8*], [2 x [2 x i8*]], i32, i32, i32, [2 x [4 x [2 x i32]]], [2 x [2 x i32]], [2 x [2 x [2 x i32]]], i8*, [2 x [64 x i16]], %struct.MotionEstContext, i32, i32, i32, i32, i32, i32, i16*, [6 x i32], [6 x i32], [3 x i8*], i32*, [64 x i16], [64 x i16], [64 x i16], [64 x i16], i32, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i8*, [8 x i32], [64 x i32]*, [64 x i32]*, [2 x [64 x i16]]*, [2 x [64 x i16]]*, [12 x i32], %struct.ScanTable, %struct.ScanTable, %struct.ScanTable, %struct.ScanTable, [64 x i32]*, [2 x i32], [64 x i16]*, i8*, i64, i64, i32, i32, %struct.RateControlContext, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i32, i32, %struct.GetBitContext, i32, i32, i32, %struct.ParseContext, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i16, i16, i16, i16, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x [2 x i32]], [2 x [2 x i32]], [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.PutBitContext, %struct.PutBitContext, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, [3 x i32], %struct.MJpegContext*, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x [65 x [65 x [2 x i32]]]]*, i32, i32, %struct.GetBitContext, i32, i32, i32, i8*, i32, [2 x [2 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], i32, i32, i32, i32, i8*, i32, [12 x i16*], [64 x i16]*, [8 x [64 x i16]]*, i32 (%struct.MpegEncContext*, [64 x i16]*)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, i32 (%struct.MpegEncContext*, i16*, i32, i32, i32*)*, i32 (%struct.MpegEncContext*, i16*, i32, i32, i32*)*, void (%struct.MpegEncContext*, i16*)* }

+	%struct.ParseContext = type { i8*, i32, i32, i32, i32, i32, i32, i32 }

+	%struct.Picture = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*], [3 x i8*], [2 x [2 x i16]*], i32*, [2 x i32], i32, i32, i32, i32, [2 x [16 x i32]], [2 x i32], i32, i32, i16*, i16*, i8*, i32*, i32 }

+	%struct.Predictor = type { double, double, double }

+	%struct.PutBitContext = type { i32, i32, i8*, i8*, i8* }

+	%struct.RateControlContext = type { %struct.FILE*, i32, %struct.RateControlEntry*, double, [5 x %struct.Predictor], double, double, double, double, double, [5 x double], i32, i32, [5 x i64], [5 x i64], [5 x i64], [5 x i64], [5 x i32], i32, i8*, float, i32, %struct.AVEvalExpr* }

+	%struct.RateControlEntry = type { i32, float, i32, i32, i32, i32, i32, i64, i32, float, i32, i32, i32, i32, i32, i32 }

+	%struct.RcOverride = type { i32, i32, i32, float }

+	%struct.ScanTable = type { i8*, [64 x i8], [64 x i8] }

+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }

+	%struct.slice_buffer = type opaque

+

+define float @ff_rate_estimate_qscale(%struct.MpegEncContext* %s, i32 %dry_run) {

+entry:

+	br i1 false, label %cond_false163, label %cond_true135

+

+cond_true135:		; preds = %entry

+	ret float 0.000000e+00

+

+cond_false163:		; preds = %entry

+	br i1 false, label %cond_true203, label %cond_next211

+

+cond_true203:		; preds = %cond_false163

+	ret float 0.000000e+00

+

+cond_next211:		; preds = %cond_false163

+	br i1 false, label %cond_false243, label %cond_true220

+

+cond_true220:		; preds = %cond_next211

+	br i1 false, label %cond_next237, label %cond_true225

+

+cond_true225:		; preds = %cond_true220

+	ret float 0.000000e+00

+

+cond_next237:		; preds = %cond_true220

+	br i1 false, label %cond_false785, label %cond_true735

+

+cond_false243:		; preds = %cond_next211

+	ret float 0.000000e+00

+

+cond_true735:		; preds = %cond_next237

+	ret float 0.000000e+00

+

+cond_false785:		; preds = %cond_next237

+	br i1 false, label %cond_true356.i.preheader, label %bb359.i

+

+cond_true356.i.preheader:		; preds = %cond_false785

+	%tmp116117.i = zext i8 0 to i32		; <i32> [#uses=1]

+	br i1 false, label %cond_false.i, label %cond_next159.i

+

+cond_false.i:		; preds = %cond_true356.i.preheader

+	ret float 0.000000e+00

+

+cond_next159.i:		; preds = %cond_true356.i.preheader

+	%tmp178.i = add i32 %tmp116117.i, -128		; <i32> [#uses=2]

+	%tmp181.i = mul i32 %tmp178.i, %tmp178.i		; <i32> [#uses=1]

+	%tmp181182.i = sitofp i32 %tmp181.i to float		; <float> [#uses=1]

+	%tmp199200.pn.in.i = fmul float %tmp181182.i, 0.000000e+00		; <float> [#uses=1]

+	%tmp199200.pn.i = fpext float %tmp199200.pn.in.i to double		; <double> [#uses=1]

+	%tmp201.pn.i = fsub double 1.000000e+00, %tmp199200.pn.i		; <double> [#uses=1]

+	%factor.2.in.i = fmul double 0.000000e+00, %tmp201.pn.i		; <double> [#uses=1]

+	%factor.2.i = fptrunc double %factor.2.in.i to float		; <float> [#uses=1]

+	br i1 false, label %cond_next312.i, label %cond_false222.i

+

+cond_false222.i:		; preds = %cond_next159.i

+	ret float 0.000000e+00

+

+cond_next312.i:		; preds = %cond_next159.i

+	%tmp313314.i = fpext float %factor.2.i to double		; <double> [#uses=0]

+	ret float 0.000000e+00

+

+bb359.i:		; preds = %cond_false785

+	ret float 0.000000e+00

+}


diff --git a/src/LLVM/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll b/src/LLVM/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
new file mode 100644
index 0000000..27c7162
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s
+
+define fastcc void @bc__support__high_resolution_time__initialize_clock_rate() {
+entry:
+  invoke void asm "rdtsc\0A\09movl %eax, $0\0A\09movl %edx, $1", "=*imr,=*imr,~{dirflag},~{fpsr},~{flags},~{dx},~{ax}"( i32* null, i32* null )
+      to label %.noexc unwind label %cleanup144
+
+.noexc:		; preds = %entry
+  ret void
+
+cleanup144:		; preds = %entry
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  resume { i8*, i32 } %exn
+}
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll b/src/LLVM/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
new file mode 100644
index 0000000..943ed88
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s
+; PR1833
+
+	%struct.__class_type_info_pseudo = type { %struct.__type_info_pseudo }
+	%struct.__type_info_pseudo = type { i8*, i8* }
+@_ZTI2e1 = external constant %struct.__class_type_info_pseudo		; <%struct.__class_type_info_pseudo*> [#uses=1]
+
+define void @_Z7ex_testv() {
+entry:
+	invoke void @__cxa_throw( i8* null, i8* bitcast (%struct.__class_type_info_pseudo* @_ZTI2e1 to i8*), void (i8*)* null ) noreturn 
+			to label %UnifiedUnreachableBlock unwind label %lpad
+
+bb14:		; preds = %lpad
+	unreachable
+
+lpad:		; preds = %entry
+        %lpad1 = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+                  catch i8* null
+	invoke void @__cxa_end_catch( )
+			to label %bb14 unwind label %lpad17
+
+lpad17:		; preds = %lpad
+        %lpad2 = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+                  catch i8* null
+	unreachable
+
+UnifiedUnreachableBlock:		; preds = %entry
+	unreachable
+}
+
+declare void @__cxa_throw(i8*, i8*, void (i8*)*) noreturn 
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...)
+
+declare void @__cxa_end_catch()
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll b/src/LLVM/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
new file mode 100644
index 0000000..314bb05
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s
+; rdar://5707064
+
+define i32 @f(i16* %pc) {
+entry:
+	%acc = alloca i64, align 8		; <i64*> [#uses=4]
+	%tmp97 = load i64* %acc, align 8		; <i64> [#uses=1]
+	%tmp98 = and i64 %tmp97, 4294967295		; <i64> [#uses=1]
+	%tmp99 = load i64* null, align 8		; <i64> [#uses=1]
+	%tmp100 = and i64 %tmp99, 4294967295		; <i64> [#uses=1]
+	%tmp101 = mul i64 %tmp98, %tmp100		; <i64> [#uses=1]
+	%tmp103 = lshr i64 %tmp101, 0		; <i64> [#uses=1]
+	%tmp104 = load i64* %acc, align 8		; <i64> [#uses=1]
+	%.cast105 = zext i32 32 to i64		; <i64> [#uses=1]
+	%tmp106 = lshr i64 %tmp104, %.cast105		; <i64> [#uses=1]
+	%tmp107 = load i64* null, align 8		; <i64> [#uses=1]
+	%tmp108 = and i64 %tmp107, 4294967295		; <i64> [#uses=1]
+	%tmp109 = mul i64 %tmp106, %tmp108		; <i64> [#uses=1]
+	%tmp112 = add i64 %tmp109, 0		; <i64> [#uses=1]
+	%tmp116 = add i64 %tmp112, 0		; <i64> [#uses=1]
+	%tmp117 = add i64 %tmp103, %tmp116		; <i64> [#uses=1]
+	%tmp118 = load i64* %acc, align 8		; <i64> [#uses=1]
+	%tmp120 = lshr i64 %tmp118, 0		; <i64> [#uses=1]
+	%tmp121 = load i64* null, align 8		; <i64> [#uses=1]
+	%tmp123 = lshr i64 %tmp121, 0		; <i64> [#uses=1]
+	%tmp124 = mul i64 %tmp120, %tmp123		; <i64> [#uses=1]
+	%tmp126 = shl i64 %tmp124, 0		; <i64> [#uses=1]
+	%tmp127 = add i64 %tmp117, %tmp126		; <i64> [#uses=1]
+	store i64 %tmp127, i64* %acc, align 8
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/Generic/2008-01-30-LoadCrash.ll b/src/LLVM/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
new file mode 100644
index 0000000..70c3aaa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2008-01-30-LoadCrash.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s
+
+@letters.3100 = external constant [63 x i8]		; <[63 x i8]*> [#uses=2]
+
+define i32 @mkstemps(i8* %pattern, i32 %suffix_len, i64 %tmp42.rle) nounwind  {
+bb20:
+	br label %bb41
+
+bb41:		; preds = %bb20
+	%tmp8182 = trunc i64 %tmp42.rle to i32		; <i32> [#uses=1]
+	%tmp83 = getelementptr [63 x i8]* @letters.3100, i32 0, i32 %tmp8182		; <i8*> [#uses=1]
+	%tmp84 = load i8* %tmp83, align 1		; <i8> [#uses=1]
+	store i8 %tmp84, i8* null, align 1
+	%tmp90 = urem i64 %tmp42.rle, 62		; <i64> [#uses=1]
+	%tmp9091 = trunc i64 %tmp90 to i32		; <i32> [#uses=1]
+	%tmp92 = getelementptr [63 x i8]* @letters.3100, i32 0, i32 %tmp9091		; <i8*> [#uses=1]
+	store i8* %tmp92, i8** null, align 1
+	ret i32 -1
+}

diff --git a/src/LLVM/test/CodeGen/Generic/2008-02-04-Ctlz.ll b/src/LLVM/test/CodeGen/Generic/2008-02-04-Ctlz.ll
new file mode 100644
index 0000000..288bfd2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2008-02-04-Ctlz.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s
+
+@.str = internal constant [14 x i8] c"%lld %d %d %d\00"
+
+define i32 @main(i64 %arg) nounwind  {
+entry:
+	%tmp37 = tail call i64 @llvm.ctlz.i64( i64 %arg )		; <i64> [#uses=1]
+	%tmp47 = tail call i64 @llvm.cttz.i64( i64 %arg )		; <i64> [#uses=1]
+	%tmp57 = tail call i64 @llvm.ctpop.i64( i64 %arg )		; <i64> [#uses=1]
+	%tmp38 = trunc i64 %tmp37 to i32		; <i32>:0 [#uses=1]
+	%tmp48 = trunc i64 %tmp47 to i32		; <i32>:0 [#uses=1]
+	%tmp58 = trunc i64 %tmp57 to i32		; <i32>:0 [#uses=1]
+	%tmp40 = tail call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([14 x i8]* @.str, i32 0, i32 0), i64 %arg, i32 %tmp38, i32 %tmp48, i32 %tmp58 ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @printf(i8* noalias , ...) nounwind 
+
+declare i64 @llvm.ctlz.i64(i64) nounwind readnone 
+declare i64 @llvm.cttz.i64(i64) nounwind readnone 
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone 

diff --git a/src/LLVM/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll b/src/LLVM/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
new file mode 100644
index 0000000..8bf82df
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s
+
+define i32 @main() nounwind  {
+entry:
+	br label %bb15
+
+bb15:		; preds = %bb15, %entry
+	%tmp21 = fadd <8 x double> zeroinitializer, zeroinitializer		; <<8 x double>> [#uses=1]
+	br i1 false, label %bb30, label %bb15
+
+bb30:		; preds = %bb15
+	store <8 x double> %tmp21, <8 x double>* null, align 64
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/Generic/2008-02-20-MatchingMem.ll b/src/LLVM/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
new file mode 100644
index 0000000..da1aeb5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2008-02-20-MatchingMem.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s
+; PR1133
+define void @test(i32* %X) nounwind  {
+entry:
+	%tmp1 = getelementptr i32* %X, i32 10		; <i32*> [#uses=2]
+	tail call void asm sideeffect " $0 $1 ", "=*im,*im,~{memory}"( i32* %tmp1, i32* %tmp1 ) nounwind 
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/Generic/2008-02-25-NegateZero.ll b/src/LLVM/test/CodeGen/Generic/2008-02-25-NegateZero.ll
new file mode 100644
index 0000000..97db667
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2008-02-25-NegateZero.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s 
+; rdar://5763967
+
+define void @test() {
+entry:
+	%tmp98 = load float* null, align 4		; <float> [#uses=1]
+	%tmp106 = load float* null, align 4		; <float> [#uses=1]
+	%tmp113 = fadd float %tmp98, %tmp106		; <float> [#uses=1]
+	%tmp119 = fsub float %tmp113, 0.000000e+00		; <float> [#uses=1]
+	call void (i32, ...)* @foo( i32 0, float 0.000000e+00, float %tmp119 ) nounwind 
+	ret void
+}
+
+declare void @foo(i32, ...)

diff --git a/src/LLVM/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll b/src/LLVM/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
new file mode 100644
index 0000000..10b3d44
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s
+; PR2096
+	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
+	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
+	%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i32, float }
+	%struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*] }
+	%struct.AVOption = type opaque
+	%struct.AVPaletteControl = type { i32, [256 x i32] }
+	%struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }
+	%struct.AVRational = type { i32, i32 }
+	%struct.RcOverride = type { i32, i32, i32, float }
+
+define i32 @sonic_encode_frame(%struct.AVCodecContext* %avctx, i8* %buf, i32 %buf_size, i8* %data) {
+entry:
+	switch i32 0, label %bb429 [
+		 i32 0, label %bb244.preheader
+		 i32 1, label %bb279.preheader
+	]
+
+bb279.preheader:		; preds = %entry
+	ret i32 0
+
+bb244.preheader:		; preds = %entry
+	ret i32 0
+
+bb429:		; preds = %entry
+	br i1 false, label %bb.nph1770, label %bb627
+
+bb.nph1770:		; preds = %bb429
+	br i1 false, label %bb471, label %bb505
+
+bb471:		; preds = %bb471, %bb.nph1770
+	%tmp487 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	br i1 false, label %bb505, label %bb471
+
+bb505:		; preds = %bb471, %bb.nph1770
+	%xy.0.lcssa = phi double [ 0.000000e+00, %bb.nph1770 ], [ %tmp487, %bb471 ]		; <double> [#uses=1]
+	%tmp507 = fsub double -0.000000e+00, %xy.0.lcssa		; <double> [#uses=1]
+	%tmp509 = fdiv double %tmp507, 0.000000e+00		; <double> [#uses=1]
+	%tmp510 = fmul double %tmp509, 1.024000e+03		; <double> [#uses=1]
+	%tmp516 = fdiv double %tmp510, 0.000000e+00		; <double> [#uses=1]
+	%tmp517 = fadd double %tmp516, 5.000000e-01		; <double> [#uses=1]
+	%tmp518 = tail call double @floor( double %tmp517 ) nounwind readnone 		; <double> [#uses=0]
+	ret i32 0
+
+bb627:		; preds = %bb429
+	ret i32 0
+}
+
+declare double @floor(double) nounwind readnone 

diff --git a/src/LLVM/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll b/src/LLVM/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
new file mode 100644
index 0000000..00ca8c7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s
+; PR2603
+        %struct.A = type { i8 }
+        %struct.B = type { i8, [1 x i8] }
+@Foo = constant %struct.A { i8 ptrtoint (i8* getelementptr ([1 x i8]* inttoptr (i32 17 to [1 x i8]*), i32 0, i32 -16) to i8) }          ; <%struct.A*> [#uses=0]

diff --git a/src/LLVM/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll b/src/LLVM/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
new file mode 100644
index 0000000..6281ada
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll

@@ -0,0 +1,92 @@
+; RUN: llc < %s
+; PR3806
+
+	%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
+	%struct.Alignment = type { i32 }
+	%struct.QDesignerFormWindowInterface = type { %struct.QWidget }
+	%struct.QFont = type { %struct.QFontPrivate*, i32 }
+	%struct.QFontPrivate = type opaque
+	%"struct.QHash<QString,QList<QAbstractExtensionFactory*> >" = type { %"struct.QHash<QString,QList<QAbstractExtensionFactory*> >::._120" }
+	%"struct.QHash<QString,QList<QAbstractExtensionFactory*> >::._120" = type { %struct.QHashData* }
+	%struct.QHashData = type { %"struct.QHashData::Node"*, %"struct.QHashData::Node"**, %struct.Alignment, i32, i32, i16, i16, i32, i8 }
+	%"struct.QHashData::Node" = type { %"struct.QHashData::Node"*, i32 }
+	%"struct.QList<QAbstractExtensionFactory*>" = type { %"struct.QList<QAbstractExtensionFactory*>::._101" }
+	%"struct.QList<QAbstractExtensionFactory*>::._101" = type { %struct.QListData }
+	%struct.QListData = type { %"struct.QListData::Data"* }
+	%"struct.QListData::Data" = type { %struct.Alignment, i32, i32, i32, i8, [1 x i8*] }
+	%struct.QObject = type { i32 (...)**, %struct.QObjectData* }
+	%struct.QObjectData = type { i32 (...)**, %struct.QObject*, %struct.QObject*, %"struct.QList<QAbstractExtensionFactory*>", i32, i32 }
+	%struct.QPaintDevice.base = type { i32 (...)**, i16 }
+	%"struct.QPair<int,int>" = type { i32, i32 }
+	%struct.QPalette = type { %struct.QPalettePrivate*, i32 }
+	%struct.QPalettePrivate = type opaque
+	%struct.QRect = type { i32, i32, i32, i32 }
+	%struct.QWidget = type { %struct.QObject, %struct.QPaintDevice.base, %struct.QWidgetData* }
+	%struct.QWidgetData = type { i64, i32, %struct.Alignment, i8, i8, i16, %struct.QRect, %struct.QPalette, %struct.QFont, %struct.QRect }
+	%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
+	%struct.pthread_attr_t = type { i64, [48 x i8] }
+	%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
+	%"struct.qdesigner_internal::Grid" = type { i32, i32, %struct.QWidget**, i8*, i8* }
+	%"struct.qdesigner_internal::GridLayout" = type { %"struct.qdesigner_internal::Layout", %"struct.QPair<int,int>", %"struct.qdesigner_internal::Grid"* }
+	%"struct.qdesigner_internal::Layout" = type { %struct.QObject, %"struct.QList<QAbstractExtensionFactory*>", %struct.QWidget*, %"struct.QHash<QString,QList<QAbstractExtensionFactory*> >", %struct.QWidget*, %struct.QDesignerFormWindowInterface*, i8, %"struct.QPair<int,int>", %struct.QRect, i8 }
+
+@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel		; <i32 (i64)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %struct.Alignment*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct.Alignment*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%struct.Alignment*)* @pthread_mutexattr_init		; <i32 (%struct.Alignment*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%struct.Alignment*, i32)* @pthread_mutexattr_settype		; <i32 (%struct.Alignment*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%struct.Alignment*)* @pthread_mutexattr_destroy		; <i32 (%struct.Alignment*)*> [#uses=0]
+
+define void @_ZN18qdesigner_internal10GridLayout9buildGridEv(%"struct.qdesigner_internal::GridLayout"* %this) nounwind {
+entry:
+	br label %bb44
+
+bb44:		; preds = %bb47, %entry
+	%indvar = phi i128 [ %indvar.next144, %bb47 ], [ 0, %entry ]		; <i128> [#uses=2]
+	br i1 false, label %bb46, label %bb47
+
+bb46:		; preds = %bb44
+	%tmp = shl i128 %indvar, 64		; <i128> [#uses=1]
+	%tmp96 = and i128 %tmp, 79228162495817593519834398720		; <i128> [#uses=0]
+	br label %bb47
+
+bb47:		; preds = %bb46, %bb44
+	%indvar.next144 = add i128 %indvar, 1		; <i128> [#uses=1]
+	br label %bb44
+}
+
+declare i32 @pthread_once(i32*, void ()*)
+
+declare i8* @pthread_getspecific(i32)
+
+declare i32 @pthread_setspecific(i32, i8*)
+
+declare i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
+
+declare i32 @pthread_cancel(i64)
+
+declare i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
+
+declare i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
+
+declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
+
+declare i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct.Alignment*)
+
+declare i32 @pthread_key_create(i32*, void (i8*)*)
+
+declare i32 @pthread_key_delete(i32)
+
+declare i32 @pthread_mutexattr_init(%struct.Alignment*)
+
+declare i32 @pthread_mutexattr_settype(%struct.Alignment*, i32)
+
+declare i32 @pthread_mutexattr_destroy(%struct.Alignment*)

diff --git a/src/LLVM/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll b/src/LLVM/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
new file mode 100644
index 0000000..45b561a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -soft-float
+; PR3899
+
+@m = external global <2 x double>
+
+define double @vector_ex() nounwind {
+       %v = load <2 x double>* @m
+       %x = extractelement <2 x double> %v, i32 1
+       ret double %x
+}

diff --git a/src/LLVM/test/CodeGen/Generic/2009-04-10-SinkCrash.ll b/src/LLVM/test/CodeGen/Generic/2009-04-10-SinkCrash.ll
new file mode 100644
index 0000000..125f875
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2009-04-10-SinkCrash.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s
+
+define void @QRiterate(i32 %p.1, double %tmp.212) nounwind {
+entry:
+	br i1 false, label %shortcirc_next.1, label %exit.1.critedge
+
+shortcirc_next.1:		; preds = %shortcirc_next.1, %entry
+	%tmp.213 = fcmp une double %tmp.212, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %tmp.213, label %shortcirc_next.1, label %exit.1
+
+exit.1.critedge:		; preds = %entry
+	ret void
+
+exit.1:		; preds = %shortcirc_next.1
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll b/src/LLVM/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
new file mode 100644
index 0000000..b62f811
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s
+; rdar://6836460
+; rdar://7516906
+; PR5963
+
+define i32 @test(i128* %P) nounwind {
+entry:
+	%tmp48 = load i128* %P
+	%and49 = and i128 %tmp48, 18446744073709551616		; <i128> [#uses=1]
+	%tobool = icmp ne i128 %and49, 0		; <i1> [#uses=1]
+	br i1 %tobool, label %if.then50, label %if.end61
+
+if.then50:		; preds = %if.then20
+	ret i32 1241
+
+if.end61:		; preds = %if.then50, %if.then20, %entry
+	ret i32 123
+}
+
+define i32 @test2(i320* %P) nounwind {
+entry:
+	%tmp48 = load i320* %P
+	%and49 = and i320 %tmp48, 25108406941546723055343157692830665664409421777856138051584
+	%tobool = icmp ne i320 %and49, 0		; <i1> [#uses=1]
+	br i1 %tobool, label %if.then50, label %if.end61
+
+if.then50:		; preds = %if.then20
+	ret i32 1241
+
+if.end61:		; preds = %if.then50, %if.then20, %entry
+	ret i32 123
+}

diff --git a/src/LLVM/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll b/src/LLVM/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
new file mode 100644
index 0000000..ad418f7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s
+; PR4317
+
+declare i32 @b()
+
+define void @a() {
+entry:
+  ret void
+
+dummy:
+  invoke i32 @b() to label %reg unwind label %reg
+
+reg:
+  %lpad = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+            catch i8* null
+  ret void
+}
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll b/src/LLVM/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
new file mode 100644
index 0000000..3cbf4c5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll

@@ -0,0 +1,79 @@
+; RUN: llc < %s
+; PR5495
+
+%"struct.std::__ctype_abstract_base<wchar_t>" = type { %"struct.std::locale::facet" }
+%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::__ctype_abstract_base<wchar_t>"*, %"struct.std::__ctype_abstract_base<wchar_t>"* }
+%"struct.std::basic_istream<char,std::char_traits<char> >" = type { i32 (...)**, i32, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }
+%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+%"struct.std::ios_base::_Words" = type { i8*, i32 }
+%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+%union..0._15 = type { i32 }
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i8* @__cxa_begin_catch(i8*) nounwind
+
+declare %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"*)
+
+define %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_(%"struct.std::basic_istream<char,std::char_traits<char> >"* %__in, i8* nocapture %__s) {
+entry:
+  %0 = invoke %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"* undef)
+          to label %invcont8 unwind label %lpad74 ; <%"struct.std::ctype<char>"*> [#uses=0]
+
+invcont8:                                         ; preds = %entry
+  %1 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %bb26.preheader unwind label %lpad ; <i32> [#uses=0]
+
+bb26.preheader:                                   ; preds = %invcont8
+  br label %invcont38
+
+bb1.i100:                                         ; preds = %invcont38
+  %2 = add nsw i32 1, %__extracted.0  ; <i32> [#uses=3]
+  br i1 undef, label %bb.i97, label %bb1.i
+
+bb.i97:                                           ; preds = %bb1.i100
+  br label %invcont38
+
+bb1.i:                                            ; preds = %bb1.i100
+  %3 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %invcont38 unwind label %lpad ; <i32> [#uses=0]
+
+invcont24:                                        ; preds = %invcont38
+  %4 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i unwind label %lpad ; <i32> [#uses=0]
+
+_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i: ; preds = %invcont24
+  br i1 undef, label %invcont25, label %bb.i93
+
+bb.i93:                                           ; preds = %_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i
+  %5 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %invcont25 unwind label %lpad ; <i32> [#uses=0]
+
+invcont25:                                        ; preds = %bb.i93, %_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i
+  br label %invcont38
+
+invcont38:                                        ; preds = %invcont25, %bb1.i, %bb.i97, %bb26.preheader
+  %__extracted.0 = phi i32 [ 0, %bb26.preheader ], [ undef, %invcont25 ], [ %2, %bb.i97 ], [ %2, %bb1.i ] ; <i32> [#uses=1]
+  br i1 false, label %bb1.i100, label %invcont24
+
+lpad:                                             ; preds = %bb.i93, %invcont24, %bb1.i, %invcont8
+  %__extracted.1 = phi i32 [ 0, %invcont8 ], [ %2, %bb1.i ], [ undef, %bb.i93 ], [ undef, %invcont24 ] ; <i32> [#uses=0]
+  %lpad1 = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+            catch i8* null
+  %eh_ptr = extractvalue { i8*, i32 } %lpad1, 0
+  %6 = call i8* @__cxa_begin_catch(i8* %eh_ptr) nounwind ; <i8*> [#uses=0]
+  unreachable
+
+lpad74:                                           ; preds = %entry
+  %lpad2 = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  unreachable
+}
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll b/src/LLVM/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll
new file mode 100644
index 0000000..a2945aa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s
+
+define float @test1()
+{
+	ret float extractelement (<2 x float> bitcast (<1 x double> <double 0x3f800000> to <2 x float>), i32 1);
+}

diff --git a/src/LLVM/test/CodeGen/Generic/2010-11-04-BigByval.ll b/src/LLVM/test/CodeGen/Generic/2010-11-04-BigByval.ll
new file mode 100644
index 0000000..df2ca4c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2010-11-04-BigByval.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s
+; PR7170
+
+%big = type [131072 x i8]
+
+declare void @foo(%big* byval align 1)
+
+define void @bar(%big* byval align 1 %x) {
+  call void @foo(%big* byval align 1 %x)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/2010-ZeroSizedArg.ll b/src/LLVM/test/CodeGen/Generic/2010-ZeroSizedArg.ll
new file mode 100644
index 0000000..d9d8374
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2010-ZeroSizedArg.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s
+; PR4975
+
+%0 = type <{ [0 x i32] }>
+%union.T0 = type { }
+
+@.str = private constant [1 x i8] c" "
+
+define void @t(%0) nounwind {
+entry:
+  %arg0 = alloca %union.T0
+  %1 = bitcast %union.T0* %arg0 to %0*
+  store %0 %0, %0* %1, align 1
+  ret void
+}
+
+declare i32 @printf(i8*, ...)

diff --git a/src/LLVM/test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll b/src/LLVM/test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll
new file mode 100644
index 0000000..05fdf4c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s
+; PR8582
+
+define void @uint82() nounwind {
+entry:
+  %tmp3 = select i1 undef, i960 4872657003430991806293355221650511486142000513558154090491761976385142772940676648094983476628187266917101386048750715027104076737938178423519545241493072038894065019132638919037781494702597609951702322267198307200588774905587225212622510286498675097141625012190497682454879271766334636032, i960 0
+  br i1 undef, label %for.body25.for.body25_crit_edge, label %if.end
+
+for.body25.for.body25_crit_edge:                  ; preds = %entry
+  %ins = or i960 %tmp3, undef
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll b/src/LLVM/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
new file mode 100644
index 0000000..da26504
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s
+; This caused ScheduleDAG to crash in EmitPhysRegCopy when searching
+; the uses of a copy to a physical register without ignoring non-data
+; dependence, PR10220.
+
+; The ARM backend can't handle i256 math at the moment.
+; XFAIL: arm
+
+define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp {
+entry:
+  %c = load i256* %cc
+  %d = load i256* %dd
+  %add = add nsw i256 %c, %d
+  store i256 %add, i256* %a, align 8
+  %or = or i256 %c, 1606938044258990275541962092341162602522202993782792835301376
+  %add6 = add nsw i256 %or, %d
+  store i256 %add6, i256* %b, align 8
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/APIntLoadStore.ll b/src/LLVM/test/CodeGen/Generic/APIntLoadStore.ll
new file mode 100644
index 0000000..7c71a33
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/APIntLoadStore.ll

@@ -0,0 +1,2049 @@
+; RUN: llc < %s > %t
+@i1_l = external global i1		; <i1*> [#uses=1]
+@i1_s = external global i1		; <i1*> [#uses=1]
+@i2_l = external global i2		; <i2*> [#uses=1]
+@i2_s = external global i2		; <i2*> [#uses=1]
+@i3_l = external global i3		; <i3*> [#uses=1]
+@i3_s = external global i3		; <i3*> [#uses=1]
+@i4_l = external global i4		; <i4*> [#uses=1]
+@i4_s = external global i4		; <i4*> [#uses=1]
+@i5_l = external global i5		; <i5*> [#uses=1]
+@i5_s = external global i5		; <i5*> [#uses=1]
+@i6_l = external global i6		; <i6*> [#uses=1]
+@i6_s = external global i6		; <i6*> [#uses=1]
+@i7_l = external global i7		; <i7*> [#uses=1]
+@i7_s = external global i7		; <i7*> [#uses=1]
+@i8_l = external global i8		; <i8*> [#uses=1]
+@i8_s = external global i8		; <i8*> [#uses=1]
+@i9_l = external global i9		; <i9*> [#uses=1]
+@i9_s = external global i9		; <i9*> [#uses=1]
+@i10_l = external global i10		; <i10*> [#uses=1]
+@i10_s = external global i10		; <i10*> [#uses=1]
+@i11_l = external global i11		; <i11*> [#uses=1]
+@i11_s = external global i11		; <i11*> [#uses=1]
+@i12_l = external global i12		; <i12*> [#uses=1]
+@i12_s = external global i12		; <i12*> [#uses=1]
+@i13_l = external global i13		; <i13*> [#uses=1]
+@i13_s = external global i13		; <i13*> [#uses=1]
+@i14_l = external global i14		; <i14*> [#uses=1]
+@i14_s = external global i14		; <i14*> [#uses=1]
+@i15_l = external global i15		; <i15*> [#uses=1]
+@i15_s = external global i15		; <i15*> [#uses=1]
+@i16_l = external global i16		; <i16*> [#uses=1]
+@i16_s = external global i16		; <i16*> [#uses=1]
+@i17_l = external global i17		; <i17*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+@i18_l = external global i18		; <i18*> [#uses=1]
+@i18_s = external global i18		; <i18*> [#uses=1]
+@i19_l = external global i19		; <i19*> [#uses=1]
+@i19_s = external global i19		; <i19*> [#uses=1]
+@i20_l = external global i20		; <i20*> [#uses=1]
+@i20_s = external global i20		; <i20*> [#uses=1]
+@i21_l = external global i21		; <i21*> [#uses=1]
+@i21_s = external global i21		; <i21*> [#uses=1]
+@i22_l = external global i22		; <i22*> [#uses=1]
+@i22_s = external global i22		; <i22*> [#uses=1]
+@i23_l = external global i23		; <i23*> [#uses=1]
+@i23_s = external global i23		; <i23*> [#uses=1]
+@i24_l = external global i24		; <i24*> [#uses=1]
+@i24_s = external global i24		; <i24*> [#uses=1]
+@i25_l = external global i25		; <i25*> [#uses=1]
+@i25_s = external global i25		; <i25*> [#uses=1]
+@i26_l = external global i26		; <i26*> [#uses=1]
+@i26_s = external global i26		; <i26*> [#uses=1]
+@i27_l = external global i27		; <i27*> [#uses=1]
+@i27_s = external global i27		; <i27*> [#uses=1]
+@i28_l = external global i28		; <i28*> [#uses=1]
+@i28_s = external global i28		; <i28*> [#uses=1]
+@i29_l = external global i29		; <i29*> [#uses=1]
+@i29_s = external global i29		; <i29*> [#uses=1]
+@i30_l = external global i30		; <i30*> [#uses=1]
+@i30_s = external global i30		; <i30*> [#uses=1]
+@i31_l = external global i31		; <i31*> [#uses=1]
+@i31_s = external global i31		; <i31*> [#uses=1]
+@i32_l = external global i32		; <i32*> [#uses=1]
+@i32_s = external global i32		; <i32*> [#uses=1]
+@i33_l = external global i33		; <i33*> [#uses=1]
+@i33_s = external global i33		; <i33*> [#uses=1]
+@i34_l = external global i34		; <i34*> [#uses=1]
+@i34_s = external global i34		; <i34*> [#uses=1]
+@i35_l = external global i35		; <i35*> [#uses=1]
+@i35_s = external global i35		; <i35*> [#uses=1]
+@i36_l = external global i36		; <i36*> [#uses=1]
+@i36_s = external global i36		; <i36*> [#uses=1]
+@i37_l = external global i37		; <i37*> [#uses=1]
+@i37_s = external global i37		; <i37*> [#uses=1]
+@i38_l = external global i38		; <i38*> [#uses=1]
+@i38_s = external global i38		; <i38*> [#uses=1]
+@i39_l = external global i39		; <i39*> [#uses=1]
+@i39_s = external global i39		; <i39*> [#uses=1]
+@i40_l = external global i40		; <i40*> [#uses=1]
+@i40_s = external global i40		; <i40*> [#uses=1]
+@i41_l = external global i41		; <i41*> [#uses=1]
+@i41_s = external global i41		; <i41*> [#uses=1]
+@i42_l = external global i42		; <i42*> [#uses=1]
+@i42_s = external global i42		; <i42*> [#uses=1]
+@i43_l = external global i43		; <i43*> [#uses=1]
+@i43_s = external global i43		; <i43*> [#uses=1]
+@i44_l = external global i44		; <i44*> [#uses=1]
+@i44_s = external global i44		; <i44*> [#uses=1]
+@i45_l = external global i45		; <i45*> [#uses=1]
+@i45_s = external global i45		; <i45*> [#uses=1]
+@i46_l = external global i46		; <i46*> [#uses=1]
+@i46_s = external global i46		; <i46*> [#uses=1]
+@i47_l = external global i47		; <i47*> [#uses=1]
+@i47_s = external global i47		; <i47*> [#uses=1]
+@i48_l = external global i48		; <i48*> [#uses=1]
+@i48_s = external global i48		; <i48*> [#uses=1]
+@i49_l = external global i49		; <i49*> [#uses=1]
+@i49_s = external global i49		; <i49*> [#uses=1]
+@i50_l = external global i50		; <i50*> [#uses=1]
+@i50_s = external global i50		; <i50*> [#uses=1]
+@i51_l = external global i51		; <i51*> [#uses=1]
+@i51_s = external global i51		; <i51*> [#uses=1]
+@i52_l = external global i52		; <i52*> [#uses=1]
+@i52_s = external global i52		; <i52*> [#uses=1]
+@i53_l = external global i53		; <i53*> [#uses=1]
+@i53_s = external global i53		; <i53*> [#uses=1]
+@i54_l = external global i54		; <i54*> [#uses=1]
+@i54_s = external global i54		; <i54*> [#uses=1]
+@i55_l = external global i55		; <i55*> [#uses=1]
+@i55_s = external global i55		; <i55*> [#uses=1]
+@i56_l = external global i56		; <i56*> [#uses=1]
+@i56_s = external global i56		; <i56*> [#uses=1]
+@i57_l = external global i57		; <i57*> [#uses=1]
+@i57_s = external global i57		; <i57*> [#uses=1]
+@i58_l = external global i58		; <i58*> [#uses=1]
+@i58_s = external global i58		; <i58*> [#uses=1]
+@i59_l = external global i59		; <i59*> [#uses=1]
+@i59_s = external global i59		; <i59*> [#uses=1]
+@i60_l = external global i60		; <i60*> [#uses=1]
+@i60_s = external global i60		; <i60*> [#uses=1]
+@i61_l = external global i61		; <i61*> [#uses=1]
+@i61_s = external global i61		; <i61*> [#uses=1]
+@i62_l = external global i62		; <i62*> [#uses=1]
+@i62_s = external global i62		; <i62*> [#uses=1]
+@i63_l = external global i63		; <i63*> [#uses=1]
+@i63_s = external global i63		; <i63*> [#uses=1]
+@i64_l = external global i64		; <i64*> [#uses=1]
+@i64_s = external global i64		; <i64*> [#uses=1]
+@i65_l = external global i65		; <i65*> [#uses=1]
+@i65_s = external global i65		; <i65*> [#uses=1]
+@i66_l = external global i66		; <i66*> [#uses=1]
+@i66_s = external global i66		; <i66*> [#uses=1]
+@i67_l = external global i67		; <i67*> [#uses=1]
+@i67_s = external global i67		; <i67*> [#uses=1]
+@i68_l = external global i68		; <i68*> [#uses=1]
+@i68_s = external global i68		; <i68*> [#uses=1]
+@i69_l = external global i69		; <i69*> [#uses=1]
+@i69_s = external global i69		; <i69*> [#uses=1]
+@i70_l = external global i70		; <i70*> [#uses=1]
+@i70_s = external global i70		; <i70*> [#uses=1]
+@i71_l = external global i71		; <i71*> [#uses=1]
+@i71_s = external global i71		; <i71*> [#uses=1]
+@i72_l = external global i72		; <i72*> [#uses=1]
+@i72_s = external global i72		; <i72*> [#uses=1]
+@i73_l = external global i73		; <i73*> [#uses=1]
+@i73_s = external global i73		; <i73*> [#uses=1]
+@i74_l = external global i74		; <i74*> [#uses=1]
+@i74_s = external global i74		; <i74*> [#uses=1]
+@i75_l = external global i75		; <i75*> [#uses=1]
+@i75_s = external global i75		; <i75*> [#uses=1]
+@i76_l = external global i76		; <i76*> [#uses=1]
+@i76_s = external global i76		; <i76*> [#uses=1]
+@i77_l = external global i77		; <i77*> [#uses=1]
+@i77_s = external global i77		; <i77*> [#uses=1]
+@i78_l = external global i78		; <i78*> [#uses=1]
+@i78_s = external global i78		; <i78*> [#uses=1]
+@i79_l = external global i79		; <i79*> [#uses=1]
+@i79_s = external global i79		; <i79*> [#uses=1]
+@i80_l = external global i80		; <i80*> [#uses=1]
+@i80_s = external global i80		; <i80*> [#uses=1]
+@i81_l = external global i81		; <i81*> [#uses=1]
+@i81_s = external global i81		; <i81*> [#uses=1]
+@i82_l = external global i82		; <i82*> [#uses=1]
+@i82_s = external global i82		; <i82*> [#uses=1]
+@i83_l = external global i83		; <i83*> [#uses=1]
+@i83_s = external global i83		; <i83*> [#uses=1]
+@i84_l = external global i84		; <i84*> [#uses=1]
+@i84_s = external global i84		; <i84*> [#uses=1]
+@i85_l = external global i85		; <i85*> [#uses=1]
+@i85_s = external global i85		; <i85*> [#uses=1]
+@i86_l = external global i86		; <i86*> [#uses=1]
+@i86_s = external global i86		; <i86*> [#uses=1]
+@i87_l = external global i87		; <i87*> [#uses=1]
+@i87_s = external global i87		; <i87*> [#uses=1]
+@i88_l = external global i88		; <i88*> [#uses=1]
+@i88_s = external global i88		; <i88*> [#uses=1]
+@i89_l = external global i89		; <i89*> [#uses=1]
+@i89_s = external global i89		; <i89*> [#uses=1]
+@i90_l = external global i90		; <i90*> [#uses=1]
+@i90_s = external global i90		; <i90*> [#uses=1]
+@i91_l = external global i91		; <i91*> [#uses=1]
+@i91_s = external global i91		; <i91*> [#uses=1]
+@i92_l = external global i92		; <i92*> [#uses=1]
+@i92_s = external global i92		; <i92*> [#uses=1]
+@i93_l = external global i93		; <i93*> [#uses=1]
+@i93_s = external global i93		; <i93*> [#uses=1]
+@i94_l = external global i94		; <i94*> [#uses=1]
+@i94_s = external global i94		; <i94*> [#uses=1]
+@i95_l = external global i95		; <i95*> [#uses=1]
+@i95_s = external global i95		; <i95*> [#uses=1]
+@i96_l = external global i96		; <i96*> [#uses=1]
+@i96_s = external global i96		; <i96*> [#uses=1]
+@i97_l = external global i97		; <i97*> [#uses=1]
+@i97_s = external global i97		; <i97*> [#uses=1]
+@i98_l = external global i98		; <i98*> [#uses=1]
+@i98_s = external global i98		; <i98*> [#uses=1]
+@i99_l = external global i99		; <i99*> [#uses=1]
+@i99_s = external global i99		; <i99*> [#uses=1]
+@i100_l = external global i100		; <i100*> [#uses=1]
+@i100_s = external global i100		; <i100*> [#uses=1]
+@i101_l = external global i101		; <i101*> [#uses=1]
+@i101_s = external global i101		; <i101*> [#uses=1]
+@i102_l = external global i102		; <i102*> [#uses=1]
+@i102_s = external global i102		; <i102*> [#uses=1]
+@i103_l = external global i103		; <i103*> [#uses=1]
+@i103_s = external global i103		; <i103*> [#uses=1]
+@i104_l = external global i104		; <i104*> [#uses=1]
+@i104_s = external global i104		; <i104*> [#uses=1]
+@i105_l = external global i105		; <i105*> [#uses=1]
+@i105_s = external global i105		; <i105*> [#uses=1]
+@i106_l = external global i106		; <i106*> [#uses=1]
+@i106_s = external global i106		; <i106*> [#uses=1]
+@i107_l = external global i107		; <i107*> [#uses=1]
+@i107_s = external global i107		; <i107*> [#uses=1]
+@i108_l = external global i108		; <i108*> [#uses=1]
+@i108_s = external global i108		; <i108*> [#uses=1]
+@i109_l = external global i109		; <i109*> [#uses=1]
+@i109_s = external global i109		; <i109*> [#uses=1]
+@i110_l = external global i110		; <i110*> [#uses=1]
+@i110_s = external global i110		; <i110*> [#uses=1]
+@i111_l = external global i111		; <i111*> [#uses=1]
+@i111_s = external global i111		; <i111*> [#uses=1]
+@i112_l = external global i112		; <i112*> [#uses=1]
+@i112_s = external global i112		; <i112*> [#uses=1]
+@i113_l = external global i113		; <i113*> [#uses=1]
+@i113_s = external global i113		; <i113*> [#uses=1]
+@i114_l = external global i114		; <i114*> [#uses=1]
+@i114_s = external global i114		; <i114*> [#uses=1]
+@i115_l = external global i115		; <i115*> [#uses=1]
+@i115_s = external global i115		; <i115*> [#uses=1]
+@i116_l = external global i116		; <i116*> [#uses=1]
+@i116_s = external global i116		; <i116*> [#uses=1]
+@i117_l = external global i117		; <i117*> [#uses=1]
+@i117_s = external global i117		; <i117*> [#uses=1]
+@i118_l = external global i118		; <i118*> [#uses=1]
+@i118_s = external global i118		; <i118*> [#uses=1]
+@i119_l = external global i119		; <i119*> [#uses=1]
+@i119_s = external global i119		; <i119*> [#uses=1]
+@i120_l = external global i120		; <i120*> [#uses=1]
+@i120_s = external global i120		; <i120*> [#uses=1]
+@i121_l = external global i121		; <i121*> [#uses=1]
+@i121_s = external global i121		; <i121*> [#uses=1]
+@i122_l = external global i122		; <i122*> [#uses=1]
+@i122_s = external global i122		; <i122*> [#uses=1]
+@i123_l = external global i123		; <i123*> [#uses=1]
+@i123_s = external global i123		; <i123*> [#uses=1]
+@i124_l = external global i124		; <i124*> [#uses=1]
+@i124_s = external global i124		; <i124*> [#uses=1]
+@i125_l = external global i125		; <i125*> [#uses=1]
+@i125_s = external global i125		; <i125*> [#uses=1]
+@i126_l = external global i126		; <i126*> [#uses=1]
+@i126_s = external global i126		; <i126*> [#uses=1]
+@i127_l = external global i127		; <i127*> [#uses=1]
+@i127_s = external global i127		; <i127*> [#uses=1]
+@i128_l = external global i128		; <i128*> [#uses=1]
+@i128_s = external global i128		; <i128*> [#uses=1]
+@i129_l = external global i129		; <i129*> [#uses=1]
+@i129_s = external global i129		; <i129*> [#uses=1]
+@i130_l = external global i130		; <i130*> [#uses=1]
+@i130_s = external global i130		; <i130*> [#uses=1]
+@i131_l = external global i131		; <i131*> [#uses=1]
+@i131_s = external global i131		; <i131*> [#uses=1]
+@i132_l = external global i132		; <i132*> [#uses=1]
+@i132_s = external global i132		; <i132*> [#uses=1]
+@i133_l = external global i133		; <i133*> [#uses=1]
+@i133_s = external global i133		; <i133*> [#uses=1]
+@i134_l = external global i134		; <i134*> [#uses=1]
+@i134_s = external global i134		; <i134*> [#uses=1]
+@i135_l = external global i135		; <i135*> [#uses=1]
+@i135_s = external global i135		; <i135*> [#uses=1]
+@i136_l = external global i136		; <i136*> [#uses=1]
+@i136_s = external global i136		; <i136*> [#uses=1]
+@i137_l = external global i137		; <i137*> [#uses=1]
+@i137_s = external global i137		; <i137*> [#uses=1]
+@i138_l = external global i138		; <i138*> [#uses=1]
+@i138_s = external global i138		; <i138*> [#uses=1]
+@i139_l = external global i139		; <i139*> [#uses=1]
+@i139_s = external global i139		; <i139*> [#uses=1]
+@i140_l = external global i140		; <i140*> [#uses=1]
+@i140_s = external global i140		; <i140*> [#uses=1]
+@i141_l = external global i141		; <i141*> [#uses=1]
+@i141_s = external global i141		; <i141*> [#uses=1]
+@i142_l = external global i142		; <i142*> [#uses=1]
+@i142_s = external global i142		; <i142*> [#uses=1]
+@i143_l = external global i143		; <i143*> [#uses=1]
+@i143_s = external global i143		; <i143*> [#uses=1]
+@i144_l = external global i144		; <i144*> [#uses=1]
+@i144_s = external global i144		; <i144*> [#uses=1]
+@i145_l = external global i145		; <i145*> [#uses=1]
+@i145_s = external global i145		; <i145*> [#uses=1]
+@i146_l = external global i146		; <i146*> [#uses=1]
+@i146_s = external global i146		; <i146*> [#uses=1]
+@i147_l = external global i147		; <i147*> [#uses=1]
+@i147_s = external global i147		; <i147*> [#uses=1]
+@i148_l = external global i148		; <i148*> [#uses=1]
+@i148_s = external global i148		; <i148*> [#uses=1]
+@i149_l = external global i149		; <i149*> [#uses=1]
+@i149_s = external global i149		; <i149*> [#uses=1]
+@i150_l = external global i150		; <i150*> [#uses=1]
+@i150_s = external global i150		; <i150*> [#uses=1]
+@i151_l = external global i151		; <i151*> [#uses=1]
+@i151_s = external global i151		; <i151*> [#uses=1]
+@i152_l = external global i152		; <i152*> [#uses=1]
+@i152_s = external global i152		; <i152*> [#uses=1]
+@i153_l = external global i153		; <i153*> [#uses=1]
+@i153_s = external global i153		; <i153*> [#uses=1]
+@i154_l = external global i154		; <i154*> [#uses=1]
+@i154_s = external global i154		; <i154*> [#uses=1]
+@i155_l = external global i155		; <i155*> [#uses=1]
+@i155_s = external global i155		; <i155*> [#uses=1]
+@i156_l = external global i156		; <i156*> [#uses=1]
+@i156_s = external global i156		; <i156*> [#uses=1]
+@i157_l = external global i157		; <i157*> [#uses=1]
+@i157_s = external global i157		; <i157*> [#uses=1]
+@i158_l = external global i158		; <i158*> [#uses=1]
+@i158_s = external global i158		; <i158*> [#uses=1]
+@i159_l = external global i159		; <i159*> [#uses=1]
+@i159_s = external global i159		; <i159*> [#uses=1]
+@i160_l = external global i160		; <i160*> [#uses=1]
+@i160_s = external global i160		; <i160*> [#uses=1]
+@i161_l = external global i161		; <i161*> [#uses=1]
+@i161_s = external global i161		; <i161*> [#uses=1]
+@i162_l = external global i162		; <i162*> [#uses=1]
+@i162_s = external global i162		; <i162*> [#uses=1]
+@i163_l = external global i163		; <i163*> [#uses=1]
+@i163_s = external global i163		; <i163*> [#uses=1]
+@i164_l = external global i164		; <i164*> [#uses=1]
+@i164_s = external global i164		; <i164*> [#uses=1]
+@i165_l = external global i165		; <i165*> [#uses=1]
+@i165_s = external global i165		; <i165*> [#uses=1]
+@i166_l = external global i166		; <i166*> [#uses=1]
+@i166_s = external global i166		; <i166*> [#uses=1]
+@i167_l = external global i167		; <i167*> [#uses=1]
+@i167_s = external global i167		; <i167*> [#uses=1]
+@i168_l = external global i168		; <i168*> [#uses=1]
+@i168_s = external global i168		; <i168*> [#uses=1]
+@i169_l = external global i169		; <i169*> [#uses=1]
+@i169_s = external global i169		; <i169*> [#uses=1]
+@i170_l = external global i170		; <i170*> [#uses=1]
+@i170_s = external global i170		; <i170*> [#uses=1]
+@i171_l = external global i171		; <i171*> [#uses=1]
+@i171_s = external global i171		; <i171*> [#uses=1]
+@i172_l = external global i172		; <i172*> [#uses=1]
+@i172_s = external global i172		; <i172*> [#uses=1]
+@i173_l = external global i173		; <i173*> [#uses=1]
+@i173_s = external global i173		; <i173*> [#uses=1]
+@i174_l = external global i174		; <i174*> [#uses=1]
+@i174_s = external global i174		; <i174*> [#uses=1]
+@i175_l = external global i175		; <i175*> [#uses=1]
+@i175_s = external global i175		; <i175*> [#uses=1]
+@i176_l = external global i176		; <i176*> [#uses=1]
+@i176_s = external global i176		; <i176*> [#uses=1]
+@i177_l = external global i177		; <i177*> [#uses=1]
+@i177_s = external global i177		; <i177*> [#uses=1]
+@i178_l = external global i178		; <i178*> [#uses=1]
+@i178_s = external global i178		; <i178*> [#uses=1]
+@i179_l = external global i179		; <i179*> [#uses=1]
+@i179_s = external global i179		; <i179*> [#uses=1]
+@i180_l = external global i180		; <i180*> [#uses=1]
+@i180_s = external global i180		; <i180*> [#uses=1]
+@i181_l = external global i181		; <i181*> [#uses=1]
+@i181_s = external global i181		; <i181*> [#uses=1]
+@i182_l = external global i182		; <i182*> [#uses=1]
+@i182_s = external global i182		; <i182*> [#uses=1]
+@i183_l = external global i183		; <i183*> [#uses=1]
+@i183_s = external global i183		; <i183*> [#uses=1]
+@i184_l = external global i184		; <i184*> [#uses=1]
+@i184_s = external global i184		; <i184*> [#uses=1]
+@i185_l = external global i185		; <i185*> [#uses=1]
+@i185_s = external global i185		; <i185*> [#uses=1]
+@i186_l = external global i186		; <i186*> [#uses=1]
+@i186_s = external global i186		; <i186*> [#uses=1]
+@i187_l = external global i187		; <i187*> [#uses=1]
+@i187_s = external global i187		; <i187*> [#uses=1]
+@i188_l = external global i188		; <i188*> [#uses=1]
+@i188_s = external global i188		; <i188*> [#uses=1]
+@i189_l = external global i189		; <i189*> [#uses=1]
+@i189_s = external global i189		; <i189*> [#uses=1]
+@i190_l = external global i190		; <i190*> [#uses=1]
+@i190_s = external global i190		; <i190*> [#uses=1]
+@i191_l = external global i191		; <i191*> [#uses=1]
+@i191_s = external global i191		; <i191*> [#uses=1]
+@i192_l = external global i192		; <i192*> [#uses=1]
+@i192_s = external global i192		; <i192*> [#uses=1]
+@i193_l = external global i193		; <i193*> [#uses=1]
+@i193_s = external global i193		; <i193*> [#uses=1]
+@i194_l = external global i194		; <i194*> [#uses=1]
+@i194_s = external global i194		; <i194*> [#uses=1]
+@i195_l = external global i195		; <i195*> [#uses=1]
+@i195_s = external global i195		; <i195*> [#uses=1]
+@i196_l = external global i196		; <i196*> [#uses=1]
+@i196_s = external global i196		; <i196*> [#uses=1]
+@i197_l = external global i197		; <i197*> [#uses=1]
+@i197_s = external global i197		; <i197*> [#uses=1]
+@i198_l = external global i198		; <i198*> [#uses=1]
+@i198_s = external global i198		; <i198*> [#uses=1]
+@i199_l = external global i199		; <i199*> [#uses=1]
+@i199_s = external global i199		; <i199*> [#uses=1]
+@i200_l = external global i200		; <i200*> [#uses=1]
+@i200_s = external global i200		; <i200*> [#uses=1]
+@i201_l = external global i201		; <i201*> [#uses=1]
+@i201_s = external global i201		; <i201*> [#uses=1]
+@i202_l = external global i202		; <i202*> [#uses=1]
+@i202_s = external global i202		; <i202*> [#uses=1]
+@i203_l = external global i203		; <i203*> [#uses=1]
+@i203_s = external global i203		; <i203*> [#uses=1]
+@i204_l = external global i204		; <i204*> [#uses=1]
+@i204_s = external global i204		; <i204*> [#uses=1]
+@i205_l = external global i205		; <i205*> [#uses=1]
+@i205_s = external global i205		; <i205*> [#uses=1]
+@i206_l = external global i206		; <i206*> [#uses=1]
+@i206_s = external global i206		; <i206*> [#uses=1]
+@i207_l = external global i207		; <i207*> [#uses=1]
+@i207_s = external global i207		; <i207*> [#uses=1]
+@i208_l = external global i208		; <i208*> [#uses=1]
+@i208_s = external global i208		; <i208*> [#uses=1]
+@i209_l = external global i209		; <i209*> [#uses=1]
+@i209_s = external global i209		; <i209*> [#uses=1]
+@i210_l = external global i210		; <i210*> [#uses=1]
+@i210_s = external global i210		; <i210*> [#uses=1]
+@i211_l = external global i211		; <i211*> [#uses=1]
+@i211_s = external global i211		; <i211*> [#uses=1]
+@i212_l = external global i212		; <i212*> [#uses=1]
+@i212_s = external global i212		; <i212*> [#uses=1]
+@i213_l = external global i213		; <i213*> [#uses=1]
+@i213_s = external global i213		; <i213*> [#uses=1]
+@i214_l = external global i214		; <i214*> [#uses=1]
+@i214_s = external global i214		; <i214*> [#uses=1]
+@i215_l = external global i215		; <i215*> [#uses=1]
+@i215_s = external global i215		; <i215*> [#uses=1]
+@i216_l = external global i216		; <i216*> [#uses=1]
+@i216_s = external global i216		; <i216*> [#uses=1]
+@i217_l = external global i217		; <i217*> [#uses=1]
+@i217_s = external global i217		; <i217*> [#uses=1]
+@i218_l = external global i218		; <i218*> [#uses=1]
+@i218_s = external global i218		; <i218*> [#uses=1]
+@i219_l = external global i219		; <i219*> [#uses=1]
+@i219_s = external global i219		; <i219*> [#uses=1]
+@i220_l = external global i220		; <i220*> [#uses=1]
+@i220_s = external global i220		; <i220*> [#uses=1]
+@i221_l = external global i221		; <i221*> [#uses=1]
+@i221_s = external global i221		; <i221*> [#uses=1]
+@i222_l = external global i222		; <i222*> [#uses=1]
+@i222_s = external global i222		; <i222*> [#uses=1]
+@i223_l = external global i223		; <i223*> [#uses=1]
+@i223_s = external global i223		; <i223*> [#uses=1]
+@i224_l = external global i224		; <i224*> [#uses=1]
+@i224_s = external global i224		; <i224*> [#uses=1]
+@i225_l = external global i225		; <i225*> [#uses=1]
+@i225_s = external global i225		; <i225*> [#uses=1]
+@i226_l = external global i226		; <i226*> [#uses=1]
+@i226_s = external global i226		; <i226*> [#uses=1]
+@i227_l = external global i227		; <i227*> [#uses=1]
+@i227_s = external global i227		; <i227*> [#uses=1]
+@i228_l = external global i228		; <i228*> [#uses=1]
+@i228_s = external global i228		; <i228*> [#uses=1]
+@i229_l = external global i229		; <i229*> [#uses=1]
+@i229_s = external global i229		; <i229*> [#uses=1]
+@i230_l = external global i230		; <i230*> [#uses=1]
+@i230_s = external global i230		; <i230*> [#uses=1]
+@i231_l = external global i231		; <i231*> [#uses=1]
+@i231_s = external global i231		; <i231*> [#uses=1]
+@i232_l = external global i232		; <i232*> [#uses=1]
+@i232_s = external global i232		; <i232*> [#uses=1]
+@i233_l = external global i233		; <i233*> [#uses=1]
+@i233_s = external global i233		; <i233*> [#uses=1]
+@i234_l = external global i234		; <i234*> [#uses=1]
+@i234_s = external global i234		; <i234*> [#uses=1]
+@i235_l = external global i235		; <i235*> [#uses=1]
+@i235_s = external global i235		; <i235*> [#uses=1]
+@i236_l = external global i236		; <i236*> [#uses=1]
+@i236_s = external global i236		; <i236*> [#uses=1]
+@i237_l = external global i237		; <i237*> [#uses=1]
+@i237_s = external global i237		; <i237*> [#uses=1]
+@i238_l = external global i238		; <i238*> [#uses=1]
+@i238_s = external global i238		; <i238*> [#uses=1]
+@i239_l = external global i239		; <i239*> [#uses=1]
+@i239_s = external global i239		; <i239*> [#uses=1]
+@i240_l = external global i240		; <i240*> [#uses=1]
+@i240_s = external global i240		; <i240*> [#uses=1]
+@i241_l = external global i241		; <i241*> [#uses=1]
+@i241_s = external global i241		; <i241*> [#uses=1]
+@i242_l = external global i242		; <i242*> [#uses=1]
+@i242_s = external global i242		; <i242*> [#uses=1]
+@i243_l = external global i243		; <i243*> [#uses=1]
+@i243_s = external global i243		; <i243*> [#uses=1]
+@i244_l = external global i244		; <i244*> [#uses=1]
+@i244_s = external global i244		; <i244*> [#uses=1]
+@i245_l = external global i245		; <i245*> [#uses=1]
+@i245_s = external global i245		; <i245*> [#uses=1]
+@i246_l = external global i246		; <i246*> [#uses=1]
+@i246_s = external global i246		; <i246*> [#uses=1]
+@i247_l = external global i247		; <i247*> [#uses=1]
+@i247_s = external global i247		; <i247*> [#uses=1]
+@i248_l = external global i248		; <i248*> [#uses=1]
+@i248_s = external global i248		; <i248*> [#uses=1]
+@i249_l = external global i249		; <i249*> [#uses=1]
+@i249_s = external global i249		; <i249*> [#uses=1]
+@i250_l = external global i250		; <i250*> [#uses=1]
+@i250_s = external global i250		; <i250*> [#uses=1]
+@i251_l = external global i251		; <i251*> [#uses=1]
+@i251_s = external global i251		; <i251*> [#uses=1]
+@i252_l = external global i252		; <i252*> [#uses=1]
+@i252_s = external global i252		; <i252*> [#uses=1]
+@i253_l = external global i253		; <i253*> [#uses=1]
+@i253_s = external global i253		; <i253*> [#uses=1]
+@i254_l = external global i254		; <i254*> [#uses=1]
+@i254_s = external global i254		; <i254*> [#uses=1]
+@i255_l = external global i255		; <i255*> [#uses=1]
+@i255_s = external global i255		; <i255*> [#uses=1]
+@i256_l = external global i256		; <i256*> [#uses=1]
+@i256_s = external global i256		; <i256*> [#uses=1]
+
+define void @i1_ls() nounwind  {
+	%tmp = load i1* @i1_l		; <i1> [#uses=1]
+	store i1 %tmp, i1* @i1_s
+	ret void
+}
+
+define void @i2_ls() nounwind  {
+	%tmp = load i2* @i2_l		; <i2> [#uses=1]
+	store i2 %tmp, i2* @i2_s
+	ret void
+}
+
+define void @i3_ls() nounwind  {
+	%tmp = load i3* @i3_l		; <i3> [#uses=1]
+	store i3 %tmp, i3* @i3_s
+	ret void
+}
+
+define void @i4_ls() nounwind  {
+	%tmp = load i4* @i4_l		; <i4> [#uses=1]
+	store i4 %tmp, i4* @i4_s
+	ret void
+}
+
+define void @i5_ls() nounwind  {
+	%tmp = load i5* @i5_l		; <i5> [#uses=1]
+	store i5 %tmp, i5* @i5_s
+	ret void
+}
+
+define void @i6_ls() nounwind  {
+	%tmp = load i6* @i6_l		; <i6> [#uses=1]
+	store i6 %tmp, i6* @i6_s
+	ret void
+}
+
+define void @i7_ls() nounwind  {
+	%tmp = load i7* @i7_l		; <i7> [#uses=1]
+	store i7 %tmp, i7* @i7_s
+	ret void
+}
+
+define void @i8_ls() nounwind  {
+	%tmp = load i8* @i8_l		; <i8> [#uses=1]
+	store i8 %tmp, i8* @i8_s
+	ret void
+}
+
+define void @i9_ls() nounwind  {
+	%tmp = load i9* @i9_l		; <i9> [#uses=1]
+	store i9 %tmp, i9* @i9_s
+	ret void
+}
+
+define void @i10_ls() nounwind  {
+	%tmp = load i10* @i10_l		; <i10> [#uses=1]
+	store i10 %tmp, i10* @i10_s
+	ret void
+}
+
+define void @i11_ls() nounwind  {
+	%tmp = load i11* @i11_l		; <i11> [#uses=1]
+	store i11 %tmp, i11* @i11_s
+	ret void
+}
+
+define void @i12_ls() nounwind  {
+	%tmp = load i12* @i12_l		; <i12> [#uses=1]
+	store i12 %tmp, i12* @i12_s
+	ret void
+}
+
+define void @i13_ls() nounwind  {
+	%tmp = load i13* @i13_l		; <i13> [#uses=1]
+	store i13 %tmp, i13* @i13_s
+	ret void
+}
+
+define void @i14_ls() nounwind  {
+	%tmp = load i14* @i14_l		; <i14> [#uses=1]
+	store i14 %tmp, i14* @i14_s
+	ret void
+}
+
+define void @i15_ls() nounwind  {
+	%tmp = load i15* @i15_l		; <i15> [#uses=1]
+	store i15 %tmp, i15* @i15_s
+	ret void
+}
+
+define void @i16_ls() nounwind  {
+	%tmp = load i16* @i16_l		; <i16> [#uses=1]
+	store i16 %tmp, i16* @i16_s
+	ret void
+}
+
+define void @i17_ls() nounwind  {
+	%tmp = load i17* @i17_l		; <i17> [#uses=1]
+	store i17 %tmp, i17* @i17_s
+	ret void
+}
+
+define void @i18_ls() nounwind  {
+	%tmp = load i18* @i18_l		; <i18> [#uses=1]
+	store i18 %tmp, i18* @i18_s
+	ret void
+}
+
+define void @i19_ls() nounwind  {
+	%tmp = load i19* @i19_l		; <i19> [#uses=1]
+	store i19 %tmp, i19* @i19_s
+	ret void
+}
+
+define void @i20_ls() nounwind  {
+	%tmp = load i20* @i20_l		; <i20> [#uses=1]
+	store i20 %tmp, i20* @i20_s
+	ret void
+}
+
+define void @i21_ls() nounwind  {
+	%tmp = load i21* @i21_l		; <i21> [#uses=1]
+	store i21 %tmp, i21* @i21_s
+	ret void
+}
+
+define void @i22_ls() nounwind  {
+	%tmp = load i22* @i22_l		; <i22> [#uses=1]
+	store i22 %tmp, i22* @i22_s
+	ret void
+}
+
+define void @i23_ls() nounwind  {
+	%tmp = load i23* @i23_l		; <i23> [#uses=1]
+	store i23 %tmp, i23* @i23_s
+	ret void
+}
+
+define void @i24_ls() nounwind  {
+	%tmp = load i24* @i24_l		; <i24> [#uses=1]
+	store i24 %tmp, i24* @i24_s
+	ret void
+}
+
+define void @i25_ls() nounwind  {
+	%tmp = load i25* @i25_l		; <i25> [#uses=1]
+	store i25 %tmp, i25* @i25_s
+	ret void
+}
+
+define void @i26_ls() nounwind  {
+	%tmp = load i26* @i26_l		; <i26> [#uses=1]
+	store i26 %tmp, i26* @i26_s
+	ret void
+}
+
+define void @i27_ls() nounwind  {
+	%tmp = load i27* @i27_l		; <i27> [#uses=1]
+	store i27 %tmp, i27* @i27_s
+	ret void
+}
+
+define void @i28_ls() nounwind  {
+	%tmp = load i28* @i28_l		; <i28> [#uses=1]
+	store i28 %tmp, i28* @i28_s
+	ret void
+}
+
+define void @i29_ls() nounwind  {
+	%tmp = load i29* @i29_l		; <i29> [#uses=1]
+	store i29 %tmp, i29* @i29_s
+	ret void
+}
+
+define void @i30_ls() nounwind  {
+	%tmp = load i30* @i30_l		; <i30> [#uses=1]
+	store i30 %tmp, i30* @i30_s
+	ret void
+}
+
+define void @i31_ls() nounwind  {
+	%tmp = load i31* @i31_l		; <i31> [#uses=1]
+	store i31 %tmp, i31* @i31_s
+	ret void
+}
+
+define void @i32_ls() nounwind  {
+	%tmp = load i32* @i32_l		; <i32> [#uses=1]
+	store i32 %tmp, i32* @i32_s
+	ret void
+}
+
+define void @i33_ls() nounwind  {
+	%tmp = load i33* @i33_l		; <i33> [#uses=1]
+	store i33 %tmp, i33* @i33_s
+	ret void
+}
+
+define void @i34_ls() nounwind  {
+	%tmp = load i34* @i34_l		; <i34> [#uses=1]
+	store i34 %tmp, i34* @i34_s
+	ret void
+}
+
+define void @i35_ls() nounwind  {
+	%tmp = load i35* @i35_l		; <i35> [#uses=1]
+	store i35 %tmp, i35* @i35_s
+	ret void
+}
+
+define void @i36_ls() nounwind  {
+	%tmp = load i36* @i36_l		; <i36> [#uses=1]
+	store i36 %tmp, i36* @i36_s
+	ret void
+}
+
+define void @i37_ls() nounwind  {
+	%tmp = load i37* @i37_l		; <i37> [#uses=1]
+	store i37 %tmp, i37* @i37_s
+	ret void
+}
+
+define void @i38_ls() nounwind  {
+	%tmp = load i38* @i38_l		; <i38> [#uses=1]
+	store i38 %tmp, i38* @i38_s
+	ret void
+}
+
+define void @i39_ls() nounwind  {
+	%tmp = load i39* @i39_l		; <i39> [#uses=1]
+	store i39 %tmp, i39* @i39_s
+	ret void
+}
+
+define void @i40_ls() nounwind  {
+	%tmp = load i40* @i40_l		; <i40> [#uses=1]
+	store i40 %tmp, i40* @i40_s
+	ret void
+}
+
+define void @i41_ls() nounwind  {
+	%tmp = load i41* @i41_l		; <i41> [#uses=1]
+	store i41 %tmp, i41* @i41_s
+	ret void
+}
+
+define void @i42_ls() nounwind  {
+	%tmp = load i42* @i42_l		; <i42> [#uses=1]
+	store i42 %tmp, i42* @i42_s
+	ret void
+}
+
+define void @i43_ls() nounwind  {
+	%tmp = load i43* @i43_l		; <i43> [#uses=1]
+	store i43 %tmp, i43* @i43_s
+	ret void
+}
+
+define void @i44_ls() nounwind  {
+	%tmp = load i44* @i44_l		; <i44> [#uses=1]
+	store i44 %tmp, i44* @i44_s
+	ret void
+}
+
+define void @i45_ls() nounwind  {
+	%tmp = load i45* @i45_l		; <i45> [#uses=1]
+	store i45 %tmp, i45* @i45_s
+	ret void
+}
+
+define void @i46_ls() nounwind  {
+	%tmp = load i46* @i46_l		; <i46> [#uses=1]
+	store i46 %tmp, i46* @i46_s
+	ret void
+}
+
+define void @i47_ls() nounwind  {
+	%tmp = load i47* @i47_l		; <i47> [#uses=1]
+	store i47 %tmp, i47* @i47_s
+	ret void
+}
+
+define void @i48_ls() nounwind  {
+	%tmp = load i48* @i48_l		; <i48> [#uses=1]
+	store i48 %tmp, i48* @i48_s
+	ret void
+}
+
+define void @i49_ls() nounwind  {
+	%tmp = load i49* @i49_l		; <i49> [#uses=1]
+	store i49 %tmp, i49* @i49_s
+	ret void
+}
+
+define void @i50_ls() nounwind  {
+	%tmp = load i50* @i50_l		; <i50> [#uses=1]
+	store i50 %tmp, i50* @i50_s
+	ret void
+}
+
+define void @i51_ls() nounwind  {
+	%tmp = load i51* @i51_l		; <i51> [#uses=1]
+	store i51 %tmp, i51* @i51_s
+	ret void
+}
+
+define void @i52_ls() nounwind  {
+	%tmp = load i52* @i52_l		; <i52> [#uses=1]
+	store i52 %tmp, i52* @i52_s
+	ret void
+}
+
+define void @i53_ls() nounwind  {
+	%tmp = load i53* @i53_l		; <i53> [#uses=1]
+	store i53 %tmp, i53* @i53_s
+	ret void
+}
+
+define void @i54_ls() nounwind  {
+	%tmp = load i54* @i54_l		; <i54> [#uses=1]
+	store i54 %tmp, i54* @i54_s
+	ret void
+}
+
+define void @i55_ls() nounwind  {
+	%tmp = load i55* @i55_l		; <i55> [#uses=1]
+	store i55 %tmp, i55* @i55_s
+	ret void
+}
+
+define void @i56_ls() nounwind  {
+	%tmp = load i56* @i56_l		; <i56> [#uses=1]
+	store i56 %tmp, i56* @i56_s
+	ret void
+}
+
+define void @i57_ls() nounwind  {
+	%tmp = load i57* @i57_l		; <i57> [#uses=1]
+	store i57 %tmp, i57* @i57_s
+	ret void
+}
+
+define void @i58_ls() nounwind  {
+	%tmp = load i58* @i58_l		; <i58> [#uses=1]
+	store i58 %tmp, i58* @i58_s
+	ret void
+}
+
+define void @i59_ls() nounwind  {
+	%tmp = load i59* @i59_l		; <i59> [#uses=1]
+	store i59 %tmp, i59* @i59_s
+	ret void
+}
+
+define void @i60_ls() nounwind  {
+	%tmp = load i60* @i60_l		; <i60> [#uses=1]
+	store i60 %tmp, i60* @i60_s
+	ret void
+}
+
+define void @i61_ls() nounwind  {
+	%tmp = load i61* @i61_l		; <i61> [#uses=1]
+	store i61 %tmp, i61* @i61_s
+	ret void
+}
+
+define void @i62_ls() nounwind  {
+	%tmp = load i62* @i62_l		; <i62> [#uses=1]
+	store i62 %tmp, i62* @i62_s
+	ret void
+}
+
+define void @i63_ls() nounwind  {
+	%tmp = load i63* @i63_l		; <i63> [#uses=1]
+	store i63 %tmp, i63* @i63_s
+	ret void
+}
+
+define void @i64_ls() nounwind  {
+	%tmp = load i64* @i64_l		; <i64> [#uses=1]
+	store i64 %tmp, i64* @i64_s
+	ret void
+}
+
+define void @i65_ls() nounwind  {
+	%tmp = load i65* @i65_l		; <i65> [#uses=1]
+	store i65 %tmp, i65* @i65_s
+	ret void
+}
+
+define void @i66_ls() nounwind  {
+	%tmp = load i66* @i66_l		; <i66> [#uses=1]
+	store i66 %tmp, i66* @i66_s
+	ret void
+}
+
+define void @i67_ls() nounwind  {
+	%tmp = load i67* @i67_l		; <i67> [#uses=1]
+	store i67 %tmp, i67* @i67_s
+	ret void
+}
+
+define void @i68_ls() nounwind  {
+	%tmp = load i68* @i68_l		; <i68> [#uses=1]
+	store i68 %tmp, i68* @i68_s
+	ret void
+}
+
+define void @i69_ls() nounwind  {
+	%tmp = load i69* @i69_l		; <i69> [#uses=1]
+	store i69 %tmp, i69* @i69_s
+	ret void
+}
+
+define void @i70_ls() nounwind  {
+	%tmp = load i70* @i70_l		; <i70> [#uses=1]
+	store i70 %tmp, i70* @i70_s
+	ret void
+}
+
+define void @i71_ls() nounwind  {
+	%tmp = load i71* @i71_l		; <i71> [#uses=1]
+	store i71 %tmp, i71* @i71_s
+	ret void
+}
+
+define void @i72_ls() nounwind  {
+	%tmp = load i72* @i72_l		; <i72> [#uses=1]
+	store i72 %tmp, i72* @i72_s
+	ret void
+}
+
+define void @i73_ls() nounwind  {
+	%tmp = load i73* @i73_l		; <i73> [#uses=1]
+	store i73 %tmp, i73* @i73_s
+	ret void
+}
+
+define void @i74_ls() nounwind  {
+	%tmp = load i74* @i74_l		; <i74> [#uses=1]
+	store i74 %tmp, i74* @i74_s
+	ret void
+}
+
+define void @i75_ls() nounwind  {
+	%tmp = load i75* @i75_l		; <i75> [#uses=1]
+	store i75 %tmp, i75* @i75_s
+	ret void
+}
+
+define void @i76_ls() nounwind  {
+	%tmp = load i76* @i76_l		; <i76> [#uses=1]
+	store i76 %tmp, i76* @i76_s
+	ret void
+}
+
+define void @i77_ls() nounwind  {
+	%tmp = load i77* @i77_l		; <i77> [#uses=1]
+	store i77 %tmp, i77* @i77_s
+	ret void
+}
+
+define void @i78_ls() nounwind  {
+	%tmp = load i78* @i78_l		; <i78> [#uses=1]
+	store i78 %tmp, i78* @i78_s
+	ret void
+}
+
+define void @i79_ls() nounwind  {
+	%tmp = load i79* @i79_l		; <i79> [#uses=1]
+	store i79 %tmp, i79* @i79_s
+	ret void
+}
+
+define void @i80_ls() nounwind  {
+	%tmp = load i80* @i80_l		; <i80> [#uses=1]
+	store i80 %tmp, i80* @i80_s
+	ret void
+}
+
+define void @i81_ls() nounwind  {
+	%tmp = load i81* @i81_l		; <i81> [#uses=1]
+	store i81 %tmp, i81* @i81_s
+	ret void
+}
+
+define void @i82_ls() nounwind  {
+	%tmp = load i82* @i82_l		; <i82> [#uses=1]
+	store i82 %tmp, i82* @i82_s
+	ret void
+}
+
+define void @i83_ls() nounwind  {
+	%tmp = load i83* @i83_l		; <i83> [#uses=1]
+	store i83 %tmp, i83* @i83_s
+	ret void
+}
+
+define void @i84_ls() nounwind  {
+	%tmp = load i84* @i84_l		; <i84> [#uses=1]
+	store i84 %tmp, i84* @i84_s
+	ret void
+}
+
+define void @i85_ls() nounwind  {
+	%tmp = load i85* @i85_l		; <i85> [#uses=1]
+	store i85 %tmp, i85* @i85_s
+	ret void
+}
+
+define void @i86_ls() nounwind  {
+	%tmp = load i86* @i86_l		; <i86> [#uses=1]
+	store i86 %tmp, i86* @i86_s
+	ret void
+}
+
+define void @i87_ls() nounwind  {
+	%tmp = load i87* @i87_l		; <i87> [#uses=1]
+	store i87 %tmp, i87* @i87_s
+	ret void
+}
+
+define void @i88_ls() nounwind  {
+	%tmp = load i88* @i88_l		; <i88> [#uses=1]
+	store i88 %tmp, i88* @i88_s
+	ret void
+}
+
+define void @i89_ls() nounwind  {
+	%tmp = load i89* @i89_l		; <i89> [#uses=1]
+	store i89 %tmp, i89* @i89_s
+	ret void
+}
+
+define void @i90_ls() nounwind  {
+	%tmp = load i90* @i90_l		; <i90> [#uses=1]
+	store i90 %tmp, i90* @i90_s
+	ret void
+}
+
+define void @i91_ls() nounwind  {
+	%tmp = load i91* @i91_l		; <i91> [#uses=1]
+	store i91 %tmp, i91* @i91_s
+	ret void
+}
+
+define void @i92_ls() nounwind  {
+	%tmp = load i92* @i92_l		; <i92> [#uses=1]
+	store i92 %tmp, i92* @i92_s
+	ret void
+}
+
+define void @i93_ls() nounwind  {
+	%tmp = load i93* @i93_l		; <i93> [#uses=1]
+	store i93 %tmp, i93* @i93_s
+	ret void
+}
+
+define void @i94_ls() nounwind  {
+	%tmp = load i94* @i94_l		; <i94> [#uses=1]
+	store i94 %tmp, i94* @i94_s
+	ret void
+}
+
+define void @i95_ls() nounwind  {
+	%tmp = load i95* @i95_l		; <i95> [#uses=1]
+	store i95 %tmp, i95* @i95_s
+	ret void
+}
+
+define void @i96_ls() nounwind  {
+	%tmp = load i96* @i96_l		; <i96> [#uses=1]
+	store i96 %tmp, i96* @i96_s
+	ret void
+}
+
+define void @i97_ls() nounwind  {
+	%tmp = load i97* @i97_l		; <i97> [#uses=1]
+	store i97 %tmp, i97* @i97_s
+	ret void
+}
+
+define void @i98_ls() nounwind  {
+	%tmp = load i98* @i98_l		; <i98> [#uses=1]
+	store i98 %tmp, i98* @i98_s
+	ret void
+}
+
+define void @i99_ls() nounwind  {
+	%tmp = load i99* @i99_l		; <i99> [#uses=1]
+	store i99 %tmp, i99* @i99_s
+	ret void
+}
+
+define void @i100_ls() nounwind  {
+	%tmp = load i100* @i100_l		; <i100> [#uses=1]
+	store i100 %tmp, i100* @i100_s
+	ret void
+}
+
+define void @i101_ls() nounwind  {
+	%tmp = load i101* @i101_l		; <i101> [#uses=1]
+	store i101 %tmp, i101* @i101_s
+	ret void
+}
+
+define void @i102_ls() nounwind  {
+	%tmp = load i102* @i102_l		; <i102> [#uses=1]
+	store i102 %tmp, i102* @i102_s
+	ret void
+}
+
+define void @i103_ls() nounwind  {
+	%tmp = load i103* @i103_l		; <i103> [#uses=1]
+	store i103 %tmp, i103* @i103_s
+	ret void
+}
+
+define void @i104_ls() nounwind  {
+	%tmp = load i104* @i104_l		; <i104> [#uses=1]
+	store i104 %tmp, i104* @i104_s
+	ret void
+}
+
+define void @i105_ls() nounwind  {
+	%tmp = load i105* @i105_l		; <i105> [#uses=1]
+	store i105 %tmp, i105* @i105_s
+	ret void
+}
+
+define void @i106_ls() nounwind  {
+	%tmp = load i106* @i106_l		; <i106> [#uses=1]
+	store i106 %tmp, i106* @i106_s
+	ret void
+}
+
+define void @i107_ls() nounwind  {
+	%tmp = load i107* @i107_l		; <i107> [#uses=1]
+	store i107 %tmp, i107* @i107_s
+	ret void
+}
+
+define void @i108_ls() nounwind  {
+	%tmp = load i108* @i108_l		; <i108> [#uses=1]
+	store i108 %tmp, i108* @i108_s
+	ret void
+}
+
+define void @i109_ls() nounwind  {
+	%tmp = load i109* @i109_l		; <i109> [#uses=1]
+	store i109 %tmp, i109* @i109_s
+	ret void
+}
+
+define void @i110_ls() nounwind  {
+	%tmp = load i110* @i110_l		; <i110> [#uses=1]
+	store i110 %tmp, i110* @i110_s
+	ret void
+}
+
+define void @i111_ls() nounwind  {
+	%tmp = load i111* @i111_l		; <i111> [#uses=1]
+	store i111 %tmp, i111* @i111_s
+	ret void
+}
+
+define void @i112_ls() nounwind  {
+	%tmp = load i112* @i112_l		; <i112> [#uses=1]
+	store i112 %tmp, i112* @i112_s
+	ret void
+}
+
+define void @i113_ls() nounwind  {
+	%tmp = load i113* @i113_l		; <i113> [#uses=1]
+	store i113 %tmp, i113* @i113_s
+	ret void
+}
+
+define void @i114_ls() nounwind  {
+	%tmp = load i114* @i114_l		; <i114> [#uses=1]
+	store i114 %tmp, i114* @i114_s
+	ret void
+}
+
+define void @i115_ls() nounwind  {
+	%tmp = load i115* @i115_l		; <i115> [#uses=1]
+	store i115 %tmp, i115* @i115_s
+	ret void
+}
+
+define void @i116_ls() nounwind  {
+	%tmp = load i116* @i116_l		; <i116> [#uses=1]
+	store i116 %tmp, i116* @i116_s
+	ret void
+}
+
+define void @i117_ls() nounwind  {
+	%tmp = load i117* @i117_l		; <i117> [#uses=1]
+	store i117 %tmp, i117* @i117_s
+	ret void
+}
+
+define void @i118_ls() nounwind  {
+	%tmp = load i118* @i118_l		; <i118> [#uses=1]
+	store i118 %tmp, i118* @i118_s
+	ret void
+}
+
+define void @i119_ls() nounwind  {
+	%tmp = load i119* @i119_l		; <i119> [#uses=1]
+	store i119 %tmp, i119* @i119_s
+	ret void
+}
+
+define void @i120_ls() nounwind  {
+	%tmp = load i120* @i120_l		; <i120> [#uses=1]
+	store i120 %tmp, i120* @i120_s
+	ret void
+}
+
+define void @i121_ls() nounwind  {
+	%tmp = load i121* @i121_l		; <i121> [#uses=1]
+	store i121 %tmp, i121* @i121_s
+	ret void
+}
+
+define void @i122_ls() nounwind  {
+	%tmp = load i122* @i122_l		; <i122> [#uses=1]
+	store i122 %tmp, i122* @i122_s
+	ret void
+}
+
+define void @i123_ls() nounwind  {
+	%tmp = load i123* @i123_l		; <i123> [#uses=1]
+	store i123 %tmp, i123* @i123_s
+	ret void
+}
+
+define void @i124_ls() nounwind  {
+	%tmp = load i124* @i124_l		; <i124> [#uses=1]
+	store i124 %tmp, i124* @i124_s
+	ret void
+}
+
+define void @i125_ls() nounwind  {
+	%tmp = load i125* @i125_l		; <i125> [#uses=1]
+	store i125 %tmp, i125* @i125_s
+	ret void
+}
+
+define void @i126_ls() nounwind  {
+	%tmp = load i126* @i126_l		; <i126> [#uses=1]
+	store i126 %tmp, i126* @i126_s
+	ret void
+}
+
+define void @i127_ls() nounwind  {
+	%tmp = load i127* @i127_l		; <i127> [#uses=1]
+	store i127 %tmp, i127* @i127_s
+	ret void
+}
+
+define void @i128_ls() nounwind  {
+	%tmp = load i128* @i128_l		; <i128> [#uses=1]
+	store i128 %tmp, i128* @i128_s
+	ret void
+}
+
+define void @i129_ls() nounwind  {
+	%tmp = load i129* @i129_l		; <i129> [#uses=1]
+	store i129 %tmp, i129* @i129_s
+	ret void
+}
+
+define void @i130_ls() nounwind  {
+	%tmp = load i130* @i130_l		; <i130> [#uses=1]
+	store i130 %tmp, i130* @i130_s
+	ret void
+}
+
+define void @i131_ls() nounwind  {
+	%tmp = load i131* @i131_l		; <i131> [#uses=1]
+	store i131 %tmp, i131* @i131_s
+	ret void
+}
+
+define void @i132_ls() nounwind  {
+	%tmp = load i132* @i132_l		; <i132> [#uses=1]
+	store i132 %tmp, i132* @i132_s
+	ret void
+}
+
+define void @i133_ls() nounwind  {
+	%tmp = load i133* @i133_l		; <i133> [#uses=1]
+	store i133 %tmp, i133* @i133_s
+	ret void
+}
+
+define void @i134_ls() nounwind  {
+	%tmp = load i134* @i134_l		; <i134> [#uses=1]
+	store i134 %tmp, i134* @i134_s
+	ret void
+}
+
+define void @i135_ls() nounwind  {
+	%tmp = load i135* @i135_l		; <i135> [#uses=1]
+	store i135 %tmp, i135* @i135_s
+	ret void
+}
+
+define void @i136_ls() nounwind  {
+	%tmp = load i136* @i136_l		; <i136> [#uses=1]
+	store i136 %tmp, i136* @i136_s
+	ret void
+}
+
+define void @i137_ls() nounwind  {
+	%tmp = load i137* @i137_l		; <i137> [#uses=1]
+	store i137 %tmp, i137* @i137_s
+	ret void
+}
+
+define void @i138_ls() nounwind  {
+	%tmp = load i138* @i138_l		; <i138> [#uses=1]
+	store i138 %tmp, i138* @i138_s
+	ret void
+}
+
+define void @i139_ls() nounwind  {
+	%tmp = load i139* @i139_l		; <i139> [#uses=1]
+	store i139 %tmp, i139* @i139_s
+	ret void
+}
+
+define void @i140_ls() nounwind  {
+	%tmp = load i140* @i140_l		; <i140> [#uses=1]
+	store i140 %tmp, i140* @i140_s
+	ret void
+}
+
+define void @i141_ls() nounwind  {
+	%tmp = load i141* @i141_l		; <i141> [#uses=1]
+	store i141 %tmp, i141* @i141_s
+	ret void
+}
+
+define void @i142_ls() nounwind  {
+	%tmp = load i142* @i142_l		; <i142> [#uses=1]
+	store i142 %tmp, i142* @i142_s
+	ret void
+}
+
+define void @i143_ls() nounwind  {
+	%tmp = load i143* @i143_l		; <i143> [#uses=1]
+	store i143 %tmp, i143* @i143_s
+	ret void
+}
+
+define void @i144_ls() nounwind  {
+	%tmp = load i144* @i144_l		; <i144> [#uses=1]
+	store i144 %tmp, i144* @i144_s
+	ret void
+}
+
+define void @i145_ls() nounwind  {
+	%tmp = load i145* @i145_l		; <i145> [#uses=1]
+	store i145 %tmp, i145* @i145_s
+	ret void
+}
+
+define void @i146_ls() nounwind  {
+	%tmp = load i146* @i146_l		; <i146> [#uses=1]
+	store i146 %tmp, i146* @i146_s
+	ret void
+}
+
+define void @i147_ls() nounwind  {
+	%tmp = load i147* @i147_l		; <i147> [#uses=1]
+	store i147 %tmp, i147* @i147_s
+	ret void
+}
+
+define void @i148_ls() nounwind  {
+	%tmp = load i148* @i148_l		; <i148> [#uses=1]
+	store i148 %tmp, i148* @i148_s
+	ret void
+}
+
+define void @i149_ls() nounwind  {
+	%tmp = load i149* @i149_l		; <i149> [#uses=1]
+	store i149 %tmp, i149* @i149_s
+	ret void
+}
+
+define void @i150_ls() nounwind  {
+	%tmp = load i150* @i150_l		; <i150> [#uses=1]
+	store i150 %tmp, i150* @i150_s
+	ret void
+}
+
+define void @i151_ls() nounwind  {
+	%tmp = load i151* @i151_l		; <i151> [#uses=1]
+	store i151 %tmp, i151* @i151_s
+	ret void
+}
+
+define void @i152_ls() nounwind  {
+	%tmp = load i152* @i152_l		; <i152> [#uses=1]
+	store i152 %tmp, i152* @i152_s
+	ret void
+}
+
+define void @i153_ls() nounwind  {
+	%tmp = load i153* @i153_l		; <i153> [#uses=1]
+	store i153 %tmp, i153* @i153_s
+	ret void
+}
+
+define void @i154_ls() nounwind  {
+	%tmp = load i154* @i154_l		; <i154> [#uses=1]
+	store i154 %tmp, i154* @i154_s
+	ret void
+}
+
+define void @i155_ls() nounwind  {
+	%tmp = load i155* @i155_l		; <i155> [#uses=1]
+	store i155 %tmp, i155* @i155_s
+	ret void
+}
+
+define void @i156_ls() nounwind  {
+	%tmp = load i156* @i156_l		; <i156> [#uses=1]
+	store i156 %tmp, i156* @i156_s
+	ret void
+}
+
+define void @i157_ls() nounwind  {
+	%tmp = load i157* @i157_l		; <i157> [#uses=1]
+	store i157 %tmp, i157* @i157_s
+	ret void
+}
+
+define void @i158_ls() nounwind  {
+	%tmp = load i158* @i158_l		; <i158> [#uses=1]
+	store i158 %tmp, i158* @i158_s
+	ret void
+}
+
+define void @i159_ls() nounwind  {
+	%tmp = load i159* @i159_l		; <i159> [#uses=1]
+	store i159 %tmp, i159* @i159_s
+	ret void
+}
+
+define void @i160_ls() nounwind  {
+	%tmp = load i160* @i160_l		; <i160> [#uses=1]
+	store i160 %tmp, i160* @i160_s
+	ret void
+}
+
+define void @i161_ls() nounwind  {
+	%tmp = load i161* @i161_l		; <i161> [#uses=1]
+	store i161 %tmp, i161* @i161_s
+	ret void
+}
+
+define void @i162_ls() nounwind  {
+	%tmp = load i162* @i162_l		; <i162> [#uses=1]
+	store i162 %tmp, i162* @i162_s
+	ret void
+}
+
+define void @i163_ls() nounwind  {
+	%tmp = load i163* @i163_l		; <i163> [#uses=1]
+	store i163 %tmp, i163* @i163_s
+	ret void
+}
+
+define void @i164_ls() nounwind  {
+	%tmp = load i164* @i164_l		; <i164> [#uses=1]
+	store i164 %tmp, i164* @i164_s
+	ret void
+}
+
+define void @i165_ls() nounwind  {
+	%tmp = load i165* @i165_l		; <i165> [#uses=1]
+	store i165 %tmp, i165* @i165_s
+	ret void
+}
+
+define void @i166_ls() nounwind  {
+	%tmp = load i166* @i166_l		; <i166> [#uses=1]
+	store i166 %tmp, i166* @i166_s
+	ret void
+}
+
+define void @i167_ls() nounwind  {
+	%tmp = load i167* @i167_l		; <i167> [#uses=1]
+	store i167 %tmp, i167* @i167_s
+	ret void
+}
+
+define void @i168_ls() nounwind  {
+	%tmp = load i168* @i168_l		; <i168> [#uses=1]
+	store i168 %tmp, i168* @i168_s
+	ret void
+}
+
+define void @i169_ls() nounwind  {
+	%tmp = load i169* @i169_l		; <i169> [#uses=1]
+	store i169 %tmp, i169* @i169_s
+	ret void
+}
+
+define void @i170_ls() nounwind  {
+	%tmp = load i170* @i170_l		; <i170> [#uses=1]
+	store i170 %tmp, i170* @i170_s
+	ret void
+}
+
+define void @i171_ls() nounwind  {
+	%tmp = load i171* @i171_l		; <i171> [#uses=1]
+	store i171 %tmp, i171* @i171_s
+	ret void
+}
+
+define void @i172_ls() nounwind  {
+	%tmp = load i172* @i172_l		; <i172> [#uses=1]
+	store i172 %tmp, i172* @i172_s
+	ret void
+}
+
+define void @i173_ls() nounwind  {
+	%tmp = load i173* @i173_l		; <i173> [#uses=1]
+	store i173 %tmp, i173* @i173_s
+	ret void
+}
+
+define void @i174_ls() nounwind  {
+	%tmp = load i174* @i174_l		; <i174> [#uses=1]
+	store i174 %tmp, i174* @i174_s
+	ret void
+}
+
+define void @i175_ls() nounwind  {
+	%tmp = load i175* @i175_l		; <i175> [#uses=1]
+	store i175 %tmp, i175* @i175_s
+	ret void
+}
+
+define void @i176_ls() nounwind  {
+	%tmp = load i176* @i176_l		; <i176> [#uses=1]
+	store i176 %tmp, i176* @i176_s
+	ret void
+}
+
+define void @i177_ls() nounwind  {
+	%tmp = load i177* @i177_l		; <i177> [#uses=1]
+	store i177 %tmp, i177* @i177_s
+	ret void
+}
+
+define void @i178_ls() nounwind  {
+	%tmp = load i178* @i178_l		; <i178> [#uses=1]
+	store i178 %tmp, i178* @i178_s
+	ret void
+}
+
+define void @i179_ls() nounwind  {
+	%tmp = load i179* @i179_l		; <i179> [#uses=1]
+	store i179 %tmp, i179* @i179_s
+	ret void
+}
+
+define void @i180_ls() nounwind  {
+	%tmp = load i180* @i180_l		; <i180> [#uses=1]
+	store i180 %tmp, i180* @i180_s
+	ret void
+}
+
+define void @i181_ls() nounwind  {
+	%tmp = load i181* @i181_l		; <i181> [#uses=1]
+	store i181 %tmp, i181* @i181_s
+	ret void
+}
+
+define void @i182_ls() nounwind  {
+	%tmp = load i182* @i182_l		; <i182> [#uses=1]
+	store i182 %tmp, i182* @i182_s
+	ret void
+}
+
+define void @i183_ls() nounwind  {
+	%tmp = load i183* @i183_l		; <i183> [#uses=1]
+	store i183 %tmp, i183* @i183_s
+	ret void
+}
+
+define void @i184_ls() nounwind  {
+	%tmp = load i184* @i184_l		; <i184> [#uses=1]
+	store i184 %tmp, i184* @i184_s
+	ret void
+}
+
+define void @i185_ls() nounwind  {
+	%tmp = load i185* @i185_l		; <i185> [#uses=1]
+	store i185 %tmp, i185* @i185_s
+	ret void
+}
+
+define void @i186_ls() nounwind  {
+	%tmp = load i186* @i186_l		; <i186> [#uses=1]
+	store i186 %tmp, i186* @i186_s
+	ret void
+}
+
+define void @i187_ls() nounwind  {
+	%tmp = load i187* @i187_l		; <i187> [#uses=1]
+	store i187 %tmp, i187* @i187_s
+	ret void
+}
+
+define void @i188_ls() nounwind  {
+	%tmp = load i188* @i188_l		; <i188> [#uses=1]
+	store i188 %tmp, i188* @i188_s
+	ret void
+}
+
+define void @i189_ls() nounwind  {
+	%tmp = load i189* @i189_l		; <i189> [#uses=1]
+	store i189 %tmp, i189* @i189_s
+	ret void
+}
+
+define void @i190_ls() nounwind  {
+	%tmp = load i190* @i190_l		; <i190> [#uses=1]
+	store i190 %tmp, i190* @i190_s
+	ret void
+}
+
+define void @i191_ls() nounwind  {
+	%tmp = load i191* @i191_l		; <i191> [#uses=1]
+	store i191 %tmp, i191* @i191_s
+	ret void
+}
+
+define void @i192_ls() nounwind  {
+	%tmp = load i192* @i192_l		; <i192> [#uses=1]
+	store i192 %tmp, i192* @i192_s
+	ret void
+}
+
+define void @i193_ls() nounwind  {
+	%tmp = load i193* @i193_l		; <i193> [#uses=1]
+	store i193 %tmp, i193* @i193_s
+	ret void
+}
+
+define void @i194_ls() nounwind  {
+	%tmp = load i194* @i194_l		; <i194> [#uses=1]
+	store i194 %tmp, i194* @i194_s
+	ret void
+}
+
+define void @i195_ls() nounwind  {
+	%tmp = load i195* @i195_l		; <i195> [#uses=1]
+	store i195 %tmp, i195* @i195_s
+	ret void
+}
+
+define void @i196_ls() nounwind  {
+	%tmp = load i196* @i196_l		; <i196> [#uses=1]
+	store i196 %tmp, i196* @i196_s
+	ret void
+}
+
+define void @i197_ls() nounwind  {
+	%tmp = load i197* @i197_l		; <i197> [#uses=1]
+	store i197 %tmp, i197* @i197_s
+	ret void
+}
+
+define void @i198_ls() nounwind  {
+	%tmp = load i198* @i198_l		; <i198> [#uses=1]
+	store i198 %tmp, i198* @i198_s
+	ret void
+}
+
+define void @i199_ls() nounwind  {
+	%tmp = load i199* @i199_l		; <i199> [#uses=1]
+	store i199 %tmp, i199* @i199_s
+	ret void
+}
+
+define void @i200_ls() nounwind  {
+	%tmp = load i200* @i200_l		; <i200> [#uses=1]
+	store i200 %tmp, i200* @i200_s
+	ret void
+}
+
+define void @i201_ls() nounwind  {
+	%tmp = load i201* @i201_l		; <i201> [#uses=1]
+	store i201 %tmp, i201* @i201_s
+	ret void
+}
+
+define void @i202_ls() nounwind  {
+	%tmp = load i202* @i202_l		; <i202> [#uses=1]
+	store i202 %tmp, i202* @i202_s
+	ret void
+}
+
+define void @i203_ls() nounwind  {
+	%tmp = load i203* @i203_l		; <i203> [#uses=1]
+	store i203 %tmp, i203* @i203_s
+	ret void
+}
+
+define void @i204_ls() nounwind  {
+	%tmp = load i204* @i204_l		; <i204> [#uses=1]
+	store i204 %tmp, i204* @i204_s
+	ret void
+}
+
+define void @i205_ls() nounwind  {
+	%tmp = load i205* @i205_l		; <i205> [#uses=1]
+	store i205 %tmp, i205* @i205_s
+	ret void
+}
+
+define void @i206_ls() nounwind  {
+	%tmp = load i206* @i206_l		; <i206> [#uses=1]
+	store i206 %tmp, i206* @i206_s
+	ret void
+}
+
+define void @i207_ls() nounwind  {
+	%tmp = load i207* @i207_l		; <i207> [#uses=1]
+	store i207 %tmp, i207* @i207_s
+	ret void
+}
+
+define void @i208_ls() nounwind  {
+	%tmp = load i208* @i208_l		; <i208> [#uses=1]
+	store i208 %tmp, i208* @i208_s
+	ret void
+}
+
+define void @i209_ls() nounwind  {
+	%tmp = load i209* @i209_l		; <i209> [#uses=1]
+	store i209 %tmp, i209* @i209_s
+	ret void
+}
+
+define void @i210_ls() nounwind  {
+	%tmp = load i210* @i210_l		; <i210> [#uses=1]
+	store i210 %tmp, i210* @i210_s
+	ret void
+}
+
+define void @i211_ls() nounwind  {
+	%tmp = load i211* @i211_l		; <i211> [#uses=1]
+	store i211 %tmp, i211* @i211_s
+	ret void
+}
+
+define void @i212_ls() nounwind  {
+	%tmp = load i212* @i212_l		; <i212> [#uses=1]
+	store i212 %tmp, i212* @i212_s
+	ret void
+}
+
+define void @i213_ls() nounwind  {
+	%tmp = load i213* @i213_l		; <i213> [#uses=1]
+	store i213 %tmp, i213* @i213_s
+	ret void
+}
+
+define void @i214_ls() nounwind  {
+	%tmp = load i214* @i214_l		; <i214> [#uses=1]
+	store i214 %tmp, i214* @i214_s
+	ret void
+}
+
+define void @i215_ls() nounwind  {
+	%tmp = load i215* @i215_l		; <i215> [#uses=1]
+	store i215 %tmp, i215* @i215_s
+	ret void
+}
+
+define void @i216_ls() nounwind  {
+	%tmp = load i216* @i216_l		; <i216> [#uses=1]
+	store i216 %tmp, i216* @i216_s
+	ret void
+}
+
+define void @i217_ls() nounwind  {
+	%tmp = load i217* @i217_l		; <i217> [#uses=1]
+	store i217 %tmp, i217* @i217_s
+	ret void
+}
+
+define void @i218_ls() nounwind  {
+	%tmp = load i218* @i218_l		; <i218> [#uses=1]
+	store i218 %tmp, i218* @i218_s
+	ret void
+}
+
+define void @i219_ls() nounwind  {
+	%tmp = load i219* @i219_l		; <i219> [#uses=1]
+	store i219 %tmp, i219* @i219_s
+	ret void
+}
+
+define void @i220_ls() nounwind  {
+	%tmp = load i220* @i220_l		; <i220> [#uses=1]
+	store i220 %tmp, i220* @i220_s
+	ret void
+}
+
+define void @i221_ls() nounwind  {
+	%tmp = load i221* @i221_l		; <i221> [#uses=1]
+	store i221 %tmp, i221* @i221_s
+	ret void
+}
+
+define void @i222_ls() nounwind  {
+	%tmp = load i222* @i222_l		; <i222> [#uses=1]
+	store i222 %tmp, i222* @i222_s
+	ret void
+}
+
+define void @i223_ls() nounwind  {
+	%tmp = load i223* @i223_l		; <i223> [#uses=1]
+	store i223 %tmp, i223* @i223_s
+	ret void
+}
+
+define void @i224_ls() nounwind  {
+	%tmp = load i224* @i224_l		; <i224> [#uses=1]
+	store i224 %tmp, i224* @i224_s
+	ret void
+}
+
+define void @i225_ls() nounwind  {
+	%tmp = load i225* @i225_l		; <i225> [#uses=1]
+	store i225 %tmp, i225* @i225_s
+	ret void
+}
+
+define void @i226_ls() nounwind  {
+	%tmp = load i226* @i226_l		; <i226> [#uses=1]
+	store i226 %tmp, i226* @i226_s
+	ret void
+}
+
+define void @i227_ls() nounwind  {
+	%tmp = load i227* @i227_l		; <i227> [#uses=1]
+	store i227 %tmp, i227* @i227_s
+	ret void
+}
+
+define void @i228_ls() nounwind  {
+	%tmp = load i228* @i228_l		; <i228> [#uses=1]
+	store i228 %tmp, i228* @i228_s
+	ret void
+}
+
+define void @i229_ls() nounwind  {
+	%tmp = load i229* @i229_l		; <i229> [#uses=1]
+	store i229 %tmp, i229* @i229_s
+	ret void
+}
+
+define void @i230_ls() nounwind  {
+	%tmp = load i230* @i230_l		; <i230> [#uses=1]
+	store i230 %tmp, i230* @i230_s
+	ret void
+}
+
+define void @i231_ls() nounwind  {
+	%tmp = load i231* @i231_l		; <i231> [#uses=1]
+	store i231 %tmp, i231* @i231_s
+	ret void
+}
+
+define void @i232_ls() nounwind  {
+	%tmp = load i232* @i232_l		; <i232> [#uses=1]
+	store i232 %tmp, i232* @i232_s
+	ret void
+}
+
+define void @i233_ls() nounwind  {
+	%tmp = load i233* @i233_l		; <i233> [#uses=1]
+	store i233 %tmp, i233* @i233_s
+	ret void
+}
+
+define void @i234_ls() nounwind  {
+	%tmp = load i234* @i234_l		; <i234> [#uses=1]
+	store i234 %tmp, i234* @i234_s
+	ret void
+}
+
+define void @i235_ls() nounwind  {
+	%tmp = load i235* @i235_l		; <i235> [#uses=1]
+	store i235 %tmp, i235* @i235_s
+	ret void
+}
+
+define void @i236_ls() nounwind  {
+	%tmp = load i236* @i236_l		; <i236> [#uses=1]
+	store i236 %tmp, i236* @i236_s
+	ret void
+}
+
+define void @i237_ls() nounwind  {
+	%tmp = load i237* @i237_l		; <i237> [#uses=1]
+	store i237 %tmp, i237* @i237_s
+	ret void
+}
+
+define void @i238_ls() nounwind  {
+	%tmp = load i238* @i238_l		; <i238> [#uses=1]
+	store i238 %tmp, i238* @i238_s
+	ret void
+}
+
+define void @i239_ls() nounwind  {
+	%tmp = load i239* @i239_l		; <i239> [#uses=1]
+	store i239 %tmp, i239* @i239_s
+	ret void
+}
+
+define void @i240_ls() nounwind  {
+	%tmp = load i240* @i240_l		; <i240> [#uses=1]
+	store i240 %tmp, i240* @i240_s
+	ret void
+}
+
+define void @i241_ls() nounwind  {
+	%tmp = load i241* @i241_l		; <i241> [#uses=1]
+	store i241 %tmp, i241* @i241_s
+	ret void
+}
+
+define void @i242_ls() nounwind  {
+	%tmp = load i242* @i242_l		; <i242> [#uses=1]
+	store i242 %tmp, i242* @i242_s
+	ret void
+}
+
+define void @i243_ls() nounwind  {
+	%tmp = load i243* @i243_l		; <i243> [#uses=1]
+	store i243 %tmp, i243* @i243_s
+	ret void
+}
+
+define void @i244_ls() nounwind  {
+	%tmp = load i244* @i244_l		; <i244> [#uses=1]
+	store i244 %tmp, i244* @i244_s
+	ret void
+}
+
+define void @i245_ls() nounwind  {
+	%tmp = load i245* @i245_l		; <i245> [#uses=1]
+	store i245 %tmp, i245* @i245_s
+	ret void
+}
+
+define void @i246_ls() nounwind  {
+	%tmp = load i246* @i246_l		; <i246> [#uses=1]
+	store i246 %tmp, i246* @i246_s
+	ret void
+}
+
+define void @i247_ls() nounwind  {
+	%tmp = load i247* @i247_l		; <i247> [#uses=1]
+	store i247 %tmp, i247* @i247_s
+	ret void
+}
+
+define void @i248_ls() nounwind  {
+	%tmp = load i248* @i248_l		; <i248> [#uses=1]
+	store i248 %tmp, i248* @i248_s
+	ret void
+}
+
+define void @i249_ls() nounwind  {
+	%tmp = load i249* @i249_l		; <i249> [#uses=1]
+	store i249 %tmp, i249* @i249_s
+	ret void
+}
+
+define void @i250_ls() nounwind  {
+	%tmp = load i250* @i250_l		; <i250> [#uses=1]
+	store i250 %tmp, i250* @i250_s
+	ret void
+}
+
+define void @i251_ls() nounwind  {
+	%tmp = load i251* @i251_l		; <i251> [#uses=1]
+	store i251 %tmp, i251* @i251_s
+	ret void
+}
+
+define void @i252_ls() nounwind  {
+	%tmp = load i252* @i252_l		; <i252> [#uses=1]
+	store i252 %tmp, i252* @i252_s
+	ret void
+}
+
+define void @i253_ls() nounwind  {
+	%tmp = load i253* @i253_l		; <i253> [#uses=1]
+	store i253 %tmp, i253* @i253_s
+	ret void
+}
+
+define void @i254_ls() nounwind  {
+	%tmp = load i254* @i254_l		; <i254> [#uses=1]
+	store i254 %tmp, i254* @i254_s
+	ret void
+}
+
+define void @i255_ls() nounwind  {
+	%tmp = load i255* @i255_l		; <i255> [#uses=1]
+	store i255 %tmp, i255* @i255_s
+	ret void
+}
+
+define void @i256_ls() nounwind  {
+	%tmp = load i256* @i256_l		; <i256> [#uses=1]
+	store i256 %tmp, i256* @i256_s
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/APIntParam.ll b/src/LLVM/test/CodeGen/Generic/APIntParam.ll
new file mode 100644
index 0000000..8aa0b49
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/APIntParam.ll

@@ -0,0 +1,1537 @@
+; RUN: llc < %s > %t
+@i1_s = external global i1		; <i1*> [#uses=1]
+@i2_s = external global i2		; <i2*> [#uses=1]
+@i3_s = external global i3		; <i3*> [#uses=1]
+@i4_s = external global i4		; <i4*> [#uses=1]
+@i5_s = external global i5		; <i5*> [#uses=1]
+@i6_s = external global i6		; <i6*> [#uses=1]
+@i7_s = external global i7		; <i7*> [#uses=1]
+@i8_s = external global i8		; <i8*> [#uses=1]
+@i9_s = external global i9		; <i9*> [#uses=1]
+@i10_s = external global i10		; <i10*> [#uses=1]
+@i11_s = external global i11		; <i11*> [#uses=1]
+@i12_s = external global i12		; <i12*> [#uses=1]
+@i13_s = external global i13		; <i13*> [#uses=1]
+@i14_s = external global i14		; <i14*> [#uses=1]
+@i15_s = external global i15		; <i15*> [#uses=1]
+@i16_s = external global i16		; <i16*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+@i18_s = external global i18		; <i18*> [#uses=1]
+@i19_s = external global i19		; <i19*> [#uses=1]
+@i20_s = external global i20		; <i20*> [#uses=1]
+@i21_s = external global i21		; <i21*> [#uses=1]
+@i22_s = external global i22		; <i22*> [#uses=1]
+@i23_s = external global i23		; <i23*> [#uses=1]
+@i24_s = external global i24		; <i24*> [#uses=1]
+@i25_s = external global i25		; <i25*> [#uses=1]
+@i26_s = external global i26		; <i26*> [#uses=1]
+@i27_s = external global i27		; <i27*> [#uses=1]
+@i28_s = external global i28		; <i28*> [#uses=1]
+@i29_s = external global i29		; <i29*> [#uses=1]
+@i30_s = external global i30		; <i30*> [#uses=1]
+@i31_s = external global i31		; <i31*> [#uses=1]
+@i32_s = external global i32		; <i32*> [#uses=1]
+@i33_s = external global i33		; <i33*> [#uses=1]
+@i34_s = external global i34		; <i34*> [#uses=1]
+@i35_s = external global i35		; <i35*> [#uses=1]
+@i36_s = external global i36		; <i36*> [#uses=1]
+@i37_s = external global i37		; <i37*> [#uses=1]
+@i38_s = external global i38		; <i38*> [#uses=1]
+@i39_s = external global i39		; <i39*> [#uses=1]
+@i40_s = external global i40		; <i40*> [#uses=1]
+@i41_s = external global i41		; <i41*> [#uses=1]
+@i42_s = external global i42		; <i42*> [#uses=1]
+@i43_s = external global i43		; <i43*> [#uses=1]
+@i44_s = external global i44		; <i44*> [#uses=1]
+@i45_s = external global i45		; <i45*> [#uses=1]
+@i46_s = external global i46		; <i46*> [#uses=1]
+@i47_s = external global i47		; <i47*> [#uses=1]
+@i48_s = external global i48		; <i48*> [#uses=1]
+@i49_s = external global i49		; <i49*> [#uses=1]
+@i50_s = external global i50		; <i50*> [#uses=1]
+@i51_s = external global i51		; <i51*> [#uses=1]
+@i52_s = external global i52		; <i52*> [#uses=1]
+@i53_s = external global i53		; <i53*> [#uses=1]
+@i54_s = external global i54		; <i54*> [#uses=1]
+@i55_s = external global i55		; <i55*> [#uses=1]
+@i56_s = external global i56		; <i56*> [#uses=1]
+@i57_s = external global i57		; <i57*> [#uses=1]
+@i58_s = external global i58		; <i58*> [#uses=1]
+@i59_s = external global i59		; <i59*> [#uses=1]
+@i60_s = external global i60		; <i60*> [#uses=1]
+@i61_s = external global i61		; <i61*> [#uses=1]
+@i62_s = external global i62		; <i62*> [#uses=1]
+@i63_s = external global i63		; <i63*> [#uses=1]
+@i64_s = external global i64		; <i64*> [#uses=1]
+@i65_s = external global i65		; <i65*> [#uses=1]
+@i66_s = external global i66		; <i66*> [#uses=1]
+@i67_s = external global i67		; <i67*> [#uses=1]
+@i68_s = external global i68		; <i68*> [#uses=1]
+@i69_s = external global i69		; <i69*> [#uses=1]
+@i70_s = external global i70		; <i70*> [#uses=1]
+@i71_s = external global i71		; <i71*> [#uses=1]
+@i72_s = external global i72		; <i72*> [#uses=1]
+@i73_s = external global i73		; <i73*> [#uses=1]
+@i74_s = external global i74		; <i74*> [#uses=1]
+@i75_s = external global i75		; <i75*> [#uses=1]
+@i76_s = external global i76		; <i76*> [#uses=1]
+@i77_s = external global i77		; <i77*> [#uses=1]
+@i78_s = external global i78		; <i78*> [#uses=1]
+@i79_s = external global i79		; <i79*> [#uses=1]
+@i80_s = external global i80		; <i80*> [#uses=1]
+@i81_s = external global i81		; <i81*> [#uses=1]
+@i82_s = external global i82		; <i82*> [#uses=1]
+@i83_s = external global i83		; <i83*> [#uses=1]
+@i84_s = external global i84		; <i84*> [#uses=1]
+@i85_s = external global i85		; <i85*> [#uses=1]
+@i86_s = external global i86		; <i86*> [#uses=1]
+@i87_s = external global i87		; <i87*> [#uses=1]
+@i88_s = external global i88		; <i88*> [#uses=1]
+@i89_s = external global i89		; <i89*> [#uses=1]
+@i90_s = external global i90		; <i90*> [#uses=1]
+@i91_s = external global i91		; <i91*> [#uses=1]
+@i92_s = external global i92		; <i92*> [#uses=1]
+@i93_s = external global i93		; <i93*> [#uses=1]
+@i94_s = external global i94		; <i94*> [#uses=1]
+@i95_s = external global i95		; <i95*> [#uses=1]
+@i96_s = external global i96		; <i96*> [#uses=1]
+@i97_s = external global i97		; <i97*> [#uses=1]
+@i98_s = external global i98		; <i98*> [#uses=1]
+@i99_s = external global i99		; <i99*> [#uses=1]
+@i100_s = external global i100		; <i100*> [#uses=1]
+@i101_s = external global i101		; <i101*> [#uses=1]
+@i102_s = external global i102		; <i102*> [#uses=1]
+@i103_s = external global i103		; <i103*> [#uses=1]
+@i104_s = external global i104		; <i104*> [#uses=1]
+@i105_s = external global i105		; <i105*> [#uses=1]
+@i106_s = external global i106		; <i106*> [#uses=1]
+@i107_s = external global i107		; <i107*> [#uses=1]
+@i108_s = external global i108		; <i108*> [#uses=1]
+@i109_s = external global i109		; <i109*> [#uses=1]
+@i110_s = external global i110		; <i110*> [#uses=1]
+@i111_s = external global i111		; <i111*> [#uses=1]
+@i112_s = external global i112		; <i112*> [#uses=1]
+@i113_s = external global i113		; <i113*> [#uses=1]
+@i114_s = external global i114		; <i114*> [#uses=1]
+@i115_s = external global i115		; <i115*> [#uses=1]
+@i116_s = external global i116		; <i116*> [#uses=1]
+@i117_s = external global i117		; <i117*> [#uses=1]
+@i118_s = external global i118		; <i118*> [#uses=1]
+@i119_s = external global i119		; <i119*> [#uses=1]
+@i120_s = external global i120		; <i120*> [#uses=1]
+@i121_s = external global i121		; <i121*> [#uses=1]
+@i122_s = external global i122		; <i122*> [#uses=1]
+@i123_s = external global i123		; <i123*> [#uses=1]
+@i124_s = external global i124		; <i124*> [#uses=1]
+@i125_s = external global i125		; <i125*> [#uses=1]
+@i126_s = external global i126		; <i126*> [#uses=1]
+@i127_s = external global i127		; <i127*> [#uses=1]
+@i128_s = external global i128		; <i128*> [#uses=1]
+@i129_s = external global i129		; <i129*> [#uses=1]
+@i130_s = external global i130		; <i130*> [#uses=1]
+@i131_s = external global i131		; <i131*> [#uses=1]
+@i132_s = external global i132		; <i132*> [#uses=1]
+@i133_s = external global i133		; <i133*> [#uses=1]
+@i134_s = external global i134		; <i134*> [#uses=1]
+@i135_s = external global i135		; <i135*> [#uses=1]
+@i136_s = external global i136		; <i136*> [#uses=1]
+@i137_s = external global i137		; <i137*> [#uses=1]
+@i138_s = external global i138		; <i138*> [#uses=1]
+@i139_s = external global i139		; <i139*> [#uses=1]
+@i140_s = external global i140		; <i140*> [#uses=1]
+@i141_s = external global i141		; <i141*> [#uses=1]
+@i142_s = external global i142		; <i142*> [#uses=1]
+@i143_s = external global i143		; <i143*> [#uses=1]
+@i144_s = external global i144		; <i144*> [#uses=1]
+@i145_s = external global i145		; <i145*> [#uses=1]
+@i146_s = external global i146		; <i146*> [#uses=1]
+@i147_s = external global i147		; <i147*> [#uses=1]
+@i148_s = external global i148		; <i148*> [#uses=1]
+@i149_s = external global i149		; <i149*> [#uses=1]
+@i150_s = external global i150		; <i150*> [#uses=1]
+@i151_s = external global i151		; <i151*> [#uses=1]
+@i152_s = external global i152		; <i152*> [#uses=1]
+@i153_s = external global i153		; <i153*> [#uses=1]
+@i154_s = external global i154		; <i154*> [#uses=1]
+@i155_s = external global i155		; <i155*> [#uses=1]
+@i156_s = external global i156		; <i156*> [#uses=1]
+@i157_s = external global i157		; <i157*> [#uses=1]
+@i158_s = external global i158		; <i158*> [#uses=1]
+@i159_s = external global i159		; <i159*> [#uses=1]
+@i160_s = external global i160		; <i160*> [#uses=1]
+@i161_s = external global i161		; <i161*> [#uses=1]
+@i162_s = external global i162		; <i162*> [#uses=1]
+@i163_s = external global i163		; <i163*> [#uses=1]
+@i164_s = external global i164		; <i164*> [#uses=1]
+@i165_s = external global i165		; <i165*> [#uses=1]
+@i166_s = external global i166		; <i166*> [#uses=1]
+@i167_s = external global i167		; <i167*> [#uses=1]
+@i168_s = external global i168		; <i168*> [#uses=1]
+@i169_s = external global i169		; <i169*> [#uses=1]
+@i170_s = external global i170		; <i170*> [#uses=1]
+@i171_s = external global i171		; <i171*> [#uses=1]
+@i172_s = external global i172		; <i172*> [#uses=1]
+@i173_s = external global i173		; <i173*> [#uses=1]
+@i174_s = external global i174		; <i174*> [#uses=1]
+@i175_s = external global i175		; <i175*> [#uses=1]
+@i176_s = external global i176		; <i176*> [#uses=1]
+@i177_s = external global i177		; <i177*> [#uses=1]
+@i178_s = external global i178		; <i178*> [#uses=1]
+@i179_s = external global i179		; <i179*> [#uses=1]
+@i180_s = external global i180		; <i180*> [#uses=1]
+@i181_s = external global i181		; <i181*> [#uses=1]
+@i182_s = external global i182		; <i182*> [#uses=1]
+@i183_s = external global i183		; <i183*> [#uses=1]
+@i184_s = external global i184		; <i184*> [#uses=1]
+@i185_s = external global i185		; <i185*> [#uses=1]
+@i186_s = external global i186		; <i186*> [#uses=1]
+@i187_s = external global i187		; <i187*> [#uses=1]
+@i188_s = external global i188		; <i188*> [#uses=1]
+@i189_s = external global i189		; <i189*> [#uses=1]
+@i190_s = external global i190		; <i190*> [#uses=1]
+@i191_s = external global i191		; <i191*> [#uses=1]
+@i192_s = external global i192		; <i192*> [#uses=1]
+@i193_s = external global i193		; <i193*> [#uses=1]
+@i194_s = external global i194		; <i194*> [#uses=1]
+@i195_s = external global i195		; <i195*> [#uses=1]
+@i196_s = external global i196		; <i196*> [#uses=1]
+@i197_s = external global i197		; <i197*> [#uses=1]
+@i198_s = external global i198		; <i198*> [#uses=1]
+@i199_s = external global i199		; <i199*> [#uses=1]
+@i200_s = external global i200		; <i200*> [#uses=1]
+@i201_s = external global i201		; <i201*> [#uses=1]
+@i202_s = external global i202		; <i202*> [#uses=1]
+@i203_s = external global i203		; <i203*> [#uses=1]
+@i204_s = external global i204		; <i204*> [#uses=1]
+@i205_s = external global i205		; <i205*> [#uses=1]
+@i206_s = external global i206		; <i206*> [#uses=1]
+@i207_s = external global i207		; <i207*> [#uses=1]
+@i208_s = external global i208		; <i208*> [#uses=1]
+@i209_s = external global i209		; <i209*> [#uses=1]
+@i210_s = external global i210		; <i210*> [#uses=1]
+@i211_s = external global i211		; <i211*> [#uses=1]
+@i212_s = external global i212		; <i212*> [#uses=1]
+@i213_s = external global i213		; <i213*> [#uses=1]
+@i214_s = external global i214		; <i214*> [#uses=1]
+@i215_s = external global i215		; <i215*> [#uses=1]
+@i216_s = external global i216		; <i216*> [#uses=1]
+@i217_s = external global i217		; <i217*> [#uses=1]
+@i218_s = external global i218		; <i218*> [#uses=1]
+@i219_s = external global i219		; <i219*> [#uses=1]
+@i220_s = external global i220		; <i220*> [#uses=1]
+@i221_s = external global i221		; <i221*> [#uses=1]
+@i222_s = external global i222		; <i222*> [#uses=1]
+@i223_s = external global i223		; <i223*> [#uses=1]
+@i224_s = external global i224		; <i224*> [#uses=1]
+@i225_s = external global i225		; <i225*> [#uses=1]
+@i226_s = external global i226		; <i226*> [#uses=1]
+@i227_s = external global i227		; <i227*> [#uses=1]
+@i228_s = external global i228		; <i228*> [#uses=1]
+@i229_s = external global i229		; <i229*> [#uses=1]
+@i230_s = external global i230		; <i230*> [#uses=1]
+@i231_s = external global i231		; <i231*> [#uses=1]
+@i232_s = external global i232		; <i232*> [#uses=1]
+@i233_s = external global i233		; <i233*> [#uses=1]
+@i234_s = external global i234		; <i234*> [#uses=1]
+@i235_s = external global i235		; <i235*> [#uses=1]
+@i236_s = external global i236		; <i236*> [#uses=1]
+@i237_s = external global i237		; <i237*> [#uses=1]
+@i238_s = external global i238		; <i238*> [#uses=1]
+@i239_s = external global i239		; <i239*> [#uses=1]
+@i240_s = external global i240		; <i240*> [#uses=1]
+@i241_s = external global i241		; <i241*> [#uses=1]
+@i242_s = external global i242		; <i242*> [#uses=1]
+@i243_s = external global i243		; <i243*> [#uses=1]
+@i244_s = external global i244		; <i244*> [#uses=1]
+@i245_s = external global i245		; <i245*> [#uses=1]
+@i246_s = external global i246		; <i246*> [#uses=1]
+@i247_s = external global i247		; <i247*> [#uses=1]
+@i248_s = external global i248		; <i248*> [#uses=1]
+@i249_s = external global i249		; <i249*> [#uses=1]
+@i250_s = external global i250		; <i250*> [#uses=1]
+@i251_s = external global i251		; <i251*> [#uses=1]
+@i252_s = external global i252		; <i252*> [#uses=1]
+@i253_s = external global i253		; <i253*> [#uses=1]
+@i254_s = external global i254		; <i254*> [#uses=1]
+@i255_s = external global i255		; <i255*> [#uses=1]
+@i256_s = external global i256		; <i256*> [#uses=1]
+
+define void @i1_ls(i1 %x) nounwind  {
+	store i1 %x, i1* @i1_s
+	ret void
+}
+
+define void @i2_ls(i2 %x) nounwind  {
+	store i2 %x, i2* @i2_s
+	ret void
+}
+
+define void @i3_ls(i3 %x) nounwind  {
+	store i3 %x, i3* @i3_s
+	ret void
+}
+
+define void @i4_ls(i4 %x) nounwind  {
+	store i4 %x, i4* @i4_s
+	ret void
+}
+
+define void @i5_ls(i5 %x) nounwind  {
+	store i5 %x, i5* @i5_s
+	ret void
+}
+
+define void @i6_ls(i6 %x) nounwind  {
+	store i6 %x, i6* @i6_s
+	ret void
+}
+
+define void @i7_ls(i7 %x) nounwind  {
+	store i7 %x, i7* @i7_s
+	ret void
+}
+
+define void @i8_ls(i8 %x) nounwind  {
+	store i8 %x, i8* @i8_s
+	ret void
+}
+
+define void @i9_ls(i9 %x) nounwind  {
+	store i9 %x, i9* @i9_s
+	ret void
+}
+
+define void @i10_ls(i10 %x) nounwind  {
+	store i10 %x, i10* @i10_s
+	ret void
+}
+
+define void @i11_ls(i11 %x) nounwind  {
+	store i11 %x, i11* @i11_s
+	ret void
+}
+
+define void @i12_ls(i12 %x) nounwind  {
+	store i12 %x, i12* @i12_s
+	ret void
+}
+
+define void @i13_ls(i13 %x) nounwind  {
+	store i13 %x, i13* @i13_s
+	ret void
+}
+
+define void @i14_ls(i14 %x) nounwind  {
+	store i14 %x, i14* @i14_s
+	ret void
+}
+
+define void @i15_ls(i15 %x) nounwind  {
+	store i15 %x, i15* @i15_s
+	ret void
+}
+
+define void @i16_ls(i16 %x) nounwind  {
+	store i16 %x, i16* @i16_s
+	ret void
+}
+
+define void @i17_ls(i17 %x) nounwind  {
+	store i17 %x, i17* @i17_s
+	ret void
+}
+
+define void @i18_ls(i18 %x) nounwind  {
+	store i18 %x, i18* @i18_s
+	ret void
+}
+
+define void @i19_ls(i19 %x) nounwind  {
+	store i19 %x, i19* @i19_s
+	ret void
+}
+
+define void @i20_ls(i20 %x) nounwind  {
+	store i20 %x, i20* @i20_s
+	ret void
+}
+
+define void @i21_ls(i21 %x) nounwind  {
+	store i21 %x, i21* @i21_s
+	ret void
+}
+
+define void @i22_ls(i22 %x) nounwind  {
+	store i22 %x, i22* @i22_s
+	ret void
+}
+
+define void @i23_ls(i23 %x) nounwind  {
+	store i23 %x, i23* @i23_s
+	ret void
+}
+
+define void @i24_ls(i24 %x) nounwind  {
+	store i24 %x, i24* @i24_s
+	ret void
+}
+
+define void @i25_ls(i25 %x) nounwind  {
+	store i25 %x, i25* @i25_s
+	ret void
+}
+
+define void @i26_ls(i26 %x) nounwind  {
+	store i26 %x, i26* @i26_s
+	ret void
+}
+
+define void @i27_ls(i27 %x) nounwind  {
+	store i27 %x, i27* @i27_s
+	ret void
+}
+
+define void @i28_ls(i28 %x) nounwind  {
+	store i28 %x, i28* @i28_s
+	ret void
+}
+
+define void @i29_ls(i29 %x) nounwind  {
+	store i29 %x, i29* @i29_s
+	ret void
+}
+
+define void @i30_ls(i30 %x) nounwind  {
+	store i30 %x, i30* @i30_s
+	ret void
+}
+
+define void @i31_ls(i31 %x) nounwind  {
+	store i31 %x, i31* @i31_s
+	ret void
+}
+
+define void @i32_ls(i32 %x) nounwind  {
+	store i32 %x, i32* @i32_s
+	ret void
+}
+
+define void @i33_ls(i33 %x) nounwind  {
+	store i33 %x, i33* @i33_s
+	ret void
+}
+
+define void @i34_ls(i34 %x) nounwind  {
+	store i34 %x, i34* @i34_s
+	ret void
+}
+
+define void @i35_ls(i35 %x) nounwind  {
+	store i35 %x, i35* @i35_s
+	ret void
+}
+
+define void @i36_ls(i36 %x) nounwind  {
+	store i36 %x, i36* @i36_s
+	ret void
+}
+
+define void @i37_ls(i37 %x) nounwind  {
+	store i37 %x, i37* @i37_s
+	ret void
+}
+
+define void @i38_ls(i38 %x) nounwind  {
+	store i38 %x, i38* @i38_s
+	ret void
+}
+
+define void @i39_ls(i39 %x) nounwind  {
+	store i39 %x, i39* @i39_s
+	ret void
+}
+
+define void @i40_ls(i40 %x) nounwind  {
+	store i40 %x, i40* @i40_s
+	ret void
+}
+
+define void @i41_ls(i41 %x) nounwind  {
+	store i41 %x, i41* @i41_s
+	ret void
+}
+
+define void @i42_ls(i42 %x) nounwind  {
+	store i42 %x, i42* @i42_s
+	ret void
+}
+
+define void @i43_ls(i43 %x) nounwind  {
+	store i43 %x, i43* @i43_s
+	ret void
+}
+
+define void @i44_ls(i44 %x) nounwind  {
+	store i44 %x, i44* @i44_s
+	ret void
+}
+
+define void @i45_ls(i45 %x) nounwind  {
+	store i45 %x, i45* @i45_s
+	ret void
+}
+
+define void @i46_ls(i46 %x) nounwind  {
+	store i46 %x, i46* @i46_s
+	ret void
+}
+
+define void @i47_ls(i47 %x) nounwind  {
+	store i47 %x, i47* @i47_s
+	ret void
+}
+
+define void @i48_ls(i48 %x) nounwind  {
+	store i48 %x, i48* @i48_s
+	ret void
+}
+
+define void @i49_ls(i49 %x) nounwind  {
+	store i49 %x, i49* @i49_s
+	ret void
+}
+
+define void @i50_ls(i50 %x) nounwind  {
+	store i50 %x, i50* @i50_s
+	ret void
+}
+
+define void @i51_ls(i51 %x) nounwind  {
+	store i51 %x, i51* @i51_s
+	ret void
+}
+
+define void @i52_ls(i52 %x) nounwind  {
+	store i52 %x, i52* @i52_s
+	ret void
+}
+
+define void @i53_ls(i53 %x) nounwind  {
+	store i53 %x, i53* @i53_s
+	ret void
+}
+
+define void @i54_ls(i54 %x) nounwind  {
+	store i54 %x, i54* @i54_s
+	ret void
+}
+
+define void @i55_ls(i55 %x) nounwind  {
+	store i55 %x, i55* @i55_s
+	ret void
+}
+
+define void @i56_ls(i56 %x) nounwind  {
+	store i56 %x, i56* @i56_s
+	ret void
+}
+
+define void @i57_ls(i57 %x) nounwind  {
+	store i57 %x, i57* @i57_s
+	ret void
+}
+
+define void @i58_ls(i58 %x) nounwind  {
+	store i58 %x, i58* @i58_s
+	ret void
+}
+
+define void @i59_ls(i59 %x) nounwind  {
+	store i59 %x, i59* @i59_s
+	ret void
+}
+
+define void @i60_ls(i60 %x) nounwind  {
+	store i60 %x, i60* @i60_s
+	ret void
+}
+
+define void @i61_ls(i61 %x) nounwind  {
+	store i61 %x, i61* @i61_s
+	ret void
+}
+
+define void @i62_ls(i62 %x) nounwind  {
+	store i62 %x, i62* @i62_s
+	ret void
+}
+
+define void @i63_ls(i63 %x) nounwind  {
+	store i63 %x, i63* @i63_s
+	ret void
+}
+
+define void @i64_ls(i64 %x) nounwind  {
+	store i64 %x, i64* @i64_s
+	ret void
+}
+
+define void @i65_ls(i65 %x) nounwind  {
+	store i65 %x, i65* @i65_s
+	ret void
+}
+
+define void @i66_ls(i66 %x) nounwind  {
+	store i66 %x, i66* @i66_s
+	ret void
+}
+
+define void @i67_ls(i67 %x) nounwind  {
+	store i67 %x, i67* @i67_s
+	ret void
+}
+
+define void @i68_ls(i68 %x) nounwind  {
+	store i68 %x, i68* @i68_s
+	ret void
+}
+
+define void @i69_ls(i69 %x) nounwind  {
+	store i69 %x, i69* @i69_s
+	ret void
+}
+
+define void @i70_ls(i70 %x) nounwind  {
+	store i70 %x, i70* @i70_s
+	ret void
+}
+
+define void @i71_ls(i71 %x) nounwind  {
+	store i71 %x, i71* @i71_s
+	ret void
+}
+
+define void @i72_ls(i72 %x) nounwind  {
+	store i72 %x, i72* @i72_s
+	ret void
+}
+
+define void @i73_ls(i73 %x) nounwind  {
+	store i73 %x, i73* @i73_s
+	ret void
+}
+
+define void @i74_ls(i74 %x) nounwind  {
+	store i74 %x, i74* @i74_s
+	ret void
+}
+
+define void @i75_ls(i75 %x) nounwind  {
+	store i75 %x, i75* @i75_s
+	ret void
+}
+
+define void @i76_ls(i76 %x) nounwind  {
+	store i76 %x, i76* @i76_s
+	ret void
+}
+
+define void @i77_ls(i77 %x) nounwind  {
+	store i77 %x, i77* @i77_s
+	ret void
+}
+
+define void @i78_ls(i78 %x) nounwind  {
+	store i78 %x, i78* @i78_s
+	ret void
+}
+
+define void @i79_ls(i79 %x) nounwind  {
+	store i79 %x, i79* @i79_s
+	ret void
+}
+
+define void @i80_ls(i80 %x) nounwind  {
+	store i80 %x, i80* @i80_s
+	ret void
+}
+
+define void @i81_ls(i81 %x) nounwind  {
+	store i81 %x, i81* @i81_s
+	ret void
+}
+
+define void @i82_ls(i82 %x) nounwind  {
+	store i82 %x, i82* @i82_s
+	ret void
+}
+
+define void @i83_ls(i83 %x) nounwind  {
+	store i83 %x, i83* @i83_s
+	ret void
+}
+
+define void @i84_ls(i84 %x) nounwind  {
+	store i84 %x, i84* @i84_s
+	ret void
+}
+
+define void @i85_ls(i85 %x) nounwind  {
+	store i85 %x, i85* @i85_s
+	ret void
+}
+
+define void @i86_ls(i86 %x) nounwind  {
+	store i86 %x, i86* @i86_s
+	ret void
+}
+
+define void @i87_ls(i87 %x) nounwind  {
+	store i87 %x, i87* @i87_s
+	ret void
+}
+
+define void @i88_ls(i88 %x) nounwind  {
+	store i88 %x, i88* @i88_s
+	ret void
+}
+
+define void @i89_ls(i89 %x) nounwind  {
+	store i89 %x, i89* @i89_s
+	ret void
+}
+
+define void @i90_ls(i90 %x) nounwind  {
+	store i90 %x, i90* @i90_s
+	ret void
+}
+
+define void @i91_ls(i91 %x) nounwind  {
+	store i91 %x, i91* @i91_s
+	ret void
+}
+
+define void @i92_ls(i92 %x) nounwind  {
+	store i92 %x, i92* @i92_s
+	ret void
+}
+
+define void @i93_ls(i93 %x) nounwind  {
+	store i93 %x, i93* @i93_s
+	ret void
+}
+
+define void @i94_ls(i94 %x) nounwind  {
+	store i94 %x, i94* @i94_s
+	ret void
+}
+
+define void @i95_ls(i95 %x) nounwind  {
+	store i95 %x, i95* @i95_s
+	ret void
+}
+
+define void @i96_ls(i96 %x) nounwind  {
+	store i96 %x, i96* @i96_s
+	ret void
+}
+
+define void @i97_ls(i97 %x) nounwind  {
+	store i97 %x, i97* @i97_s
+	ret void
+}
+
+define void @i98_ls(i98 %x) nounwind  {
+	store i98 %x, i98* @i98_s
+	ret void
+}
+
+define void @i99_ls(i99 %x) nounwind  {
+	store i99 %x, i99* @i99_s
+	ret void
+}
+
+define void @i100_ls(i100 %x) nounwind  {
+	store i100 %x, i100* @i100_s
+	ret void
+}
+
+define void @i101_ls(i101 %x) nounwind  {
+	store i101 %x, i101* @i101_s
+	ret void
+}
+
+define void @i102_ls(i102 %x) nounwind  {
+	store i102 %x, i102* @i102_s
+	ret void
+}
+
+define void @i103_ls(i103 %x) nounwind  {
+	store i103 %x, i103* @i103_s
+	ret void
+}
+
+define void @i104_ls(i104 %x) nounwind  {
+	store i104 %x, i104* @i104_s
+	ret void
+}
+
+define void @i105_ls(i105 %x) nounwind  {
+	store i105 %x, i105* @i105_s
+	ret void
+}
+
+define void @i106_ls(i106 %x) nounwind  {
+	store i106 %x, i106* @i106_s
+	ret void
+}
+
+define void @i107_ls(i107 %x) nounwind  {
+	store i107 %x, i107* @i107_s
+	ret void
+}
+
+define void @i108_ls(i108 %x) nounwind  {
+	store i108 %x, i108* @i108_s
+	ret void
+}
+
+define void @i109_ls(i109 %x) nounwind  {
+	store i109 %x, i109* @i109_s
+	ret void
+}
+
+define void @i110_ls(i110 %x) nounwind  {
+	store i110 %x, i110* @i110_s
+	ret void
+}
+
+define void @i111_ls(i111 %x) nounwind  {
+	store i111 %x, i111* @i111_s
+	ret void
+}
+
+define void @i112_ls(i112 %x) nounwind  {
+	store i112 %x, i112* @i112_s
+	ret void
+}
+
+define void @i113_ls(i113 %x) nounwind  {
+	store i113 %x, i113* @i113_s
+	ret void
+}
+
+define void @i114_ls(i114 %x) nounwind  {
+	store i114 %x, i114* @i114_s
+	ret void
+}
+
+define void @i115_ls(i115 %x) nounwind  {
+	store i115 %x, i115* @i115_s
+	ret void
+}
+
+define void @i116_ls(i116 %x) nounwind  {
+	store i116 %x, i116* @i116_s
+	ret void
+}
+
+define void @i117_ls(i117 %x) nounwind  {
+	store i117 %x, i117* @i117_s
+	ret void
+}
+
+define void @i118_ls(i118 %x) nounwind  {
+	store i118 %x, i118* @i118_s
+	ret void
+}
+
+define void @i119_ls(i119 %x) nounwind  {
+	store i119 %x, i119* @i119_s
+	ret void
+}
+
+define void @i120_ls(i120 %x) nounwind  {
+	store i120 %x, i120* @i120_s
+	ret void
+}
+
+define void @i121_ls(i121 %x) nounwind  {
+	store i121 %x, i121* @i121_s
+	ret void
+}
+
+define void @i122_ls(i122 %x) nounwind  {
+	store i122 %x, i122* @i122_s
+	ret void
+}
+
+define void @i123_ls(i123 %x) nounwind  {
+	store i123 %x, i123* @i123_s
+	ret void
+}
+
+define void @i124_ls(i124 %x) nounwind  {
+	store i124 %x, i124* @i124_s
+	ret void
+}
+
+define void @i125_ls(i125 %x) nounwind  {
+	store i125 %x, i125* @i125_s
+	ret void
+}
+
+define void @i126_ls(i126 %x) nounwind  {
+	store i126 %x, i126* @i126_s
+	ret void
+}
+
+define void @i127_ls(i127 %x) nounwind  {
+	store i127 %x, i127* @i127_s
+	ret void
+}
+
+define void @i128_ls(i128 %x) nounwind  {
+	store i128 %x, i128* @i128_s
+	ret void
+}
+
+define void @i129_ls(i129 %x) nounwind  {
+	store i129 %x, i129* @i129_s
+	ret void
+}
+
+define void @i130_ls(i130 %x) nounwind  {
+	store i130 %x, i130* @i130_s
+	ret void
+}
+
+define void @i131_ls(i131 %x) nounwind  {
+	store i131 %x, i131* @i131_s
+	ret void
+}
+
+define void @i132_ls(i132 %x) nounwind  {
+	store i132 %x, i132* @i132_s
+	ret void
+}
+
+define void @i133_ls(i133 %x) nounwind  {
+	store i133 %x, i133* @i133_s
+	ret void
+}
+
+define void @i134_ls(i134 %x) nounwind  {
+	store i134 %x, i134* @i134_s
+	ret void
+}
+
+define void @i135_ls(i135 %x) nounwind  {
+	store i135 %x, i135* @i135_s
+	ret void
+}
+
+define void @i136_ls(i136 %x) nounwind  {
+	store i136 %x, i136* @i136_s
+	ret void
+}
+
+define void @i137_ls(i137 %x) nounwind  {
+	store i137 %x, i137* @i137_s
+	ret void
+}
+
+define void @i138_ls(i138 %x) nounwind  {
+	store i138 %x, i138* @i138_s
+	ret void
+}
+
+define void @i139_ls(i139 %x) nounwind  {
+	store i139 %x, i139* @i139_s
+	ret void
+}
+
+define void @i140_ls(i140 %x) nounwind  {
+	store i140 %x, i140* @i140_s
+	ret void
+}
+
+define void @i141_ls(i141 %x) nounwind  {
+	store i141 %x, i141* @i141_s
+	ret void
+}
+
+define void @i142_ls(i142 %x) nounwind  {
+	store i142 %x, i142* @i142_s
+	ret void
+}
+
+define void @i143_ls(i143 %x) nounwind  {
+	store i143 %x, i143* @i143_s
+	ret void
+}
+
+define void @i144_ls(i144 %x) nounwind  {
+	store i144 %x, i144* @i144_s
+	ret void
+}
+
+define void @i145_ls(i145 %x) nounwind  {
+	store i145 %x, i145* @i145_s
+	ret void
+}
+
+define void @i146_ls(i146 %x) nounwind  {
+	store i146 %x, i146* @i146_s
+	ret void
+}
+
+define void @i147_ls(i147 %x) nounwind  {
+	store i147 %x, i147* @i147_s
+	ret void
+}
+
+define void @i148_ls(i148 %x) nounwind  {
+	store i148 %x, i148* @i148_s
+	ret void
+}
+
+define void @i149_ls(i149 %x) nounwind  {
+	store i149 %x, i149* @i149_s
+	ret void
+}
+
+define void @i150_ls(i150 %x) nounwind  {
+	store i150 %x, i150* @i150_s
+	ret void
+}
+
+define void @i151_ls(i151 %x) nounwind  {
+	store i151 %x, i151* @i151_s
+	ret void
+}
+
+define void @i152_ls(i152 %x) nounwind  {
+	store i152 %x, i152* @i152_s
+	ret void
+}
+
+define void @i153_ls(i153 %x) nounwind  {
+	store i153 %x, i153* @i153_s
+	ret void
+}
+
+define void @i154_ls(i154 %x) nounwind  {
+	store i154 %x, i154* @i154_s
+	ret void
+}
+
+define void @i155_ls(i155 %x) nounwind  {
+	store i155 %x, i155* @i155_s
+	ret void
+}
+
+define void @i156_ls(i156 %x) nounwind  {
+	store i156 %x, i156* @i156_s
+	ret void
+}
+
+define void @i157_ls(i157 %x) nounwind  {
+	store i157 %x, i157* @i157_s
+	ret void
+}
+
+define void @i158_ls(i158 %x) nounwind  {
+	store i158 %x, i158* @i158_s
+	ret void
+}
+
+define void @i159_ls(i159 %x) nounwind  {
+	store i159 %x, i159* @i159_s
+	ret void
+}
+
+define void @i160_ls(i160 %x) nounwind  {
+	store i160 %x, i160* @i160_s
+	ret void
+}
+
+define void @i161_ls(i161 %x) nounwind  {
+	store i161 %x, i161* @i161_s
+	ret void
+}
+
+define void @i162_ls(i162 %x) nounwind  {
+	store i162 %x, i162* @i162_s
+	ret void
+}
+
+define void @i163_ls(i163 %x) nounwind  {
+	store i163 %x, i163* @i163_s
+	ret void
+}
+
+define void @i164_ls(i164 %x) nounwind  {
+	store i164 %x, i164* @i164_s
+	ret void
+}
+
+define void @i165_ls(i165 %x) nounwind  {
+	store i165 %x, i165* @i165_s
+	ret void
+}
+
+define void @i166_ls(i166 %x) nounwind  {
+	store i166 %x, i166* @i166_s
+	ret void
+}
+
+define void @i167_ls(i167 %x) nounwind  {
+	store i167 %x, i167* @i167_s
+	ret void
+}
+
+define void @i168_ls(i168 %x) nounwind  {
+	store i168 %x, i168* @i168_s
+	ret void
+}
+
+define void @i169_ls(i169 %x) nounwind  {
+	store i169 %x, i169* @i169_s
+	ret void
+}
+
+define void @i170_ls(i170 %x) nounwind  {
+	store i170 %x, i170* @i170_s
+	ret void
+}
+
+define void @i171_ls(i171 %x) nounwind  {
+	store i171 %x, i171* @i171_s
+	ret void
+}
+
+define void @i172_ls(i172 %x) nounwind  {
+	store i172 %x, i172* @i172_s
+	ret void
+}
+
+define void @i173_ls(i173 %x) nounwind  {
+	store i173 %x, i173* @i173_s
+	ret void
+}
+
+define void @i174_ls(i174 %x) nounwind  {
+	store i174 %x, i174* @i174_s
+	ret void
+}
+
+define void @i175_ls(i175 %x) nounwind  {
+	store i175 %x, i175* @i175_s
+	ret void
+}
+
+define void @i176_ls(i176 %x) nounwind  {
+	store i176 %x, i176* @i176_s
+	ret void
+}
+
+define void @i177_ls(i177 %x) nounwind  {
+	store i177 %x, i177* @i177_s
+	ret void
+}
+
+define void @i178_ls(i178 %x) nounwind  {
+	store i178 %x, i178* @i178_s
+	ret void
+}
+
+define void @i179_ls(i179 %x) nounwind  {
+	store i179 %x, i179* @i179_s
+	ret void
+}
+
+define void @i180_ls(i180 %x) nounwind  {
+	store i180 %x, i180* @i180_s
+	ret void
+}
+
+define void @i181_ls(i181 %x) nounwind  {
+	store i181 %x, i181* @i181_s
+	ret void
+}
+
+define void @i182_ls(i182 %x) nounwind  {
+	store i182 %x, i182* @i182_s
+	ret void
+}
+
+define void @i183_ls(i183 %x) nounwind  {
+	store i183 %x, i183* @i183_s
+	ret void
+}
+
+define void @i184_ls(i184 %x) nounwind  {
+	store i184 %x, i184* @i184_s
+	ret void
+}
+
+define void @i185_ls(i185 %x) nounwind  {
+	store i185 %x, i185* @i185_s
+	ret void
+}
+
+define void @i186_ls(i186 %x) nounwind  {
+	store i186 %x, i186* @i186_s
+	ret void
+}
+
+define void @i187_ls(i187 %x) nounwind  {
+	store i187 %x, i187* @i187_s
+	ret void
+}
+
+define void @i188_ls(i188 %x) nounwind  {
+	store i188 %x, i188* @i188_s
+	ret void
+}
+
+define void @i189_ls(i189 %x) nounwind  {
+	store i189 %x, i189* @i189_s
+	ret void
+}
+
+define void @i190_ls(i190 %x) nounwind  {
+	store i190 %x, i190* @i190_s
+	ret void
+}
+
+define void @i191_ls(i191 %x) nounwind  {
+	store i191 %x, i191* @i191_s
+	ret void
+}
+
+define void @i192_ls(i192 %x) nounwind  {
+	store i192 %x, i192* @i192_s
+	ret void
+}
+
+define void @i193_ls(i193 %x) nounwind  {
+	store i193 %x, i193* @i193_s
+	ret void
+}
+
+define void @i194_ls(i194 %x) nounwind  {
+	store i194 %x, i194* @i194_s
+	ret void
+}
+
+define void @i195_ls(i195 %x) nounwind  {
+	store i195 %x, i195* @i195_s
+	ret void
+}
+
+define void @i196_ls(i196 %x) nounwind  {
+	store i196 %x, i196* @i196_s
+	ret void
+}
+
+define void @i197_ls(i197 %x) nounwind  {
+	store i197 %x, i197* @i197_s
+	ret void
+}
+
+define void @i198_ls(i198 %x) nounwind  {
+	store i198 %x, i198* @i198_s
+	ret void
+}
+
+define void @i199_ls(i199 %x) nounwind  {
+	store i199 %x, i199* @i199_s
+	ret void
+}
+
+define void @i200_ls(i200 %x) nounwind  {
+	store i200 %x, i200* @i200_s
+	ret void
+}
+
+define void @i201_ls(i201 %x) nounwind  {
+	store i201 %x, i201* @i201_s
+	ret void
+}
+
+define void @i202_ls(i202 %x) nounwind  {
+	store i202 %x, i202* @i202_s
+	ret void
+}
+
+define void @i203_ls(i203 %x) nounwind  {
+	store i203 %x, i203* @i203_s
+	ret void
+}
+
+define void @i204_ls(i204 %x) nounwind  {
+	store i204 %x, i204* @i204_s
+	ret void
+}
+
+define void @i205_ls(i205 %x) nounwind  {
+	store i205 %x, i205* @i205_s
+	ret void
+}
+
+define void @i206_ls(i206 %x) nounwind  {
+	store i206 %x, i206* @i206_s
+	ret void
+}
+
+define void @i207_ls(i207 %x) nounwind  {
+	store i207 %x, i207* @i207_s
+	ret void
+}
+
+define void @i208_ls(i208 %x) nounwind  {
+	store i208 %x, i208* @i208_s
+	ret void
+}
+
+define void @i209_ls(i209 %x) nounwind  {
+	store i209 %x, i209* @i209_s
+	ret void
+}
+
+define void @i210_ls(i210 %x) nounwind  {
+	store i210 %x, i210* @i210_s
+	ret void
+}
+
+define void @i211_ls(i211 %x) nounwind  {
+	store i211 %x, i211* @i211_s
+	ret void
+}
+
+define void @i212_ls(i212 %x) nounwind  {
+	store i212 %x, i212* @i212_s
+	ret void
+}
+
+define void @i213_ls(i213 %x) nounwind  {
+	store i213 %x, i213* @i213_s
+	ret void
+}
+
+define void @i214_ls(i214 %x) nounwind  {
+	store i214 %x, i214* @i214_s
+	ret void
+}
+
+define void @i215_ls(i215 %x) nounwind  {
+	store i215 %x, i215* @i215_s
+	ret void
+}
+
+define void @i216_ls(i216 %x) nounwind  {
+	store i216 %x, i216* @i216_s
+	ret void
+}
+
+define void @i217_ls(i217 %x) nounwind  {
+	store i217 %x, i217* @i217_s
+	ret void
+}
+
+define void @i218_ls(i218 %x) nounwind  {
+	store i218 %x, i218* @i218_s
+	ret void
+}
+
+define void @i219_ls(i219 %x) nounwind  {
+	store i219 %x, i219* @i219_s
+	ret void
+}
+
+define void @i220_ls(i220 %x) nounwind  {
+	store i220 %x, i220* @i220_s
+	ret void
+}
+
+define void @i221_ls(i221 %x) nounwind  {
+	store i221 %x, i221* @i221_s
+	ret void
+}
+
+define void @i222_ls(i222 %x) nounwind  {
+	store i222 %x, i222* @i222_s
+	ret void
+}
+
+define void @i223_ls(i223 %x) nounwind  {
+	store i223 %x, i223* @i223_s
+	ret void
+}
+
+define void @i224_ls(i224 %x) nounwind  {
+	store i224 %x, i224* @i224_s
+	ret void
+}
+
+define void @i225_ls(i225 %x) nounwind  {
+	store i225 %x, i225* @i225_s
+	ret void
+}
+
+define void @i226_ls(i226 %x) nounwind  {
+	store i226 %x, i226* @i226_s
+	ret void
+}
+
+define void @i227_ls(i227 %x) nounwind  {
+	store i227 %x, i227* @i227_s
+	ret void
+}
+
+define void @i228_ls(i228 %x) nounwind  {
+	store i228 %x, i228* @i228_s
+	ret void
+}
+
+define void @i229_ls(i229 %x) nounwind  {
+	store i229 %x, i229* @i229_s
+	ret void
+}
+
+define void @i230_ls(i230 %x) nounwind  {
+	store i230 %x, i230* @i230_s
+	ret void
+}
+
+define void @i231_ls(i231 %x) nounwind  {
+	store i231 %x, i231* @i231_s
+	ret void
+}
+
+define void @i232_ls(i232 %x) nounwind  {
+	store i232 %x, i232* @i232_s
+	ret void
+}
+
+define void @i233_ls(i233 %x) nounwind  {
+	store i233 %x, i233* @i233_s
+	ret void
+}
+
+define void @i234_ls(i234 %x) nounwind  {
+	store i234 %x, i234* @i234_s
+	ret void
+}
+
+define void @i235_ls(i235 %x) nounwind  {
+	store i235 %x, i235* @i235_s
+	ret void
+}
+
+define void @i236_ls(i236 %x) nounwind  {
+	store i236 %x, i236* @i236_s
+	ret void
+}
+
+define void @i237_ls(i237 %x) nounwind  {
+	store i237 %x, i237* @i237_s
+	ret void
+}
+
+define void @i238_ls(i238 %x) nounwind  {
+	store i238 %x, i238* @i238_s
+	ret void
+}
+
+define void @i239_ls(i239 %x) nounwind  {
+	store i239 %x, i239* @i239_s
+	ret void
+}
+
+define void @i240_ls(i240 %x) nounwind  {
+	store i240 %x, i240* @i240_s
+	ret void
+}
+
+define void @i241_ls(i241 %x) nounwind  {
+	store i241 %x, i241* @i241_s
+	ret void
+}
+
+define void @i242_ls(i242 %x) nounwind  {
+	store i242 %x, i242* @i242_s
+	ret void
+}
+
+define void @i243_ls(i243 %x) nounwind  {
+	store i243 %x, i243* @i243_s
+	ret void
+}
+
+define void @i244_ls(i244 %x) nounwind  {
+	store i244 %x, i244* @i244_s
+	ret void
+}
+
+define void @i245_ls(i245 %x) nounwind  {
+	store i245 %x, i245* @i245_s
+	ret void
+}
+
+define void @i246_ls(i246 %x) nounwind  {
+	store i246 %x, i246* @i246_s
+	ret void
+}
+
+define void @i247_ls(i247 %x) nounwind  {
+	store i247 %x, i247* @i247_s
+	ret void
+}
+
+define void @i248_ls(i248 %x) nounwind  {
+	store i248 %x, i248* @i248_s
+	ret void
+}
+
+define void @i249_ls(i249 %x) nounwind  {
+	store i249 %x, i249* @i249_s
+	ret void
+}
+
+define void @i250_ls(i250 %x) nounwind  {
+	store i250 %x, i250* @i250_s
+	ret void
+}
+
+define void @i251_ls(i251 %x) nounwind  {
+	store i251 %x, i251* @i251_s
+	ret void
+}
+
+define void @i252_ls(i252 %x) nounwind  {
+	store i252 %x, i252* @i252_s
+	ret void
+}
+
+define void @i253_ls(i253 %x) nounwind  {
+	store i253 %x, i253* @i253_s
+	ret void
+}
+
+define void @i254_ls(i254 %x) nounwind  {
+	store i254 %x, i254* @i254_s
+	ret void
+}
+
+define void @i255_ls(i255 %x) nounwind  {
+	store i255 %x, i255* @i255_s
+	ret void
+}
+
+define void @i256_ls(i256 %x) nounwind  {
+	store i256 %x, i256* @i256_s
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/APIntSextParam.ll b/src/LLVM/test/CodeGen/Generic/APIntSextParam.ll
new file mode 100644
index 0000000..acc0eeb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/APIntSextParam.ll

@@ -0,0 +1,1537 @@
+; RUN: llc < %s > %t
+@i1_s = external global i1		; <i1*> [#uses=1]
+@i2_s = external global i2		; <i2*> [#uses=1]
+@i3_s = external global i3		; <i3*> [#uses=1]
+@i4_s = external global i4		; <i4*> [#uses=1]
+@i5_s = external global i5		; <i5*> [#uses=1]
+@i6_s = external global i6		; <i6*> [#uses=1]
+@i7_s = external global i7		; <i7*> [#uses=1]
+@i8_s = external global i8		; <i8*> [#uses=1]
+@i9_s = external global i9		; <i9*> [#uses=1]
+@i10_s = external global i10		; <i10*> [#uses=1]
+@i11_s = external global i11		; <i11*> [#uses=1]
+@i12_s = external global i12		; <i12*> [#uses=1]
+@i13_s = external global i13		; <i13*> [#uses=1]
+@i14_s = external global i14		; <i14*> [#uses=1]
+@i15_s = external global i15		; <i15*> [#uses=1]
+@i16_s = external global i16		; <i16*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+@i18_s = external global i18		; <i18*> [#uses=1]
+@i19_s = external global i19		; <i19*> [#uses=1]
+@i20_s = external global i20		; <i20*> [#uses=1]
+@i21_s = external global i21		; <i21*> [#uses=1]
+@i22_s = external global i22		; <i22*> [#uses=1]
+@i23_s = external global i23		; <i23*> [#uses=1]
+@i24_s = external global i24		; <i24*> [#uses=1]
+@i25_s = external global i25		; <i25*> [#uses=1]
+@i26_s = external global i26		; <i26*> [#uses=1]
+@i27_s = external global i27		; <i27*> [#uses=1]
+@i28_s = external global i28		; <i28*> [#uses=1]
+@i29_s = external global i29		; <i29*> [#uses=1]
+@i30_s = external global i30		; <i30*> [#uses=1]
+@i31_s = external global i31		; <i31*> [#uses=1]
+@i32_s = external global i32		; <i32*> [#uses=1]
+@i33_s = external global i33		; <i33*> [#uses=1]
+@i34_s = external global i34		; <i34*> [#uses=1]
+@i35_s = external global i35		; <i35*> [#uses=1]
+@i36_s = external global i36		; <i36*> [#uses=1]
+@i37_s = external global i37		; <i37*> [#uses=1]
+@i38_s = external global i38		; <i38*> [#uses=1]
+@i39_s = external global i39		; <i39*> [#uses=1]
+@i40_s = external global i40		; <i40*> [#uses=1]
+@i41_s = external global i41		; <i41*> [#uses=1]
+@i42_s = external global i42		; <i42*> [#uses=1]
+@i43_s = external global i43		; <i43*> [#uses=1]
+@i44_s = external global i44		; <i44*> [#uses=1]
+@i45_s = external global i45		; <i45*> [#uses=1]
+@i46_s = external global i46		; <i46*> [#uses=1]
+@i47_s = external global i47		; <i47*> [#uses=1]
+@i48_s = external global i48		; <i48*> [#uses=1]
+@i49_s = external global i49		; <i49*> [#uses=1]
+@i50_s = external global i50		; <i50*> [#uses=1]
+@i51_s = external global i51		; <i51*> [#uses=1]
+@i52_s = external global i52		; <i52*> [#uses=1]
+@i53_s = external global i53		; <i53*> [#uses=1]
+@i54_s = external global i54		; <i54*> [#uses=1]
+@i55_s = external global i55		; <i55*> [#uses=1]
+@i56_s = external global i56		; <i56*> [#uses=1]
+@i57_s = external global i57		; <i57*> [#uses=1]
+@i58_s = external global i58		; <i58*> [#uses=1]
+@i59_s = external global i59		; <i59*> [#uses=1]
+@i60_s = external global i60		; <i60*> [#uses=1]
+@i61_s = external global i61		; <i61*> [#uses=1]
+@i62_s = external global i62		; <i62*> [#uses=1]
+@i63_s = external global i63		; <i63*> [#uses=1]
+@i64_s = external global i64		; <i64*> [#uses=1]
+@i65_s = external global i65		; <i65*> [#uses=1]
+@i66_s = external global i66		; <i66*> [#uses=1]
+@i67_s = external global i67		; <i67*> [#uses=1]
+@i68_s = external global i68		; <i68*> [#uses=1]
+@i69_s = external global i69		; <i69*> [#uses=1]
+@i70_s = external global i70		; <i70*> [#uses=1]
+@i71_s = external global i71		; <i71*> [#uses=1]
+@i72_s = external global i72		; <i72*> [#uses=1]
+@i73_s = external global i73		; <i73*> [#uses=1]
+@i74_s = external global i74		; <i74*> [#uses=1]
+@i75_s = external global i75		; <i75*> [#uses=1]
+@i76_s = external global i76		; <i76*> [#uses=1]
+@i77_s = external global i77		; <i77*> [#uses=1]
+@i78_s = external global i78		; <i78*> [#uses=1]
+@i79_s = external global i79		; <i79*> [#uses=1]
+@i80_s = external global i80		; <i80*> [#uses=1]
+@i81_s = external global i81		; <i81*> [#uses=1]
+@i82_s = external global i82		; <i82*> [#uses=1]
+@i83_s = external global i83		; <i83*> [#uses=1]
+@i84_s = external global i84		; <i84*> [#uses=1]
+@i85_s = external global i85		; <i85*> [#uses=1]
+@i86_s = external global i86		; <i86*> [#uses=1]
+@i87_s = external global i87		; <i87*> [#uses=1]
+@i88_s = external global i88		; <i88*> [#uses=1]
+@i89_s = external global i89		; <i89*> [#uses=1]
+@i90_s = external global i90		; <i90*> [#uses=1]
+@i91_s = external global i91		; <i91*> [#uses=1]
+@i92_s = external global i92		; <i92*> [#uses=1]
+@i93_s = external global i93		; <i93*> [#uses=1]
+@i94_s = external global i94		; <i94*> [#uses=1]
+@i95_s = external global i95		; <i95*> [#uses=1]
+@i96_s = external global i96		; <i96*> [#uses=1]
+@i97_s = external global i97		; <i97*> [#uses=1]
+@i98_s = external global i98		; <i98*> [#uses=1]
+@i99_s = external global i99		; <i99*> [#uses=1]
+@i100_s = external global i100		; <i100*> [#uses=1]
+@i101_s = external global i101		; <i101*> [#uses=1]
+@i102_s = external global i102		; <i102*> [#uses=1]
+@i103_s = external global i103		; <i103*> [#uses=1]
+@i104_s = external global i104		; <i104*> [#uses=1]
+@i105_s = external global i105		; <i105*> [#uses=1]
+@i106_s = external global i106		; <i106*> [#uses=1]
+@i107_s = external global i107		; <i107*> [#uses=1]
+@i108_s = external global i108		; <i108*> [#uses=1]
+@i109_s = external global i109		; <i109*> [#uses=1]
+@i110_s = external global i110		; <i110*> [#uses=1]
+@i111_s = external global i111		; <i111*> [#uses=1]
+@i112_s = external global i112		; <i112*> [#uses=1]
+@i113_s = external global i113		; <i113*> [#uses=1]
+@i114_s = external global i114		; <i114*> [#uses=1]
+@i115_s = external global i115		; <i115*> [#uses=1]
+@i116_s = external global i116		; <i116*> [#uses=1]
+@i117_s = external global i117		; <i117*> [#uses=1]
+@i118_s = external global i118		; <i118*> [#uses=1]
+@i119_s = external global i119		; <i119*> [#uses=1]
+@i120_s = external global i120		; <i120*> [#uses=1]
+@i121_s = external global i121		; <i121*> [#uses=1]
+@i122_s = external global i122		; <i122*> [#uses=1]
+@i123_s = external global i123		; <i123*> [#uses=1]
+@i124_s = external global i124		; <i124*> [#uses=1]
+@i125_s = external global i125		; <i125*> [#uses=1]
+@i126_s = external global i126		; <i126*> [#uses=1]
+@i127_s = external global i127		; <i127*> [#uses=1]
+@i128_s = external global i128		; <i128*> [#uses=1]
+@i129_s = external global i129		; <i129*> [#uses=1]
+@i130_s = external global i130		; <i130*> [#uses=1]
+@i131_s = external global i131		; <i131*> [#uses=1]
+@i132_s = external global i132		; <i132*> [#uses=1]
+@i133_s = external global i133		; <i133*> [#uses=1]
+@i134_s = external global i134		; <i134*> [#uses=1]
+@i135_s = external global i135		; <i135*> [#uses=1]
+@i136_s = external global i136		; <i136*> [#uses=1]
+@i137_s = external global i137		; <i137*> [#uses=1]
+@i138_s = external global i138		; <i138*> [#uses=1]
+@i139_s = external global i139		; <i139*> [#uses=1]
+@i140_s = external global i140		; <i140*> [#uses=1]
+@i141_s = external global i141		; <i141*> [#uses=1]
+@i142_s = external global i142		; <i142*> [#uses=1]
+@i143_s = external global i143		; <i143*> [#uses=1]
+@i144_s = external global i144		; <i144*> [#uses=1]
+@i145_s = external global i145		; <i145*> [#uses=1]
+@i146_s = external global i146		; <i146*> [#uses=1]
+@i147_s = external global i147		; <i147*> [#uses=1]
+@i148_s = external global i148		; <i148*> [#uses=1]
+@i149_s = external global i149		; <i149*> [#uses=1]
+@i150_s = external global i150		; <i150*> [#uses=1]
+@i151_s = external global i151		; <i151*> [#uses=1]
+@i152_s = external global i152		; <i152*> [#uses=1]
+@i153_s = external global i153		; <i153*> [#uses=1]
+@i154_s = external global i154		; <i154*> [#uses=1]
+@i155_s = external global i155		; <i155*> [#uses=1]
+@i156_s = external global i156		; <i156*> [#uses=1]
+@i157_s = external global i157		; <i157*> [#uses=1]
+@i158_s = external global i158		; <i158*> [#uses=1]
+@i159_s = external global i159		; <i159*> [#uses=1]
+@i160_s = external global i160		; <i160*> [#uses=1]
+@i161_s = external global i161		; <i161*> [#uses=1]
+@i162_s = external global i162		; <i162*> [#uses=1]
+@i163_s = external global i163		; <i163*> [#uses=1]
+@i164_s = external global i164		; <i164*> [#uses=1]
+@i165_s = external global i165		; <i165*> [#uses=1]
+@i166_s = external global i166		; <i166*> [#uses=1]
+@i167_s = external global i167		; <i167*> [#uses=1]
+@i168_s = external global i168		; <i168*> [#uses=1]
+@i169_s = external global i169		; <i169*> [#uses=1]
+@i170_s = external global i170		; <i170*> [#uses=1]
+@i171_s = external global i171		; <i171*> [#uses=1]
+@i172_s = external global i172		; <i172*> [#uses=1]
+@i173_s = external global i173		; <i173*> [#uses=1]
+@i174_s = external global i174		; <i174*> [#uses=1]
+@i175_s = external global i175		; <i175*> [#uses=1]
+@i176_s = external global i176		; <i176*> [#uses=1]
+@i177_s = external global i177		; <i177*> [#uses=1]
+@i178_s = external global i178		; <i178*> [#uses=1]
+@i179_s = external global i179		; <i179*> [#uses=1]
+@i180_s = external global i180		; <i180*> [#uses=1]
+@i181_s = external global i181		; <i181*> [#uses=1]
+@i182_s = external global i182		; <i182*> [#uses=1]
+@i183_s = external global i183		; <i183*> [#uses=1]
+@i184_s = external global i184		; <i184*> [#uses=1]
+@i185_s = external global i185		; <i185*> [#uses=1]
+@i186_s = external global i186		; <i186*> [#uses=1]
+@i187_s = external global i187		; <i187*> [#uses=1]
+@i188_s = external global i188		; <i188*> [#uses=1]
+@i189_s = external global i189		; <i189*> [#uses=1]
+@i190_s = external global i190		; <i190*> [#uses=1]
+@i191_s = external global i191		; <i191*> [#uses=1]
+@i192_s = external global i192		; <i192*> [#uses=1]
+@i193_s = external global i193		; <i193*> [#uses=1]
+@i194_s = external global i194		; <i194*> [#uses=1]
+@i195_s = external global i195		; <i195*> [#uses=1]
+@i196_s = external global i196		; <i196*> [#uses=1]
+@i197_s = external global i197		; <i197*> [#uses=1]
+@i198_s = external global i198		; <i198*> [#uses=1]
+@i199_s = external global i199		; <i199*> [#uses=1]
+@i200_s = external global i200		; <i200*> [#uses=1]
+@i201_s = external global i201		; <i201*> [#uses=1]
+@i202_s = external global i202		; <i202*> [#uses=1]
+@i203_s = external global i203		; <i203*> [#uses=1]
+@i204_s = external global i204		; <i204*> [#uses=1]
+@i205_s = external global i205		; <i205*> [#uses=1]
+@i206_s = external global i206		; <i206*> [#uses=1]
+@i207_s = external global i207		; <i207*> [#uses=1]
+@i208_s = external global i208		; <i208*> [#uses=1]
+@i209_s = external global i209		; <i209*> [#uses=1]
+@i210_s = external global i210		; <i210*> [#uses=1]
+@i211_s = external global i211		; <i211*> [#uses=1]
+@i212_s = external global i212		; <i212*> [#uses=1]
+@i213_s = external global i213		; <i213*> [#uses=1]
+@i214_s = external global i214		; <i214*> [#uses=1]
+@i215_s = external global i215		; <i215*> [#uses=1]
+@i216_s = external global i216		; <i216*> [#uses=1]
+@i217_s = external global i217		; <i217*> [#uses=1]
+@i218_s = external global i218		; <i218*> [#uses=1]
+@i219_s = external global i219		; <i219*> [#uses=1]
+@i220_s = external global i220		; <i220*> [#uses=1]
+@i221_s = external global i221		; <i221*> [#uses=1]
+@i222_s = external global i222		; <i222*> [#uses=1]
+@i223_s = external global i223		; <i223*> [#uses=1]
+@i224_s = external global i224		; <i224*> [#uses=1]
+@i225_s = external global i225		; <i225*> [#uses=1]
+@i226_s = external global i226		; <i226*> [#uses=1]
+@i227_s = external global i227		; <i227*> [#uses=1]
+@i228_s = external global i228		; <i228*> [#uses=1]
+@i229_s = external global i229		; <i229*> [#uses=1]
+@i230_s = external global i230		; <i230*> [#uses=1]
+@i231_s = external global i231		; <i231*> [#uses=1]
+@i232_s = external global i232		; <i232*> [#uses=1]
+@i233_s = external global i233		; <i233*> [#uses=1]
+@i234_s = external global i234		; <i234*> [#uses=1]
+@i235_s = external global i235		; <i235*> [#uses=1]
+@i236_s = external global i236		; <i236*> [#uses=1]
+@i237_s = external global i237		; <i237*> [#uses=1]
+@i238_s = external global i238		; <i238*> [#uses=1]
+@i239_s = external global i239		; <i239*> [#uses=1]
+@i240_s = external global i240		; <i240*> [#uses=1]
+@i241_s = external global i241		; <i241*> [#uses=1]
+@i242_s = external global i242		; <i242*> [#uses=1]
+@i243_s = external global i243		; <i243*> [#uses=1]
+@i244_s = external global i244		; <i244*> [#uses=1]
+@i245_s = external global i245		; <i245*> [#uses=1]
+@i246_s = external global i246		; <i246*> [#uses=1]
+@i247_s = external global i247		; <i247*> [#uses=1]
+@i248_s = external global i248		; <i248*> [#uses=1]
+@i249_s = external global i249		; <i249*> [#uses=1]
+@i250_s = external global i250		; <i250*> [#uses=1]
+@i251_s = external global i251		; <i251*> [#uses=1]
+@i252_s = external global i252		; <i252*> [#uses=1]
+@i253_s = external global i253		; <i253*> [#uses=1]
+@i254_s = external global i254		; <i254*> [#uses=1]
+@i255_s = external global i255		; <i255*> [#uses=1]
+@i256_s = external global i256		; <i256*> [#uses=1]
+
+define void @i1_ls(i1 signext %x) nounwind  {
+	store i1 %x, i1* @i1_s
+	ret void
+}
+
+define void @i2_ls(i2 signext %x) nounwind  {
+	store i2 %x, i2* @i2_s
+	ret void
+}
+
+define void @i3_ls(i3 signext %x) nounwind  {
+	store i3 %x, i3* @i3_s
+	ret void
+}
+
+define void @i4_ls(i4 signext %x) nounwind  {
+	store i4 %x, i4* @i4_s
+	ret void
+}
+
+define void @i5_ls(i5 signext %x) nounwind  {
+	store i5 %x, i5* @i5_s
+	ret void
+}
+
+define void @i6_ls(i6 signext %x) nounwind  {
+	store i6 %x, i6* @i6_s
+	ret void
+}
+
+define void @i7_ls(i7 signext %x) nounwind  {
+	store i7 %x, i7* @i7_s
+	ret void
+}
+
+define void @i8_ls(i8 signext %x) nounwind  {
+	store i8 %x, i8* @i8_s
+	ret void
+}
+
+define void @i9_ls(i9 signext %x) nounwind  {
+	store i9 %x, i9* @i9_s
+	ret void
+}
+
+define void @i10_ls(i10 signext %x) nounwind  {
+	store i10 %x, i10* @i10_s
+	ret void
+}
+
+define void @i11_ls(i11 signext %x) nounwind  {
+	store i11 %x, i11* @i11_s
+	ret void
+}
+
+define void @i12_ls(i12 signext %x) nounwind  {
+	store i12 %x, i12* @i12_s
+	ret void
+}
+
+define void @i13_ls(i13 signext %x) nounwind  {
+	store i13 %x, i13* @i13_s
+	ret void
+}
+
+define void @i14_ls(i14 signext %x) nounwind  {
+	store i14 %x, i14* @i14_s
+	ret void
+}
+
+define void @i15_ls(i15 signext %x) nounwind  {
+	store i15 %x, i15* @i15_s
+	ret void
+}
+
+define void @i16_ls(i16 signext %x) nounwind  {
+	store i16 %x, i16* @i16_s
+	ret void
+}
+
+define void @i17_ls(i17 signext %x) nounwind  {
+	store i17 %x, i17* @i17_s
+	ret void
+}
+
+define void @i18_ls(i18 signext %x) nounwind  {
+	store i18 %x, i18* @i18_s
+	ret void
+}
+
+define void @i19_ls(i19 signext %x) nounwind  {
+	store i19 %x, i19* @i19_s
+	ret void
+}
+
+define void @i20_ls(i20 signext %x) nounwind  {
+	store i20 %x, i20* @i20_s
+	ret void
+}
+
+define void @i21_ls(i21 signext %x) nounwind  {
+	store i21 %x, i21* @i21_s
+	ret void
+}
+
+define void @i22_ls(i22 signext %x) nounwind  {
+	store i22 %x, i22* @i22_s
+	ret void
+}
+
+define void @i23_ls(i23 signext %x) nounwind  {
+	store i23 %x, i23* @i23_s
+	ret void
+}
+
+define void @i24_ls(i24 signext %x) nounwind  {
+	store i24 %x, i24* @i24_s
+	ret void
+}
+
+define void @i25_ls(i25 signext %x) nounwind  {
+	store i25 %x, i25* @i25_s
+	ret void
+}
+
+define void @i26_ls(i26 signext %x) nounwind  {
+	store i26 %x, i26* @i26_s
+	ret void
+}
+
+define void @i27_ls(i27 signext %x) nounwind  {
+	store i27 %x, i27* @i27_s
+	ret void
+}
+
+define void @i28_ls(i28 signext %x) nounwind  {
+	store i28 %x, i28* @i28_s
+	ret void
+}
+
+define void @i29_ls(i29 signext %x) nounwind  {
+	store i29 %x, i29* @i29_s
+	ret void
+}
+
+define void @i30_ls(i30 signext %x) nounwind  {
+	store i30 %x, i30* @i30_s
+	ret void
+}
+
+define void @i31_ls(i31 signext %x) nounwind  {
+	store i31 %x, i31* @i31_s
+	ret void
+}
+
+define void @i32_ls(i32 signext %x) nounwind  {
+	store i32 %x, i32* @i32_s
+	ret void
+}
+
+define void @i33_ls(i33 signext %x) nounwind  {
+	store i33 %x, i33* @i33_s
+	ret void
+}
+
+define void @i34_ls(i34 signext %x) nounwind  {
+	store i34 %x, i34* @i34_s
+	ret void
+}
+
+define void @i35_ls(i35 signext %x) nounwind  {
+	store i35 %x, i35* @i35_s
+	ret void
+}
+
+define void @i36_ls(i36 signext %x) nounwind  {
+	store i36 %x, i36* @i36_s
+	ret void
+}
+
+define void @i37_ls(i37 signext %x) nounwind  {
+	store i37 %x, i37* @i37_s
+	ret void
+}
+
+define void @i38_ls(i38 signext %x) nounwind  {
+	store i38 %x, i38* @i38_s
+	ret void
+}
+
+define void @i39_ls(i39 signext %x) nounwind  {
+	store i39 %x, i39* @i39_s
+	ret void
+}
+
+define void @i40_ls(i40 signext %x) nounwind  {
+	store i40 %x, i40* @i40_s
+	ret void
+}
+
+define void @i41_ls(i41 signext %x) nounwind  {
+	store i41 %x, i41* @i41_s
+	ret void
+}
+
+define void @i42_ls(i42 signext %x) nounwind  {
+	store i42 %x, i42* @i42_s
+	ret void
+}
+
+define void @i43_ls(i43 signext %x) nounwind  {
+	store i43 %x, i43* @i43_s
+	ret void
+}
+
+define void @i44_ls(i44 signext %x) nounwind  {
+	store i44 %x, i44* @i44_s
+	ret void
+}
+
+define void @i45_ls(i45 signext %x) nounwind  {
+	store i45 %x, i45* @i45_s
+	ret void
+}
+
+define void @i46_ls(i46 signext %x) nounwind  {
+	store i46 %x, i46* @i46_s
+	ret void
+}
+
+define void @i47_ls(i47 signext %x) nounwind  {
+	store i47 %x, i47* @i47_s
+	ret void
+}
+
+define void @i48_ls(i48 signext %x) nounwind  {
+	store i48 %x, i48* @i48_s
+	ret void
+}
+
+define void @i49_ls(i49 signext %x) nounwind  {
+	store i49 %x, i49* @i49_s
+	ret void
+}
+
+define void @i50_ls(i50 signext %x) nounwind  {
+	store i50 %x, i50* @i50_s
+	ret void
+}
+
+define void @i51_ls(i51 signext %x) nounwind  {
+	store i51 %x, i51* @i51_s
+	ret void
+}
+
+define void @i52_ls(i52 signext %x) nounwind  {
+	store i52 %x, i52* @i52_s
+	ret void
+}
+
+define void @i53_ls(i53 signext %x) nounwind  {
+	store i53 %x, i53* @i53_s
+	ret void
+}
+
+define void @i54_ls(i54 signext %x) nounwind  {
+	store i54 %x, i54* @i54_s
+	ret void
+}
+
+define void @i55_ls(i55 signext %x) nounwind  {
+	store i55 %x, i55* @i55_s
+	ret void
+}
+
+define void @i56_ls(i56 signext %x) nounwind  {
+	store i56 %x, i56* @i56_s
+	ret void
+}
+
+define void @i57_ls(i57 signext %x) nounwind  {
+	store i57 %x, i57* @i57_s
+	ret void
+}
+
+define void @i58_ls(i58 signext %x) nounwind  {
+	store i58 %x, i58* @i58_s
+	ret void
+}
+
+define void @i59_ls(i59 signext %x) nounwind  {
+	store i59 %x, i59* @i59_s
+	ret void
+}
+
+define void @i60_ls(i60 signext %x) nounwind  {
+	store i60 %x, i60* @i60_s
+	ret void
+}
+
+define void @i61_ls(i61 signext %x) nounwind  {
+	store i61 %x, i61* @i61_s
+	ret void
+}
+
+define void @i62_ls(i62 signext %x) nounwind  {
+	store i62 %x, i62* @i62_s
+	ret void
+}
+
+define void @i63_ls(i63 signext %x) nounwind  {
+	store i63 %x, i63* @i63_s
+	ret void
+}
+
+define void @i64_ls(i64 signext %x) nounwind  {
+	store i64 %x, i64* @i64_s
+	ret void
+}
+
+define void @i65_ls(i65 signext %x) nounwind  {
+	store i65 %x, i65* @i65_s
+	ret void
+}
+
+define void @i66_ls(i66 signext %x) nounwind  {
+	store i66 %x, i66* @i66_s
+	ret void
+}
+
+define void @i67_ls(i67 signext %x) nounwind  {
+	store i67 %x, i67* @i67_s
+	ret void
+}
+
+define void @i68_ls(i68 signext %x) nounwind  {
+	store i68 %x, i68* @i68_s
+	ret void
+}
+
+define void @i69_ls(i69 signext %x) nounwind  {
+	store i69 %x, i69* @i69_s
+	ret void
+}
+
+define void @i70_ls(i70 signext %x) nounwind  {
+	store i70 %x, i70* @i70_s
+	ret void
+}
+
+define void @i71_ls(i71 signext %x) nounwind  {
+	store i71 %x, i71* @i71_s
+	ret void
+}
+
+define void @i72_ls(i72 signext %x) nounwind  {
+	store i72 %x, i72* @i72_s
+	ret void
+}
+
+define void @i73_ls(i73 signext %x) nounwind  {
+	store i73 %x, i73* @i73_s
+	ret void
+}
+
+define void @i74_ls(i74 signext %x) nounwind  {
+	store i74 %x, i74* @i74_s
+	ret void
+}
+
+define void @i75_ls(i75 signext %x) nounwind  {
+	store i75 %x, i75* @i75_s
+	ret void
+}
+
+define void @i76_ls(i76 signext %x) nounwind  {
+	store i76 %x, i76* @i76_s
+	ret void
+}
+
+define void @i77_ls(i77 signext %x) nounwind  {
+	store i77 %x, i77* @i77_s
+	ret void
+}
+
+define void @i78_ls(i78 signext %x) nounwind  {
+	store i78 %x, i78* @i78_s
+	ret void
+}
+
+define void @i79_ls(i79 signext %x) nounwind  {
+	store i79 %x, i79* @i79_s
+	ret void
+}
+
+define void @i80_ls(i80 signext %x) nounwind  {
+	store i80 %x, i80* @i80_s
+	ret void
+}
+
+define void @i81_ls(i81 signext %x) nounwind  {
+	store i81 %x, i81* @i81_s
+	ret void
+}
+
+define void @i82_ls(i82 signext %x) nounwind  {
+	store i82 %x, i82* @i82_s
+	ret void
+}
+
+define void @i83_ls(i83 signext %x) nounwind  {
+	store i83 %x, i83* @i83_s
+	ret void
+}
+
+define void @i84_ls(i84 signext %x) nounwind  {
+	store i84 %x, i84* @i84_s
+	ret void
+}
+
+define void @i85_ls(i85 signext %x) nounwind  {
+	store i85 %x, i85* @i85_s
+	ret void
+}
+
+define void @i86_ls(i86 signext %x) nounwind  {
+	store i86 %x, i86* @i86_s
+	ret void
+}
+
+define void @i87_ls(i87 signext %x) nounwind  {
+	store i87 %x, i87* @i87_s
+	ret void
+}
+
+define void @i88_ls(i88 signext %x) nounwind  {
+	store i88 %x, i88* @i88_s
+	ret void
+}
+
+define void @i89_ls(i89 signext %x) nounwind  {
+	store i89 %x, i89* @i89_s
+	ret void
+}
+
+define void @i90_ls(i90 signext %x) nounwind  {
+	store i90 %x, i90* @i90_s
+	ret void
+}
+
+define void @i91_ls(i91 signext %x) nounwind  {
+	store i91 %x, i91* @i91_s
+	ret void
+}
+
+define void @i92_ls(i92 signext %x) nounwind  {
+	store i92 %x, i92* @i92_s
+	ret void
+}
+
+define void @i93_ls(i93 signext %x) nounwind  {
+	store i93 %x, i93* @i93_s
+	ret void
+}
+
+define void @i94_ls(i94 signext %x) nounwind  {
+	store i94 %x, i94* @i94_s
+	ret void
+}
+
+define void @i95_ls(i95 signext %x) nounwind  {
+	store i95 %x, i95* @i95_s
+	ret void
+}
+
+define void @i96_ls(i96 signext %x) nounwind  {
+	store i96 %x, i96* @i96_s
+	ret void
+}
+
+define void @i97_ls(i97 signext %x) nounwind  {
+	store i97 %x, i97* @i97_s
+	ret void
+}
+
+define void @i98_ls(i98 signext %x) nounwind  {
+	store i98 %x, i98* @i98_s
+	ret void
+}
+
+define void @i99_ls(i99 signext %x) nounwind  {
+	store i99 %x, i99* @i99_s
+	ret void
+}
+
+define void @i100_ls(i100 signext %x) nounwind  {
+	store i100 %x, i100* @i100_s
+	ret void
+}
+
+define void @i101_ls(i101 signext %x) nounwind  {
+	store i101 %x, i101* @i101_s
+	ret void
+}
+
+define void @i102_ls(i102 signext %x) nounwind  {
+	store i102 %x, i102* @i102_s
+	ret void
+}
+
+define void @i103_ls(i103 signext %x) nounwind  {
+	store i103 %x, i103* @i103_s
+	ret void
+}
+
+define void @i104_ls(i104 signext %x) nounwind  {
+	store i104 %x, i104* @i104_s
+	ret void
+}
+
+define void @i105_ls(i105 signext %x) nounwind  {
+	store i105 %x, i105* @i105_s
+	ret void
+}
+
+define void @i106_ls(i106 signext %x) nounwind  {
+	store i106 %x, i106* @i106_s
+	ret void
+}
+
+define void @i107_ls(i107 signext %x) nounwind  {
+	store i107 %x, i107* @i107_s
+	ret void
+}
+
+define void @i108_ls(i108 signext %x) nounwind  {
+	store i108 %x, i108* @i108_s
+	ret void
+}
+
+define void @i109_ls(i109 signext %x) nounwind  {
+	store i109 %x, i109* @i109_s
+	ret void
+}
+
+define void @i110_ls(i110 signext %x) nounwind  {
+	store i110 %x, i110* @i110_s
+	ret void
+}
+
+define void @i111_ls(i111 signext %x) nounwind  {
+	store i111 %x, i111* @i111_s
+	ret void
+}
+
+define void @i112_ls(i112 signext %x) nounwind  {
+	store i112 %x, i112* @i112_s
+	ret void
+}
+
+define void @i113_ls(i113 signext %x) nounwind  {
+	store i113 %x, i113* @i113_s
+	ret void
+}
+
+define void @i114_ls(i114 signext %x) nounwind  {
+	store i114 %x, i114* @i114_s
+	ret void
+}
+
+define void @i115_ls(i115 signext %x) nounwind  {
+	store i115 %x, i115* @i115_s
+	ret void
+}
+
+define void @i116_ls(i116 signext %x) nounwind  {
+	store i116 %x, i116* @i116_s
+	ret void
+}
+
+define void @i117_ls(i117 signext %x) nounwind  {
+	store i117 %x, i117* @i117_s
+	ret void
+}
+
+define void @i118_ls(i118 signext %x) nounwind  {
+	store i118 %x, i118* @i118_s
+	ret void
+}
+
+define void @i119_ls(i119 signext %x) nounwind  {
+	store i119 %x, i119* @i119_s
+	ret void
+}
+
+define void @i120_ls(i120 signext %x) nounwind  {
+	store i120 %x, i120* @i120_s
+	ret void
+}
+
+define void @i121_ls(i121 signext %x) nounwind  {
+	store i121 %x, i121* @i121_s
+	ret void
+}
+
+define void @i122_ls(i122 signext %x) nounwind  {
+	store i122 %x, i122* @i122_s
+	ret void
+}
+
+define void @i123_ls(i123 signext %x) nounwind  {
+	store i123 %x, i123* @i123_s
+	ret void
+}
+
+define void @i124_ls(i124 signext %x) nounwind  {
+	store i124 %x, i124* @i124_s
+	ret void
+}
+
+define void @i125_ls(i125 signext %x) nounwind  {
+	store i125 %x, i125* @i125_s
+	ret void
+}
+
+define void @i126_ls(i126 signext %x) nounwind  {
+	store i126 %x, i126* @i126_s
+	ret void
+}
+
+define void @i127_ls(i127 signext %x) nounwind  {
+	store i127 %x, i127* @i127_s
+	ret void
+}
+
+define void @i128_ls(i128 signext %x) nounwind  {
+	store i128 %x, i128* @i128_s
+	ret void
+}
+
+define void @i129_ls(i129 signext %x) nounwind  {
+	store i129 %x, i129* @i129_s
+	ret void
+}
+
+define void @i130_ls(i130 signext %x) nounwind  {
+	store i130 %x, i130* @i130_s
+	ret void
+}
+
+define void @i131_ls(i131 signext %x) nounwind  {
+	store i131 %x, i131* @i131_s
+	ret void
+}
+
+define void @i132_ls(i132 signext %x) nounwind  {
+	store i132 %x, i132* @i132_s
+	ret void
+}
+
+define void @i133_ls(i133 signext %x) nounwind  {
+	store i133 %x, i133* @i133_s
+	ret void
+}
+
+define void @i134_ls(i134 signext %x) nounwind  {
+	store i134 %x, i134* @i134_s
+	ret void
+}
+
+define void @i135_ls(i135 signext %x) nounwind  {
+	store i135 %x, i135* @i135_s
+	ret void
+}
+
+define void @i136_ls(i136 signext %x) nounwind  {
+	store i136 %x, i136* @i136_s
+	ret void
+}
+
+define void @i137_ls(i137 signext %x) nounwind  {
+	store i137 %x, i137* @i137_s
+	ret void
+}
+
+define void @i138_ls(i138 signext %x) nounwind  {
+	store i138 %x, i138* @i138_s
+	ret void
+}
+
+define void @i139_ls(i139 signext %x) nounwind  {
+	store i139 %x, i139* @i139_s
+	ret void
+}
+
+define void @i140_ls(i140 signext %x) nounwind  {
+	store i140 %x, i140* @i140_s
+	ret void
+}
+
+define void @i141_ls(i141 signext %x) nounwind  {
+	store i141 %x, i141* @i141_s
+	ret void
+}
+
+define void @i142_ls(i142 signext %x) nounwind  {
+	store i142 %x, i142* @i142_s
+	ret void
+}
+
+define void @i143_ls(i143 signext %x) nounwind  {
+	store i143 %x, i143* @i143_s
+	ret void
+}
+
+define void @i144_ls(i144 signext %x) nounwind  {
+	store i144 %x, i144* @i144_s
+	ret void
+}
+
+define void @i145_ls(i145 signext %x) nounwind  {
+	store i145 %x, i145* @i145_s
+	ret void
+}
+
+define void @i146_ls(i146 signext %x) nounwind  {
+	store i146 %x, i146* @i146_s
+	ret void
+}
+
+define void @i147_ls(i147 signext %x) nounwind  {
+	store i147 %x, i147* @i147_s
+	ret void
+}
+
+define void @i148_ls(i148 signext %x) nounwind  {
+	store i148 %x, i148* @i148_s
+	ret void
+}
+
+define void @i149_ls(i149 signext %x) nounwind  {
+	store i149 %x, i149* @i149_s
+	ret void
+}
+
+define void @i150_ls(i150 signext %x) nounwind  {
+	store i150 %x, i150* @i150_s
+	ret void
+}
+
+define void @i151_ls(i151 signext %x) nounwind  {
+	store i151 %x, i151* @i151_s
+	ret void
+}
+
+define void @i152_ls(i152 signext %x) nounwind  {
+	store i152 %x, i152* @i152_s
+	ret void
+}
+
+define void @i153_ls(i153 signext %x) nounwind  {
+	store i153 %x, i153* @i153_s
+	ret void
+}
+
+define void @i154_ls(i154 signext %x) nounwind  {
+	store i154 %x, i154* @i154_s
+	ret void
+}
+
+define void @i155_ls(i155 signext %x) nounwind  {
+	store i155 %x, i155* @i155_s
+	ret void
+}
+
+define void @i156_ls(i156 signext %x) nounwind  {
+	store i156 %x, i156* @i156_s
+	ret void
+}
+
+define void @i157_ls(i157 signext %x) nounwind  {
+	store i157 %x, i157* @i157_s
+	ret void
+}
+
+define void @i158_ls(i158 signext %x) nounwind  {
+	store i158 %x, i158* @i158_s
+	ret void
+}
+
+define void @i159_ls(i159 signext %x) nounwind  {
+	store i159 %x, i159* @i159_s
+	ret void
+}
+
+define void @i160_ls(i160 signext %x) nounwind  {
+	store i160 %x, i160* @i160_s
+	ret void
+}
+
+define void @i161_ls(i161 signext %x) nounwind  {
+	store i161 %x, i161* @i161_s
+	ret void
+}
+
+define void @i162_ls(i162 signext %x) nounwind  {
+	store i162 %x, i162* @i162_s
+	ret void
+}
+
+define void @i163_ls(i163 signext %x) nounwind  {
+	store i163 %x, i163* @i163_s
+	ret void
+}
+
+define void @i164_ls(i164 signext %x) nounwind  {
+	store i164 %x, i164* @i164_s
+	ret void
+}
+
+define void @i165_ls(i165 signext %x) nounwind  {
+	store i165 %x, i165* @i165_s
+	ret void
+}
+
+define void @i166_ls(i166 signext %x) nounwind  {
+	store i166 %x, i166* @i166_s
+	ret void
+}
+
+define void @i167_ls(i167 signext %x) nounwind  {
+	store i167 %x, i167* @i167_s
+	ret void
+}
+
+define void @i168_ls(i168 signext %x) nounwind  {
+	store i168 %x, i168* @i168_s
+	ret void
+}
+
+define void @i169_ls(i169 signext %x) nounwind  {
+	store i169 %x, i169* @i169_s
+	ret void
+}
+
+define void @i170_ls(i170 signext %x) nounwind  {
+	store i170 %x, i170* @i170_s
+	ret void
+}
+
+define void @i171_ls(i171 signext %x) nounwind  {
+	store i171 %x, i171* @i171_s
+	ret void
+}
+
+define void @i172_ls(i172 signext %x) nounwind  {
+	store i172 %x, i172* @i172_s
+	ret void
+}
+
+define void @i173_ls(i173 signext %x) nounwind  {
+	store i173 %x, i173* @i173_s
+	ret void
+}
+
+define void @i174_ls(i174 signext %x) nounwind  {
+	store i174 %x, i174* @i174_s
+	ret void
+}
+
+define void @i175_ls(i175 signext %x) nounwind  {
+	store i175 %x, i175* @i175_s
+	ret void
+}
+
+define void @i176_ls(i176 signext %x) nounwind  {
+	store i176 %x, i176* @i176_s
+	ret void
+}
+
+define void @i177_ls(i177 signext %x) nounwind  {
+	store i177 %x, i177* @i177_s
+	ret void
+}
+
+define void @i178_ls(i178 signext %x) nounwind  {
+	store i178 %x, i178* @i178_s
+	ret void
+}
+
+define void @i179_ls(i179 signext %x) nounwind  {
+	store i179 %x, i179* @i179_s
+	ret void
+}
+
+define void @i180_ls(i180 signext %x) nounwind  {
+	store i180 %x, i180* @i180_s
+	ret void
+}
+
+define void @i181_ls(i181 signext %x) nounwind  {
+	store i181 %x, i181* @i181_s
+	ret void
+}
+
+define void @i182_ls(i182 signext %x) nounwind  {
+	store i182 %x, i182* @i182_s
+	ret void
+}
+
+define void @i183_ls(i183 signext %x) nounwind  {
+	store i183 %x, i183* @i183_s
+	ret void
+}
+
+define void @i184_ls(i184 signext %x) nounwind  {
+	store i184 %x, i184* @i184_s
+	ret void
+}
+
+define void @i185_ls(i185 signext %x) nounwind  {
+	store i185 %x, i185* @i185_s
+	ret void
+}
+
+define void @i186_ls(i186 signext %x) nounwind  {
+	store i186 %x, i186* @i186_s
+	ret void
+}
+
+define void @i187_ls(i187 signext %x) nounwind  {
+	store i187 %x, i187* @i187_s
+	ret void
+}
+
+define void @i188_ls(i188 signext %x) nounwind  {
+	store i188 %x, i188* @i188_s
+	ret void
+}
+
+define void @i189_ls(i189 signext %x) nounwind  {
+	store i189 %x, i189* @i189_s
+	ret void
+}
+
+define void @i190_ls(i190 signext %x) nounwind  {
+	store i190 %x, i190* @i190_s
+	ret void
+}
+
+define void @i191_ls(i191 signext %x) nounwind  {
+	store i191 %x, i191* @i191_s
+	ret void
+}
+
+define void @i192_ls(i192 signext %x) nounwind  {
+	store i192 %x, i192* @i192_s
+	ret void
+}
+
+define void @i193_ls(i193 signext %x) nounwind  {
+	store i193 %x, i193* @i193_s
+	ret void
+}
+
+define void @i194_ls(i194 signext %x) nounwind  {
+	store i194 %x, i194* @i194_s
+	ret void
+}
+
+define void @i195_ls(i195 signext %x) nounwind  {
+	store i195 %x, i195* @i195_s
+	ret void
+}
+
+define void @i196_ls(i196 signext %x) nounwind  {
+	store i196 %x, i196* @i196_s
+	ret void
+}
+
+define void @i197_ls(i197 signext %x) nounwind  {
+	store i197 %x, i197* @i197_s
+	ret void
+}
+
+define void @i198_ls(i198 signext %x) nounwind  {
+	store i198 %x, i198* @i198_s
+	ret void
+}
+
+define void @i199_ls(i199 signext %x) nounwind  {
+	store i199 %x, i199* @i199_s
+	ret void
+}
+
+define void @i200_ls(i200 signext %x) nounwind  {
+	store i200 %x, i200* @i200_s
+	ret void
+}
+
+define void @i201_ls(i201 signext %x) nounwind  {
+	store i201 %x, i201* @i201_s
+	ret void
+}
+
+define void @i202_ls(i202 signext %x) nounwind  {
+	store i202 %x, i202* @i202_s
+	ret void
+}
+
+define void @i203_ls(i203 signext %x) nounwind  {
+	store i203 %x, i203* @i203_s
+	ret void
+}
+
+define void @i204_ls(i204 signext %x) nounwind  {
+	store i204 %x, i204* @i204_s
+	ret void
+}
+
+define void @i205_ls(i205 signext %x) nounwind  {
+	store i205 %x, i205* @i205_s
+	ret void
+}
+
+define void @i206_ls(i206 signext %x) nounwind  {
+	store i206 %x, i206* @i206_s
+	ret void
+}
+
+define void @i207_ls(i207 signext %x) nounwind  {
+	store i207 %x, i207* @i207_s
+	ret void
+}
+
+define void @i208_ls(i208 signext %x) nounwind  {
+	store i208 %x, i208* @i208_s
+	ret void
+}
+
+define void @i209_ls(i209 signext %x) nounwind  {
+	store i209 %x, i209* @i209_s
+	ret void
+}
+
+define void @i210_ls(i210 signext %x) nounwind  {
+	store i210 %x, i210* @i210_s
+	ret void
+}
+
+define void @i211_ls(i211 signext %x) nounwind  {
+	store i211 %x, i211* @i211_s
+	ret void
+}
+
+define void @i212_ls(i212 signext %x) nounwind  {
+	store i212 %x, i212* @i212_s
+	ret void
+}
+
+define void @i213_ls(i213 signext %x) nounwind  {
+	store i213 %x, i213* @i213_s
+	ret void
+}
+
+define void @i214_ls(i214 signext %x) nounwind  {
+	store i214 %x, i214* @i214_s
+	ret void
+}
+
+define void @i215_ls(i215 signext %x) nounwind  {
+	store i215 %x, i215* @i215_s
+	ret void
+}
+
+define void @i216_ls(i216 signext %x) nounwind  {
+	store i216 %x, i216* @i216_s
+	ret void
+}
+
+define void @i217_ls(i217 signext %x) nounwind  {
+	store i217 %x, i217* @i217_s
+	ret void
+}
+
+define void @i218_ls(i218 signext %x) nounwind  {
+	store i218 %x, i218* @i218_s
+	ret void
+}
+
+define void @i219_ls(i219 signext %x) nounwind  {
+	store i219 %x, i219* @i219_s
+	ret void
+}
+
+define void @i220_ls(i220 signext %x) nounwind  {
+	store i220 %x, i220* @i220_s
+	ret void
+}
+
+define void @i221_ls(i221 signext %x) nounwind  {
+	store i221 %x, i221* @i221_s
+	ret void
+}
+
+define void @i222_ls(i222 signext %x) nounwind  {
+	store i222 %x, i222* @i222_s
+	ret void
+}
+
+define void @i223_ls(i223 signext %x) nounwind  {
+	store i223 %x, i223* @i223_s
+	ret void
+}
+
+define void @i224_ls(i224 signext %x) nounwind  {
+	store i224 %x, i224* @i224_s
+	ret void
+}
+
+define void @i225_ls(i225 signext %x) nounwind  {
+	store i225 %x, i225* @i225_s
+	ret void
+}
+
+define void @i226_ls(i226 signext %x) nounwind  {
+	store i226 %x, i226* @i226_s
+	ret void
+}
+
+define void @i227_ls(i227 signext %x) nounwind  {
+	store i227 %x, i227* @i227_s
+	ret void
+}
+
+define void @i228_ls(i228 signext %x) nounwind  {
+	store i228 %x, i228* @i228_s
+	ret void
+}
+
+define void @i229_ls(i229 signext %x) nounwind  {
+	store i229 %x, i229* @i229_s
+	ret void
+}
+
+define void @i230_ls(i230 signext %x) nounwind  {
+	store i230 %x, i230* @i230_s
+	ret void
+}
+
+define void @i231_ls(i231 signext %x) nounwind  {
+	store i231 %x, i231* @i231_s
+	ret void
+}
+
+define void @i232_ls(i232 signext %x) nounwind  {
+	store i232 %x, i232* @i232_s
+	ret void
+}
+
+define void @i233_ls(i233 signext %x) nounwind  {
+	store i233 %x, i233* @i233_s
+	ret void
+}
+
+define void @i234_ls(i234 signext %x) nounwind  {
+	store i234 %x, i234* @i234_s
+	ret void
+}
+
+define void @i235_ls(i235 signext %x) nounwind  {
+	store i235 %x, i235* @i235_s
+	ret void
+}
+
+define void @i236_ls(i236 signext %x) nounwind  {
+	store i236 %x, i236* @i236_s
+	ret void
+}
+
+define void @i237_ls(i237 signext %x) nounwind  {
+	store i237 %x, i237* @i237_s
+	ret void
+}
+
+define void @i238_ls(i238 signext %x) nounwind  {
+	store i238 %x, i238* @i238_s
+	ret void
+}
+
+define void @i239_ls(i239 signext %x) nounwind  {
+	store i239 %x, i239* @i239_s
+	ret void
+}
+
+define void @i240_ls(i240 signext %x) nounwind  {
+	store i240 %x, i240* @i240_s
+	ret void
+}
+
+define void @i241_ls(i241 signext %x) nounwind  {
+	store i241 %x, i241* @i241_s
+	ret void
+}
+
+define void @i242_ls(i242 signext %x) nounwind  {
+	store i242 %x, i242* @i242_s
+	ret void
+}
+
+define void @i243_ls(i243 signext %x) nounwind  {
+	store i243 %x, i243* @i243_s
+	ret void
+}
+
+define void @i244_ls(i244 signext %x) nounwind  {
+	store i244 %x, i244* @i244_s
+	ret void
+}
+
+define void @i245_ls(i245 signext %x) nounwind  {
+	store i245 %x, i245* @i245_s
+	ret void
+}
+
+define void @i246_ls(i246 signext %x) nounwind  {
+	store i246 %x, i246* @i246_s
+	ret void
+}
+
+define void @i247_ls(i247 signext %x) nounwind  {
+	store i247 %x, i247* @i247_s
+	ret void
+}
+
+define void @i248_ls(i248 signext %x) nounwind  {
+	store i248 %x, i248* @i248_s
+	ret void
+}
+
+define void @i249_ls(i249 signext %x) nounwind  {
+	store i249 %x, i249* @i249_s
+	ret void
+}
+
+define void @i250_ls(i250 signext %x) nounwind  {
+	store i250 %x, i250* @i250_s
+	ret void
+}
+
+define void @i251_ls(i251 signext %x) nounwind  {
+	store i251 %x, i251* @i251_s
+	ret void
+}
+
+define void @i252_ls(i252 signext %x) nounwind  {
+	store i252 %x, i252* @i252_s
+	ret void
+}
+
+define void @i253_ls(i253 signext %x) nounwind  {
+	store i253 %x, i253* @i253_s
+	ret void
+}
+
+define void @i254_ls(i254 signext %x) nounwind  {
+	store i254 %x, i254* @i254_s
+	ret void
+}
+
+define void @i255_ls(i255 signext %x) nounwind  {
+	store i255 %x, i255* @i255_s
+	ret void
+}
+
+define void @i256_ls(i256 signext %x) nounwind  {
+	store i256 %x, i256* @i256_s
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/APIntZextParam.ll b/src/LLVM/test/CodeGen/Generic/APIntZextParam.ll
new file mode 100644
index 0000000..173b9fd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/APIntZextParam.ll

@@ -0,0 +1,1537 @@
+; RUN: llc < %s > %t
+@i1_s = external global i1		; <i1*> [#uses=1]
+@i2_s = external global i2		; <i2*> [#uses=1]
+@i3_s = external global i3		; <i3*> [#uses=1]
+@i4_s = external global i4		; <i4*> [#uses=1]
+@i5_s = external global i5		; <i5*> [#uses=1]
+@i6_s = external global i6		; <i6*> [#uses=1]
+@i7_s = external global i7		; <i7*> [#uses=1]
+@i8_s = external global i8		; <i8*> [#uses=1]
+@i9_s = external global i9		; <i9*> [#uses=1]
+@i10_s = external global i10		; <i10*> [#uses=1]
+@i11_s = external global i11		; <i11*> [#uses=1]
+@i12_s = external global i12		; <i12*> [#uses=1]
+@i13_s = external global i13		; <i13*> [#uses=1]
+@i14_s = external global i14		; <i14*> [#uses=1]
+@i15_s = external global i15		; <i15*> [#uses=1]
+@i16_s = external global i16		; <i16*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+@i18_s = external global i18		; <i18*> [#uses=1]
+@i19_s = external global i19		; <i19*> [#uses=1]
+@i20_s = external global i20		; <i20*> [#uses=1]
+@i21_s = external global i21		; <i21*> [#uses=1]
+@i22_s = external global i22		; <i22*> [#uses=1]
+@i23_s = external global i23		; <i23*> [#uses=1]
+@i24_s = external global i24		; <i24*> [#uses=1]
+@i25_s = external global i25		; <i25*> [#uses=1]
+@i26_s = external global i26		; <i26*> [#uses=1]
+@i27_s = external global i27		; <i27*> [#uses=1]
+@i28_s = external global i28		; <i28*> [#uses=1]
+@i29_s = external global i29		; <i29*> [#uses=1]
+@i30_s = external global i30		; <i30*> [#uses=1]
+@i31_s = external global i31		; <i31*> [#uses=1]
+@i32_s = external global i32		; <i32*> [#uses=1]
+@i33_s = external global i33		; <i33*> [#uses=1]
+@i34_s = external global i34		; <i34*> [#uses=1]
+@i35_s = external global i35		; <i35*> [#uses=1]
+@i36_s = external global i36		; <i36*> [#uses=1]
+@i37_s = external global i37		; <i37*> [#uses=1]
+@i38_s = external global i38		; <i38*> [#uses=1]
+@i39_s = external global i39		; <i39*> [#uses=1]
+@i40_s = external global i40		; <i40*> [#uses=1]
+@i41_s = external global i41		; <i41*> [#uses=1]
+@i42_s = external global i42		; <i42*> [#uses=1]
+@i43_s = external global i43		; <i43*> [#uses=1]
+@i44_s = external global i44		; <i44*> [#uses=1]
+@i45_s = external global i45		; <i45*> [#uses=1]
+@i46_s = external global i46		; <i46*> [#uses=1]
+@i47_s = external global i47		; <i47*> [#uses=1]
+@i48_s = external global i48		; <i48*> [#uses=1]
+@i49_s = external global i49		; <i49*> [#uses=1]
+@i50_s = external global i50		; <i50*> [#uses=1]
+@i51_s = external global i51		; <i51*> [#uses=1]
+@i52_s = external global i52		; <i52*> [#uses=1]
+@i53_s = external global i53		; <i53*> [#uses=1]
+@i54_s = external global i54		; <i54*> [#uses=1]
+@i55_s = external global i55		; <i55*> [#uses=1]
+@i56_s = external global i56		; <i56*> [#uses=1]
+@i57_s = external global i57		; <i57*> [#uses=1]
+@i58_s = external global i58		; <i58*> [#uses=1]
+@i59_s = external global i59		; <i59*> [#uses=1]
+@i60_s = external global i60		; <i60*> [#uses=1]
+@i61_s = external global i61		; <i61*> [#uses=1]
+@i62_s = external global i62		; <i62*> [#uses=1]
+@i63_s = external global i63		; <i63*> [#uses=1]
+@i64_s = external global i64		; <i64*> [#uses=1]
+@i65_s = external global i65		; <i65*> [#uses=1]
+@i66_s = external global i66		; <i66*> [#uses=1]
+@i67_s = external global i67		; <i67*> [#uses=1]
+@i68_s = external global i68		; <i68*> [#uses=1]
+@i69_s = external global i69		; <i69*> [#uses=1]
+@i70_s = external global i70		; <i70*> [#uses=1]
+@i71_s = external global i71		; <i71*> [#uses=1]
+@i72_s = external global i72		; <i72*> [#uses=1]
+@i73_s = external global i73		; <i73*> [#uses=1]
+@i74_s = external global i74		; <i74*> [#uses=1]
+@i75_s = external global i75		; <i75*> [#uses=1]
+@i76_s = external global i76		; <i76*> [#uses=1]
+@i77_s = external global i77		; <i77*> [#uses=1]
+@i78_s = external global i78		; <i78*> [#uses=1]
+@i79_s = external global i79		; <i79*> [#uses=1]
+@i80_s = external global i80		; <i80*> [#uses=1]
+@i81_s = external global i81		; <i81*> [#uses=1]
+@i82_s = external global i82		; <i82*> [#uses=1]
+@i83_s = external global i83		; <i83*> [#uses=1]
+@i84_s = external global i84		; <i84*> [#uses=1]
+@i85_s = external global i85		; <i85*> [#uses=1]
+@i86_s = external global i86		; <i86*> [#uses=1]
+@i87_s = external global i87		; <i87*> [#uses=1]
+@i88_s = external global i88		; <i88*> [#uses=1]
+@i89_s = external global i89		; <i89*> [#uses=1]
+@i90_s = external global i90		; <i90*> [#uses=1]
+@i91_s = external global i91		; <i91*> [#uses=1]
+@i92_s = external global i92		; <i92*> [#uses=1]
+@i93_s = external global i93		; <i93*> [#uses=1]
+@i94_s = external global i94		; <i94*> [#uses=1]
+@i95_s = external global i95		; <i95*> [#uses=1]
+@i96_s = external global i96		; <i96*> [#uses=1]
+@i97_s = external global i97		; <i97*> [#uses=1]
+@i98_s = external global i98		; <i98*> [#uses=1]
+@i99_s = external global i99		; <i99*> [#uses=1]
+@i100_s = external global i100		; <i100*> [#uses=1]
+@i101_s = external global i101		; <i101*> [#uses=1]
+@i102_s = external global i102		; <i102*> [#uses=1]
+@i103_s = external global i103		; <i103*> [#uses=1]
+@i104_s = external global i104		; <i104*> [#uses=1]
+@i105_s = external global i105		; <i105*> [#uses=1]
+@i106_s = external global i106		; <i106*> [#uses=1]
+@i107_s = external global i107		; <i107*> [#uses=1]
+@i108_s = external global i108		; <i108*> [#uses=1]
+@i109_s = external global i109		; <i109*> [#uses=1]
+@i110_s = external global i110		; <i110*> [#uses=1]
+@i111_s = external global i111		; <i111*> [#uses=1]
+@i112_s = external global i112		; <i112*> [#uses=1]
+@i113_s = external global i113		; <i113*> [#uses=1]
+@i114_s = external global i114		; <i114*> [#uses=1]
+@i115_s = external global i115		; <i115*> [#uses=1]
+@i116_s = external global i116		; <i116*> [#uses=1]
+@i117_s = external global i117		; <i117*> [#uses=1]
+@i118_s = external global i118		; <i118*> [#uses=1]
+@i119_s = external global i119		; <i119*> [#uses=1]
+@i120_s = external global i120		; <i120*> [#uses=1]
+@i121_s = external global i121		; <i121*> [#uses=1]
+@i122_s = external global i122		; <i122*> [#uses=1]
+@i123_s = external global i123		; <i123*> [#uses=1]
+@i124_s = external global i124		; <i124*> [#uses=1]
+@i125_s = external global i125		; <i125*> [#uses=1]
+@i126_s = external global i126		; <i126*> [#uses=1]
+@i127_s = external global i127		; <i127*> [#uses=1]
+@i128_s = external global i128		; <i128*> [#uses=1]
+@i129_s = external global i129		; <i129*> [#uses=1]
+@i130_s = external global i130		; <i130*> [#uses=1]
+@i131_s = external global i131		; <i131*> [#uses=1]
+@i132_s = external global i132		; <i132*> [#uses=1]
+@i133_s = external global i133		; <i133*> [#uses=1]
+@i134_s = external global i134		; <i134*> [#uses=1]
+@i135_s = external global i135		; <i135*> [#uses=1]
+@i136_s = external global i136		; <i136*> [#uses=1]
+@i137_s = external global i137		; <i137*> [#uses=1]
+@i138_s = external global i138		; <i138*> [#uses=1]
+@i139_s = external global i139		; <i139*> [#uses=1]
+@i140_s = external global i140		; <i140*> [#uses=1]
+@i141_s = external global i141		; <i141*> [#uses=1]
+@i142_s = external global i142		; <i142*> [#uses=1]
+@i143_s = external global i143		; <i143*> [#uses=1]
+@i144_s = external global i144		; <i144*> [#uses=1]
+@i145_s = external global i145		; <i145*> [#uses=1]
+@i146_s = external global i146		; <i146*> [#uses=1]
+@i147_s = external global i147		; <i147*> [#uses=1]
+@i148_s = external global i148		; <i148*> [#uses=1]
+@i149_s = external global i149		; <i149*> [#uses=1]
+@i150_s = external global i150		; <i150*> [#uses=1]
+@i151_s = external global i151		; <i151*> [#uses=1]
+@i152_s = external global i152		; <i152*> [#uses=1]
+@i153_s = external global i153		; <i153*> [#uses=1]
+@i154_s = external global i154		; <i154*> [#uses=1]
+@i155_s = external global i155		; <i155*> [#uses=1]
+@i156_s = external global i156		; <i156*> [#uses=1]
+@i157_s = external global i157		; <i157*> [#uses=1]
+@i158_s = external global i158		; <i158*> [#uses=1]
+@i159_s = external global i159		; <i159*> [#uses=1]
+@i160_s = external global i160		; <i160*> [#uses=1]
+@i161_s = external global i161		; <i161*> [#uses=1]
+@i162_s = external global i162		; <i162*> [#uses=1]
+@i163_s = external global i163		; <i163*> [#uses=1]
+@i164_s = external global i164		; <i164*> [#uses=1]
+@i165_s = external global i165		; <i165*> [#uses=1]
+@i166_s = external global i166		; <i166*> [#uses=1]
+@i167_s = external global i167		; <i167*> [#uses=1]
+@i168_s = external global i168		; <i168*> [#uses=1]
+@i169_s = external global i169		; <i169*> [#uses=1]
+@i170_s = external global i170		; <i170*> [#uses=1]
+@i171_s = external global i171		; <i171*> [#uses=1]
+@i172_s = external global i172		; <i172*> [#uses=1]
+@i173_s = external global i173		; <i173*> [#uses=1]
+@i174_s = external global i174		; <i174*> [#uses=1]
+@i175_s = external global i175		; <i175*> [#uses=1]
+@i176_s = external global i176		; <i176*> [#uses=1]
+@i177_s = external global i177		; <i177*> [#uses=1]
+@i178_s = external global i178		; <i178*> [#uses=1]
+@i179_s = external global i179		; <i179*> [#uses=1]
+@i180_s = external global i180		; <i180*> [#uses=1]
+@i181_s = external global i181		; <i181*> [#uses=1]
+@i182_s = external global i182		; <i182*> [#uses=1]
+@i183_s = external global i183		; <i183*> [#uses=1]
+@i184_s = external global i184		; <i184*> [#uses=1]
+@i185_s = external global i185		; <i185*> [#uses=1]
+@i186_s = external global i186		; <i186*> [#uses=1]
+@i187_s = external global i187		; <i187*> [#uses=1]
+@i188_s = external global i188		; <i188*> [#uses=1]
+@i189_s = external global i189		; <i189*> [#uses=1]
+@i190_s = external global i190		; <i190*> [#uses=1]
+@i191_s = external global i191		; <i191*> [#uses=1]
+@i192_s = external global i192		; <i192*> [#uses=1]
+@i193_s = external global i193		; <i193*> [#uses=1]
+@i194_s = external global i194		; <i194*> [#uses=1]
+@i195_s = external global i195		; <i195*> [#uses=1]
+@i196_s = external global i196		; <i196*> [#uses=1]
+@i197_s = external global i197		; <i197*> [#uses=1]
+@i198_s = external global i198		; <i198*> [#uses=1]
+@i199_s = external global i199		; <i199*> [#uses=1]
+@i200_s = external global i200		; <i200*> [#uses=1]
+@i201_s = external global i201		; <i201*> [#uses=1]
+@i202_s = external global i202		; <i202*> [#uses=1]
+@i203_s = external global i203		; <i203*> [#uses=1]
+@i204_s = external global i204		; <i204*> [#uses=1]
+@i205_s = external global i205		; <i205*> [#uses=1]
+@i206_s = external global i206		; <i206*> [#uses=1]
+@i207_s = external global i207		; <i207*> [#uses=1]
+@i208_s = external global i208		; <i208*> [#uses=1]
+@i209_s = external global i209		; <i209*> [#uses=1]
+@i210_s = external global i210		; <i210*> [#uses=1]
+@i211_s = external global i211		; <i211*> [#uses=1]
+@i212_s = external global i212		; <i212*> [#uses=1]
+@i213_s = external global i213		; <i213*> [#uses=1]
+@i214_s = external global i214		; <i214*> [#uses=1]
+@i215_s = external global i215		; <i215*> [#uses=1]
+@i216_s = external global i216		; <i216*> [#uses=1]
+@i217_s = external global i217		; <i217*> [#uses=1]
+@i218_s = external global i218		; <i218*> [#uses=1]
+@i219_s = external global i219		; <i219*> [#uses=1]
+@i220_s = external global i220		; <i220*> [#uses=1]
+@i221_s = external global i221		; <i221*> [#uses=1]
+@i222_s = external global i222		; <i222*> [#uses=1]
+@i223_s = external global i223		; <i223*> [#uses=1]
+@i224_s = external global i224		; <i224*> [#uses=1]
+@i225_s = external global i225		; <i225*> [#uses=1]
+@i226_s = external global i226		; <i226*> [#uses=1]
+@i227_s = external global i227		; <i227*> [#uses=1]
+@i228_s = external global i228		; <i228*> [#uses=1]
+@i229_s = external global i229		; <i229*> [#uses=1]
+@i230_s = external global i230		; <i230*> [#uses=1]
+@i231_s = external global i231		; <i231*> [#uses=1]
+@i232_s = external global i232		; <i232*> [#uses=1]
+@i233_s = external global i233		; <i233*> [#uses=1]
+@i234_s = external global i234		; <i234*> [#uses=1]
+@i235_s = external global i235		; <i235*> [#uses=1]
+@i236_s = external global i236		; <i236*> [#uses=1]
+@i237_s = external global i237		; <i237*> [#uses=1]
+@i238_s = external global i238		; <i238*> [#uses=1]
+@i239_s = external global i239		; <i239*> [#uses=1]
+@i240_s = external global i240		; <i240*> [#uses=1]
+@i241_s = external global i241		; <i241*> [#uses=1]
+@i242_s = external global i242		; <i242*> [#uses=1]
+@i243_s = external global i243		; <i243*> [#uses=1]
+@i244_s = external global i244		; <i244*> [#uses=1]
+@i245_s = external global i245		; <i245*> [#uses=1]
+@i246_s = external global i246		; <i246*> [#uses=1]
+@i247_s = external global i247		; <i247*> [#uses=1]
+@i248_s = external global i248		; <i248*> [#uses=1]
+@i249_s = external global i249		; <i249*> [#uses=1]
+@i250_s = external global i250		; <i250*> [#uses=1]
+@i251_s = external global i251		; <i251*> [#uses=1]
+@i252_s = external global i252		; <i252*> [#uses=1]
+@i253_s = external global i253		; <i253*> [#uses=1]
+@i254_s = external global i254		; <i254*> [#uses=1]
+@i255_s = external global i255		; <i255*> [#uses=1]
+@i256_s = external global i256		; <i256*> [#uses=1]
+
+define void @i1_ls(i1 zeroext %x) nounwind  {
+	store i1 %x, i1* @i1_s
+	ret void
+}
+
+define void @i2_ls(i2 zeroext %x) nounwind  {
+	store i2 %x, i2* @i2_s
+	ret void
+}
+
+define void @i3_ls(i3 zeroext %x) nounwind  {
+	store i3 %x, i3* @i3_s
+	ret void
+}
+
+define void @i4_ls(i4 zeroext %x) nounwind  {
+	store i4 %x, i4* @i4_s
+	ret void
+}
+
+define void @i5_ls(i5 zeroext %x) nounwind  {
+	store i5 %x, i5* @i5_s
+	ret void
+}
+
+define void @i6_ls(i6 zeroext %x) nounwind  {
+	store i6 %x, i6* @i6_s
+	ret void
+}
+
+define void @i7_ls(i7 zeroext %x) nounwind  {
+	store i7 %x, i7* @i7_s
+	ret void
+}
+
+define void @i8_ls(i8 zeroext %x) nounwind  {
+	store i8 %x, i8* @i8_s
+	ret void
+}
+
+define void @i9_ls(i9 zeroext %x) nounwind  {
+	store i9 %x, i9* @i9_s
+	ret void
+}
+
+define void @i10_ls(i10 zeroext %x) nounwind  {
+	store i10 %x, i10* @i10_s
+	ret void
+}
+
+define void @i11_ls(i11 zeroext %x) nounwind  {
+	store i11 %x, i11* @i11_s
+	ret void
+}
+
+define void @i12_ls(i12 zeroext %x) nounwind  {
+	store i12 %x, i12* @i12_s
+	ret void
+}
+
+define void @i13_ls(i13 zeroext %x) nounwind  {
+	store i13 %x, i13* @i13_s
+	ret void
+}
+
+define void @i14_ls(i14 zeroext %x) nounwind  {
+	store i14 %x, i14* @i14_s
+	ret void
+}
+
+define void @i15_ls(i15 zeroext %x) nounwind  {
+	store i15 %x, i15* @i15_s
+	ret void
+}
+
+define void @i16_ls(i16 zeroext %x) nounwind  {
+	store i16 %x, i16* @i16_s
+	ret void
+}
+
+define void @i17_ls(i17 zeroext %x) nounwind  {
+	store i17 %x, i17* @i17_s
+	ret void
+}
+
+define void @i18_ls(i18 zeroext %x) nounwind  {
+	store i18 %x, i18* @i18_s
+	ret void
+}
+
+define void @i19_ls(i19 zeroext %x) nounwind  {
+	store i19 %x, i19* @i19_s
+	ret void
+}
+
+define void @i20_ls(i20 zeroext %x) nounwind  {
+	store i20 %x, i20* @i20_s
+	ret void
+}
+
+define void @i21_ls(i21 zeroext %x) nounwind  {
+	store i21 %x, i21* @i21_s
+	ret void
+}
+
+define void @i22_ls(i22 zeroext %x) nounwind  {
+	store i22 %x, i22* @i22_s
+	ret void
+}
+
+define void @i23_ls(i23 zeroext %x) nounwind  {
+	store i23 %x, i23* @i23_s
+	ret void
+}
+
+define void @i24_ls(i24 zeroext %x) nounwind  {
+	store i24 %x, i24* @i24_s
+	ret void
+}
+
+define void @i25_ls(i25 zeroext %x) nounwind  {
+	store i25 %x, i25* @i25_s
+	ret void
+}
+
+define void @i26_ls(i26 zeroext %x) nounwind  {
+	store i26 %x, i26* @i26_s
+	ret void
+}
+
+define void @i27_ls(i27 zeroext %x) nounwind  {
+	store i27 %x, i27* @i27_s
+	ret void
+}
+
+define void @i28_ls(i28 zeroext %x) nounwind  {
+	store i28 %x, i28* @i28_s
+	ret void
+}
+
+define void @i29_ls(i29 zeroext %x) nounwind  {
+	store i29 %x, i29* @i29_s
+	ret void
+}
+
+define void @i30_ls(i30 zeroext %x) nounwind  {
+	store i30 %x, i30* @i30_s
+	ret void
+}
+
+define void @i31_ls(i31 zeroext %x) nounwind  {
+	store i31 %x, i31* @i31_s
+	ret void
+}
+
+define void @i32_ls(i32 zeroext %x) nounwind  {
+	store i32 %x, i32* @i32_s
+	ret void
+}
+
+define void @i33_ls(i33 zeroext %x) nounwind  {
+	store i33 %x, i33* @i33_s
+	ret void
+}
+
+define void @i34_ls(i34 zeroext %x) nounwind  {
+	store i34 %x, i34* @i34_s
+	ret void
+}
+
+define void @i35_ls(i35 zeroext %x) nounwind  {
+	store i35 %x, i35* @i35_s
+	ret void
+}
+
+define void @i36_ls(i36 zeroext %x) nounwind  {
+	store i36 %x, i36* @i36_s
+	ret void
+}
+
+define void @i37_ls(i37 zeroext %x) nounwind  {
+	store i37 %x, i37* @i37_s
+	ret void
+}
+
+define void @i38_ls(i38 zeroext %x) nounwind  {
+	store i38 %x, i38* @i38_s
+	ret void
+}
+
+define void @i39_ls(i39 zeroext %x) nounwind  {
+	store i39 %x, i39* @i39_s
+	ret void
+}
+
+define void @i40_ls(i40 zeroext %x) nounwind  {
+	store i40 %x, i40* @i40_s
+	ret void
+}
+
+define void @i41_ls(i41 zeroext %x) nounwind  {
+	store i41 %x, i41* @i41_s
+	ret void
+}
+
+define void @i42_ls(i42 zeroext %x) nounwind  {
+	store i42 %x, i42* @i42_s
+	ret void
+}
+
+define void @i43_ls(i43 zeroext %x) nounwind  {
+	store i43 %x, i43* @i43_s
+	ret void
+}
+
+define void @i44_ls(i44 zeroext %x) nounwind  {
+	store i44 %x, i44* @i44_s
+	ret void
+}
+
+define void @i45_ls(i45 zeroext %x) nounwind  {
+	store i45 %x, i45* @i45_s
+	ret void
+}
+
+define void @i46_ls(i46 zeroext %x) nounwind  {
+	store i46 %x, i46* @i46_s
+	ret void
+}
+
+define void @i47_ls(i47 zeroext %x) nounwind  {
+	store i47 %x, i47* @i47_s
+	ret void
+}
+
+define void @i48_ls(i48 zeroext %x) nounwind  {
+	store i48 %x, i48* @i48_s
+	ret void
+}
+
+define void @i49_ls(i49 zeroext %x) nounwind  {
+	store i49 %x, i49* @i49_s
+	ret void
+}
+
+define void @i50_ls(i50 zeroext %x) nounwind  {
+	store i50 %x, i50* @i50_s
+	ret void
+}
+
+define void @i51_ls(i51 zeroext %x) nounwind  {
+	store i51 %x, i51* @i51_s
+	ret void
+}
+
+define void @i52_ls(i52 zeroext %x) nounwind  {
+	store i52 %x, i52* @i52_s
+	ret void
+}
+
+define void @i53_ls(i53 zeroext %x) nounwind  {
+	store i53 %x, i53* @i53_s
+	ret void
+}
+
+define void @i54_ls(i54 zeroext %x) nounwind  {
+	store i54 %x, i54* @i54_s
+	ret void
+}
+
+define void @i55_ls(i55 zeroext %x) nounwind  {
+	store i55 %x, i55* @i55_s
+	ret void
+}
+
+define void @i56_ls(i56 zeroext %x) nounwind  {
+	store i56 %x, i56* @i56_s
+	ret void
+}
+
+define void @i57_ls(i57 zeroext %x) nounwind  {
+	store i57 %x, i57* @i57_s
+	ret void
+}
+
+define void @i58_ls(i58 zeroext %x) nounwind  {
+	store i58 %x, i58* @i58_s
+	ret void
+}
+
+define void @i59_ls(i59 zeroext %x) nounwind  {
+	store i59 %x, i59* @i59_s
+	ret void
+}
+
+define void @i60_ls(i60 zeroext %x) nounwind  {
+	store i60 %x, i60* @i60_s
+	ret void
+}
+
+define void @i61_ls(i61 zeroext %x) nounwind  {
+	store i61 %x, i61* @i61_s
+	ret void
+}
+
+define void @i62_ls(i62 zeroext %x) nounwind  {
+	store i62 %x, i62* @i62_s
+	ret void
+}
+
+define void @i63_ls(i63 zeroext %x) nounwind  {
+	store i63 %x, i63* @i63_s
+	ret void
+}
+
+define void @i64_ls(i64 zeroext %x) nounwind  {
+	store i64 %x, i64* @i64_s
+	ret void
+}
+
+define void @i65_ls(i65 zeroext %x) nounwind  {
+	store i65 %x, i65* @i65_s
+	ret void
+}
+
+define void @i66_ls(i66 zeroext %x) nounwind  {
+	store i66 %x, i66* @i66_s
+	ret void
+}
+
+define void @i67_ls(i67 zeroext %x) nounwind  {
+	store i67 %x, i67* @i67_s
+	ret void
+}
+
+define void @i68_ls(i68 zeroext %x) nounwind  {
+	store i68 %x, i68* @i68_s
+	ret void
+}
+
+define void @i69_ls(i69 zeroext %x) nounwind  {
+	store i69 %x, i69* @i69_s
+	ret void
+}
+
+define void @i70_ls(i70 zeroext %x) nounwind  {
+	store i70 %x, i70* @i70_s
+	ret void
+}
+
+define void @i71_ls(i71 zeroext %x) nounwind  {
+	store i71 %x, i71* @i71_s
+	ret void
+}
+
+define void @i72_ls(i72 zeroext %x) nounwind  {
+	store i72 %x, i72* @i72_s
+	ret void
+}
+
+define void @i73_ls(i73 zeroext %x) nounwind  {
+	store i73 %x, i73* @i73_s
+	ret void
+}
+
+define void @i74_ls(i74 zeroext %x) nounwind  {
+	store i74 %x, i74* @i74_s
+	ret void
+}
+
+define void @i75_ls(i75 zeroext %x) nounwind  {
+	store i75 %x, i75* @i75_s
+	ret void
+}
+
+define void @i76_ls(i76 zeroext %x) nounwind  {
+	store i76 %x, i76* @i76_s
+	ret void
+}
+
+define void @i77_ls(i77 zeroext %x) nounwind  {
+	store i77 %x, i77* @i77_s
+	ret void
+}
+
+define void @i78_ls(i78 zeroext %x) nounwind  {
+	store i78 %x, i78* @i78_s
+	ret void
+}
+
+define void @i79_ls(i79 zeroext %x) nounwind  {
+	store i79 %x, i79* @i79_s
+	ret void
+}
+
+define void @i80_ls(i80 zeroext %x) nounwind  {
+	store i80 %x, i80* @i80_s
+	ret void
+}
+
+define void @i81_ls(i81 zeroext %x) nounwind  {
+	store i81 %x, i81* @i81_s
+	ret void
+}
+
+define void @i82_ls(i82 zeroext %x) nounwind  {
+	store i82 %x, i82* @i82_s
+	ret void
+}
+
+define void @i83_ls(i83 zeroext %x) nounwind  {
+	store i83 %x, i83* @i83_s
+	ret void
+}
+
+define void @i84_ls(i84 zeroext %x) nounwind  {
+	store i84 %x, i84* @i84_s
+	ret void
+}
+
+define void @i85_ls(i85 zeroext %x) nounwind  {
+	store i85 %x, i85* @i85_s
+	ret void
+}
+
+define void @i86_ls(i86 zeroext %x) nounwind  {
+	store i86 %x, i86* @i86_s
+	ret void
+}
+
+define void @i87_ls(i87 zeroext %x) nounwind  {
+	store i87 %x, i87* @i87_s
+	ret void
+}
+
+define void @i88_ls(i88 zeroext %x) nounwind  {
+	store i88 %x, i88* @i88_s
+	ret void
+}
+
+define void @i89_ls(i89 zeroext %x) nounwind  {
+	store i89 %x, i89* @i89_s
+	ret void
+}
+
+define void @i90_ls(i90 zeroext %x) nounwind  {
+	store i90 %x, i90* @i90_s
+	ret void
+}
+
+define void @i91_ls(i91 zeroext %x) nounwind  {
+	store i91 %x, i91* @i91_s
+	ret void
+}
+
+define void @i92_ls(i92 zeroext %x) nounwind  {
+	store i92 %x, i92* @i92_s
+	ret void
+}
+
+define void @i93_ls(i93 zeroext %x) nounwind  {
+	store i93 %x, i93* @i93_s
+	ret void
+}
+
+define void @i94_ls(i94 zeroext %x) nounwind  {
+	store i94 %x, i94* @i94_s
+	ret void
+}
+
+define void @i95_ls(i95 zeroext %x) nounwind  {
+	store i95 %x, i95* @i95_s
+	ret void
+}
+
+define void @i96_ls(i96 zeroext %x) nounwind  {
+	store i96 %x, i96* @i96_s
+	ret void
+}
+
+define void @i97_ls(i97 zeroext %x) nounwind  {
+	store i97 %x, i97* @i97_s
+	ret void
+}
+
+define void @i98_ls(i98 zeroext %x) nounwind  {
+	store i98 %x, i98* @i98_s
+	ret void
+}
+
+define void @i99_ls(i99 zeroext %x) nounwind  {
+	store i99 %x, i99* @i99_s
+	ret void
+}
+
+define void @i100_ls(i100 zeroext %x) nounwind  {
+	store i100 %x, i100* @i100_s
+	ret void
+}
+
+define void @i101_ls(i101 zeroext %x) nounwind  {
+	store i101 %x, i101* @i101_s
+	ret void
+}
+
+define void @i102_ls(i102 zeroext %x) nounwind  {
+	store i102 %x, i102* @i102_s
+	ret void
+}
+
+define void @i103_ls(i103 zeroext %x) nounwind  {
+	store i103 %x, i103* @i103_s
+	ret void
+}
+
+define void @i104_ls(i104 zeroext %x) nounwind  {
+	store i104 %x, i104* @i104_s
+	ret void
+}
+
+define void @i105_ls(i105 zeroext %x) nounwind  {
+	store i105 %x, i105* @i105_s
+	ret void
+}
+
+define void @i106_ls(i106 zeroext %x) nounwind  {
+	store i106 %x, i106* @i106_s
+	ret void
+}
+
+define void @i107_ls(i107 zeroext %x) nounwind  {
+	store i107 %x, i107* @i107_s
+	ret void
+}
+
+define void @i108_ls(i108 zeroext %x) nounwind  {
+	store i108 %x, i108* @i108_s
+	ret void
+}
+
+define void @i109_ls(i109 zeroext %x) nounwind  {
+	store i109 %x, i109* @i109_s
+	ret void
+}
+
+define void @i110_ls(i110 zeroext %x) nounwind  {
+	store i110 %x, i110* @i110_s
+	ret void
+}
+
+define void @i111_ls(i111 zeroext %x) nounwind  {
+	store i111 %x, i111* @i111_s
+	ret void
+}
+
+define void @i112_ls(i112 zeroext %x) nounwind  {
+	store i112 %x, i112* @i112_s
+	ret void
+}
+
+define void @i113_ls(i113 zeroext %x) nounwind  {
+	store i113 %x, i113* @i113_s
+	ret void
+}
+
+define void @i114_ls(i114 zeroext %x) nounwind  {
+	store i114 %x, i114* @i114_s
+	ret void
+}
+
+define void @i115_ls(i115 zeroext %x) nounwind  {
+	store i115 %x, i115* @i115_s
+	ret void
+}
+
+define void @i116_ls(i116 zeroext %x) nounwind  {
+	store i116 %x, i116* @i116_s
+	ret void
+}
+
+define void @i117_ls(i117 zeroext %x) nounwind  {
+	store i117 %x, i117* @i117_s
+	ret void
+}
+
+define void @i118_ls(i118 zeroext %x) nounwind  {
+	store i118 %x, i118* @i118_s
+	ret void
+}
+
+define void @i119_ls(i119 zeroext %x) nounwind  {
+	store i119 %x, i119* @i119_s
+	ret void
+}
+
+define void @i120_ls(i120 zeroext %x) nounwind  {
+	store i120 %x, i120* @i120_s
+	ret void
+}
+
+define void @i121_ls(i121 zeroext %x) nounwind  {
+	store i121 %x, i121* @i121_s
+	ret void
+}
+
+define void @i122_ls(i122 zeroext %x) nounwind  {
+	store i122 %x, i122* @i122_s
+	ret void
+}
+
+define void @i123_ls(i123 zeroext %x) nounwind  {
+	store i123 %x, i123* @i123_s
+	ret void
+}
+
+define void @i124_ls(i124 zeroext %x) nounwind  {
+	store i124 %x, i124* @i124_s
+	ret void
+}
+
+define void @i125_ls(i125 zeroext %x) nounwind  {
+	store i125 %x, i125* @i125_s
+	ret void
+}
+
+define void @i126_ls(i126 zeroext %x) nounwind  {
+	store i126 %x, i126* @i126_s
+	ret void
+}
+
+define void @i127_ls(i127 zeroext %x) nounwind  {
+	store i127 %x, i127* @i127_s
+	ret void
+}
+
+define void @i128_ls(i128 zeroext %x) nounwind  {
+	store i128 %x, i128* @i128_s
+	ret void
+}
+
+define void @i129_ls(i129 zeroext %x) nounwind  {
+	store i129 %x, i129* @i129_s
+	ret void
+}
+
+define void @i130_ls(i130 zeroext %x) nounwind  {
+	store i130 %x, i130* @i130_s
+	ret void
+}
+
+define void @i131_ls(i131 zeroext %x) nounwind  {
+	store i131 %x, i131* @i131_s
+	ret void
+}
+
+define void @i132_ls(i132 zeroext %x) nounwind  {
+	store i132 %x, i132* @i132_s
+	ret void
+}
+
+define void @i133_ls(i133 zeroext %x) nounwind  {
+	store i133 %x, i133* @i133_s
+	ret void
+}
+
+define void @i134_ls(i134 zeroext %x) nounwind  {
+	store i134 %x, i134* @i134_s
+	ret void
+}
+
+define void @i135_ls(i135 zeroext %x) nounwind  {
+	store i135 %x, i135* @i135_s
+	ret void
+}
+
+define void @i136_ls(i136 zeroext %x) nounwind  {
+	store i136 %x, i136* @i136_s
+	ret void
+}
+
+define void @i137_ls(i137 zeroext %x) nounwind  {
+	store i137 %x, i137* @i137_s
+	ret void
+}
+
+define void @i138_ls(i138 zeroext %x) nounwind  {
+	store i138 %x, i138* @i138_s
+	ret void
+}
+
+define void @i139_ls(i139 zeroext %x) nounwind  {
+	store i139 %x, i139* @i139_s
+	ret void
+}
+
+define void @i140_ls(i140 zeroext %x) nounwind  {
+	store i140 %x, i140* @i140_s
+	ret void
+}
+
+define void @i141_ls(i141 zeroext %x) nounwind  {
+	store i141 %x, i141* @i141_s
+	ret void
+}
+
+define void @i142_ls(i142 zeroext %x) nounwind  {
+	store i142 %x, i142* @i142_s
+	ret void
+}
+
+define void @i143_ls(i143 zeroext %x) nounwind  {
+	store i143 %x, i143* @i143_s
+	ret void
+}
+
+define void @i144_ls(i144 zeroext %x) nounwind  {
+	store i144 %x, i144* @i144_s
+	ret void
+}
+
+define void @i145_ls(i145 zeroext %x) nounwind  {
+	store i145 %x, i145* @i145_s
+	ret void
+}
+
+define void @i146_ls(i146 zeroext %x) nounwind  {
+	store i146 %x, i146* @i146_s
+	ret void
+}
+
+define void @i147_ls(i147 zeroext %x) nounwind  {
+	store i147 %x, i147* @i147_s
+	ret void
+}
+
+define void @i148_ls(i148 zeroext %x) nounwind  {
+	store i148 %x, i148* @i148_s
+	ret void
+}
+
+define void @i149_ls(i149 zeroext %x) nounwind  {
+	store i149 %x, i149* @i149_s
+	ret void
+}
+
+define void @i150_ls(i150 zeroext %x) nounwind  {
+	store i150 %x, i150* @i150_s
+	ret void
+}
+
+define void @i151_ls(i151 zeroext %x) nounwind  {
+	store i151 %x, i151* @i151_s
+	ret void
+}
+
+define void @i152_ls(i152 zeroext %x) nounwind  {
+	store i152 %x, i152* @i152_s
+	ret void
+}
+
+define void @i153_ls(i153 zeroext %x) nounwind  {
+	store i153 %x, i153* @i153_s
+	ret void
+}
+
+define void @i154_ls(i154 zeroext %x) nounwind  {
+	store i154 %x, i154* @i154_s
+	ret void
+}
+
+define void @i155_ls(i155 zeroext %x) nounwind  {
+	store i155 %x, i155* @i155_s
+	ret void
+}
+
+define void @i156_ls(i156 zeroext %x) nounwind  {
+	store i156 %x, i156* @i156_s
+	ret void
+}
+
+define void @i157_ls(i157 zeroext %x) nounwind  {
+	store i157 %x, i157* @i157_s
+	ret void
+}
+
+define void @i158_ls(i158 zeroext %x) nounwind  {
+	store i158 %x, i158* @i158_s
+	ret void
+}
+
+define void @i159_ls(i159 zeroext %x) nounwind  {
+	store i159 %x, i159* @i159_s
+	ret void
+}
+
+define void @i160_ls(i160 zeroext %x) nounwind  {
+	store i160 %x, i160* @i160_s
+	ret void
+}
+
+define void @i161_ls(i161 zeroext %x) nounwind  {
+	store i161 %x, i161* @i161_s
+	ret void
+}
+
+define void @i162_ls(i162 zeroext %x) nounwind  {
+	store i162 %x, i162* @i162_s
+	ret void
+}
+
+define void @i163_ls(i163 zeroext %x) nounwind  {
+	store i163 %x, i163* @i163_s
+	ret void
+}
+
+define void @i164_ls(i164 zeroext %x) nounwind  {
+	store i164 %x, i164* @i164_s
+	ret void
+}
+
+define void @i165_ls(i165 zeroext %x) nounwind  {
+	store i165 %x, i165* @i165_s
+	ret void
+}
+
+define void @i166_ls(i166 zeroext %x) nounwind  {
+	store i166 %x, i166* @i166_s
+	ret void
+}
+
+define void @i167_ls(i167 zeroext %x) nounwind  {
+	store i167 %x, i167* @i167_s
+	ret void
+}
+
+define void @i168_ls(i168 zeroext %x) nounwind  {
+	store i168 %x, i168* @i168_s
+	ret void
+}
+
+define void @i169_ls(i169 zeroext %x) nounwind  {
+	store i169 %x, i169* @i169_s
+	ret void
+}
+
+define void @i170_ls(i170 zeroext %x) nounwind  {
+	store i170 %x, i170* @i170_s
+	ret void
+}
+
+define void @i171_ls(i171 zeroext %x) nounwind  {
+	store i171 %x, i171* @i171_s
+	ret void
+}
+
+define void @i172_ls(i172 zeroext %x) nounwind  {
+	store i172 %x, i172* @i172_s
+	ret void
+}
+
+define void @i173_ls(i173 zeroext %x) nounwind  {
+	store i173 %x, i173* @i173_s
+	ret void
+}
+
+define void @i174_ls(i174 zeroext %x) nounwind  {
+	store i174 %x, i174* @i174_s
+	ret void
+}
+
+define void @i175_ls(i175 zeroext %x) nounwind  {
+	store i175 %x, i175* @i175_s
+	ret void
+}
+
+define void @i176_ls(i176 zeroext %x) nounwind  {
+	store i176 %x, i176* @i176_s
+	ret void
+}
+
+define void @i177_ls(i177 zeroext %x) nounwind  {
+	store i177 %x, i177* @i177_s
+	ret void
+}
+
+define void @i178_ls(i178 zeroext %x) nounwind  {
+	store i178 %x, i178* @i178_s
+	ret void
+}
+
+define void @i179_ls(i179 zeroext %x) nounwind  {
+	store i179 %x, i179* @i179_s
+	ret void
+}
+
+define void @i180_ls(i180 zeroext %x) nounwind  {
+	store i180 %x, i180* @i180_s
+	ret void
+}
+
+define void @i181_ls(i181 zeroext %x) nounwind  {
+	store i181 %x, i181* @i181_s
+	ret void
+}
+
+define void @i182_ls(i182 zeroext %x) nounwind  {
+	store i182 %x, i182* @i182_s
+	ret void
+}
+
+define void @i183_ls(i183 zeroext %x) nounwind  {
+	store i183 %x, i183* @i183_s
+	ret void
+}
+
+define void @i184_ls(i184 zeroext %x) nounwind  {
+	store i184 %x, i184* @i184_s
+	ret void
+}
+
+define void @i185_ls(i185 zeroext %x) nounwind  {
+	store i185 %x, i185* @i185_s
+	ret void
+}
+
+define void @i186_ls(i186 zeroext %x) nounwind  {
+	store i186 %x, i186* @i186_s
+	ret void
+}
+
+define void @i187_ls(i187 zeroext %x) nounwind  {
+	store i187 %x, i187* @i187_s
+	ret void
+}
+
+define void @i188_ls(i188 zeroext %x) nounwind  {
+	store i188 %x, i188* @i188_s
+	ret void
+}
+
+define void @i189_ls(i189 zeroext %x) nounwind  {
+	store i189 %x, i189* @i189_s
+	ret void
+}
+
+define void @i190_ls(i190 zeroext %x) nounwind  {
+	store i190 %x, i190* @i190_s
+	ret void
+}
+
+define void @i191_ls(i191 zeroext %x) nounwind  {
+	store i191 %x, i191* @i191_s
+	ret void
+}
+
+define void @i192_ls(i192 zeroext %x) nounwind  {
+	store i192 %x, i192* @i192_s
+	ret void
+}
+
+define void @i193_ls(i193 zeroext %x) nounwind  {
+	store i193 %x, i193* @i193_s
+	ret void
+}
+
+define void @i194_ls(i194 zeroext %x) nounwind  {
+	store i194 %x, i194* @i194_s
+	ret void
+}
+
+define void @i195_ls(i195 zeroext %x) nounwind  {
+	store i195 %x, i195* @i195_s
+	ret void
+}
+
+define void @i196_ls(i196 zeroext %x) nounwind  {
+	store i196 %x, i196* @i196_s
+	ret void
+}
+
+define void @i197_ls(i197 zeroext %x) nounwind  {
+	store i197 %x, i197* @i197_s
+	ret void
+}
+
+define void @i198_ls(i198 zeroext %x) nounwind  {
+	store i198 %x, i198* @i198_s
+	ret void
+}
+
+define void @i199_ls(i199 zeroext %x) nounwind  {
+	store i199 %x, i199* @i199_s
+	ret void
+}
+
+define void @i200_ls(i200 zeroext %x) nounwind  {
+	store i200 %x, i200* @i200_s
+	ret void
+}
+
+define void @i201_ls(i201 zeroext %x) nounwind  {
+	store i201 %x, i201* @i201_s
+	ret void
+}
+
+define void @i202_ls(i202 zeroext %x) nounwind  {
+	store i202 %x, i202* @i202_s
+	ret void
+}
+
+define void @i203_ls(i203 zeroext %x) nounwind  {
+	store i203 %x, i203* @i203_s
+	ret void
+}
+
+define void @i204_ls(i204 zeroext %x) nounwind  {
+	store i204 %x, i204* @i204_s
+	ret void
+}
+
+define void @i205_ls(i205 zeroext %x) nounwind  {
+	store i205 %x, i205* @i205_s
+	ret void
+}
+
+define void @i206_ls(i206 zeroext %x) nounwind  {
+	store i206 %x, i206* @i206_s
+	ret void
+}
+
+define void @i207_ls(i207 zeroext %x) nounwind  {
+	store i207 %x, i207* @i207_s
+	ret void
+}
+
+define void @i208_ls(i208 zeroext %x) nounwind  {
+	store i208 %x, i208* @i208_s
+	ret void
+}
+
+define void @i209_ls(i209 zeroext %x) nounwind  {
+	store i209 %x, i209* @i209_s
+	ret void
+}
+
+define void @i210_ls(i210 zeroext %x) nounwind  {
+	store i210 %x, i210* @i210_s
+	ret void
+}
+
+define void @i211_ls(i211 zeroext %x) nounwind  {
+	store i211 %x, i211* @i211_s
+	ret void
+}
+
+define void @i212_ls(i212 zeroext %x) nounwind  {
+	store i212 %x, i212* @i212_s
+	ret void
+}
+
+define void @i213_ls(i213 zeroext %x) nounwind  {
+	store i213 %x, i213* @i213_s
+	ret void
+}
+
+define void @i214_ls(i214 zeroext %x) nounwind  {
+	store i214 %x, i214* @i214_s
+	ret void
+}
+
+define void @i215_ls(i215 zeroext %x) nounwind  {
+	store i215 %x, i215* @i215_s
+	ret void
+}
+
+define void @i216_ls(i216 zeroext %x) nounwind  {
+	store i216 %x, i216* @i216_s
+	ret void
+}
+
+define void @i217_ls(i217 zeroext %x) nounwind  {
+	store i217 %x, i217* @i217_s
+	ret void
+}
+
+define void @i218_ls(i218 zeroext %x) nounwind  {
+	store i218 %x, i218* @i218_s
+	ret void
+}
+
+define void @i219_ls(i219 zeroext %x) nounwind  {
+	store i219 %x, i219* @i219_s
+	ret void
+}
+
+define void @i220_ls(i220 zeroext %x) nounwind  {
+	store i220 %x, i220* @i220_s
+	ret void
+}
+
+define void @i221_ls(i221 zeroext %x) nounwind  {
+	store i221 %x, i221* @i221_s
+	ret void
+}
+
+define void @i222_ls(i222 zeroext %x) nounwind  {
+	store i222 %x, i222* @i222_s
+	ret void
+}
+
+define void @i223_ls(i223 zeroext %x) nounwind  {
+	store i223 %x, i223* @i223_s
+	ret void
+}
+
+define void @i224_ls(i224 zeroext %x) nounwind  {
+	store i224 %x, i224* @i224_s
+	ret void
+}
+
+define void @i225_ls(i225 zeroext %x) nounwind  {
+	store i225 %x, i225* @i225_s
+	ret void
+}
+
+define void @i226_ls(i226 zeroext %x) nounwind  {
+	store i226 %x, i226* @i226_s
+	ret void
+}
+
+define void @i227_ls(i227 zeroext %x) nounwind  {
+	store i227 %x, i227* @i227_s
+	ret void
+}
+
+define void @i228_ls(i228 zeroext %x) nounwind  {
+	store i228 %x, i228* @i228_s
+	ret void
+}
+
+define void @i229_ls(i229 zeroext %x) nounwind  {
+	store i229 %x, i229* @i229_s
+	ret void
+}
+
+define void @i230_ls(i230 zeroext %x) nounwind  {
+	store i230 %x, i230* @i230_s
+	ret void
+}
+
+define void @i231_ls(i231 zeroext %x) nounwind  {
+	store i231 %x, i231* @i231_s
+	ret void
+}
+
+define void @i232_ls(i232 zeroext %x) nounwind  {
+	store i232 %x, i232* @i232_s
+	ret void
+}
+
+define void @i233_ls(i233 zeroext %x) nounwind  {
+	store i233 %x, i233* @i233_s
+	ret void
+}
+
+define void @i234_ls(i234 zeroext %x) nounwind  {
+	store i234 %x, i234* @i234_s
+	ret void
+}
+
+define void @i235_ls(i235 zeroext %x) nounwind  {
+	store i235 %x, i235* @i235_s
+	ret void
+}
+
+define void @i236_ls(i236 zeroext %x) nounwind  {
+	store i236 %x, i236* @i236_s
+	ret void
+}
+
+define void @i237_ls(i237 zeroext %x) nounwind  {
+	store i237 %x, i237* @i237_s
+	ret void
+}
+
+define void @i238_ls(i238 zeroext %x) nounwind  {
+	store i238 %x, i238* @i238_s
+	ret void
+}
+
+define void @i239_ls(i239 zeroext %x) nounwind  {
+	store i239 %x, i239* @i239_s
+	ret void
+}
+
+define void @i240_ls(i240 zeroext %x) nounwind  {
+	store i240 %x, i240* @i240_s
+	ret void
+}
+
+define void @i241_ls(i241 zeroext %x) nounwind  {
+	store i241 %x, i241* @i241_s
+	ret void
+}
+
+define void @i242_ls(i242 zeroext %x) nounwind  {
+	store i242 %x, i242* @i242_s
+	ret void
+}
+
+define void @i243_ls(i243 zeroext %x) nounwind  {
+	store i243 %x, i243* @i243_s
+	ret void
+}
+
+define void @i244_ls(i244 zeroext %x) nounwind  {
+	store i244 %x, i244* @i244_s
+	ret void
+}
+
+define void @i245_ls(i245 zeroext %x) nounwind  {
+	store i245 %x, i245* @i245_s
+	ret void
+}
+
+define void @i246_ls(i246 zeroext %x) nounwind  {
+	store i246 %x, i246* @i246_s
+	ret void
+}
+
+define void @i247_ls(i247 zeroext %x) nounwind  {
+	store i247 %x, i247* @i247_s
+	ret void
+}
+
+define void @i248_ls(i248 zeroext %x) nounwind  {
+	store i248 %x, i248* @i248_s
+	ret void
+}
+
+define void @i249_ls(i249 zeroext %x) nounwind  {
+	store i249 %x, i249* @i249_s
+	ret void
+}
+
+define void @i250_ls(i250 zeroext %x) nounwind  {
+	store i250 %x, i250* @i250_s
+	ret void
+}
+
+define void @i251_ls(i251 zeroext %x) nounwind  {
+	store i251 %x, i251* @i251_s
+	ret void
+}
+
+define void @i252_ls(i252 zeroext %x) nounwind  {
+	store i252 %x, i252* @i252_s
+	ret void
+}
+
+define void @i253_ls(i253 zeroext %x) nounwind  {
+	store i253 %x, i253* @i253_s
+	ret void
+}
+
+define void @i254_ls(i254 zeroext %x) nounwind  {
+	store i254 %x, i254* @i254_s
+	ret void
+}
+
+define void @i255_ls(i255 zeroext %x) nounwind  {
+	store i255 %x, i255* @i255_s
+	ret void
+}
+
+define void @i256_ls(i256 zeroext %x) nounwind  {
+	store i256 %x, i256* @i256_s
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/BasicInstrs.ll b/src/LLVM/test/CodeGen/Generic/BasicInstrs.ll
new file mode 100644
index 0000000..f9cd662
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/BasicInstrs.ll

@@ -0,0 +1,54 @@
+; New testcase, this contains a bunch of simple instructions that should be

+; handled by a code generator.

+

+; RUN: llc < %s

+

+define i32 @add(i32 %A, i32 %B) {

+	%R = add i32 %A, %B		; <i32> [#uses=1]

+	ret i32 %R

+}

+

+define i32 @sub(i32 %A, i32 %B) {

+	%R = sub i32 %A, %B		; <i32> [#uses=1]

+	ret i32 %R

+}

+

+define i32 @mul(i32 %A, i32 %B) {

+	%R = mul i32 %A, %B		; <i32> [#uses=1]

+	ret i32 %R

+}

+

+define i32 @sdiv(i32 %A, i32 %B) {

+	%R = sdiv i32 %A, %B		; <i32> [#uses=1]

+	ret i32 %R

+}

+

+define i32 @udiv(i32 %A, i32 %B) {

+	%R = udiv i32 %A, %B		; <i32> [#uses=1]

+	ret i32 %R

+}

+

+define i32 @srem(i32 %A, i32 %B) {

+	%R = srem i32 %A, %B		; <i32> [#uses=1]

+	ret i32 %R

+}

+

+define i32 @urem(i32 %A, i32 %B) {

+	%R = urem i32 %A, %B		; <i32> [#uses=1]

+	ret i32 %R

+}

+

+define i32 @and(i32 %A, i32 %B) {

+	%R = and i32 %A, %B		; <i32> [#uses=1]

+	ret i32 %R

+}

+

+define i32 @or(i32 %A, i32 %B) {

+	%R = or i32 %A, %B		; <i32> [#uses=1]

+	ret i32 %R

+}

+

+define i32 @xor(i32 %A, i32 %B) {

+	%R = xor i32 %A, %B		; <i32> [#uses=1]

+	ret i32 %R

+}


diff --git a/src/LLVM/test/CodeGen/Generic/ConstantExprLowering.ll b/src/LLVM/test/CodeGen/Generic/ConstantExprLowering.ll
new file mode 100644
index 0000000..fcbe9ca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/ConstantExprLowering.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s

+

+@.str_1 = internal constant [16 x i8] c"%d %d %d %d %d\0A\00"           ; <[16 x i8]*> [#uses=1]

+@XA = external global i32               ; <i32*> [#uses=1]

+@XB = external global i32               ; <i32*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define void @test(i32 %A, i32 %B, i32 %C, i32 %D) {

+entry:

+        %t1 = icmp slt i32 %A, 0                ; <i1> [#uses=1]

+        br i1 %t1, label %less, label %not_less

+

+less:           ; preds = %entry

+        br label %not_less

+

+not_less:               ; preds = %less, %entry

+        %t2 = phi i32 [ sub (i32 ptrtoint (i32* @XA to i32), i32 ptrtoint (i32* @XB to i32)), %less ], [ sub (i32 ptrtoint (i32* @XA to i32), i32 ptrtoint (i32* @XB to i32)), %entry ]               ; <i32> [#uses=1]

+        %tmp.39 = call i32 (i8*, ...)* @printf( i8* getelementptr ([16 x i8]* @.str_1, i64 0, i64 0), i32 %t2 )      ; <i32> [#uses=0]

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/Makefile b/src/LLVM/test/CodeGen/Generic/Makefile
new file mode 100644
index 0000000..6f60af5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/Makefile

@@ -0,0 +1,23 @@
+# Makefile for running ad-hoc custom LLVM tests

+#

+%.bc: %.ll

+	llvm-as $< 

+	

+%.llc.s: %.bc

+	llc $< -o $@ 

+

+%.gcc.s: %.c

+	gcc -O0 -S $< -o $@

+

+%.nat: %.s

+	gcc -O0 -lm $< -o $@

+

+%.cbe.out: %.cbe.nat

+	./$< > $@

+

+%.out: %.nat

+	./$< > $@

+

+%.clean:

+	rm -f $(patsubst %.clean,%.bc,$@) $(patsubst %.clean,%.*.s,$@) \

+	      $(patsubst %.clean,%.*.nat,$@) $(patsubst %.clean,%.*.out,$@) 


diff --git a/src/LLVM/test/CodeGen/Generic/add-with-overflow-128.ll b/src/LLVM/test/CodeGen/Generic/add-with-overflow-128.ll
new file mode 100644
index 0000000..33f44d6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/add-with-overflow-128.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+
+
+define i1 @func2(i128 zeroext %v1, i128 zeroext %v2) nounwind {
+entry:
+  %t = call {i128, i1} @llvm.uadd.with.overflow.i128(i128 %v1, i128 %v2)
+  %sum = extractvalue {i128, i1} %t, 0
+  %sum32 = trunc i128 %sum to i32
+  %obit = extractvalue {i128, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
+  ret i1 true
+
+carry:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i96, i1} @llvm.sadd.with.overflow.i96(i96, i96)
+declare {i128, i1} @llvm.uadd.with.overflow.i128(i128, i128)
+
+define i1 @func1(i96 signext %v1, i96 signext %v2) nounwind {
+entry:
+  %t = call {i96, i1} @llvm.sadd.with.overflow.i96(i96 %v1, i96 %v2)
+  %obit = extractvalue {i96, i1} %t, 1
+  ret i1 %obit
+}

diff --git a/src/LLVM/test/CodeGen/Generic/add-with-overflow-24.ll b/src/LLVM/test/CodeGen/Generic/add-with-overflow-24.ll
new file mode 100644
index 0000000..63f5a22
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/add-with-overflow-24.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i24 signext %v1, i24 signext %v2) nounwind {
+entry:
+  %t = call {i24, i1} @llvm.sadd.with.overflow.i24(i24 %v1, i24 %v2)
+  %sum = extractvalue {i24, i1} %t, 0
+  %sum32 = sext i24 %sum to i32
+  %obit = extractvalue {i24, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+define i1 @func2(i24 zeroext %v1, i24 zeroext %v2) nounwind {
+entry:
+  %t = call {i24, i1} @llvm.uadd.with.overflow.i24(i24 %v1, i24 %v2)
+  %sum = extractvalue {i24, i1} %t, 0
+  %sum32 = zext i24 %sum to i32
+  %obit = extractvalue {i24, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
+  ret i1 true
+
+carry:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i24, i1} @llvm.sadd.with.overflow.i24(i24, i24)
+declare {i24, i1} @llvm.uadd.with.overflow.i24(i24, i24)

diff --git a/src/LLVM/test/CodeGen/Generic/add-with-overflow.ll b/src/LLVM/test/CodeGen/Generic/add-with-overflow.ll
new file mode 100644
index 0000000..0c2c960
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/add-with-overflow.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s
+; RUN: llc < %s -fast-isel
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+define i1 @func2(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)

diff --git a/src/LLVM/test/CodeGen/Generic/addr-label.ll b/src/LLVM/test/CodeGen/Generic/addr-label.ll
new file mode 100644
index 0000000..0dbe502
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/addr-label.ll

@@ -0,0 +1,81 @@
+; RUN: llc %s -o -
+
+;; Reference to a label that gets deleted.
+define i8* @test1() nounwind {
+entry:
+	ret i8* blockaddress(@test1b, %test_label)
+}
+
+define i32 @test1b() nounwind {
+entry:
+	ret i32 -1
+test_label:
+	br label %ret
+ret:
+	ret i32 -1
+}
+
+
+;; Issues with referring to a label that gets RAUW'd later.
+define i32 @test2a() nounwind {
+entry:
+        %target = bitcast i8* blockaddress(@test2b, %test_label) to i8*
+
+        call i32 @test2b(i8* %target)
+
+        ret i32 0
+}
+
+define i32 @test2b(i8* %target) nounwind {
+entry:
+        indirectbr i8* %target, [label %test_label]
+
+test_label:
+; assume some code here...
+        br label %ret
+
+ret:
+        ret i32 -1
+}
+
+; Issues with a BB that gets RAUW'd to another one after references are
+; generated.
+define void @test3(i8** %P, i8** %Q) nounwind {
+entry:
+  store i8* blockaddress(@test3b, %test_label), i8** %P
+  store i8* blockaddress(@test3b, %ret), i8** %Q
+  ret void
+}
+
+define i32 @test3b() nounwind {
+entry:
+	br label %test_label
+test_label:
+	br label %ret
+ret:
+	ret i32 -1
+}
+
+
+; PR6673
+
+define i64 @test4a() {
+	%target = bitcast i8* blockaddress(@test4b, %usermain) to i8*
+	%ret = call i64 @test4b(i8* %target)
+
+	ret i64 %ret
+}
+
+define i64 @test4b(i8* %Code) {
+entry:
+	indirectbr i8* %Code, [label %usermain]
+usermain:
+	br label %label_line_0
+
+label_line_0:
+	br label %label_line_1
+
+label_line_1:
+	%target = ptrtoint i8* blockaddress(@test4b, %label_line_0) to i64
+	ret i64 %target
+}

diff --git a/src/LLVM/test/CodeGen/Generic/asm-large-immediate.ll b/src/LLVM/test/CodeGen/Generic/asm-large-immediate.ll
new file mode 100644
index 0000000..605665b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/asm-large-immediate.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s | grep 68719476738
+
+define void @test() {
+entry:
+        tail call void asm sideeffect "/* result: ${0:c} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 )
+        ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/Generic/badCallArgLRLLVM.ll b/src/LLVM/test/CodeGen/Generic/badCallArgLRLLVM.ll
new file mode 100644
index 0000000..239cc2f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/badCallArgLRLLVM.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s

+

+; This caused a problem because the argument of a call was defined by

+; the return value of another call that appears later in the code.

+; When processing the first call, the second call has not yet been processed

+; so no LiveRange has been created for its return value.

+; 

+; llc dies in UltraSparcRegInfo::suggestRegs4CallArgs() with:

+;     ERROR: In call instr, no LR for arg: 0x1009e0740 

+;

+

+declare i32 @getInt(i32)

+

+define i32 @main(i32 %argc, i8** %argv) {

+bb0:

+        br label %bb2

+

+bb1:            ; preds = %bb2

+        %reg222 = call i32 @getInt( i32 %reg218 )               ; <i32> [#uses=1]

+        %reg110 = add i32 %reg222, 1            ; <i32> [#uses=2]

+        %b = icmp sle i32 %reg110, 0            ; <i1> [#uses=1]

+        br i1 %b, label %bb2, label %bb3

+

+bb2:            ; preds = %bb1, %bb0

+        %reg218 = call i32 @getInt( i32 %argc )         ; <i32> [#uses=1]

+        br label %bb1

+

+bb3:            ; preds = %bb1

+        ret i32 %reg110

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/badFoldGEP.ll b/src/LLVM/test/CodeGen/Generic/badFoldGEP.ll
new file mode 100644
index 0000000..72c1b5e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/badFoldGEP.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s

+

+;; GetMemInstArgs() folded the two getElementPtr instructions together,

+;; producing an illegal getElementPtr.  That's because the type generated

+;; by the last index for the first one is a structure field, not an array

+;; element, and the second one indexes off that structure field.

+;; The code is legal but not type-safe and the two GEPs should not be folded.

+;; 

+;; This code fragment is from Spec/CINT2000/197.parser/197.parser.bc,

+;; file post_process.c, function build_domain().

+;; (Modified to replace store with load and return load value.)

+;; 

+        %Domain = type { i8*, i32, i32*, i32, i32, i32*, %Domain* }

+@domain_array = external global [497 x %Domain]         ; <[497 x %Domain]*> [#uses=2]

+

+declare void @opaque([497 x %Domain]*)

+

+define i32 @main(i32 %argc, i8** %argv) {

+bb0:

+        call void @opaque( [497 x %Domain]* @domain_array )

+        %cann-indvar-idxcast = sext i32 %argc to i64            ; <i64> [#uses=1]

+        %reg841 = getelementptr [497 x %Domain]* @domain_array, i64 0, i64 %cann-indvar-idxcast, i32 3          ; <i32*> [#uses=1]

+        %reg846 = getelementptr i32* %reg841, i64 1             ; <i32*> [#uses=1]

+        %reg820 = load i32* %reg846             ; <i32> [#uses=1]

+        ret i32 %reg820

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/badarg6.ll b/src/LLVM/test/CodeGen/Generic/badarg6.ll
new file mode 100644
index 0000000..cbd1187
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/badarg6.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s

+

+; On this code, llc did not pass the sixth argument (%reg321) to printf.

+; It passed the first five in %o0 - %o4, but never initialized %o5.

+@.LC12 = internal global [44 x i8] c"\09\09M = %g, I = %g, V = %g\0A\09\09O = %g, E = %g\0A\0A\00"		; <[44 x i8]*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+declare double @opaque(double)

+

+define i32 @main(i32 %argc, i8** %argv) {

+bb25:

+	%b = icmp sle i32 %argc, 2		; <i1> [#uses=1]

+	br i1 %b, label %bb42, label %bb43

+

+bb42:		; preds = %bb25

+	%reg315 = call double @opaque( double 3.000000e+00 )		; <double> [#uses=1]

+	%reg316 = call double @opaque( double 3.100000e+00 )		; <double> [#uses=1]

+	%reg317 = call double @opaque( double 3.200000e+00 )		; <double> [#uses=1]

+	%reg318 = call double @opaque( double 3.300000e+00 )		; <double> [#uses=1]

+	%reg319 = call double @opaque( double 3.400000e+00 )		; <double> [#uses=1]

+	br label %bb43

+

+bb43:		; preds = %bb42, %bb25

+	%reg321 = phi double [ 2.000000e-01, %bb25 ], [ %reg315, %bb42 ]		; <double> [#uses=1]

+	%reg322 = phi double [ 6.000000e+00, %bb25 ], [ %reg316, %bb42 ]		; <double> [#uses=1]

+	%reg323 = phi double [ -1.000000e+00, %bb25 ], [ %reg317, %bb42 ]		; <double> [#uses=1]

+	%reg324 = phi double [ -1.000000e+00, %bb25 ], [ %reg318, %bb42 ]		; <double> [#uses=1]

+	%reg325 = phi double [ 1.000000e+00, %bb25 ], [ %reg319, %bb42 ]		; <double> [#uses=1]

+	%reg609 = call i32 (i8*, ...)* @printf( i8* getelementptr ([44 x i8]* @.LC12, i64 0, i64 0), double %reg325, double %reg324, double %reg323, double %reg322, double %reg321 )		; <i32> [#uses=0]

+	ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/Generic/bool-to-double.ll b/src/LLVM/test/CodeGen/Generic/bool-to-double.ll
new file mode 100644
index 0000000..2cffded
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/bool-to-double.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s

+define double @test(i1 %X) {

+        %Y = uitofp i1 %X to double             ; <double> [#uses=1]

+        ret double %Y

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/bool-vector.ll b/src/LLVM/test/CodeGen/Generic/bool-vector.ll
new file mode 100644
index 0000000..4758697
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/bool-vector.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s
+; PR1845
+
+define void @boolVectorSelect(<4 x i1>* %boolVectorPtr) {
+Body:
+        %castPtr = bitcast <4 x i1>* %boolVectorPtr to <4 x i1>*
+        %someBools = load <4 x i1>* %castPtr, align 1           ; <<4 x i1>>
+        %internal = alloca <4 x i1>, align 16           ; <<4 x i1>*> [#uses=1]
+        store <4 x i1> %someBools, <4 x i1>* %internal, align 1
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/builtin-expect.ll b/src/LLVM/test/CodeGen/Generic/builtin-expect.ll
new file mode 100644
index 0000000..e8cd07b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/builtin-expect.ll

@@ -0,0 +1,223 @@
+; RUN: llc < %s
+
+define i32 @test1(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %cmp = icmp sgt i32 %tmp, 1
+  %conv = zext i1 %cmp to i32
+  %conv1 = sext i32 %conv to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 1)
+  %tobool = icmp ne i64 %expval, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+declare i64 @llvm.expect.i64(i64, i64) nounwind readnone
+
+declare i32 @f(...)
+
+define i32 @test2(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %conv = sext i32 %tmp to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  %tobool = icmp ne i64 %expval, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define i32 @test3(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %tobool = icmp ne i32 %tmp, 0
+  %lnot = xor i1 %tobool, true
+  %lnot.ext = zext i1 %lnot to i32
+  %conv = sext i32 %lnot.ext to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  %tobool1 = icmp ne i64 %expval, 0
+  br i1 %tobool1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define i32 @test4(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %tobool = icmp ne i32 %tmp, 0
+  %lnot = xor i1 %tobool, true
+  %lnot1 = xor i1 %lnot, true
+  %lnot.ext = zext i1 %lnot1 to i32
+  %conv = sext i32 %lnot.ext to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  %tobool2 = icmp ne i64 %expval, 0
+  br i1 %tobool2, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define i32 @test5(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %cmp = icmp slt i32 %tmp, 0
+  %conv = zext i1 %cmp to i32
+  %conv1 = sext i32 %conv to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 0)
+  %tobool = icmp ne i64 %expval, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define i32 @test6(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %conv = sext i32 %tmp to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  switch i64 %expval, label %sw.epilog [
+    i64 1, label %sw.bb
+    i64 2, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry
+  store i32 0, i32* %retval
+  br label %return
+
+sw.epilog:                                        ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %sw.epilog, %sw.bb
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define i32 @test7(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %conv = sext i32 %tmp to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  switch i64 %expval, label %sw.epilog [
+    i64 2, label %sw.bb
+    i64 3, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry
+  %tmp1 = load i32* %x.addr, align 4
+  store i32 %tmp1, i32* %retval
+  br label %return
+
+sw.epilog:                                        ; preds = %entry
+  store i32 0, i32* %retval
+  br label %return
+
+return:                                           ; preds = %sw.epilog, %sw.bb
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define i32 @test8(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %cmp = icmp sgt i32 %tmp, 1
+  %conv = zext i1 %cmp to i32
+  %expval = call i32 @llvm.expect.i32(i32 %conv, i32 1)
+  %tobool = icmp ne i32 %expval, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+declare i32 @llvm.expect.i32(i32, i32) nounwind readnone
+

diff --git a/src/LLVM/test/CodeGen/Generic/call-ret0.ll b/src/LLVM/test/CodeGen/Generic/call-ret0.ll
new file mode 100644
index 0000000..b28ed70
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/call-ret0.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s

+define i32 @foo(i32 %x) {

+        ret i32 %x

+}

+

+define i32 @main() {

+        %r = call i32 @foo( i32 0 )             ; <i32> [#uses=1]

+        ret i32 %r

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/call-ret42.ll b/src/LLVM/test/CodeGen/Generic/call-ret42.ll
new file mode 100644
index 0000000..f43424c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/call-ret42.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s

+

+define i32 @foo(i32 %x) {

+        ret i32 42

+}

+

+define i32 @main() {

+        %r = call i32 @foo( i32 15 )            ; <i32> [#uses=1]

+        ret i32 %r

+}


diff --git a/src/LLVM/test/CodeGen/Generic/call-void.ll b/src/LLVM/test/CodeGen/Generic/call-void.ll
new file mode 100644
index 0000000..37feeae
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/call-void.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s

+

+define void @foo() {

+        ret void

+}

+

+define i32 @main() {

+        call void @foo( )

+        ret i32 0

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/call2-ret0.ll b/src/LLVM/test/CodeGen/Generic/call2-ret0.ll
new file mode 100644
index 0000000..e7b7a4c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/call2-ret0.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s

+

+define i32 @bar(i32 %x) {

+        ret i32 0

+}

+

+define i32 @foo(i32 %x) {

+        %q = call i32 @bar( i32 1 )             ; <i32> [#uses=1]

+        ret i32 %q

+}

+

+define i32 @main() {

+        %r = call i32 @foo( i32 2 )             ; <i32> [#uses=1]

+        ret i32 %r

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/cast-fp.ll b/src/LLVM/test/CodeGen/Generic/cast-fp.ll
new file mode 100644
index 0000000..90e53ee
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/cast-fp.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s

+@a_fstr = internal constant [8 x i8] c"a = %f\0A\00"		; <[8 x i8]*> [#uses=1]

+@a_lstr = internal constant [10 x i8] c"a = %lld\0A\00"		; <[10 x i8]*> [#uses=1]

+@a_dstr = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]

+@b_dstr = internal constant [8 x i8] c"b = %d\0A\00"		; <[8 x i8]*> [#uses=1]

+@b_fstr = internal constant [8 x i8] c"b = %f\0A\00"		; <[8 x i8]*> [#uses=1]

+@A = global double 2.000000e+00		; <double*> [#uses=1]

+@B = global i32 2		; <i32*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define i32 @main() {

+	%a = load double* @A		; <double> [#uses=4]

+	%a_fs = getelementptr [8 x i8]* @a_fstr, i64 0, i64 0		; <i8*> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %a_fs, double %a )		; <i32>:1 [#uses=0]

+	%a_d2l = fptosi double %a to i64		; <i64> [#uses=1]

+	%a_ls = getelementptr [10 x i8]* @a_lstr, i64 0, i64 0		; <i8*> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %a_ls, i64 %a_d2l )		; <i32>:2 [#uses=0]

+	%a_d2i = fptosi double %a to i32		; <i32> [#uses=2]

+	%a_ds = getelementptr [8 x i8]* @a_dstr, i64 0, i64 0		; <i8*> [#uses=3]

+	call i32 (i8*, ...)* @printf( i8* %a_ds, i32 %a_d2i )		; <i32>:3 [#uses=0]

+	%a_d2sb = fptosi double %a to i8		; <i8> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %a_ds, i8 %a_d2sb )		; <i32>:4 [#uses=0]

+	%a_d2i2sb = trunc i32 %a_d2i to i8		; <i8> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %a_ds, i8 %a_d2i2sb )		; <i32>:5 [#uses=0]

+	%b = load i32* @B		; <i32> [#uses=2]

+	%b_ds = getelementptr [8 x i8]* @b_dstr, i64 0, i64 0		; <i8*> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %b_ds, i32 %b )		; <i32>:6 [#uses=0]

+	%b_i2d = sitofp i32 %b to double		; <double> [#uses=1]

+	%b_fs = getelementptr [8 x i8]* @b_fstr, i64 0, i64 0		; <i8*> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %b_fs, double %b_i2d )		; <i32>:7 [#uses=0]

+	ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/Generic/constindices.ll b/src/LLVM/test/CodeGen/Generic/constindices.ll
new file mode 100644
index 0000000..528cb5b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/constindices.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s

+

+; Test that a sequence of constant indices are folded correctly

+; into the equivalent offset at compile-time.

+

+        %MixedA = type { float, [15 x i32], i8, float }

+        %MixedB = type { float, %MixedA, float }

+@fmtArg = internal global [44 x i8] c"sqrt(2) = %g\0Aexp(1) = %g\0Api = %g\0Afive = %g\0A\00"           ; <[44 x i8]*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define i32 @main() {

+        %ScalarA = alloca %MixedA               ; <%MixedA*> [#uses=1]

+        %ScalarB = alloca %MixedB               ; <%MixedB*> [#uses=1]

+        %ArrayA = alloca %MixedA, i32 4         ; <%MixedA*> [#uses=3]

+        %ArrayB = alloca %MixedB, i32 3         ; <%MixedB*> [#uses=2]

+        %I1 = getelementptr %MixedA* %ScalarA, i64 0, i32 0             ; <float*> [#uses=2]

+        store float 0x3FF6A09020000000, float* %I1

+        %I2 = getelementptr %MixedB* %ScalarB, i64 0, i32 1, i32 0              ; <float*> [#uses=2]

+        store float 0x4005BF1420000000, float* %I2

+        %fptrA = getelementptr %MixedA* %ArrayA, i64 1, i32 0           ; <float*> [#uses=1]

+        %fptrB = getelementptr %MixedB* %ArrayB, i64 2, i32 1, i32 0            ; <float*> [#uses=1]

+        store float 0x400921CAC0000000, float* %fptrA

+        store float 5.000000e+00, float* %fptrB

+

+        ;; Test that a sequence of GEPs with constant indices are folded right

+        %fptrA1 = getelementptr %MixedA* %ArrayA, i64 3         ; <%MixedA*> [#uses=1]

+        %fptrA2 = getelementptr %MixedA* %fptrA1, i64 0, i32 1          ; <[15 x i32]*> [#uses=1]

+        %fptrA3 = getelementptr [15 x i32]* %fptrA2, i64 0, i64 8               ; <i32*> [#uses=1]

+        store i32 5, i32* %fptrA3

+        %sqrtTwo = load float* %I1              ; <float> [#uses=1]

+        %exp = load float* %I2          ; <float> [#uses=1]

+        %I3 = getelementptr %MixedA* %ArrayA, i64 1, i32 0              ; <float*> [#uses=1]

+        %pi = load float* %I3           ; <float> [#uses=1]

+        %I4 = getelementptr %MixedB* %ArrayB, i64 2, i32 1, i32 0               ; <float*> [#uses=1]

+        %five = load float* %I4         ; <float> [#uses=1]

+        %dsqrtTwo = fpext float %sqrtTwo to double              ; <double> [#uses=1]

+        %dexp = fpext float %exp to double              ; <double> [#uses=1]

+        %dpi = fpext float %pi to double                ; <double> [#uses=1]

+        %dfive = fpext float %five to double            ; <double> [#uses=1]

+        %castFmt = getelementptr [44 x i8]* @fmtArg, i64 0, i64 0               ; <i8*> [#uses=1]

+        call i32 (i8*, ...)* @printf( i8* %castFmt, double %dsqrtTwo, double %dexp, double %dpi, double %dfive )     ; <i32>:1 [#uses=0]

+        ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/Generic/crash.ll b/src/LLVM/test/CodeGen/Generic/crash.ll
new file mode 100644
index 0000000..d889389
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/crash.ll

@@ -0,0 +1,68 @@
+; RUN: llc %s -o -
+
+; PR6332
+%struct.AVCodecTag = type {}
+@ff_codec_bmp_tags = external global [0 x %struct.AVCodecTag]
+@tags = global [1 x %struct.AVCodecTag*] [%struct.AVCodecTag* getelementptr
+inbounds ([0 x %struct.AVCodecTag]* @ff_codec_bmp_tags, i32 0, i32 0)]
+
+
+; rdar://8878965
+
+%struct.CAMERA = type { [3 x double], [3 x double], [3 x double], [3 x double], [3 x double], [3 x double], double, double, i32, double, double, i32, double, i32* }
+
+define void @Parse_Camera(%struct.CAMERA** nocapture %Camera_Ptr) nounwind {
+entry:
+%.pre = load %struct.CAMERA** %Camera_Ptr, align 4
+%0 = getelementptr inbounds %struct.CAMERA* %.pre, i32 0, i32 1, i32 0
+%1 = getelementptr inbounds %struct.CAMERA* %.pre, i32 0, i32 1, i32 2
+br label %bb32
+
+bb32:                                             ; preds = %bb6
+%2 = load double* %0, align 4
+%3 = load double* %1, align 4
+%4 = load double* %0, align 4
+call void @Parse_Vector(double* %0) nounwind
+%5 = call i32 @llvm.objectsize.i32(i8* undef, i1 false)
+%6 = icmp eq i32 %5, -1
+br i1 %6, label %bb34, label %bb33
+
+bb33:                                             ; preds = %bb32
+unreachable
+
+bb34:                                             ; preds = %bb32
+unreachable
+
+}
+
+declare void @Parse_Vector(double*)
+declare i32 @llvm.objectsize.i32(i8*, i1)
+
+
+; PR9578
+%struct.S0 = type { i32, i8, i32 }
+
+define void @func_82() nounwind optsize {
+entry:
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %entry
+  br i1 undef, label %func_74.exit.for.cond29.thread_crit_edge, label %for.body.i
+
+func_74.exit.for.cond29.thread_crit_edge:         ; preds = %for.body.i
+  %f13576.pre = getelementptr inbounds %struct.S0* undef, i64 0, i32 1
+  store i8 0, i8* %f13576.pre, align 4, !tbaa !0
+  br label %lbl_468
+
+lbl_468:                                          ; preds = %lbl_468, %func_74.exit.for.cond29.thread_crit_edge
+  %f13577.ph = phi i8* [ %f13576.pre, %func_74.exit.for.cond29.thread_crit_edge ], [ %f135.pre, %lbl_468 ]
+  store i8 1, i8* %f13577.ph, align 1
+  %f135.pre = getelementptr inbounds %struct.S0* undef, i64 0, i32 1
+  br i1 undef, label %lbl_468, label %for.end74
+
+for.end74:                                        ; preds = %lbl_468
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}

diff --git a/src/LLVM/test/CodeGen/Generic/dbg_value.ll b/src/LLVM/test/CodeGen/Generic/dbg_value.ll
new file mode 100644
index 0000000..ce3364d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/dbg_value.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s
+; rdar://7759395
+
+%0 = type { i32, i32 }
+
+define void @t(%0*, i32, i32, i32, i32) nounwind {
+  tail call void @llvm.dbg.value(metadata !{%0* %0}, i64 0, metadata !0)
+  unreachable
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 0} ;

diff --git a/src/LLVM/test/CodeGen/Generic/dg.exp b/src/LLVM/test/CodeGen/Generic/dg.exp
new file mode 100644
index 0000000..f2e8f3b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/dg.exp

@@ -0,0 +1,3 @@
+load_lib llvm.exp

+

+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]


diff --git a/src/LLVM/test/CodeGen/Generic/div-neg-power-2.ll b/src/LLVM/test/CodeGen/Generic/div-neg-power-2.ll
new file mode 100644
index 0000000..753e3da
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/div-neg-power-2.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s

+

+define i32 @test(i32 %X) {

+        %Y = sdiv i32 %X, -2            ; <i32> [#uses=1]

+        ret i32 %Y

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/edge-bundles-blockIDs.ll b/src/LLVM/test/CodeGen/Generic/edge-bundles-blockIDs.ll
new file mode 100644
index 0000000..b4ae415
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/edge-bundles-blockIDs.ll

@@ -0,0 +1,81 @@
+; Make sure EdgeBoundles handles the case when the function size is less then 
+; the number of block IDs.
+; RUN: llc -regalloc=fast < %s
+
+define void @foo() nounwind {
+entry:
+  br i1 undef, label %bb5.i1632, label %bb1.i1605
+
+bb1.i1605:                                        ; preds = %entry
+  br i1 undef, label %bb5.i73.i, label %bb3.i68.i
+
+bb3.i68.i:                                        ; preds = %bb1.i1605
+  unreachable
+
+bb5.i73.i:                                        ; preds = %bb1.i1605
+  br i1 undef, label %bb7.i79.i, label %bb6.i76.i
+
+bb6.i76.i:                                        ; preds = %bb5.i73.i
+  unreachable
+
+bb7.i79.i:                                        ; preds = %bb5.i73.i
+  br i1 undef, label %bb.i.i1608, label %bb8.i82.i
+
+bb8.i82.i:                                        ; preds = %bb7.i79.i
+  unreachable
+
+bb.i.i1608:                                       ; preds = %bb.i.i1608, %bb7.i79.i
+  br i1 undef, label %bb1.i.dis.preheader_crit_edge.i, label %bb.i.i1608
+
+bb1.i.dis.preheader_crit_edge.i: ; preds = %bb.i.i1608
+  br label %dis.i
+
+bb3.i.i1610:                                      ; preds = %bb8.i.i, %bb7.i.i1615
+  br i1 undef, label %bb5.i.i1613, label %bb4.i.i1611
+
+bb4.i.i1611:                                      ; preds = %bb3.i.i1610
+  br label %bb5.i.i1613
+
+bb5.i.i1613:                                      ; preds = %bb4.i.i1611, %bb3.i.i1610
+  unreachable
+
+bb7.i.i1615:                                      ; preds = %getfloder.exit.i
+  br i1 undef, label %bb3.i.i1610, label %bb8.i.i
+
+bb8.i.i:                                          ; preds = %bb7.i.i1615
+  br i1 undef, label %bb3.i.i1610, label %bb9.i.i
+
+bb9.i.i:                                          ; preds = %bb8.i.i
+  br label %bb12.i.i
+
+bb12.i.i:                                         ; preds = %bb12.i.i, %bb9.i.i
+  br i1 undef, label %bb13.i.bb14.i_crit_edge.i, label %bb12.i.i
+
+bb13.i.bb14.i_crit_edge.i:                        ; preds = %bb12.i.i
+  br i1 undef, label %bb25.i.i, label %bb20.i.i
+
+bb19.i.i:                                         ; preds = %bb20.i.i
+  br label %bb20.i.i
+
+bb20.i.i:                                         ; preds = %bb19.i.i, %bb13.i.bb14.i_crit_edge.i
+  %or.cond.i = or i1 undef, undef
+  br i1 %or.cond.i, label %bb25.i.i, label %bb19.i.i
+
+bb25.i.i:                                         ; preds = %bb20.i.i, %bb13.i.bb14.i_crit_edge.i
+  unreachable
+
+bb5.i1632:                                        ; preds = %entry
+  unreachable
+
+dis.i:                     ; preds = %getfloder.exit.i, %bb1.i.dis.preheader_crit_edge.i
+  br i1 undef, label %bb.i96.i, label %bb1.i102.i
+
+bb.i96.i:                                         ; preds = %dis.i
+  br label %getfloder.exit.i
+
+bb1.i102.i:                                       ; preds = %dis.i
+  br label %getfloder.exit.i
+
+getfloder.exit.i:                           ; preds = %bb1.i102.i, %bb.i96.i
+  br i1 undef, label %bb7.i.i1615, label %dis.i
+}

diff --git a/src/LLVM/test/CodeGen/Generic/empty-load-store.ll b/src/LLVM/test/CodeGen/Generic/empty-load-store.ll
new file mode 100644
index 0000000..bca7305
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/empty-load-store.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s
+; PR2612
+
+@current_foo = internal global {  } zeroinitializer
+
+define i32 @foo() {
+entry:
+        %retval = alloca i32
+        store i32 0, i32* %retval
+        %local_foo = alloca {  }
+        load {  }* @current_foo
+        store {  } %0, {  }* %local_foo
+        br label %return
+
+return:
+        load i32* %retval
+        ret i32 %1
+}

diff --git a/src/LLVM/test/CodeGen/Generic/exception-handling.ll b/src/LLVM/test/CodeGen/Generic/exception-handling.ll
new file mode 100644
index 0000000..376e1f1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/exception-handling.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s
+; PR10733
+declare void @_Znam()
+
+define void @_ZNK14gIndexOdometer15AfterExcisionOfERi() uwtable align 2 {
+_ZN6Gambit5ArrayIiEC2Ej.exit36:
+  br label %"9"
+
+"9":                                              ; preds = %"10", %_ZN6Gambit5ArrayIiEC2Ej.exit36
+  %indvar82 = phi i64 [ 0, %_ZN6Gambit5ArrayIiEC2Ej.exit36 ], [ %tmp85, %"10" ]
+  %tmp85 = add i64 %indvar82, 1
+  %tmp = trunc i64 %tmp85 to i32
+  invoke void @_ZNK14gIndexOdometer9NoIndicesEv()
+          to label %"10" unwind label %lpad27
+
+"10":                                             ; preds = %"9"
+  invoke void @_Znam()
+          to label %"9" unwind label %lpad27
+
+lpad27:                                           ; preds = %"10", %"9"
+  %0 = phi i32 [ undef, %"9" ], [ %tmp, %"10" ]
+  %1 = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__gxx_personality_v0
+          cleanup
+  resume { i8*, i32 } zeroinitializer
+}
+
+declare void @_ZNK14gIndexOdometer9NoIndicesEv()
+
+declare i32 @__gxx_personality_v0(i32, i64, i8*, i8*)

diff --git a/src/LLVM/test/CodeGen/Generic/externally_available.ll b/src/LLVM/test/CodeGen/Generic/externally_available.ll
new file mode 100644
index 0000000..7976cc9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/externally_available.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s | not grep test_
+
+; test_function should not be emitted to the .s file.
+define available_externally i32 @test_function() {
+  ret i32 4
+}
+
+; test_global should not be emitted to the .s file.
+@test_global = available_externally global i32 4
+

diff --git a/src/LLVM/test/CodeGen/Generic/fastcall.ll b/src/LLVM/test/CodeGen/Generic/fastcall.ll
new file mode 100644
index 0000000..35e04f1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/fastcall.ll

@@ -0,0 +1,14 @@
+; Test fastcc works. Test from bug 2770.
+; RUN: llc < %s -relocation-model=pic
+
+
+%struct.__gcov_var = type {  i32 }
+@__gcov_var = external global %struct.__gcov_var
+
+define fastcc void @gcov_read_words(i32 %words) {
+entry:
+        store i32 %words, i32* getelementptr (%struct.__gcov_var* 
+@__gcov_var,
+i32 0, i32 0)
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/fneg-fabs.ll b/src/LLVM/test/CodeGen/Generic/fneg-fabs.ll
new file mode 100644
index 0000000..4454ab7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/fneg-fabs.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s

+

+define double @fneg(double %X) {

+        %Y = fsub double -0.000000e+00, %X               ; <double> [#uses=1]

+        ret double %Y

+}

+

+define float @fnegf(float %X) {

+        %Y = fsub float -0.000000e+00, %X                ; <float> [#uses=1]

+        ret float %Y

+}

+

+declare double @fabs(double)

+

+declare float @fabsf(float)

+

+define double @fabstest(double %X) {

+        %Y = call double @fabs( double %X )             ; <double> [#uses=1]

+        ret double %Y

+}

+

+define float @fabsftest(float %X) {

+        %Y = call float @fabsf( float %X )              ; <float> [#uses=1]

+        ret float %Y

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/fp-to-int-invalid.ll b/src/LLVM/test/CodeGen/Generic/fp-to-int-invalid.ll
new file mode 100644
index 0000000..cdcc3a2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/fp-to-int-invalid.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s
+; PR4057
+define void @test_cast_float_to_char(i8* %result) nounwind {
+entry:
+	%result_addr = alloca i8*		; <i8**> [#uses=2]
+	%test = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i8* %result, i8** %result_addr
+	store float 0x40B2AFA160000000, float* %test, align 4
+	%0 = load float* %test, align 4		; <float> [#uses=1]
+	%1 = fptosi float %0 to i8		; <i8> [#uses=1]
+	%2 = load i8** %result_addr, align 4		; <i8*> [#uses=1]
+	store i8 %1, i8* %2, align 1
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/fp_to_int.ll b/src/LLVM/test/CodeGen/Generic/fp_to_int.ll
new file mode 100644
index 0000000..9989ddf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/fp_to_int.ll

@@ -0,0 +1,81 @@
+; RUN: llc < %s

+

+define i8 @test1(double %X) {

+	%tmp.1 = fptosi double %X to i8		; <i8> [#uses=1]

+	ret i8 %tmp.1

+}

+

+define i16 @test2(double %X) {

+	%tmp.1 = fptosi double %X to i16		; <i16> [#uses=1]

+	ret i16 %tmp.1

+}

+

+define i32 @test3(double %X) {

+	%tmp.1 = fptosi double %X to i32		; <i32> [#uses=1]

+	ret i32 %tmp.1

+}

+

+define i64 @test4(double %X) {

+	%tmp.1 = fptosi double %X to i64		; <i64> [#uses=1]

+	ret i64 %tmp.1

+}

+

+define i8 @test1u(double %X) {

+	%tmp.1 = fptoui double %X to i8		; <i8> [#uses=1]

+	ret i8 %tmp.1

+}

+

+define i16 @test2u(double %X) {

+	%tmp.1 = fptoui double %X to i16		; <i16> [#uses=1]

+	ret i16 %tmp.1

+}

+

+define i32 @test3u(double %X) {

+	%tmp.1 = fptoui double %X to i32		; <i32> [#uses=1]

+	ret i32 %tmp.1

+}

+

+define i64 @test4u(double %X) {

+	%tmp.1 = fptoui double %X to i64		; <i64> [#uses=1]

+	ret i64 %tmp.1

+}

+

+define i8 @test1f(float %X) {

+	%tmp.1 = fptosi float %X to i8		; <i8> [#uses=1]

+	ret i8 %tmp.1

+}

+

+define i16 @test2f(float %X) {

+	%tmp.1 = fptosi float %X to i16		; <i16> [#uses=1]

+	ret i16 %tmp.1

+}

+

+define i32 @test3f(float %X) {

+	%tmp.1 = fptosi float %X to i32		; <i32> [#uses=1]

+	ret i32 %tmp.1

+}

+

+define i64 @test4f(float %X) {

+	%tmp.1 = fptosi float %X to i64		; <i64> [#uses=1]

+	ret i64 %tmp.1

+}

+

+define i8 @test1uf(float %X) {

+	%tmp.1 = fptoui float %X to i8		; <i8> [#uses=1]

+	ret i8 %tmp.1

+}

+

+define i16 @test2uf(float %X) {

+	%tmp.1 = fptoui float %X to i16		; <i16> [#uses=1]

+	ret i16 %tmp.1

+}

+

+define i32 @test3uf(float %X) {

+	%tmp.1 = fptoui float %X to i32		; <i32> [#uses=1]

+	ret i32 %tmp.1

+}

+

+define i64 @test4uf(float %X) {

+	%tmp.1 = fptoui float %X to i64		; <i64> [#uses=1]

+	ret i64 %tmp.1

+}


diff --git a/src/LLVM/test/CodeGen/Generic/fpowi-promote.ll b/src/LLVM/test/CodeGen/Generic/fpowi-promote.ll
new file mode 100644
index 0000000..f9d4716
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/fpowi-promote.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s

+

+; PR1239

+

+define float @test(float %tmp23302331, i32 %tmp23282329 ) {

+

+%tmp2339 = call float @llvm.powi.f32( float %tmp23302331, i32 %tmp23282329 )

+	ret float %tmp2339

+}

+

+declare float @llvm.powi.f32(float,i32)


diff --git a/src/LLVM/test/CodeGen/Generic/fwdtwice.ll b/src/LLVM/test/CodeGen/Generic/fwdtwice.ll
new file mode 100644
index 0000000..5107f36
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/fwdtwice.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s

+

+;;

+;; Test the sequence:

+;;	cast -> setle 0, %cast -> br %cond

+;; This sequence should cause the cast value to be forwarded twice,

+;; i.e., cast is forwarded to the setle and the setle is forwarded

+;; to the branch.

+;; register argument of the "branch-on-register" instruction, i.e.,

+;; 

+;; This produces the bogus output instruction:

+;;	brlez   <NULL VALUE>, .L_SumArray_bb3.

+;; This came from %bb1 of sumarrray.ll generated from sumarray.c.

+

+define i32 @SumArray(i32 %Num) {

+        %Num.upgrd.1 = alloca i32               ; <i32*> [#uses=2]

+        br label %Top

+

+Top:            ; preds = %Top, %0

+        store i32 %Num, i32* %Num.upgrd.1

+        %reg108 = load i32* %Num.upgrd.1                ; <i32> [#uses=1]

+        %cast1006 = bitcast i32 %reg108 to i32          ; <i32> [#uses=1]

+        %cond1001 = icmp ule i32 %cast1006, 0           ; <i1> [#uses=1]

+        br i1 %cond1001, label %bb6, label %Top

+

+bb6:            ; preds = %Top

+        ret i32 42

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/global-ret0.ll b/src/LLVM/test/CodeGen/Generic/global-ret0.ll
new file mode 100644
index 0000000..0838372
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/global-ret0.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s

+

+@g = global i32 0               ; <i32*> [#uses=1]

+

+define i32 @main() {

+        %h = load i32* @g               ; <i32> [#uses=1]

+        ret i32 %h

+}


diff --git a/src/LLVM/test/CodeGen/Generic/hello.ll b/src/LLVM/test/CodeGen/Generic/hello.ll
new file mode 100644
index 0000000..8d51008
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/hello.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s

+

+@.str_1 = internal constant [7 x i8] c"hello\0A\00"             ; <[7 x i8]*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define i32 @main() {

+        %s = getelementptr [7 x i8]* @.str_1, i64 0, i64 0              ; <i8*> [#uses=1]

+        call i32 (i8*, ...)* @printf( i8* %s )          ; <i32>:1 [#uses=0]

+        ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/Generic/i128-addsub.ll b/src/LLVM/test/CodeGen/Generic/i128-addsub.ll
new file mode 100644
index 0000000..f36c90f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/i128-addsub.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s

+

+define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {

+entry:

+	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]

+	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]

+	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]

+	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]

+	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]

+	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]

+	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]

+	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]

+	%tmp15 = add i128 %tmp12, %tmp5		; <i128> [#uses=2]

+	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]

+	store i64 %tmp1617, i64* %RL

+	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]

+	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]

+	store i64 %tmp2122, i64* %RH

+	ret void

+}

+

+define void @test_sub(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {

+entry:

+	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]

+	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]

+	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]

+	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]

+	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]

+	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]

+	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]

+	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]

+	%tmp15 = sub i128 %tmp5, %tmp12		; <i128> [#uses=2]

+	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]

+	store i64 %tmp1617, i64* %RL

+	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]

+	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]

+	store i64 %tmp2122, i64* %RH

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/i128-arith.ll b/src/LLVM/test/CodeGen/Generic/i128-arith.ll
new file mode 100644
index 0000000..5d69e64
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/i128-arith.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s 

+

+define i64 @foo(i64 %x, i64 %y, i32 %amt) {

+        %tmp0 = zext i64 %x to i128

+        %tmp1 = sext i64 %y to i128

+        %tmp2 = or i128 %tmp0, %tmp1

+        %tmp7 = zext i32 13 to i128

+        %tmp3 = lshr i128 %tmp2, %tmp7

+        %tmp4 = trunc i128 %tmp3 to i64

+        ret i64 %tmp4

+}


diff --git a/src/LLVM/test/CodeGen/Generic/inline-asm-special-strings.ll b/src/LLVM/test/CodeGen/Generic/inline-asm-special-strings.ll
new file mode 100644
index 0000000..d18221e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/inline-asm-special-strings.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s | grep "foo 0 0"
+
+define void @bar() nounwind {
+	tail call void asm sideeffect "foo ${:uid} ${:uid}", ""() nounwind
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/intrinsics.ll b/src/LLVM/test/CodeGen/Generic/intrinsics.ll
new file mode 100644
index 0000000..69b67e1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/intrinsics.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s

+

+;; SQRT

+declare float @llvm.sqrt.f32(float)

+

+declare double @llvm.sqrt.f64(double)

+

+define double @test_sqrt(float %F) {

+        %G = call float @llvm.sqrt.f32( float %F )              ; <float> [#uses=1]

+        %H = fpext float %G to double           ; <double> [#uses=1]

+        %I = call double @llvm.sqrt.f64( double %H )            ; <double> [#uses=1]

+        ret double %I

+}

+

+

+; SIN

+declare float @sinf(float) readonly

+

+declare double @sin(double) readonly

+

+define double @test_sin(float %F) {

+        %G = call float @sinf( float %F )               ; <float> [#uses=1]

+        %H = fpext float %G to double           ; <double> [#uses=1]

+        %I = call double @sin( double %H )              ; <double> [#uses=1]

+        ret double %I

+}

+

+

+; COS

+declare float @cosf(float) readonly

+

+declare double @cos(double) readonly

+

+define double @test_cos(float %F) {

+        %G = call float @cosf( float %F )               ; <float> [#uses=1]

+        %H = fpext float %G to double           ; <double> [#uses=1]

+        %I = call double @cos( double %H )              ; <double> [#uses=1]

+        ret double %I

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/invalid-memcpy.ll b/src/LLVM/test/CodeGen/Generic/invalid-memcpy.ll
new file mode 100644
index 0000000..2dfa28b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/invalid-memcpy.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s 
+
+; This testcase is invalid (the alignment specified for memcpy is 
+; greater than the alignment guaranteed for Qux or C.0.1173), but it
+; should compile, not crash the code generator.
+
+@C.0.1173 = external constant [33 x i8]
+
+define void @Bork() {
+entry:
+  %Qux = alloca [33 x i8]
+  %Qux1 = bitcast [33 x i8]* %Qux to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Qux1, i8* getelementptr inbounds ([33 x i8]* @C.0.1173, i32 0, i32 0), i64 33, i32 8, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/Generic/isunord.ll b/src/LLVM/test/CodeGen/Generic/isunord.ll
new file mode 100644
index 0000000..0cc4ecc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/isunord.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s

+

+declare i1 @llvm.isunordered.f64(double, double)

+

+define i1 @test(double %X, double %Y) {

+        %tmp27 = fcmp uno double %X, %Y         ; <i1> [#uses=1]

+        ret i1 %tmp27

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/llvm-ct-intrinsics.ll b/src/LLVM/test/CodeGen/Generic/llvm-ct-intrinsics.ll
new file mode 100644
index 0000000..bce3315
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/llvm-ct-intrinsics.ll

@@ -0,0 +1,62 @@
+; Make sure this testcase is supported by all code generators

+; RUN: llc < %s

+

+declare i64 @llvm.ctpop.i64(i64)

+

+declare i32 @llvm.ctpop.i32(i32)

+

+declare i16 @llvm.ctpop.i16(i16)

+

+declare i8 @llvm.ctpop.i8(i8)

+

+define void @ctpoptest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {

+	%a = call i8 @llvm.ctpop.i8( i8 %A )		; <i8> [#uses=1]

+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]

+	%c = call i32 @llvm.ctpop.i32( i32 %C )		; <i32> [#uses=1]

+	%d = call i64 @llvm.ctpop.i64( i64 %D )		; <i64> [#uses=1]

+	store i8 %a, i8* %AP

+	store i16 %b, i16* %BP

+	store i32 %c, i32* %CP

+	store i64 %d, i64* %DP

+	ret void

+}

+

+declare i64 @llvm.ctlz.i64(i64)

+

+declare i32 @llvm.ctlz.i32(i32)

+

+declare i16 @llvm.ctlz.i16(i16)

+

+declare i8 @llvm.ctlz.i8(i8)

+

+define void @ctlztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {

+	%a = call i8 @llvm.ctlz.i8( i8 %A )		; <i8> [#uses=1]

+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]

+	%c = call i32 @llvm.ctlz.i32( i32 %C )		; <i32> [#uses=1]

+	%d = call i64 @llvm.ctlz.i64( i64 %D )		; <i64> [#uses=1]

+	store i8 %a, i8* %AP

+	store i16 %b, i16* %BP

+	store i32 %c, i32* %CP

+	store i64 %d, i64* %DP

+	ret void

+}

+

+declare i64 @llvm.cttz.i64(i64)

+

+declare i32 @llvm.cttz.i32(i32)

+

+declare i16 @llvm.cttz.i16(i16)

+

+declare i8 @llvm.cttz.i8(i8)

+

+define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {

+	%a = call i8 @llvm.cttz.i8( i8 %A )		; <i8> [#uses=1]

+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]

+	%c = call i32 @llvm.cttz.i32( i32 %C )		; <i32> [#uses=1]

+	%d = call i64 @llvm.cttz.i64( i64 %D )		; <i64> [#uses=1]

+	store i8 %a, i8* %AP

+	store i16 %b, i16* %BP

+	store i32 %c, i32* %CP

+	store i64 %d, i64* %DP

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll b/src/LLVM/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
new file mode 100644
index 0000000..e709080
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s
+
+declare { i64, double } @wild()
+
+define void @foo(i64* %p, double* %q) nounwind {
+        %t = invoke { i64, double } @wild() to label %normal unwind label %handler
+
+normal:
+        %mrv_gr = extractvalue { i64, double } %t, 0
+        store i64 %mrv_gr, i64* %p
+        %mrv_gr12681 = extractvalue { i64, double } %t, 1   
+        store double %mrv_gr12681, double* %q
+	ret void
+  
+handler:
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 catch i8* null
+	ret void
+}
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/Generic/negintconst.ll b/src/LLVM/test/CodeGen/Generic/negintconst.ll
new file mode 100644
index 0000000..896872f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/negintconst.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s

+

+; Test that a negative constant smaller than 64 bits (e.g., int)

+; is correctly implemented with sign-extension.

+; In particular, the current code generated is:

+;

+; main:

+; .L_main_LL_0:

+;         save    %o6, -224, %o6

+;         setx    .G_fmtArg_1, %o1, %o0

+;         setuw   1, %o1		! i = 1

+;         setuw   4294967295, %o3	! THE BUG: 0x00000000ffffffff

+;         setsw   0, %i0

+;         add     %i6, 1999, %o2	! fval

+;         add     %o1, %g0, %o1

+;         add     %o0, 0, %o0

+;         mulx    %o1, %o3, %o1		! ERROR: 0xffffffff; should be -1

+;         add     %o1, 3, %o1		! ERROR: 0x100000002; should be 0x2

+;         mulx    %o1, 12, %o3		! 

+;         add     %o2, %o3, %o3		! produces bad address!

+;         call    printf

+;         nop     

+;         jmpl    %i7+8, %g0

+;         restore %g0, 0, %g0

+; 

+;   llc produces:

+; ioff = 2        fval = 0xffffffff7fffec90       &fval[2] = 0xb7fffeca8

+;   instead of:

+; ioff = 2        fval = 0xffffffff7fffec90       &fval[2] = 0xffffffff7fffeca8

+; 

+        %Results = type { float, float, float }

+@fmtArg = internal global [39 x i8] c"ioff = %u\09fval = 0x%p\09&fval[2] = 0x%p\0A\00"          ; <[39 x i8]*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define i32 @main() {

+        %fval = alloca %Results, i32 4          ; <%Results*> [#uses=2]

+        %i = add i32 1, 0               ; <i32> [#uses=1]

+        %iscale = mul i32 %i, -1                ; <i32> [#uses=1]

+        %ioff = add i32 %iscale, 3              ; <i32> [#uses=2]

+        %ioff.upgrd.1 = zext i32 %ioff to i64           ; <i64> [#uses=1]

+        %fptr = getelementptr %Results* %fval, i64 %ioff.upgrd.1                ; <%Results*> [#uses=1]

+        %castFmt = getelementptr [39 x i8]* @fmtArg, i64 0, i64 0               ; <i8*> [#uses=1]

+        call i32 (i8*, ...)* @printf( i8* %castFmt, i32 %ioff, %Results* %fval, %Results* %fptr )               ; <i32>:1 [#uses=0]

+        ret i32 0

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/nested-select.ll b/src/LLVM/test/CodeGen/Generic/nested-select.ll
new file mode 100644
index 0000000..e079271
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/nested-select.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -o /dev/null

+

+; Test that select of a select works

+

+%typedef.tree = type opaque

+

+define i32 @ic_test(double %p.0.2.0.val, double %p.0.2.1.val, double %p.0.2.2.val, %typedef.tree* %t) {

+        %result.1.0 = zext i1 false to i32              ; <i32> [#uses=1]

+        %tmp.55 = fcmp oge double 0.000000e+00, 1.000000e+00            ; <i1> [#uses=1]

+        %tmp.66 = fdiv double 0.000000e+00, 0.000000e+00                ; <double> [#uses=1]

+        br label %N

+

+N:              ; preds = %0

+        %result.1.1 = select i1 %tmp.55, i32 0, i32 %result.1.0         ; <i32> [#uses=1]

+        %tmp.75 = fcmp oge double %tmp.66, 1.000000e+00         ; <i1> [#uses=1]

+        %retval1 = select i1 %tmp.75, i32 0, i32 %result.1.1            ; <i32> [#uses=1]

+        ret i32 %retval1

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/overflow.ll b/src/LLVM/test/CodeGen/Generic/overflow.ll
new file mode 100644
index 0000000..4196855
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/overflow.ll

@@ -0,0 +1,220 @@
+; RUN: llc < %s
+; Verify codegen's don't crash on overflow intrinsics.
+
+;; SADD
+
+define zeroext i8 @sadd_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %sadd = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %sadd, 1
+  %sadd.result = extractvalue { i8, i1 } %sadd, 0
+  %X = select i1 %cmp, i8 %sadd.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.sadd.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @sadd_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %sadd = tail call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %sadd, 1
+  %sadd.result = extractvalue { i16, i1 } %sadd, 0
+  %X = select i1 %cmp, i16 %sadd.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @sadd_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %sadd, 1
+  %sadd.result = extractvalue { i32, i1 } %sadd, 0
+  %X = select i1 %cmp, i32 %sadd.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+
+
+;; UADD
+
+define zeroext i8 @uadd_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %uadd = tail call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %uadd, 1
+  %uadd.result = extractvalue { i8, i1 } %uadd, 0
+  %X = select i1 %cmp, i8 %uadd.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.uadd.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @uadd_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %uadd = tail call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %uadd, 1
+  %uadd.result = extractvalue { i16, i1 } %uadd, 0
+  %X = select i1 %cmp, i16 %uadd.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @uadd_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %uadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %uadd, 1
+  %uadd.result = extractvalue { i32, i1 } %uadd, 0
+  %X = select i1 %cmp, i32 %uadd.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+
+
+
+;; ssub
+
+define zeroext i8 @ssub_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %ssub = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %ssub, 1
+  %ssub.result = extractvalue { i8, i1 } %ssub, 0
+  %X = select i1 %cmp, i8 %ssub.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.ssub.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @ssub_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %ssub = tail call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %ssub, 1
+  %ssub.result = extractvalue { i16, i1 } %ssub, 0
+  %X = select i1 %cmp, i16 %ssub.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.ssub.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @ssub_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %ssub = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %ssub, 1
+  %ssub.result = extractvalue { i32, i1 } %ssub, 0
+  %X = select i1 %cmp, i32 %ssub.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+
+
+;; usub
+
+define zeroext i8 @usub_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %usub = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %usub, 1
+  %usub.result = extractvalue { i8, i1 } %usub, 0
+  %X = select i1 %cmp, i8 %usub.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.usub.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @usub_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %usub = tail call { i16, i1 } @llvm.usub.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %usub, 1
+  %usub.result = extractvalue { i16, i1 } %usub, 0
+  %X = select i1 %cmp, i16 %usub.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.usub.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @usub_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %usub = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %usub, 1
+  %usub.result = extractvalue { i32, i1 } %usub, 0
+  %X = select i1 %cmp, i32 %usub.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+
+
+
+;; smul
+
+define zeroext i8 @smul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %smul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %smul, 1
+  %smul.result = extractvalue { i8, i1 } %smul, 0
+  %X = select i1 %cmp, i8 %smul.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.smul.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @smul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %smul = tail call { i16, i1 } @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %smul, 1
+  %smul.result = extractvalue { i16, i1 } %smul, 0
+  %X = select i1 %cmp, i16 %smul.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @smul_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %smul = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %smul, 1
+  %smul.result = extractvalue { i32, i1 } %smul, 0
+  %X = select i1 %cmp, i32 %smul.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+
+
+;; umul
+
+define zeroext i8 @umul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %umul = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %umul, 1
+  %umul.result = extractvalue { i8, i1 } %umul, 0
+  %X = select i1 %cmp, i8 %umul.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.umul.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @umul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %umul = tail call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %umul, 1
+  %umul.result = extractvalue { i16, i1 } %umul, 0
+  %X = select i1 %cmp, i16 %umul.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @umul_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %umul = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %umul, 1
+  %umul.result = extractvalue { i32, i1 } %umul, 0
+  %X = select i1 %cmp, i32 %umul.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+

diff --git a/src/LLVM/test/CodeGen/Generic/pr2625.ll b/src/LLVM/test/CodeGen/Generic/pr2625.ll
new file mode 100644
index 0000000..3e3dc4b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/pr2625.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s
+; PR2625
+
+define i32 @main({ i32, { i32 } }*) {
+entry:
+        %state = alloca { i32, { i32 } }*               ; <{ i32, { i32 } }**> [#uses=2]
+        store { i32, { i32 } }* %0, { i32, { i32 } }** %state
+        %retval = alloca i32            ; <i32*> [#uses=2]
+        store i32 0, i32* %retval
+        load { i32, { i32 } }** %state          ; <{ i32, { i32 } }*>:1 [#uses=1]
+        store { i32, { i32 } } zeroinitializer, { i32, { i32 } }* %1
+        br label %return
+
+return:         ; preds = %entry
+        load i32* %retval               ; <i32>:2 [#uses=1]
+        ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/Generic/pr3288.ll b/src/LLVM/test/CodeGen/Generic/pr3288.ll
new file mode 100644
index 0000000..b62710f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/pr3288.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s
+; PR3288
+
+define void @a() {
+  %i = insertvalue [2 x [2 x i32]] undef, [2 x i32] undef, 1
+  ret void
+}
+define void @b() {
+  %i = insertvalue {{i32,float},{i16,double}} undef, {i16,double} undef, 1
+  ret void
+}
+define void @c() {
+  %i = insertvalue [2 x [2 x i32]] zeroinitializer, [2 x i32] zeroinitializer, 1
+  ret void
+}
+define void @d() {
+  %i = insertvalue {{i32,float},{i16,double}} zeroinitializer, {i16,double} zeroinitializer, 1
+  ret void
+}
+define void @e() {
+  %i = insertvalue [2 x [2 x i32]] undef, [2 x i32] undef, 0
+  ret void
+}
+define void @f() {
+  %i = insertvalue {{i32,float},{i16,double}} undef, {i32,float} undef, 0
+  ret void
+}
+define void @g() {
+  %i = insertvalue [2 x [2 x i32]] zeroinitializer, [2 x i32] zeroinitializer, 0
+  ret void
+}
+define void @h() {
+  %i = insertvalue {{i32,float},{i16,double}} zeroinitializer, {i32,float} zeroinitializer, 0
+  ret void
+}
+define void @ax() {
+  %i = insertvalue [2 x [2 x i32]] undef, i32 undef, 1, 1
+  ret void
+}
+define void @bx() {
+  %i = insertvalue {{i32,float},{i16,double}} undef, double undef, 1, 1
+  ret void
+}
+define void @cx() {
+  %i = insertvalue [2 x [2 x i32]] zeroinitializer, i32 zeroinitializer, 1, 1
+  ret void
+}
+define void @dx() {
+  %i = insertvalue {{i32,float},{i16,double}} zeroinitializer, double zeroinitializer, 1, 1
+  ret void
+}
+define void @ex() {
+  %i = insertvalue [2 x [2 x i32]] undef, i32 undef, 0, 1
+  ret void
+}
+define void @fx() {
+  %i = insertvalue {{i32,float},{i16,double}} undef, float undef, 0, 1
+  ret void
+}
+define void @gx() {
+  %i = insertvalue [2 x [2 x i32]] zeroinitializer, i32 zeroinitializer, 0, 1
+  ret void
+}
+define void @hx() {
+  %i = insertvalue {{i32,float},{i16,double}} zeroinitializer, float zeroinitializer, 0, 1
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/print-add.ll b/src/LLVM/test/CodeGen/Generic/print-add.ll
new file mode 100644
index 0000000..81e136d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/print-add.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s

+

+@.str_1 = internal constant [4 x i8] c"%d\0A\00"                ; <[4 x i8]*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define i32 @main() {

+        %f = getelementptr [4 x i8]* @.str_1, i64 0, i64 0              ; <i8*> [#uses=3]

+        %d = add i32 1, 0               ; <i32> [#uses=3]

+        call i32 (i8*, ...)* @printf( i8* %f, i32 %d )          ; <i32>:1 [#uses=0]

+        %e = add i32 38, 2              ; <i32> [#uses=2]

+        call i32 (i8*, ...)* @printf( i8* %f, i32 %e )          ; <i32>:2 [#uses=0]

+        %g = add i32 %d, %d             ; <i32> [#uses=1]

+        %h = add i32 %e, %g             ; <i32> [#uses=1]

+        call i32 (i8*, ...)* @printf( i8* %f, i32 %h )          ; <i32>:3 [#uses=0]

+        ret i32 0

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/print-arith-fp.ll b/src/LLVM/test/CodeGen/Generic/print-arith-fp.ll
new file mode 100644
index 0000000..7f69808
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/print-arith-fp.ll

@@ -0,0 +1,61 @@
+; RUN: llc < %s

+@a_str = internal constant [8 x i8] c"a = %f\0A\00"		; <[8 x i8]*> [#uses=1]

+@b_str = internal constant [8 x i8] c"b = %f\0A\00"		; <[8 x i8]*> [#uses=1]

+@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"		; <[12 x i8]*> [#uses=1]

+@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"		; <[12 x i8]*> [#uses=1]

+@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"		; <[12 x i8]*> [#uses=1]

+@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"		; <[12 x i8]*> [#uses=1]

+@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"		; <[13 x i8]*> [#uses=1]

+@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"		; <[12 x i8]*> [#uses=1]

+@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"		; <[13 x i8]*> [#uses=1]

+@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"		; <[12 x i8]*> [#uses=1]

+@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"		; <[13 x i8]*> [#uses=1]

+@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"		; <[13 x i8]*> [#uses=1]

+@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"		; <[13 x i8]*> [#uses=1]

+@A = global double 2.000000e+00		; <double*> [#uses=1]

+@B = global double 5.000000e+00		; <double*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define i32 @main() {

+	%a = load double* @A		; <double> [#uses=12]

+	%b = load double* @B		; <double> [#uses=12]

+	%a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%b_s = getelementptr [8 x i8]* @b_str, i64 0, i64 0		; <i8*> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %a_s, double %a )		; <i32>:1 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %b_s, double %b )		; <i32>:2 [#uses=0]

+	%add_r = fadd double %a, %b		; <double> [#uses=1]

+	%sub_r = fsub double %a, %b		; <double> [#uses=1]

+	%mul_r = fmul double %a, %b		; <double> [#uses=1]

+	%div_r = fdiv double %b, %a		; <double> [#uses=1]

+	%rem_r = frem double %b, %a		; <double> [#uses=1]

+	%add_s = getelementptr [12 x i8]* @add_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%sub_s = getelementptr [12 x i8]* @sub_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%mul_s = getelementptr [12 x i8]* @mul_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%div_s = getelementptr [12 x i8]* @div_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%rem_s = getelementptr [13 x i8]* @rem_str, i64 0, i64 0		; <i8*> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %add_s, double %add_r )		; <i32>:3 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %sub_s, double %sub_r )		; <i32>:4 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %mul_s, double %mul_r )		; <i32>:5 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %div_s, double %div_r )		; <i32>:6 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %rem_s, double %rem_r )		; <i32>:7 [#uses=0]

+	%lt_r = fcmp olt double %a, %b		; <i1> [#uses=1]

+	%le_r = fcmp ole double %a, %b		; <i1> [#uses=1]

+	%gt_r = fcmp ogt double %a, %b		; <i1> [#uses=1]

+	%ge_r = fcmp oge double %a, %b		; <i1> [#uses=1]

+	%eq_r = fcmp oeq double %a, %b		; <i1> [#uses=1]

+	%ne_r = fcmp une double %a, %b		; <i1> [#uses=1]

+	%lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0		; <i8*> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %lt_s, i1 %lt_r )		; <i32>:8 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %le_s, i1 %le_r )		; <i32>:9 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %gt_s, i1 %gt_r )		; <i32>:10 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %ge_s, i1 %ge_r )		; <i32>:11 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %eq_s, i1 %eq_r )		; <i32>:12 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %ne_s, i1 %ne_r )		; <i32>:13 [#uses=0]

+	ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/Generic/print-arith-int.ll b/src/LLVM/test/CodeGen/Generic/print-arith-int.ll
new file mode 100644
index 0000000..d51b9eb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/print-arith-int.ll

@@ -0,0 +1,84 @@
+; RUN: llc < %s

+@a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]

+@b_str = internal constant [8 x i8] c"b = %d\0A\00"		; <[8 x i8]*> [#uses=1]

+@add_str = internal constant [12 x i8] c"a + b = %d\0A\00"		; <[12 x i8]*> [#uses=1]

+@sub_str = internal constant [12 x i8] c"a - b = %d\0A\00"		; <[12 x i8]*> [#uses=1]

+@mul_str = internal constant [12 x i8] c"a * b = %d\0A\00"		; <[12 x i8]*> [#uses=1]

+@div_str = internal constant [12 x i8] c"b / a = %d\0A\00"		; <[12 x i8]*> [#uses=1]

+@rem_str = internal constant [13 x i8] c"b \5C% a = %d\0A\00"		; <[13 x i8]*> [#uses=1]

+@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"		; <[12 x i8]*> [#uses=1]

+@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"		; <[13 x i8]*> [#uses=1]

+@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"		; <[12 x i8]*> [#uses=1]

+@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"		; <[13 x i8]*> [#uses=1]

+@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"		; <[13 x i8]*> [#uses=1]

+@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"		; <[13 x i8]*> [#uses=1]

+@and_str = internal constant [12 x i8] c"a & b = %d\0A\00"		; <[12 x i8]*> [#uses=1]

+@or_str = internal constant [12 x i8] c"a | b = %d\0A\00"		; <[12 x i8]*> [#uses=1]

+@xor_str = internal constant [12 x i8] c"a ^ b = %d\0A\00"		; <[12 x i8]*> [#uses=1]

+@shl_str = internal constant [13 x i8] c"b << a = %d\0A\00"		; <[13 x i8]*> [#uses=1]

+@shr_str = internal constant [13 x i8] c"b >> a = %d\0A\00"		; <[13 x i8]*> [#uses=1]

+@A = global i32 2		; <i32*> [#uses=1]

+@B = global i32 5		; <i32*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define i32 @main() {

+	%a = load i32* @A		; <i32> [#uses=16]

+	%b = load i32* @B		; <i32> [#uses=17]

+	%a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%b_s = getelementptr [8 x i8]* @b_str, i64 0, i64 0		; <i8*> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a )		; <i32>:1 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %b_s, i32 %b )		; <i32>:2 [#uses=0]

+	%add_r = add i32 %a, %b		; <i32> [#uses=1]

+	%sub_r = sub i32 %a, %b		; <i32> [#uses=1]

+	%mul_r = mul i32 %a, %b		; <i32> [#uses=1]

+	%div_r = sdiv i32 %b, %a		; <i32> [#uses=1]

+	%rem_r = srem i32 %b, %a		; <i32> [#uses=1]

+	%add_s = getelementptr [12 x i8]* @add_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%sub_s = getelementptr [12 x i8]* @sub_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%mul_s = getelementptr [12 x i8]* @mul_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%div_s = getelementptr [12 x i8]* @div_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%rem_s = getelementptr [13 x i8]* @rem_str, i64 0, i64 0		; <i8*> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %add_s, i32 %add_r )		; <i32>:3 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %sub_s, i32 %sub_r )		; <i32>:4 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %mul_s, i32 %mul_r )		; <i32>:5 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %div_s, i32 %div_r )		; <i32>:6 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %rem_s, i32 %rem_r )		; <i32>:7 [#uses=0]

+	%lt_r = icmp slt i32 %a, %b		; <i1> [#uses=1]

+	%le_r = icmp sle i32 %a, %b		; <i1> [#uses=1]

+	%gt_r = icmp sgt i32 %a, %b		; <i1> [#uses=1]

+	%ge_r = icmp sge i32 %a, %b		; <i1> [#uses=1]

+	%eq_r = icmp eq i32 %a, %b		; <i1> [#uses=1]

+	%ne_r = icmp ne i32 %a, %b		; <i1> [#uses=1]

+	%lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0		; <i8*> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %lt_s, i1 %lt_r )		; <i32>:8 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %le_s, i1 %le_r )		; <i32>:9 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %gt_s, i1 %gt_r )		; <i32>:10 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %ge_s, i1 %ge_r )		; <i32>:11 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %eq_s, i1 %eq_r )		; <i32>:12 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %ne_s, i1 %ne_r )		; <i32>:13 [#uses=0]

+	%and_r = and i32 %a, %b		; <i32> [#uses=1]

+	%or_r = or i32 %a, %b		; <i32> [#uses=1]

+	%xor_r = xor i32 %a, %b		; <i32> [#uses=1]

+	%u = trunc i32 %a to i8		; <i8> [#uses=2]

+	%shift.upgrd.1 = zext i8 %u to i32		; <i32> [#uses=1]

+	%shl_r = shl i32 %b, %shift.upgrd.1		; <i32> [#uses=1]

+	%shift.upgrd.2 = zext i8 %u to i32		; <i32> [#uses=1]

+	%shr_r = ashr i32 %b, %shift.upgrd.2		; <i32> [#uses=1]

+	%and_s = getelementptr [12 x i8]* @and_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%or_s = getelementptr [12 x i8]* @or_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%xor_s = getelementptr [12 x i8]* @xor_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%shl_s = getelementptr [13 x i8]* @shl_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%shr_s = getelementptr [13 x i8]* @shr_str, i64 0, i64 0		; <i8*> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %and_s, i32 %and_r )		; <i32>:14 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %or_s, i32 %or_r )		; <i32>:15 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %xor_s, i32 %xor_r )		; <i32>:16 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %shl_s, i32 %shl_r )		; <i32>:17 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %shr_s, i32 %shr_r )		; <i32>:18 [#uses=0]

+	ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/Generic/print-int.ll b/src/LLVM/test/CodeGen/Generic/print-int.ll
new file mode 100644
index 0000000..f252b4d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/print-int.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s

+

+@.str_1 = internal constant [4 x i8] c"%d\0A\00"                ; <[4 x i8]*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define i32 @main() {

+        %f = getelementptr [4 x i8]* @.str_1, i64 0, i64 0              ; <i8*> [#uses=1]

+        %d = add i32 0, 0               ; <i32> [#uses=1]

+        %tmp.0 = call i32 (i8*, ...)* @printf( i8* %f, i32 %d )         ; <i32> [#uses=0]

+        ret i32 0

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/print-mul-exp.ll b/src/LLVM/test/CodeGen/Generic/print-mul-exp.ll
new file mode 100644
index 0000000..5441849
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/print-mul-exp.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s

+

+@a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]

+@a_mul_str = internal constant [13 x i8] c"a * %d = %d\0A\00"		; <[13 x i8]*> [#uses=1]

+@A = global i32 2		; <i32*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define i32 @main() {

+	%a = load i32* @A		; <i32> [#uses=21]

+	%a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%a_mul_s = getelementptr [13 x i8]* @a_mul_str, i64 0, i64 0		; <i8*> [#uses=20]

+	call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a )		; <i32>:1 [#uses=0]

+	%r_0 = mul i32 %a, 0		; <i32> [#uses=1]

+	%r_1 = mul i32 %a, 1		; <i32> [#uses=1]

+	%r_2 = mul i32 %a, 2		; <i32> [#uses=1]

+	%r_3 = mul i32 %a, 3		; <i32> [#uses=1]

+	%r_4 = mul i32 %a, 4		; <i32> [#uses=1]

+	%r_5 = mul i32 %a, 5		; <i32> [#uses=1]

+	%r_6 = mul i32 %a, 6		; <i32> [#uses=1]

+	%r_7 = mul i32 %a, 7		; <i32> [#uses=1]

+	%r_8 = mul i32 %a, 8		; <i32> [#uses=1]

+	%r_9 = mul i32 %a, 9		; <i32> [#uses=1]

+	%r_10 = mul i32 %a, 10		; <i32> [#uses=1]

+	%r_11 = mul i32 %a, 11		; <i32> [#uses=1]

+	%r_12 = mul i32 %a, 12		; <i32> [#uses=1]

+	%r_13 = mul i32 %a, 13		; <i32> [#uses=1]

+	%r_14 = mul i32 %a, 14		; <i32> [#uses=1]

+	%r_15 = mul i32 %a, 15		; <i32> [#uses=1]

+	%r_16 = mul i32 %a, 16		; <i32> [#uses=1]

+	%r_17 = mul i32 %a, 17		; <i32> [#uses=1]

+	%r_18 = mul i32 %a, 18		; <i32> [#uses=1]

+	%r_19 = mul i32 %a, 19		; <i32> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 0, i32 %r_0 )		; <i32>:2 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 1, i32 %r_1 )		; <i32>:3 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 2, i32 %r_2 )		; <i32>:4 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 3, i32 %r_3 )		; <i32>:5 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 4, i32 %r_4 )		; <i32>:6 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 5, i32 %r_5 )		; <i32>:7 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 6, i32 %r_6 )		; <i32>:8 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 7, i32 %r_7 )		; <i32>:9 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 8, i32 %r_8 )		; <i32>:10 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 9, i32 %r_9 )		; <i32>:11 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 10, i32 %r_10 )		; <i32>:12 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 11, i32 %r_11 )		; <i32>:13 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 12, i32 %r_12 )		; <i32>:14 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 13, i32 %r_13 )		; <i32>:15 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 14, i32 %r_14 )		; <i32>:16 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 15, i32 %r_15 )		; <i32>:17 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 16, i32 %r_16 )		; <i32>:18 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 17, i32 %r_17 )		; <i32>:19 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 18, i32 %r_18 )		; <i32>:20 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 19, i32 %r_19 )		; <i32>:21 [#uses=0]

+	ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/Generic/print-mul.ll b/src/LLVM/test/CodeGen/Generic/print-mul.ll
new file mode 100644
index 0000000..d37b360
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/print-mul.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s

+

+@a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]

+@b_str = internal constant [8 x i8] c"b = %d\0A\00"		; <[8 x i8]*> [#uses=1]

+@a_mul_str = internal constant [13 x i8] c"a * %d = %d\0A\00"		; <[13 x i8]*> [#uses=1]

+@A = global i32 2		; <i32*> [#uses=1]

+@B = global i32 5		; <i32*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define i32 @main() {

+entry:

+	%a = load i32* @A		; <i32> [#uses=2]

+	%b = load i32* @B		; <i32> [#uses=1]

+	%a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%b_s = getelementptr [8 x i8]* @b_str, i64 0, i64 0		; <i8*> [#uses=1]

+	%a_mul_s = getelementptr [13 x i8]* @a_mul_str, i64 0, i64 0		; <i8*> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a )		; <i32>:0 [#uses=0]

+	call i32 (i8*, ...)* @printf( i8* %b_s, i32 %b )		; <i32>:1 [#uses=0]

+	br label %shl_test

+

+shl_test:		; preds = %shl_test, %entry

+	%s = phi i32 [ 0, %entry ], [ %s_inc, %shl_test ]		; <i32> [#uses=4]

+	%result = mul i32 %a, %s		; <i32> [#uses=1]

+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 %s, i32 %result )		; <i32>:2 [#uses=0]

+	%s_inc = add i32 %s, 1		; <i32> [#uses=1]

+	%done = icmp eq i32 %s, 256		; <i1> [#uses=1]

+	br i1 %done, label %fini, label %shl_test

+

+fini:		; preds = %shl_test

+	ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/Generic/print-shift.ll b/src/LLVM/test/CodeGen/Generic/print-shift.ll
new file mode 100644
index 0000000..06866f3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/print-shift.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s

+

+@a_str = internal constant [8 x i8] c"a = %d\0A\00"             ; <[8 x i8]*> [#uses=1]

+@b_str = internal constant [8 x i8] c"b = %d\0A\00"             ; <[8 x i8]*> [#uses=1]

+@a_shl_str = internal constant [14 x i8] c"a << %d = %d\0A\00"          ; <[14 x i8]*> [#uses=1]

+@A = global i32 2               ; <i32*> [#uses=1]

+@B = global i32 5               ; <i32*> [#uses=1]

+

+declare i32 @printf(i8*, ...)

+

+define i32 @main() {

+entry:

+        %a = load i32* @A               ; <i32> [#uses=2]

+        %b = load i32* @B               ; <i32> [#uses=1]

+        %a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0             ; <i8*> [#uses=1]

+        %b_s = getelementptr [8 x i8]* @b_str, i64 0, i64 0             ; <i8*> [#uses=1]

+        %a_shl_s = getelementptr [14 x i8]* @a_shl_str, i64 0, i64 0            ; <i8*> [#uses=1]

+        call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a )                ; <i32>:0 [#uses=0]

+        call i32 (i8*, ...)* @printf( i8* %b_s, i32 %b )                ; <i32>:1 [#uses=0]

+        br label %shl_test

+

+shl_test:               ; preds = %shl_test, %entry

+        %s = phi i8 [ 0, %entry ], [ %s_inc, %shl_test ]                ; <i8> [#uses=4]

+        %shift.upgrd.1 = zext i8 %s to i32              ; <i32> [#uses=1]

+        %result = shl i32 %a, %shift.upgrd.1            ; <i32> [#uses=1]

+        call i32 (i8*, ...)* @printf( i8* %a_shl_s, i8 %s, i32 %result )                ; <i32>:2 [#uses=0]

+        %s_inc = add i8 %s, 1           ; <i8> [#uses=1]

+        %done = icmp eq i8 %s, 32               ; <i1> [#uses=1]

+        br i1 %done, label %fini, label %shl_test

+

+fini:           ; preds = %shl_test

+        ret i32 0

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/ret0.ll b/src/LLVM/test/CodeGen/Generic/ret0.ll
new file mode 100644
index 0000000..1da4375
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/ret0.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s

+

+define i32 @main() {  

+  ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/Generic/ret42.ll b/src/LLVM/test/CodeGen/Generic/ret42.ll
new file mode 100644
index 0000000..587de2f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/ret42.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s

+

+define i32 @main() {  

+  ret i32 42

+}


diff --git a/src/LLVM/test/CodeGen/Generic/select-cc.ll b/src/LLVM/test/CodeGen/Generic/select-cc.ll
new file mode 100644
index 0000000..b653e2a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/select-cc.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s
+; PR2504
+
+define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind  {
+	%x.lo = extractelement <2 x double> %x, i32 0		; <double> [#uses=1]
+	%x.lo.ge = fcmp oge double %x.lo, 0.000000e+00		; <i1> [#uses=1]
+	%a.d = select i1 %x.lo.ge, <2 x double> %y, <2 x double> %x		; <<2 x double>> [#uses=1]
+	ret <2 x double> %a.d
+}

diff --git a/src/LLVM/test/CodeGen/Generic/select.ll b/src/LLVM/test/CodeGen/Generic/select.ll
new file mode 100644
index 0000000..7ef63d5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/select.ll

@@ -0,0 +1,187 @@
+; RUN: llc < %s

+

+%Domain = type { i8*, i32, i32*, i32, i32, i32*, %Domain* }

+@AConst = constant i32 123              ; <i32*> [#uses=1]

+

+; Test setting values of different constants in registers.

+; 

+define void @testConsts(i32 %N, float %X) {

+        %a = add i32 %N, 1              ; <i32> [#uses=0]

+        %i = add i32 %N, 12345678               ; <i32> [#uses=0]

+        %b = add i16 4, 3               ; <i16> [#uses=0]

+        %c = fadd float %X, 0.000000e+00         ; <float> [#uses=0]

+        %d = fadd float %X, 0x400921CAC0000000           ; <float> [#uses=0]

+        %f = add i32 -1, 10             ; <i32> [#uses=0]

+        %g = add i16 20, -1             ; <i16> [#uses=0]

+        %j = add i16 -1, 30             ; <i16> [#uses=0]

+        %h = add i8 40, -1              ; <i8> [#uses=0]

+        %k = add i8 -1, 50              ; <i8> [#uses=0]

+        ret void

+}

+

+; A SetCC whose result is used should produce instructions to

+; compute the boolean value in a register.  One whose result

+; is unused will only generate the condition code but not

+; the boolean result.

+; 

+define void @unusedBool(i32* %x, i32* %y) {

+        icmp eq i32* %x, %y             ; <i1>:1 [#uses=1]

+        xor i1 %1, true         ; <i1>:2 [#uses=0]

+        icmp ne i32* %x, %y             ; <i1>:3 [#uses=0]

+        ret void

+}

+

+; A constant argument to a Phi produces a Cast instruction in the

+; corresponding predecessor basic block.  This checks a few things:

+; -- phi arguments coming from the bottom of the same basic block

+;    (they should not be forward substituted in the machine code!)

+; -- code generation for casts of various types

+; -- use of immediate fields for integral constants of different sizes

+; -- branch on a constant condition

+; 

+define void @mergeConstants(i32* %x, i32* %y) {

+; <label>:0

+        br label %Top

+

+Top:            ; preds = %Next, %Top, %0

+        phi i32 [ 0, %0 ], [ 1, %Top ], [ 524288, %Next ]               ; <i32>:1 [#uses=0]

+        phi float [ 0.000000e+00, %0 ], [ 1.000000e+00, %Top ], [ 2.000000e+00, %Next ]         ; <float>:2 [#uses=0]

+        phi double [ 5.000000e-01, %0 ], [ 1.500000e+00, %Top ], [ 2.500000e+00, %Next ]         

+        phi i1 [ true, %0 ], [ false, %Top ], [ true, %Next ]           ; <i1>:4 [#uses=0]

+        br i1 true, label %Top, label %Next

+

+Next:           ; preds = %Top

+        br label %Top

+}

+

+

+

+; A constant argument to a cast used only once should be forward substituted

+; and loaded where needed, which happens is:

+; -- User of cast has no immediate field

+; -- User of cast has immediate field but constant is too large to fit

+;    or constant is not resolved until later (e.g., global address)

+; -- User of cast uses it as a call arg. or return value so it is an implicit

+;    use but has to be loaded into a virtual register so that the reg.

+;    allocator can allocate the appropriate phys. reg. for it

+;  

+define i32* @castconst(float) {

+        %castbig = trunc i64 99999999 to i32            ; <i32> [#uses=1]

+        %castsmall = trunc i64 1 to i32         ; <i32> [#uses=1]

+        %usebig = add i32 %castbig, %castsmall          ; <i32> [#uses=0]

+        %castglob = bitcast i32* @AConst to i64*                ; <i64*> [#uses=1]

+        %dummyl = load i64* %castglob           ; <i64> [#uses=0]

+        %castnull = inttoptr i64 0 to i32*              ; <i32*> [#uses=1]

+        ret i32* %castnull

+}

+

+; Test branch-on-comparison-with-zero, in two ways:

+; 1. can be folded

+; 2. cannot be folded because result of comparison is used twice

+;

+define void @testbool(i32 %A, i32 %B) {

+        br label %Top

+

+Top:            ; preds = %loop, %0

+        %D = add i32 %A, %B             ; <i32> [#uses=2]

+        %E = sub i32 %D, -4             ; <i32> [#uses=1]

+        %C = icmp sle i32 %E, 0         ; <i1> [#uses=1]

+        br i1 %C, label %retlbl, label %loop

+

+loop:           ; preds = %loop, %Top

+        %F = add i32 %A, %B             ; <i32> [#uses=0]

+        %G = sub i32 %D, -4             ; <i32> [#uses=1]

+        %D.upgrd.1 = icmp sle i32 %G, 0         ; <i1> [#uses=1]

+        %E.upgrd.2 = xor i1 %D.upgrd.1, true            ; <i1> [#uses=1]

+        br i1 %E.upgrd.2, label %loop, label %Top

+

+retlbl:         ; preds = %Top

+        ret void

+}

+

+

+;; Test use of a boolean result in cast operations.

+;; Requires converting a condition code result into a 0/1 value in a reg.

+;; 

+define i32 @castbool(i32 %A, i32 %B) {

+bb0:

+        %cond213 = icmp slt i32 %A, %B          ; <i1> [#uses=1]

+        %cast110 = zext i1 %cond213 to i8               ; <i8> [#uses=1]

+        %cast109 = zext i8 %cast110 to i32              ; <i32> [#uses=1]

+        ret i32 %cast109

+}

+

+;; Test use of a boolean result in arithmetic and logical operations.

+;; Requires converting a condition code result into a 0/1 value in a reg.

+;; 

+define i1 @boolexpr(i1 %b, i32 %N) {

+        %b2 = icmp sge i32 %N, 0                ; <i1> [#uses=1]

+        %b3 = and i1 %b, %b2            ; <i1> [#uses=1]

+        ret i1 %b3

+}

+

+; Test branch on floating point comparison

+;

+define void @testfloatbool(float %x, float %y) {

+        br label %Top

+

+Top:            ; preds = %Top, %0

+        %p = fadd float %x, %y           ; <float> [#uses=1]

+        %z = fsub float %x, %y           ; <float> [#uses=1]

+        %b = fcmp ole float %p, %z              ; <i1> [#uses=2]

+        %c = xor i1 %b, true            ; <i1> [#uses=0]

+        br i1 %b, label %Top, label %goon

+

+goon:           ; preds = %Top

+        ret void

+}

+

+

+; Test cases where an LLVM instruction requires no machine

+; instructions (e.g., cast int* to long).  But there are 2 cases:

+; 1. If the result register has only a single use and the use is in the

+;    same basic block, the operand will be copy-propagated during

+;    instruction selection.

+; 2. If the result register has multiple uses or is in a different

+;    basic block, it cannot (or will not) be copy propagated during

+;    instruction selection.  It will generate a

+;    copy instruction (add-with-0), but this copy should get coalesced

+;    away by the register allocator.

+;

+define i32 @checkForward(i32 %N, i32* %A) {

+bb2:

+        %reg114 = shl i32 %N, 2         ; <i32> [#uses=1]

+        %cast115 = sext i32 %reg114 to i64              ; <i64> [#uses=1]

+        %cast116 = ptrtoint i32* %A to i64              ; <i64> [#uses=1]

+        %reg116 = add i64 %cast116, %cast115            ; <i64> [#uses=1]

+        %castPtr = inttoptr i64 %reg116 to i32*         ; <i32*> [#uses=1]

+        %reg118 = load i32* %castPtr            ; <i32> [#uses=1]

+        %cast117 = sext i32 %reg118 to i64              ; <i64> [#uses=2]

+        %reg159 = add i64 1234567, %cast117             ; <i64> [#uses=0]

+        %reg160 = add i64 7654321, %cast117             ; <i64> [#uses=0]

+        ret i32 0

+}

+

+

+; Test case for unary NOT operation constructed from XOR.

+; 

+define void @checkNot(i1 %b, i32 %i) {

+        %notB = xor i1 %b, true         ; <i1> [#uses=1]

+        %notI = xor i32 %i, -1          ; <i32> [#uses=2]

+        %F = icmp sge i32 %notI, 100            ; <i1> [#uses=1]

+        %J = add i32 %i, %i             ; <i32> [#uses=1]

+        %andNotB = and i1 %F, %notB             ; <i1> [#uses=0]

+        %andNotI = and i32 %J, %notI            ; <i32> [#uses=0]

+        %notB2 = xor i1 true, %b                ; <i1> [#uses=0]

+        %notI2 = xor i32 -1, %i         ; <i32> [#uses=0]

+        ret void

+}

+

+; Test case for folding getelementptr into a load/store

+;

+define i32 @checkFoldGEP(%Domain* %D, i64 %idx) {

+        %reg841 = getelementptr %Domain* %D, i64 0, i32 1               ; <i32*> [#uses=1]

+        %reg820 = load i32* %reg841             ; <i32> [#uses=1]

+        ret i32 %reg820

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/shift-int64.ll b/src/LLVM/test/CodeGen/Generic/shift-int64.ll
new file mode 100644
index 0000000..6b93a00
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/shift-int64.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s

+

+define i64 @test_imm(i64 %X) {

+        %Y = ashr i64 %X, 17            ; <i64> [#uses=1]

+        ret i64 %Y

+}

+

+define i64 @test_variable(i64 %X, i8 %Amt) {

+        %shift.upgrd.1 = zext i8 %Amt to i64            ; <i64> [#uses=1]

+        %Y = ashr i64 %X, %shift.upgrd.1                ; <i64> [#uses=1]

+        ret i64 %Y

+}


diff --git a/src/LLVM/test/CodeGen/Generic/stacksave-restore.ll b/src/LLVM/test/CodeGen/Generic/stacksave-restore.ll
new file mode 100644
index 0000000..25eb167
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/stacksave-restore.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s

+

+declare i8* @llvm.stacksave()

+

+declare void @llvm.stackrestore(i8*)

+

+define i32* @test(i32 %N) {

+        %tmp = call i8* @llvm.stacksave( )              ; <i8*> [#uses=1]

+        %P = alloca i32, i32 %N         ; <i32*> [#uses=1]

+        call void @llvm.stackrestore( i8* %tmp )

+        %Q = alloca i32, i32 %N         ; <i32*> [#uses=0]

+        ret i32* %P

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/storetrunc-fp.ll b/src/LLVM/test/CodeGen/Generic/storetrunc-fp.ll
new file mode 100644
index 0000000..7f7c7f7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/storetrunc-fp.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s
+
+define void @foo(double %a, double %b, float* %fp) {
+	%c = fadd double %a, %b
+	%d = fptrunc double %c to float
+	store float %d, float* %fp
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/switch-lower-feature.ll b/src/LLVM/test/CodeGen/Generic/switch-lower-feature.ll
new file mode 100644
index 0000000..5c9e8da
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/switch-lower-feature.ll

@@ -0,0 +1,63 @@
+; RUN: llc < %s

+

+define i32 @test(i32 %tmp158) {

+entry:

+        switch i32 %tmp158, label %bb336 [

+	         i32 120, label %bb338

+	         i32 121, label %bb338

+                 i32 122, label %bb338

+                 i32 123, label %bb338

+                 i32 124, label %bb338

+                 i32 125, label %bb338

+                 i32 126, label %bb338

+		 i32 1024, label %bb338

+                 i32 0, label %bb338

+                 i32 1, label %bb338

+                 i32 2, label %bb338

+                 i32 3, label %bb338

+                 i32 4, label %bb338

+		 i32 5, label %bb338

+        ]

+bb336:

+  ret i32 10

+bb338:

+  ret i32 11

+}

+

+define i32 @test2(i32 %tmp158) {

+entry:

+        switch i32 %tmp158, label %bb336 [

+	         i32 -2147483648, label %bb338

+		 i32 -2147483647, label %bb338

+		 i32 -2147483646, label %bb338

+	         i32 120, label %bb338

+	         i32 121, label %bb339

+                 i32 122, label %bb340

+                 i32 123, label %bb341

+                 i32 124, label %bb342

+                 i32 125, label %bb343

+                 i32 126, label %bb336

+		 i32 1024, label %bb338

+                 i32 0, label %bb338

+                 i32 1, label %bb338

+                 i32 2, label %bb338

+                 i32 3, label %bb338

+                 i32 4, label %bb338

+		 i32 5, label %bb338

+        ]

+bb336:

+  ret i32 10

+bb338:

+  ret i32 11

+bb339:

+  ret i32 12

+bb340:

+  ret i32 13

+bb341:

+  ret i32 14

+bb342:

+  ret i32 15

+bb343:

+  ret i32 18

+

+}


diff --git a/src/LLVM/test/CodeGen/Generic/switch-lower.ll b/src/LLVM/test/CodeGen/Generic/switch-lower.ll
new file mode 100644
index 0000000..951cc97
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/switch-lower.ll

@@ -0,0 +1,348 @@
+; RUN: llc < %s

+

+

+; PR5421

+define void @test1() {

+entry:

+  switch i128 undef, label %exit [

+    i128 55340232221128654848, label %exit

+    i128 92233720368547758080, label %exit

+    i128 73786976294838206464, label %exit

+    i128 147573952589676412928, label %exit

+  ]

+exit:

+  unreachable

+}

+

+

+; PR1197

+define void @test2() {

+entry:

+	br i1 false, label %cond_next954, label %cond_true924

+

+cond_true924:		; preds = %entry

+	ret void

+

+cond_next954:		; preds = %entry

+	switch i8 0, label %cleanup7419 [

+		 i8 1, label %bb956

+		 i8 2, label %bb1069

+		 i8 4, label %bb7328

+		 i8 5, label %bb1267

+		 i8 8, label %bb1348

+		 i8 9, label %bb7328

+		 i8 11, label %bb1439

+		 i8 12, label %bb1484

+		 i8 13, label %bb1706

+		 i8 14, label %bb1783

+		 i8 17, label %bb1925

+		 i8 18, label %bb1929

+		 i8 19, label %bb2240

+		 i8 25, label %bb2447

+		 i8 27, label %bb2480

+		 i8 29, label %bb2590

+		 i8 30, label %bb2594

+		 i8 31, label %bb2621

+		 i8 32, label %bb2664

+		 i8 33, label %bb2697

+		 i8 34, label %bb2735

+		 i8 37, label %bb2786

+		 i8 38, label %bb2849

+		 i8 39, label %bb3269

+		 i8 41, label %bb3303

+		 i8 42, label %bb3346

+		 i8 43, label %bb3391

+		 i8 44, label %bb3395

+		 i8 50, label %bb3673

+		 i8 52, label %bb3677

+		 i8 53, label %bb3693

+		 i8 54, label %bb7328

+		 i8 56, label %bb3758

+		 i8 57, label %bb3787

+		 i8 64, label %bb5019

+		 i8 68, label %cond_true4235

+		 i8 69, label %bb4325

+		 i8 70, label %bb4526

+		 i8 72, label %bb4618

+		 i8 73, label %bb4991

+		 i8 80, label %bb5012

+		 i8 82, label %bb5019

+		 i8 84, label %bb5518

+		 i8 86, label %bb5752

+		 i8 87, label %bb5953

+		 i8 89, label %bb6040

+		 i8 90, label %bb6132

+		 i8 92, label %bb6186

+		 i8 93, label %bb6151

+		 i8 94, label %bb6155

+		 i8 97, label %bb6355

+		 i8 98, label %bb5019

+		 i8 99, label %bb6401

+		 i8 101, label %bb5019

+		 i8 102, label %bb1484

+		 i8 104, label %bb7064

+		 i8 105, label %bb7068

+		 i8 106, label %bb7072

+		 i8 108, label %bb1065

+		 i8 109, label %bb1702

+		 i8 110, label %bb2200

+		 i8 111, label %bb2731

+		 i8 112, label %bb2782

+		 i8 113, label %bb2845

+		 i8 114, label %bb2875

+		 i8 115, label %bb3669

+		 i8 116, label %bb7316

+		 i8 117, label %bb7316

+		 i8 118, label %bb3875

+		 i8 119, label %bb4359

+		 i8 120, label %bb4987

+		 i8 121, label %bb5008

+		 i8 122, label %bb5786

+		 i8 123, label %bb6147

+		 i8 124, label %bb6916

+		 i8 125, label %bb6920

+		 i8 126, label %bb6955

+		 i8 127, label %bb6990

+		 i8 -128, label %bb7027

+		 i8 -127, label %bb3879

+		 i8 -126, label %bb4700

+		 i8 -125, label %bb7076

+		 i8 -124, label %bb2366

+		 i8 -123, label %bb2366

+		 i8 -122, label %bb5490

+	]

+

+bb956:		; preds = %cond_next954

+	ret void

+

+bb1065:		; preds = %cond_next954

+	ret void

+

+bb1069:		; preds = %cond_next954

+	ret void

+

+bb1267:		; preds = %cond_next954

+	ret void

+

+bb1348:		; preds = %cond_next954

+	ret void

+

+bb1439:		; preds = %cond_next954

+	ret void

+

+bb1484:		; preds = %cond_next954, %cond_next954

+	ret void

+

+bb1702:		; preds = %cond_next954

+	ret void

+

+bb1706:		; preds = %cond_next954

+	ret void

+

+bb1783:		; preds = %cond_next954

+	ret void

+

+bb1925:		; preds = %cond_next954

+	ret void

+

+bb1929:		; preds = %cond_next954

+	ret void

+

+bb2200:		; preds = %cond_next954

+	ret void

+

+bb2240:		; preds = %cond_next954

+	ret void

+

+bb2366:		; preds = %cond_next954, %cond_next954

+	ret void

+

+bb2447:		; preds = %cond_next954

+	ret void

+

+bb2480:		; preds = %cond_next954

+	ret void

+

+bb2590:		; preds = %cond_next954

+	ret void

+

+bb2594:		; preds = %cond_next954

+	ret void

+

+bb2621:		; preds = %cond_next954

+	ret void

+

+bb2664:		; preds = %cond_next954

+	ret void

+

+bb2697:		; preds = %cond_next954

+	ret void

+

+bb2731:		; preds = %cond_next954

+	ret void

+

+bb2735:		; preds = %cond_next954

+	ret void

+

+bb2782:		; preds = %cond_next954

+	ret void

+

+bb2786:		; preds = %cond_next954

+	ret void

+

+bb2845:		; preds = %cond_next954

+	ret void

+

+bb2849:		; preds = %cond_next954

+	ret void

+

+bb2875:		; preds = %cond_next954

+	ret void

+

+bb3269:		; preds = %cond_next954

+	ret void

+

+bb3303:		; preds = %cond_next954

+	ret void

+

+bb3346:		; preds = %cond_next954

+	ret void

+

+bb3391:		; preds = %cond_next954

+	ret void

+

+bb3395:		; preds = %cond_next954

+	ret void

+

+bb3669:		; preds = %cond_next954

+	ret void

+

+bb3673:		; preds = %cond_next954

+	ret void

+

+bb3677:		; preds = %cond_next954

+	ret void

+

+bb3693:		; preds = %cond_next954

+	ret void

+

+bb3758:		; preds = %cond_next954

+	ret void

+

+bb3787:		; preds = %cond_next954

+	ret void

+

+bb3875:		; preds = %cond_next954

+	ret void

+

+bb3879:		; preds = %cond_next954

+	ret void

+

+cond_true4235:		; preds = %cond_next954

+	ret void

+

+bb4325:		; preds = %cond_next954

+	ret void

+

+bb4359:		; preds = %cond_next954

+	ret void

+

+bb4526:		; preds = %cond_next954

+	ret void

+

+bb4618:		; preds = %cond_next954

+	ret void

+

+bb4700:		; preds = %cond_next954

+	ret void

+

+bb4987:		; preds = %cond_next954

+	ret void

+

+bb4991:		; preds = %cond_next954

+	ret void

+

+bb5008:		; preds = %cond_next954

+	ret void

+

+bb5012:		; preds = %cond_next954

+	ret void

+

+bb5019:		; preds = %cond_next954, %cond_next954, %cond_next954, %cond_next954

+	ret void

+

+bb5490:		; preds = %cond_next954

+	ret void

+

+bb5518:		; preds = %cond_next954

+	ret void

+

+bb5752:		; preds = %cond_next954

+	ret void

+

+bb5786:		; preds = %cond_next954

+	ret void

+

+bb5953:		; preds = %cond_next954

+	ret void

+

+bb6040:		; preds = %cond_next954

+	ret void

+

+bb6132:		; preds = %cond_next954

+	ret void

+

+bb6147:		; preds = %cond_next954

+	ret void

+

+bb6151:		; preds = %cond_next954

+	ret void

+

+bb6155:		; preds = %cond_next954

+	ret void

+

+bb6186:		; preds = %cond_next954

+	ret void

+

+bb6355:		; preds = %cond_next954

+	ret void

+

+bb6401:		; preds = %cond_next954

+	ret void

+

+bb6916:		; preds = %cond_next954

+	ret void

+

+bb6920:		; preds = %cond_next954

+	ret void

+

+bb6955:		; preds = %cond_next954

+	ret void

+

+bb6990:		; preds = %cond_next954

+	ret void

+

+bb7027:		; preds = %cond_next954

+	ret void

+

+bb7064:		; preds = %cond_next954

+	ret void

+

+bb7068:		; preds = %cond_next954

+	ret void

+

+bb7072:		; preds = %cond_next954

+	ret void

+

+bb7076:		; preds = %cond_next954

+	ret void

+

+bb7316:		; preds = %cond_next954, %cond_next954

+	ret void

+

+bb7328:		; preds = %cond_next954, %cond_next954, %cond_next954

+	ret void

+

+cleanup7419:		; preds = %cond_next954

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/trap.ll b/src/LLVM/test/CodeGen/Generic/trap.ll
new file mode 100644
index 0000000..67d1a7a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/trap.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s
+define i32 @test() noreturn nounwind  {
+entry:
+	tail call void @llvm.trap( )
+	unreachable
+}
+
+declare void @llvm.trap() nounwind 
+

diff --git a/src/LLVM/test/CodeGen/Generic/v-split.ll b/src/LLVM/test/CodeGen/Generic/v-split.ll
new file mode 100644
index 0000000..634b562
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/v-split.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s
+%f8 = type <8 x float>
+
+define void @test_f8(%f8 *%P, %f8* %Q, %f8 *%S) {
+  %p = load %f8* %P
+  %q = load %f8* %Q
+  %R = fadd %f8 %p, %q
+  store %f8 %R, %f8 *%S
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/Generic/vector-casts.ll b/src/LLVM/test/CodeGen/Generic/vector-casts.ll
new file mode 100644
index 0000000..a26918b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/vector-casts.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s
+; PR2671
+
+define void @a(<2 x double>* %p, <2 x i8>* %q) {
+        %t = load <2 x double>* %p
+	%r = fptosi <2 x double> %t to <2 x i8>
+        store <2 x i8> %r, <2 x i8>* %q
+	ret void
+}
+define void @b(<2 x double>* %p, <2 x i8>* %q) {
+        %t = load <2 x double>* %p
+	%r = fptoui <2 x double> %t to <2 x i8>
+        store <2 x i8> %r, <2 x i8>* %q
+	ret void
+}
+define void @c(<2 x i8>* %p, <2 x double>* %q) {
+        %t = load <2 x i8>* %p
+	%r = sitofp <2 x i8> %t to <2 x double>
+        store <2 x double> %r, <2 x double>* %q
+	ret void
+}
+define void @d(<2 x i8>* %p, <2 x double>* %q) {
+        %t = load <2 x i8>* %p
+	%r = uitofp <2 x i8> %t to <2 x double>
+        store <2 x double> %r, <2 x double>* %q
+	ret void
+}
+define void @e(<2 x i8>* %p, <2 x i16>* %q) {
+        %t = load <2 x i8>* %p
+	%r = sext <2 x i8> %t to <2 x i16>
+        store <2 x i16> %r, <2 x i16>* %q
+	ret void
+}
+define void @f(<2 x i8>* %p, <2 x i16>* %q) {
+        %t = load <2 x i8>* %p
+	%r = zext <2 x i8> %t to <2 x i16>
+        store <2 x i16> %r, <2 x i16>* %q
+	ret void
+}
+define void @g(<2 x i16>* %p, <2 x i8>* %q) {
+        %t = load <2 x i16>* %p
+	%r = trunc <2 x i16> %t to <2 x i8>
+        store <2 x i8> %r, <2 x i8>* %q
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Generic/vector-constantexpr.ll b/src/LLVM/test/CodeGen/Generic/vector-constantexpr.ll
new file mode 100644
index 0000000..0579174
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/vector-constantexpr.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s

+	

+define void @""(float* %inregs, float* %outregs) {

+        %a_addr.i = alloca <4 x float>          ; <<4 x float>*> [#uses=1]

+        store <4 x float> < float undef, float undef, float undef, float undef >, <4 x float>* %a_addr.i

+        ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/vector-identity-shuffle.ll b/src/LLVM/test/CodeGen/Generic/vector-identity-shuffle.ll
new file mode 100644
index 0000000..d800fea
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/vector-identity-shuffle.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s 

+

+

+define void @test(<4 x float>* %tmp2.i) {

+        %tmp2.i.upgrd.1 = load <4 x float>* %tmp2.i             ; <<4 x float>> [#uses=4]

+        %xFloat0.48 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 0         ; <float> [#uses=1]

+        %inFloat0.49 = insertelement <4 x float> undef, float %xFloat0.48, i32 0                ; <<4 x float>> [#uses=1]

+        %xFloat1.50 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 1         ; <float> [#uses=1]

+        %inFloat1.52 = insertelement <4 x float> %inFloat0.49, float %xFloat1.50, i32 1         ; <<4 x float>> [#uses=1]

+        %xFloat2.53 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 2         ; <float> [#uses=1]

+        %inFloat2.55 = insertelement <4 x float> %inFloat1.52, float %xFloat2.53, i32 2         ; <<4 x float>> [#uses=1]

+        %xFloat3.56 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 3         ; <float> [#uses=1]

+        %inFloat3.58 = insertelement <4 x float> %inFloat2.55, float %xFloat3.56, i32 3         ; <<4 x float>> [#uses=1]

+        store <4 x float> %inFloat3.58, <4 x float>* %tmp2.i

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/Generic/vector.ll b/src/LLVM/test/CodeGen/Generic/vector.ll
new file mode 100644
index 0000000..af1d787
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/vector.ll

@@ -0,0 +1,154 @@
+; Test that vectors are scalarized/lowered correctly.

+; RUN: llc < %s

+

+

+%d8 = type <8 x double>

+%f1 = type <1 x float>

+%f2 = type <2 x float>

+%f4 = type <4 x float>

+%f8 = type <8 x float>

+%i4 = type <4 x i32>

+

+;;; TEST HANDLING OF VARIOUS VECTOR SIZES

+

+define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {

+	%p = load %f1* %P		; <%f1> [#uses=1]

+	%q = load %f1* %Q		; <%f1> [#uses=1]

+	%R = fadd %f1 %p, %q		; <%f1> [#uses=1]

+	store %f1 %R, %f1* %S

+	ret void

+}

+

+define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {

+	%p = load %f2* %P		; <%f2> [#uses=1]

+	%q = load %f2* %Q		; <%f2> [#uses=1]

+	%R = fadd %f2 %p, %q		; <%f2> [#uses=1]

+	store %f2 %R, %f2* %S

+	ret void

+}

+

+define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {

+	%p = load %f4* %P		; <%f4> [#uses=1]

+	%q = load %f4* %Q		; <%f4> [#uses=1]

+	%R = fadd %f4 %p, %q		; <%f4> [#uses=1]

+	store %f4 %R, %f4* %S

+	ret void

+}

+

+define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {

+	%p = load %f8* %P		; <%f8> [#uses=1]

+	%q = load %f8* %Q		; <%f8> [#uses=1]

+	%R = fadd %f8 %p, %q		; <%f8> [#uses=1]

+	store %f8 %R, %f8* %S

+	ret void

+}

+

+define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {

+	%p = load %f8* %P		; <%f8> [#uses=1]

+	%q = load %f8* %Q		; <%f8> [#uses=1]

+	%R = fmul %f8 %p, %q		; <%f8> [#uses=1]

+	store %f8 %R, %f8* %S

+	ret void

+}

+

+define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {

+	%p = load %f8* %P		; <%f8> [#uses=1]

+	%q = load %f8* %Q		; <%f8> [#uses=1]

+	%R = fdiv %f8 %p, %q		; <%f8> [#uses=1]

+	store %f8 %R, %f8* %S

+	ret void

+}

+

+;;; TEST VECTOR CONSTRUCTS

+

+

+define void @test_cst(%f4* %P, %f4* %S) {

+	%p = load %f4* %P		; <%f4> [#uses=1]

+	%R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >		; <%f4> [#uses=1]

+	store %f4 %R, %f4* %S

+	ret void

+}

+

+define void @test_zero(%f4* %P, %f4* %S) {

+	%p = load %f4* %P		; <%f4> [#uses=1]

+	%R = fadd %f4 %p, zeroinitializer		; <%f4> [#uses=1]

+	store %f4 %R, %f4* %S

+	ret void

+}

+

+define void @test_undef(%f4* %P, %f4* %S) {

+	%p = load %f4* %P		; <%f4> [#uses=1]

+	%R = fadd %f4 %p, undef		; <%f4> [#uses=1]

+	store %f4 %R, %f4* %S

+	ret void

+}

+

+define void @test_constant_insert(%f4* %S) {

+	%R = insertelement %f4 zeroinitializer, float 1.000000e+01, i32 0		; <%f4> [#uses=1]

+	store %f4 %R, %f4* %S

+	ret void

+}

+

+define void @test_variable_buildvector(float %F, %f4* %S) {

+	%R = insertelement %f4 zeroinitializer, float %F, i32 0		; <%f4> [#uses=1]

+	store %f4 %R, %f4* %S

+	ret void

+}

+

+define void @test_scalar_to_vector(float %F, %f4* %S) {

+	%R = insertelement %f4 undef, float %F, i32 0		; <%f4> [#uses=1]

+	store %f4 %R, %f4* %S

+	ret void

+}

+

+define float @test_extract_elt(%f8* %P) {

+	%p = load %f8* %P		; <%f8> [#uses=1]

+	%R = extractelement %f8 %p, i32 3		; <float> [#uses=1]

+	ret float %R

+}

+

+define double @test_extract_elt2(%d8* %P) {

+	%p = load %d8* %P		; <%d8> [#uses=1]

+	%R = extractelement %d8 %p, i32 3		; <double> [#uses=1]

+	ret double %R

+}

+

+define void @test_cast_1(%f4* %b, %i4* %a) {

+	%tmp = load %f4* %b		; <%f4> [#uses=1]

+	%tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >		; <%f4> [#uses=1]

+	%tmp3 = bitcast %f4 %tmp2 to %i4		; <%i4> [#uses=1]

+	%tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >		; <%i4> [#uses=1]

+	store %i4 %tmp4, %i4* %a

+	ret void

+}

+

+define void @test_cast_2(%f8* %a, <8 x i32>* %b) {

+	%T = load %f8* %a		; <%f8> [#uses=1]

+	%T2 = bitcast %f8 %T to <8 x i32>		; <<8 x i32>> [#uses=1]

+	store <8 x i32> %T2, <8 x i32>* %b

+	ret void

+}

+

+;;; TEST IMPORTANT IDIOMS

+

+define void @splat(%f4* %P, %f4* %Q, float %X) {

+	%tmp = insertelement %f4 undef, float %X, i32 0		; <%f4> [#uses=1]

+	%tmp2 = insertelement %f4 %tmp, float %X, i32 1		; <%f4> [#uses=1]

+	%tmp4 = insertelement %f4 %tmp2, float %X, i32 2		; <%f4> [#uses=1]

+	%tmp6 = insertelement %f4 %tmp4, float %X, i32 3		; <%f4> [#uses=1]

+	%q = load %f4* %Q		; <%f4> [#uses=1]

+	%R = fadd %f4 %q, %tmp6		; <%f4> [#uses=1]

+	store %f4 %R, %f4* %P

+	ret void

+}

+

+define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {

+	%tmp = insertelement %i4 undef, i32 %X, i32 0		; <%i4> [#uses=1]

+	%tmp2 = insertelement %i4 %tmp, i32 %X, i32 1		; <%i4> [#uses=1]

+	%tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2		; <%i4> [#uses=1]

+	%tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3		; <%i4> [#uses=1]

+	%q = load %i4* %Q		; <%i4> [#uses=1]

+	%R = add %i4 %q, %tmp6		; <%i4> [#uses=1]

+	store %i4 %R, %i4* %P

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Generic/zero-sized-array.ll b/src/LLVM/test/CodeGen/Generic/zero-sized-array.ll
new file mode 100644
index 0000000..280ba00
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Generic/zero-sized-array.ll

@@ -0,0 +1,81 @@
+; RUN: llc < %s
+; PR9900
+
+%zero = type [0 x i8]
+%foobar = type { i32, %zero }
+
+define void @f(%foobar %arg) {
+  %arg1 = extractvalue %foobar %arg, 0
+  %arg2 = extractvalue %foobar %arg, 1
+  call i32 @f2(%zero %arg2, i32 5, i32 42)
+  ret void
+}
+
+define i32 @f2(%zero %x, i32 %y, i32 %z) {
+  ret i32 %y
+}
+
+define void @f3(%zero %x, i32 %y) {
+  call i32 @f2(%zero %x, i32 5, i32 %y)
+  ret void
+}
+
+define void @f4(%zero %z) {
+  insertvalue %foobar undef, %zero %z, 1
+  ret void
+}
+
+define void @f5(%foobar %x) {
+allocas:
+  %y = extractvalue %foobar %x, 1
+  br  label %b1
+
+b1:
+  %insert120 = insertvalue %foobar undef, %zero %y, 1
+  ret void
+}
+
+define void @f6(%zero %x, %zero %y) {
+b1:
+  br i1 undef, label %end, label %b2
+
+b2:
+  br label %end
+
+end:
+  %z = phi %zero [ %y, %b1 ], [ %x, %b2 ]
+  call void @f4(%zero %z)
+  ret void
+}
+
+%zero2 = type {}
+
+define i32 @g1(%zero2 %x, i32 %y, i32 %z) {
+  ret i32 %y
+}
+
+define void @g2(%zero2 %x, i32 %y) {
+  call i32 @g1(%zero2 %x, i32 5, i32 %y)
+  ret void
+}
+
+%zero2r = type {%zero2}
+
+define i32 @h1(%zero2r %x, i32 %y, i32 %z) {
+  ret i32 %y
+}
+
+define void @h2(%zero2r %x, i32 %y) {
+  call i32 @h1(%zero2r %x, i32 5, i32 %y)
+  ret void
+}
+
+%foobar2 = type { i32, %zero2r }
+
+define void @h3(%foobar2 %arg) {
+  %arg1 = extractvalue %foobar2 %arg, 0
+  %arg2 = extractvalue %foobar2 %arg, 1
+  %arg21 = extractvalue %zero2r %arg2, 0
+  call void @g2(%zero2 %arg21, i32 5)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll b/src/LLVM/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 0000000..d8970ea
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll

@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=mblaze -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+

diff --git a/src/LLVM/test/CodeGen/MBlaze/brind.ll b/src/LLVM/test/CodeGen/MBlaze/brind.ll
new file mode 100644
index 0000000..2229a87
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/brind.ll

@@ -0,0 +1,72 @@
+; Ensure that the select instruction is supported and is lowered to 
+; some sort of branch instruction.
+;
+; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+@MSG = internal constant [13 x i8] c"Message: %d\0A\00"
+
+@BLKS = private constant [5 x i8*]
+    [ i8* blockaddress(@brind, %L1),
+      i8* blockaddress(@brind, %L2),
+      i8* blockaddress(@brind, %L3),
+      i8* blockaddress(@brind, %L4),
+      i8* blockaddress(@brind, %L5) ]
+
+define i32 @brind(i32 %a, i32 %b)
+{
+    ; CHECK:        brind:
+entry:
+    br label %loop
+
+loop:
+    %tmp.0 = phi i32 [ 0, %entry ], [ %tmp.8, %finish ]
+    %dst.0 = getelementptr [5 x i8*]* @BLKS, i32 0, i32 %tmp.0
+    %dst.1 = load i8** %dst.0
+    indirectbr i8* %dst.1, [ label %L1,
+                             label %L2,
+                             label %L3,
+                             label %L4,
+                             label %L5 ]
+    ; CHECK:        brad {{r[0-9]*}}
+
+L1:
+    %tmp.1 = add i32 %a, %b
+    br label %finish
+    ; CHECK:        brid
+
+L2:
+    %tmp.2 = sub i32 %a, %b
+    br label %finish
+    ; CHECK:        brid
+
+L3:
+    %tmp.3 = mul i32 %a, %b
+    br label %finish
+    ; CHECK:        brid
+
+L4:
+    %tmp.4 = sdiv i32 %a, %b
+    br label %finish
+    ; CHECK:        brid
+
+L5:
+    %tmp.5 = srem i32 %a, %b
+    br label %finish
+
+finish:
+    %tmp.6 = phi i32 [ %tmp.1, %L1 ],
+                     [ %tmp.2, %L2 ],
+                     [ %tmp.3, %L3 ],
+                     [ %tmp.4, %L4 ],
+                     [ %tmp.5, %L5 ]
+
+    call i32 (i8*,...)* @printf( i8* getelementptr([13 x i8]* @MSG,i32 0,i32 0),
+                                 i32 %tmp.6)
+
+    %tmp.7 = add i32 %tmp.0, 1
+    %tmp.8 = urem i32 %tmp.7, 5
+
+    br label %loop
+    ; CHECK:        brad {{r[0-9]*}}
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/callind.ll b/src/LLVM/test/CodeGen/MBlaze/callind.ll
new file mode 100644
index 0000000..bfc8d00
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/callind.ll

@@ -0,0 +1,80 @@
+; Ensure that indirect calls work and that they are lowered to some
+; sort of branch and link instruction.
+;
+; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+@MSG = internal constant [13 x i8] c"Message: %d\0A\00"
+
+@FUNS = private constant [5 x i32 (i32,i32)*]
+    [ i32 (i32,i32)* @doadd,
+      i32 (i32,i32)* @dosub,
+      i32 (i32,i32)* @domul,
+      i32 (i32,i32)* @dodiv,
+      i32 (i32,i32)* @dorem ]
+
+define i32 @doadd(i32 %a, i32 %b)
+{
+    ; CHECK:        doadd:
+    %tmp.0 = add i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @dosub(i32 %a, i32 %b)
+{
+    ; CHECK:        dosub:
+    %tmp.0 = sub i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @domul(i32 %a, i32 %b)
+{
+    ; CHECK:        domul:
+    %tmp.0 = mul i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @dodiv(i32 %a, i32 %b)
+{
+    ; CHECK:        dodiv:
+    %tmp.0 = sdiv i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @dorem(i32 %a, i32 %b)
+{
+    ; CHECK:        dorem:
+    %tmp.0 = srem i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @callind(i32 %a, i32 %b)
+{
+    ; CHECK:        callind:
+entry:
+    br label %loop
+
+loop:
+    %tmp.0 = phi i32 [ 0, %entry ], [ %tmp.3, %loop ]
+    %dst.0 = getelementptr [5 x i32 (i32,i32)*]* @FUNS, i32 0, i32 %tmp.0
+    %dst.1 = load i32 (i32,i32)** %dst.0
+    %tmp.1 = call i32 %dst.1(i32 %a, i32 %b)
+    ; CHECK-NOT:    brli
+    ; CHECK-NOT:    brlai
+    ; CHECK:        brl
+
+    call i32 (i8*,...)* @printf( i8* getelementptr([13 x i8]* @MSG,i32 0,i32 0),
+                                 i32 %tmp.1)
+    ; CHECK:        brl
+
+    %tmp.2 = add i32 %tmp.0, 1
+    %tmp.3 = urem i32 %tmp.2, 5
+
+    br label %loop
+    ; CHECK:        br
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/cc.ll b/src/LLVM/test/CodeGen/MBlaze/cc.ll
new file mode 100644
index 0000000..b1eb22a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/cc.ll

@@ -0,0 +1,266 @@
+; Test some of the calling convention lowering done by the MBlaze backend.
+; We test that integer values are passed in the correct registers and
+; returned in the correct registers. Additionally, we test that the stack
+; is used as appropriate for passing arguments that cannot be placed into
+; registers.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+@MSG = internal constant [13 x i8] c"Message: %d\0A\00"
+
+define void @params0_noret() {
+    ; CHECK:        params0_noret:
+    ret void
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i8 @params0_8bitret() {
+    ; CHECK:        params0_8bitret:
+    ret i8 1
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r0, 1}}
+}
+
+define i16 @params0_16bitret() {
+    ; CHECK:        params0_16bitret:
+    ret i16 1
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r0, 1}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+}
+
+define i32 @params0_32bitret() {
+    ; CHECK:        params0_32bitret:
+    ret i32 1
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r0, 1}}
+}
+
+define i64 @params0_64bitret() {
+    ; CHECK:        params0_64bitret:
+    ret i64 1
+    ; CHECK:        {{.* r3, r0, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r4, r0, 1}}
+}
+
+define i32 @params1_32bitret(i32 %a) {
+    ; CHECK:        params1_32bitret:
+    ret i32 %a
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r5, r0}}
+}
+
+define i32 @params2_32bitret(i32 %a, i32 %b) {
+    ; CHECK:        params2_32bitret:
+    ret i32 %b
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r6, r0}}
+}
+
+define i32 @params3_32bitret(i32 %a, i32 %b, i32 %c) {
+    ; CHECK:        params3_32bitret:
+    ret i32 %c
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r7, r0}}
+}
+
+define i32 @params4_32bitret(i32 %a, i32 %b, i32 %c, i32 %d) {
+    ; CHECK:        params4_32bitret:
+    ret i32 %d
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r8, r0}}
+}
+
+define i32 @params5_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+    ; CHECK:        params5_32bitret:
+    ret i32 %e
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r9, r0}}
+}
+
+define i32 @params6_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
+    ; CHECK:        params6_32bitret:
+    ret i32 %f
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r10, r0}}
+}
+
+define i32 @params7_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
+                             i32 %g) {
+    ; CHECK:        params7_32bitret:
+    ret i32 %g
+    ; CHECK:        {{lwi? r3, r1, 32}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params8_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
+                             i32 %g, i32 %h) {
+    ; CHECK:        params8_32bitret:
+    ret i32 %h
+    ; CHECK:        {{lwi? r3, r1, 36}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params9_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
+                             i32 %g, i32 %h, i32 %i) {
+    ; CHECK:        params9_32bitret:
+    ret i32 %i
+    ; CHECK:        {{lwi? r3, r1, 40}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params10_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
+                              i32 %g, i32 %h, i32 %i, i32 %j) {
+    ; CHECK:        params10_32bitret:
+    ret i32 %j
+    ; CHECK:        {{lwi? r3, r1, 44}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define void @testing() {
+    %MSG.1 = getelementptr [13 x i8]* @MSG, i32 0, i32 0
+
+    call void @params0_noret()
+    ; CHECK:        brlid
+
+    %tmp.1 = call i8 @params0_8bitret()
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i8 %tmp.1)
+
+    %tmp.2 = call i16 @params0_16bitret()
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i16 %tmp.2)
+
+    %tmp.3 = call i32 @params0_32bitret()
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.3)
+
+    %tmp.4 = call i64 @params0_64bitret()
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i64 %tmp.4)
+
+    %tmp.5 = call i32 @params1_32bitret(i32 1)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.5)
+
+    %tmp.6 = call i32 @params2_32bitret(i32 1, i32 2)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.6)
+
+    %tmp.7 = call i32 @params3_32bitret(i32 1, i32 2, i32 3)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.7)
+
+    %tmp.8 = call i32 @params4_32bitret(i32 1, i32 2, i32 3, i32 4)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.8)
+
+    %tmp.9 = call i32 @params5_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.9)
+
+    %tmp.10 = call i32 @params6_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                         i32 6)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.10)
+
+    %tmp.11 = call i32 @params7_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                         i32 6, i32 7)
+    ; CHECK:        {{swi? .*, r1, 28}}
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.11)
+
+    %tmp.12 = call i32 @params8_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                         i32 6, i32 7, i32 8)
+    ; CHECK:        {{swi? .*, r1, 28}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.12)
+
+    %tmp.13 = call i32 @params9_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                         i32 6, i32 7, i32 8, i32 9)
+    ; CHECK:        {{swi? .*, r1, 28}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 36}}
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.13)
+
+    %tmp.14 = call i32 @params10_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                          i32 6, i32 7, i32 8, i32 9, i32 10)
+    ; CHECK:        {{swi? .*, r1, 28}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 36}}
+    ; CHECK:        {{swi? .*, r1, 40}}
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.14)
+
+    ret void
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/dg.exp b/src/LLVM/test/CodeGen/MBlaze/dg.exp
new file mode 100644
index 0000000..bfd5e47
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target MBlaze] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/div.ll b/src/LLVM/test/CodeGen/MBlaze/div.ll
new file mode 100644
index 0000000..fae9830
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/div.ll

@@ -0,0 +1,75 @@
+; Ensure that multiplication is lowered to function calls when the multiplier
+; unit is not available in the hardware and that function calls are not used
+; when the multiplier unit is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+div | FileCheck -check-prefix=DIV %s
+
+define i8 @test_i8(i8 %a, i8 %b) {
+    ; FUN:        test_i8:
+    ; DIV:        test_i8:
+
+    %tmp.1 = udiv i8 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV:        idivu
+
+    %tmp.2 = sdiv i8 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV-NOT:    idivu
+    ; DIV:        idiv
+
+    %tmp.3 = add i8 %tmp.1, %tmp.2
+    ret i8 %tmp.3
+    ; FUN:        rtsd
+    ; DIV:        rtsd
+}
+
+define i16 @test_i16(i16 %a, i16 %b) {
+    ; FUN:        test_i16:
+    ; DIV:        test_i16:
+
+    %tmp.1 = udiv i16 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV:        idivu
+
+    %tmp.2 = sdiv i16 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV-NOT:    idivu
+    ; DIV:        idiv
+
+    %tmp.3 = add i16 %tmp.1, %tmp.2
+    ret i16 %tmp.3
+    ; FUN:        rtsd
+    ; DIV:        rtsd
+}
+
+define i32 @test_i32(i32 %a, i32 %b) {
+    ; FUN:        test_i32:
+    ; DIV:        test_i32:
+
+    %tmp.1 = udiv i32 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV:        idivu
+
+    %tmp.2 = sdiv i32 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV-NOT:    idivu
+    ; DIV:        idiv
+
+    %tmp.3 = add i32 %tmp.1, %tmp.2
+    ret i32 %tmp.3
+    ; FUN:        rtsd
+    ; DIV:        rtsd
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/fpu.ll b/src/LLVM/test/CodeGen/MBlaze/fpu.ll
new file mode 100644
index 0000000..2aef4fd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/fpu.ll

@@ -0,0 +1,66 @@
+; Ensure that floating point operations are lowered to function calls when the
+; FPU is not available in the hardware and that function calls are not used
+; when the FPU is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+fpu | FileCheck -check-prefix=FPU %s
+
+define float @test_add(float %a, float %b) {
+    ; FUN:        test_add:
+    ; FPU:        test_add:
+
+    %tmp.1 = fadd float %a, %b
+    ; FUN:        brlid
+    ; FPU-NOT:    brlid
+
+    ret float %tmp.1
+    ; FUN:        rtsd
+    ; FPU:        rtsd
+    ; FUN-NOT:    fadd
+    ; FPU-NEXT:   fadd
+}
+
+define float @test_sub(float %a, float %b) {
+    ; FUN:        test_sub:
+    ; FPU:        test_sub:
+
+    %tmp.1 = fsub float %a, %b
+    ; FUN:        brlid
+    ; FPU-NOT:    brlid
+
+    ret float %tmp.1
+    ; FUN:        rtsd
+    ; FPU:        rtsd
+    ; FUN-NOT:    frsub
+    ; FPU-NEXT:   frsub
+}
+
+define float @test_mul(float %a, float %b) {
+    ; FUN:        test_mul:
+    ; FPU:        test_mul:
+
+    %tmp.1 = fmul float %a, %b
+    ; FUN:        brlid
+    ; FPU-NOT:    brlid
+
+    ret float %tmp.1
+    ; FUN:        rtsd
+    ; FPU:        rtsd
+    ; FUN-NOT:    fmul
+    ; FPU-NEXT:   fmul
+}
+
+define float @test_div(float %a, float %b) {
+    ; FUN:        test_div:
+    ; FPU:        test_div:
+
+    %tmp.1 = fdiv float %a, %b
+    ; FUN:        brlid
+    ; FPU-NOT:    brlid
+
+    ret float %tmp.1
+    ; FUN:        rtsd
+    ; FPU:        rtsd
+    ; FUN-NOT:    fdiv
+    ; FPU-NEXT:   fdiv
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/fsl.ll b/src/LLVM/test/CodeGen/MBlaze/fsl.ll
new file mode 100644
index 0000000..5444f82
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/fsl.ll

@@ -0,0 +1,319 @@
+; Ensure that the FSL instrinsic instruction generate single FSL instructions
+; at the machine level. Additionally, ensure that dynamic values use the
+; dynamic version of the instructions and that constant values use the
+; constant version of the instructions.
+;
+; RUN: llc -O3 < %s -march=mblaze | FileCheck %s
+
+declare i32 @llvm.mblaze.fsl.get(i32 %port)
+declare i32 @llvm.mblaze.fsl.aget(i32 %port)
+declare i32 @llvm.mblaze.fsl.cget(i32 %port)
+declare i32 @llvm.mblaze.fsl.caget(i32 %port)
+declare i32 @llvm.mblaze.fsl.eget(i32 %port)
+declare i32 @llvm.mblaze.fsl.eaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.ecget(i32 %port)
+declare i32 @llvm.mblaze.fsl.ecaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.nget(i32 %port)
+declare i32 @llvm.mblaze.fsl.naget(i32 %port)
+declare i32 @llvm.mblaze.fsl.ncget(i32 %port)
+declare i32 @llvm.mblaze.fsl.ncaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.neget(i32 %port)
+declare i32 @llvm.mblaze.fsl.neaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.necget(i32 %port)
+declare i32 @llvm.mblaze.fsl.necaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tget(i32 %port)
+declare i32 @llvm.mblaze.fsl.taget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tcget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tcaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.teget(i32 %port)
+declare i32 @llvm.mblaze.fsl.teaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tecget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tecaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tnget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tnaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tncget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tncaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tneget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tneaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tnecget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tnecaget(i32 %port)
+
+declare void @llvm.mblaze.fsl.put(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.aput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.cput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.caput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.nput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.naput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.ncput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.ncaput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.tput(i32 %port)
+declare void @llvm.mblaze.fsl.taput(i32 %port)
+declare void @llvm.mblaze.fsl.tcput(i32 %port)
+declare void @llvm.mblaze.fsl.tcaput(i32 %port)
+declare void @llvm.mblaze.fsl.tnput(i32 %port)
+declare void @llvm.mblaze.fsl.tnaput(i32 %port)
+declare void @llvm.mblaze.fsl.tncput(i32 %port)
+declare void @llvm.mblaze.fsl.tncaput(i32 %port)
+
+define void @fsl_get(i32 %port) {
+    ; CHECK:        fsl_get:
+    %v0  = call i32 @llvm.mblaze.fsl.get(i32 %port)
+    ; CHECK:        getd
+    %v1  = call i32 @llvm.mblaze.fsl.aget(i32 %port)
+    ; CHECK-NEXT:   agetd
+    %v2  = call i32 @llvm.mblaze.fsl.cget(i32 %port)
+    ; CHECK-NEXT:   cgetd
+    %v3  = call i32 @llvm.mblaze.fsl.caget(i32 %port)
+    ; CHECK-NEXT:   cagetd
+    %v4  = call i32 @llvm.mblaze.fsl.eget(i32 %port)
+    ; CHECK-NEXT:   egetd
+    %v5  = call i32 @llvm.mblaze.fsl.eaget(i32 %port)
+    ; CHECK-NEXT:   eagetd
+    %v6  = call i32 @llvm.mblaze.fsl.ecget(i32 %port)
+    ; CHECK-NEXT:   ecgetd
+    %v7  = call i32 @llvm.mblaze.fsl.ecaget(i32 %port)
+    ; CHECK-NEXT:   ecagetd
+    %v8  = call i32 @llvm.mblaze.fsl.nget(i32 %port)
+    ; CHECK-NEXT:   ngetd
+    %v9  = call i32 @llvm.mblaze.fsl.naget(i32 %port)
+    ; CHECK-NEXT:   nagetd
+    %v10 = call i32 @llvm.mblaze.fsl.ncget(i32 %port)
+    ; CHECK-NEXT:   ncgetd
+    %v11 = call i32 @llvm.mblaze.fsl.ncaget(i32 %port)
+    ; CHECK-NEXT:   ncagetd
+    %v12 = call i32 @llvm.mblaze.fsl.neget(i32 %port)
+    ; CHECK-NEXT:   negetd
+    %v13 = call i32 @llvm.mblaze.fsl.neaget(i32 %port)
+    ; CHECK-NEXT:   neagetd
+    %v14 = call i32 @llvm.mblaze.fsl.necget(i32 %port)
+    ; CHECK-NEXT:   necgetd
+    %v15 = call i32 @llvm.mblaze.fsl.necaget(i32 %port)
+    ; CHECK-NEXT:   necagetd
+    %v16 = call i32 @llvm.mblaze.fsl.tget(i32 %port)
+    ; CHECK-NEXT:   tgetd
+    %v17 = call i32 @llvm.mblaze.fsl.taget(i32 %port)
+    ; CHECK-NEXT:   tagetd
+    %v18 = call i32 @llvm.mblaze.fsl.tcget(i32 %port)
+    ; CHECK-NEXT:   tcgetd
+    %v19 = call i32 @llvm.mblaze.fsl.tcaget(i32 %port)
+    ; CHECK-NEXT:   tcagetd
+    %v20 = call i32 @llvm.mblaze.fsl.teget(i32 %port)
+    ; CHECK-NEXT:   tegetd
+    %v21 = call i32 @llvm.mblaze.fsl.teaget(i32 %port)
+    ; CHECK-NEXT:   teagetd
+    %v22 = call i32 @llvm.mblaze.fsl.tecget(i32 %port)
+    ; CHECK-NEXT:   tecgetd
+    %v23 = call i32 @llvm.mblaze.fsl.tecaget(i32 %port)
+    ; CHECK-NEXT:   tecagetd
+    %v24 = call i32 @llvm.mblaze.fsl.tnget(i32 %port)
+    ; CHECK-NEXT:   tngetd
+    %v25 = call i32 @llvm.mblaze.fsl.tnaget(i32 %port)
+    ; CHECK-NEXT:   tnagetd
+    %v26 = call i32 @llvm.mblaze.fsl.tncget(i32 %port)
+    ; CHECK-NEXT:   tncgetd
+    %v27 = call i32 @llvm.mblaze.fsl.tncaget(i32 %port)
+    ; CHECK-NEXT:   tncagetd
+    %v28 = call i32 @llvm.mblaze.fsl.tneget(i32 %port)
+    ; CHECK-NEXT:   tnegetd
+    %v29 = call i32 @llvm.mblaze.fsl.tneaget(i32 %port)
+    ; CHECK-NEXT:   tneagetd
+    %v30 = call i32 @llvm.mblaze.fsl.tnecget(i32 %port)
+    ; CHECK-NEXT:   tnecgetd
+    %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 %port)
+    ; CHECK-NEXT:   tnecagetd
+    ret void
+    ; CHECK:        rtsd
+}
+
+define void @fslc_get() {
+    ; CHECK:        fslc_get:
+    %v0  = call i32 @llvm.mblaze.fsl.get(i32 1)
+    ; CHECK:        get
+    %v1  = call i32 @llvm.mblaze.fsl.aget(i32 1)
+    ; CHECK-NOT:    agetd
+    ; CHECK:        aget
+    %v2  = call i32 @llvm.mblaze.fsl.cget(i32 1)
+    ; CHECK-NOT:    cgetd
+    ; CHECK:        cget
+    %v3  = call i32 @llvm.mblaze.fsl.caget(i32 1)
+    ; CHECK-NOT:    cagetd
+    ; CHECK:        caget
+    %v4  = call i32 @llvm.mblaze.fsl.eget(i32 1)
+    ; CHECK-NOT:    egetd
+    ; CHECK:        eget
+    %v5  = call i32 @llvm.mblaze.fsl.eaget(i32 1)
+    ; CHECK-NOT:    eagetd
+    ; CHECK:        eaget
+    %v6  = call i32 @llvm.mblaze.fsl.ecget(i32 1)
+    ; CHECK-NOT:    ecgetd
+    ; CHECK:        ecget
+    %v7  = call i32 @llvm.mblaze.fsl.ecaget(i32 1)
+    ; CHECK-NOT:    ecagetd
+    ; CHECK:        ecaget
+    %v8  = call i32 @llvm.mblaze.fsl.nget(i32 1)
+    ; CHECK-NOT:    ngetd
+    ; CHECK:        nget
+    %v9  = call i32 @llvm.mblaze.fsl.naget(i32 1)
+    ; CHECK-NOT:    nagetd
+    ; CHECK:        naget
+    %v10 = call i32 @llvm.mblaze.fsl.ncget(i32 1)
+    ; CHECK-NOT:    ncgetd
+    ; CHECK:        ncget
+    %v11 = call i32 @llvm.mblaze.fsl.ncaget(i32 1)
+    ; CHECK-NOT:    ncagetd
+    ; CHECK:        ncaget
+    %v12 = call i32 @llvm.mblaze.fsl.neget(i32 1)
+    ; CHECK-NOT:    negetd
+    ; CHECK:        neget
+    %v13 = call i32 @llvm.mblaze.fsl.neaget(i32 1)
+    ; CHECK-NOT:    neagetd
+    ; CHECK:        neaget
+    %v14 = call i32 @llvm.mblaze.fsl.necget(i32 1)
+    ; CHECK-NOT:    necgetd
+    ; CHECK:        necget
+    %v15 = call i32 @llvm.mblaze.fsl.necaget(i32 1)
+    ; CHECK-NOT:    necagetd
+    ; CHECK:        necaget
+    %v16 = call i32 @llvm.mblaze.fsl.tget(i32 1)
+    ; CHECK-NOT:    tgetd
+    ; CHECK:        tget
+    %v17 = call i32 @llvm.mblaze.fsl.taget(i32 1)
+    ; CHECK-NOT:    tagetd
+    ; CHECK:        taget
+    %v18 = call i32 @llvm.mblaze.fsl.tcget(i32 1)
+    ; CHECK-NOT:    tcgetd
+    ; CHECK:        tcget
+    %v19 = call i32 @llvm.mblaze.fsl.tcaget(i32 1)
+    ; CHECK-NOT:    tcagetd
+    ; CHECK:        tcaget
+    %v20 = call i32 @llvm.mblaze.fsl.teget(i32 1)
+    ; CHECK-NOT:    tegetd
+    ; CHECK:        teget
+    %v21 = call i32 @llvm.mblaze.fsl.teaget(i32 1)
+    ; CHECK-NOT:    teagetd
+    ; CHECK:        teaget
+    %v22 = call i32 @llvm.mblaze.fsl.tecget(i32 1)
+    ; CHECK-NOT:    tecgetd
+    ; CHECK:        tecget
+    %v23 = call i32 @llvm.mblaze.fsl.tecaget(i32 1)
+    ; CHECK-NOT:    tecagetd
+    ; CHECK:        tecaget
+    %v24 = call i32 @llvm.mblaze.fsl.tnget(i32 1)
+    ; CHECK-NOT:    tngetd
+    ; CHECK:        tnget
+    %v25 = call i32 @llvm.mblaze.fsl.tnaget(i32 1)
+    ; CHECK-NOT:    tnagetd
+    ; CHECK:        tnaget
+    %v26 = call i32 @llvm.mblaze.fsl.tncget(i32 1)
+    ; CHECK-NOT:    tncgetd
+    ; CHECK:        tncget
+    %v27 = call i32 @llvm.mblaze.fsl.tncaget(i32 1)
+    ; CHECK-NOT:    tncagetd
+    ; CHECK:        tncaget
+    %v28 = call i32 @llvm.mblaze.fsl.tneget(i32 1)
+    ; CHECK-NOT:    tnegetd
+    ; CHECK:        tneget
+    %v29 = call i32 @llvm.mblaze.fsl.tneaget(i32 1)
+    ; CHECK-NOT:    tneagetd
+    ; CHECK:        tneaget
+    %v30 = call i32 @llvm.mblaze.fsl.tnecget(i32 1)
+    ; CHECK-NOT:    tnecgetd
+    ; CHECK:        tnecget
+    %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 1)
+    ; CHECK-NOT:    tnecagetd
+    ; CHECK:        tnecaget
+    ret void
+    ; CHECK:        rtsd
+}
+
+define void @putfsl(i32 %value, i32 %port) {
+    ; CHECK:        putfsl:
+    call void @llvm.mblaze.fsl.put(i32 %value, i32 %port)
+    ; CHECK:        putd
+    call void @llvm.mblaze.fsl.aput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   aputd
+    call void @llvm.mblaze.fsl.cput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   cputd
+    call void @llvm.mblaze.fsl.caput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   caputd
+    call void @llvm.mblaze.fsl.nput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   nputd
+    call void @llvm.mblaze.fsl.naput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   naputd
+    call void @llvm.mblaze.fsl.ncput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   ncputd
+    call void @llvm.mblaze.fsl.ncaput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   ncaputd
+    call void @llvm.mblaze.fsl.tput(i32 %port)
+    ; CHECK-NEXT:   tputd
+    call void @llvm.mblaze.fsl.taput(i32 %port)
+    ; CHECK-NEXT:   taputd
+    call void @llvm.mblaze.fsl.tcput(i32 %port)
+    ; CHECK-NEXT:   tcputd
+    call void @llvm.mblaze.fsl.tcaput(i32 %port)
+    ; CHECK-NEXT:   tcaputd
+    call void @llvm.mblaze.fsl.tnput(i32 %port)
+    ; CHECK-NEXT:   tnputd
+    call void @llvm.mblaze.fsl.tnaput(i32 %port)
+    ; CHECK-NEXT:   tnaputd
+    call void @llvm.mblaze.fsl.tncput(i32 %port)
+    ; CHECK-NEXT:   tncputd
+    call void @llvm.mblaze.fsl.tncaput(i32 %port)
+    ; CHECK-NEXT:   tncaputd
+    ret void
+    ; CHECK:        rtsd
+}
+
+define void @putfsl_const(i32 %value) {
+    ; CHECK:        putfsl_const:
+    call void @llvm.mblaze.fsl.put(i32 %value, i32 1)
+    ; CHECK-NOT:    putd
+    ; CHECK:        put
+    call void @llvm.mblaze.fsl.aput(i32 %value, i32 1)
+    ; CHECK-NOT:    aputd
+    ; CHECK:        aput
+    call void @llvm.mblaze.fsl.cput(i32 %value, i32 1)
+    ; CHECK-NOT:    cputd
+    ; CHECK:        cput
+    call void @llvm.mblaze.fsl.caput(i32 %value, i32 1)
+    ; CHECK-NOT:    caputd
+    ; CHECK:        caput
+    call void @llvm.mblaze.fsl.nput(i32 %value, i32 1)
+    ; CHECK-NOT:    nputd
+    ; CHECK:        nput
+    call void @llvm.mblaze.fsl.naput(i32 %value, i32 1)
+    ; CHECK-NOT:    naputd
+    ; CHECK:        naput
+    call void @llvm.mblaze.fsl.ncput(i32 %value, i32 1)
+    ; CHECK-NOT:    ncputd
+    ; CHECK:        ncput
+    call void @llvm.mblaze.fsl.ncaput(i32 %value, i32 1)
+    ; CHECK-NOT:    ncaputd
+    ; CHECK:        ncaput
+    call void @llvm.mblaze.fsl.tput(i32 1)
+    ; CHECK-NOT:    tputd
+    ; CHECK:        tput
+    call void @llvm.mblaze.fsl.taput(i32 1)
+    ; CHECK-NOT:    taputd
+    ; CHECK:        taput
+    call void @llvm.mblaze.fsl.tcput(i32 1)
+    ; CHECK-NOT:    tcputd
+    ; CHECK:        tcput
+    call void @llvm.mblaze.fsl.tcaput(i32 1)
+    ; CHECK-NOT:    tcaputd
+    ; CHECK:        tcaput
+    call void @llvm.mblaze.fsl.tnput(i32 1)
+    ; CHECK-NOT:    tnputd
+    ; CHECK:        tnput
+    call void @llvm.mblaze.fsl.tnaput(i32 1)
+    ; CHECK-NOT:    tnaputd
+    ; CHECK:        tnaput
+    call void @llvm.mblaze.fsl.tncput(i32 1)
+    ; CHECK-NOT:    tncputd
+    ; CHECK:        tncput
+    call void @llvm.mblaze.fsl.tncaput(i32 1)
+    ; CHECK-NOT:    tncaputd
+    ; CHECK:        tncaput
+    ret void
+    ; CHECK:        rtsd
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/imm.ll b/src/LLVM/test/CodeGen/MBlaze/imm.ll
new file mode 100644
index 0000000..6effd3e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/imm.ll

@@ -0,0 +1,70 @@
+; Ensure that all immediate values that are 32-bits or less can be loaded 
+; using a single instruction and that immediate values 64-bits or less can
+; be loaded using two instructions.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+; RUN: llc < %s -march=mblaze -mattr=+fpu | FileCheck -check-prefix=FPU %s
+
+define i8 @retimm_i8() {
+    ; CHECK:        retimm_i8:
+    ; CHECK:        rtsd
+    ; CHECK-NEXT:   add
+    ; FPU:          retimm_i8:
+    ; FPU:          rtsd
+    ; FPU-NEXT:     add
+    ret i8 123
+}
+
+define i16 @retimm_i16() {
+    ; CHECK:        retimm_i16:
+    ; CHECK:        rtsd
+    ; CHECK-NEXT:   add
+    ; FPU:          retimm_i16:
+    ; FPU:          rtsd
+    ; FPU-NEXT:     add
+    ret i16 31212
+}
+
+define i32 @retimm_i32() {
+    ; CHECK:        retimm_i32:
+    ; CHECK:        add
+    ; CHECK-NEXT:   rtsd
+    ; FPU:          retimm_i32:
+    ; FPU:          add
+    ; FPU-NEXT:     rtsd
+    ret i32 2938128
+}
+
+define i64 @retimm_i64() {
+    ; CHECK:        retimm_i64:
+    ; CHECK:        add
+    ; CHECK-NEXT:   rtsd
+    ; CHECK-NEXT:   add
+    ; FPU:          retimm_i64:
+    ; FPU:          add
+    ; FPU-NEXT:     rtsd
+    ; FPU-NEXT:     add
+    ret i64 94581823
+}
+
+define float @retimm_float() {
+    ; CHECK:        retimm_float:
+    ; CHECK:        add
+    ; CHECK-NEXT:   rtsd
+    ; FPU:          retimm_float:
+    ; FPU:          or
+    ; FPU-NEXT:     rtsd
+    ret float 12.0
+}
+
+define double @retimm_double() {
+    ; CHECK:        retimm_double:
+    ; CHECK:        add
+    ; CHECK-NEXT:   add
+    ; CHECK-NEXT:   rtsd
+    ; FPU:          retimm_double:
+    ; FPU:          add
+    ; FPU-NEXT:     add
+    ; FPU-NEXT:     rtsd
+    ret double 598382.39283873
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/intr.ll b/src/LLVM/test/CodeGen/MBlaze/intr.ll
new file mode 100644
index 0000000..79c6bff
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/intr.ll

@@ -0,0 +1,48 @@
+; Ensure that the MBlaze interrupt_handler calling convention (cc73) is handled
+; correctly correctly by the MBlaze backend.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+@.str = private constant [28 x i8] c"The interrupt has gone off\0A\00"
+@_interrupt_handler = alias void ()* @myintr
+
+define cc73 void @myintr() nounwind noinline {
+  ; CHECK:        myintr:
+  ; CHECK:        swi   r3, r1
+  ; CHECK:        swi   r4, r1
+  ; CHECK:        swi   r5, r1
+  ; CHECK:        swi   r6, r1
+  ; CHECK:        swi   r7, r1
+  ; CHECK:        swi   r8, r1
+  ; CHECK:        swi   r9, r1
+  ; CHECK:        swi   r10, r1
+  ; CHECK:        swi   r11, r1
+  ; CHECK:        swi   r12, r1
+  ; CHECK:        swi   r17, r1
+  ; CHECK:        swi   r18, r1
+  ; CHECK:        mfs   r11, rmsr
+  ; CHECK:        swi   r11, r1
+  entry:
+    %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i32 0, i32 0))
+      ret void
+
+  ; CHECK:        lwi   r11, r1
+  ; CHECK:        mts   rmsr, r11
+  ; CHECK:        lwi   r18, r1
+  ; CHECK:        lwi   r17, r1
+  ; CHECK:        lwi   r12, r1
+  ; CHECK:        lwi   r11, r1
+  ; CHECK:        lwi   r10, r1
+  ; CHECK:        lwi   r9, r1
+  ; CHECK:        lwi   r8, r1
+  ; CHECK:        lwi   r7, r1
+  ; CHECK:        lwi   r6, r1
+  ; CHECK:        lwi   r5, r1
+  ; CHECK:        lwi   r4, r1
+  ; CHECK:        lwi   r3, r1
+  ; CHECK:        rtid  r14, 0
+}
+
+  ; CHECK:    .globl  _interrupt_handler
+  ; CHECK:    _interrupt_handler = myintr
+declare i32 @printf(i8*, ...)

diff --git a/src/LLVM/test/CodeGen/MBlaze/jumptable.ll b/src/LLVM/test/CodeGen/MBlaze/jumptable.ll
new file mode 100644
index 0000000..299084d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/jumptable.ll

@@ -0,0 +1,79 @@
+; Ensure that jump tables can be handled by the mblaze backend. The
+; jump table should be lowered to a "br" instruction using one of the
+; available registers.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+define i32 @jmptable(i32 %arg)
+{
+    ; CHECK:        jmptable:
+    switch i32 %arg, label %DEFAULT [ i32 0, label %L0
+                                      i32 1, label %L1
+                                      i32 2, label %L2
+                                      i32 3, label %L3
+                                      i32 4, label %L4
+                                      i32 5, label %L5
+                                      i32 6, label %L6
+                                      i32 7, label %L7
+                                      i32 8, label %L8
+                                      i32 9, label %L9 ]
+
+    ; CHECK:        lw   [[REG:r[0-9]*]]
+    ; CHECK:        brad [[REG]]
+L0:
+    %var0 = add i32 %arg, 0
+    br label %DONE
+
+L1:
+    %var1 = add i32 %arg, 1
+    br label %DONE
+
+L2:
+    %var2 = add i32 %arg, 2
+    br label %DONE
+
+L3:
+    %var3 = add i32 %arg, 3
+    br label %DONE
+
+L4:
+    %var4 = add i32 %arg, 4
+    br label %DONE
+
+L5:
+    %var5 = add i32 %arg, 5
+    br label %DONE
+
+L6:
+    %var6 = add i32 %arg, 6
+    br label %DONE
+
+L7:
+    %var7 = add i32 %arg, 7
+    br label %DONE
+
+L8:
+    %var8 = add i32 %arg, 8
+    br label %DONE
+
+L9:
+    %var9 = add i32 %arg, 9
+    br label %DONE
+
+DEFAULT:
+    unreachable
+
+DONE:
+    %rval = phi i32 [ %var0, %L0 ],
+                    [ %var1, %L1 ],
+                    [ %var2, %L2 ],
+                    [ %var3, %L3 ],
+                    [ %var4, %L4 ],
+                    [ %var5, %L5 ],
+                    [ %var6, %L6 ],
+                    [ %var7, %L7 ],
+                    [ %var8, %L8 ],
+                    [ %var9, %L9 ]
+    ret i32 %rval
+    ; CHECK:        rtsd
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/loop.ll b/src/LLVM/test/CodeGen/MBlaze/loop.ll
new file mode 100644
index 0000000..7439d0b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/loop.ll

@@ -0,0 +1,44 @@
+; Test some complicated looping constructs to ensure that they
+; compile successfully and that some sort of branching is used
+; in the resulting code.
+;
+; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+@MSG = internal constant [19 x i8] c"Message: %d %d %d\0A\00"
+
+define i32 @loop(i32 %a, i32 %b)
+{
+    ; CHECK:        loop:
+entry:
+    br label %loop_outer
+
+loop_outer:
+    %outer.0 = phi i32 [ 0, %entry ], [ %outer.2, %loop_outer_finish ]
+    br label %loop_inner
+
+loop_inner:
+    %inner.0 = phi i32 [ %a, %loop_outer ], [ %inner.3, %loop_inner_finish ]
+    %inner.1 = phi i32 [ %b, %loop_outer ], [ %inner.4, %loop_inner_finish ]
+    %inner.2 = phi i32 [  0, %loop_outer ], [ %inner.5, %loop_inner_finish ]
+    %inner.3 = add i32 %inner.0, %inner.1
+    %inner.4 = mul i32 %inner.2, 11
+    br label %loop_inner_finish
+
+loop_inner_finish:
+    %inner.5 = add i32 %inner.2, 1
+    call i32 (i8*,...)* @printf( i8* getelementptr([19 x i8]* @MSG,i32 0,i32 0),
+                                 i32 %inner.0, i32 %inner.1, i32 %inner.2 )
+
+    %inner.6 = icmp eq i32 %inner.5, 100
+    ; CHECK:        cmp [[REG:r[0-9]*]]
+
+    br i1 %inner.6, label %loop_inner, label %loop_outer_finish
+    ; CHECK:        {{beqid|bneid}} [[REG]]
+
+loop_outer_finish:
+    %outer.1 = add i32 %outer.0, 1
+    %outer.2 = urem i32 %outer.1, 1500
+    br label %loop_outer
+    ; CHECK:        br
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/mul.ll b/src/LLVM/test/CodeGen/MBlaze/mul.ll
new file mode 100644
index 0000000..cefdb8d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/mul.ll

@@ -0,0 +1,51 @@
+; Ensure that multiplication is lowered to function calls when the multiplier
+; unit is not available in the hardware and that function calls are not used
+; when the multiplier unit is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+mul | FileCheck -check-prefix=MUL %s
+
+define i8 @test_i8(i8 %a, i8 %b) {
+    ; FUN:        test_i8:
+    ; MUL:        test_i8:
+
+    %tmp.1 = mul i8 %a, %b
+    ; FUN-NOT:    mul
+    ; FUN:        brlid
+    ; MUL-NOT:    brlid
+
+    ret i8 %tmp.1
+    ; FUN:        rtsd
+    ; MUL:        rtsd
+    ; MUL:        mul
+}
+
+define i16 @test_i16(i16 %a, i16 %b) {
+    ; FUN:        test_i16:
+    ; MUL:        test_i16:
+
+    %tmp.1 = mul i16 %a, %b
+    ; FUN-NOT:    mul
+    ; FUN:        brlid
+    ; MUL-NOT:    brlid
+
+    ret i16 %tmp.1
+    ; FUN:        rtsd
+    ; MUL:        rtsd
+    ; MUL:        mul
+}
+
+define i32 @test_i32(i32 %a, i32 %b) {
+    ; FUN:        test_i32:
+    ; MUL:        test_i32:
+
+    %tmp.1 = mul i32 %a, %b
+    ; FUN-NOT:    mul
+    ; FUN:        brlid
+    ; MUL-NOT:    brlid
+
+    ret i32 %tmp.1
+    ; FUN:        rtsd
+    ; MUL:        rtsd
+    ; MUL:        mul
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/mul64.ll b/src/LLVM/test/CodeGen/MBlaze/mul64.ll
new file mode 100644
index 0000000..e0ef413
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/mul64.ll

@@ -0,0 +1,23 @@
+; Ensure that multiplication is lowered to function calls when the 64-bit
+; multiplier unit is not available in the hardware and that function calls
+; are not used when the 64-bit multiplier unit is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+mul,+mul64 | \
+; RUN:      FileCheck -check-prefix=MUL %s
+
+define i64 @test_i64(i64 %a, i64 %b) {
+    ; FUN:        test_i64:
+    ; MUL:        test_i64:
+
+    %tmp.1 = mul i64 %a, %b
+    ; FUN-NOT:    mul
+    ; FUN:        brlid
+    ; MUL-NOT:    brlid
+    ; MUL:        mulh
+    ; MUL:        mul
+
+    ret i64 %tmp.1
+    ; FUN:        rtsd
+    ; MUL:        rtsd
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/select.ll b/src/LLVM/test/CodeGen/MBlaze/select.ll
new file mode 100644
index 0000000..47a88a1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/select.ll

@@ -0,0 +1,15 @@
+; Ensure that the select instruction is supported and is lowered to 
+; some sort of branch instruction.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+define i32 @testsel(i32 %a, i32 %b)
+{
+    ; CHECK:        testsel:
+    %tmp.1 = icmp eq i32 %a, %b
+    ; CHECK:        cmp
+    %tmp.2 = select i1 %tmp.1, i32 %a, i32 %b
+    ; CHECK:        {{bne|beq}}
+    ret i32 %tmp.2
+    ; CHECK:        rtsd
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/shift.ll b/src/LLVM/test/CodeGen/MBlaze/shift.ll
new file mode 100644
index 0000000..99f0519
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/shift.ll

@@ -0,0 +1,115 @@
+; Ensure that shifts are lowered to loops when the barrel shifter unit is
+; not available in the hardware and that loops are not used when the
+; barrel shifter unit is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+barrel | FileCheck -check-prefix=SHT %s
+
+define i8 @test_i8(i8 %a, i8 %b) {
+    ; FUN:        test_i8:
+    ; SHT:        test_i8:
+
+    %tmp.1 = shl i8 %a, %b
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    bnei
+
+    ret i8 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bsll
+}
+
+define i8 @testc_i8(i8 %a, i8 %b) {
+    ; FUN:        testc_i8:
+    ; SHT:        testc_i8:
+
+    %tmp.1 = shl i8 %a, 5
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    andi
+    ; SHT-NOT:    add
+    ; SHT-NOT:    bnei
+
+    ret i8 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bslli
+}
+
+define i16 @test_i16(i16 %a, i16 %b) {
+    ; FUN:        test_i16:
+    ; SHT:        test_i16:
+
+    %tmp.1 = shl i16 %a, %b
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    bnei
+
+    ret i16 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bsll
+}
+
+define i16 @testc_i16(i16 %a, i16 %b) {
+    ; FUN:        testc_i16:
+    ; SHT:        testc_i16:
+
+    %tmp.1 = shl i16 %a, 5
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    andi
+    ; SHT-NOT:    add
+    ; SHT-NOT:    bnei
+
+    ret i16 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bslli
+}
+
+define i32 @test_i32(i32 %a, i32 %b) {
+    ; FUN:        test_i32:
+    ; SHT:        test_i32:
+
+    %tmp.1 = shl i32 %a, %b
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    andi
+    ; SHT-NOT:    bnei
+
+    ret i32 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bsll
+}
+
+define i32 @testc_i32(i32 %a, i32 %b) {
+    ; FUN:        testc_i32:
+    ; SHT:        testc_i32:
+
+    %tmp.1 = shl i32 %a, 5
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    andi
+    ; SHT-NOT:    add
+    ; SHT-NOT:    bnei
+
+    ret i32 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bslli
+}

diff --git a/src/LLVM/test/CodeGen/MBlaze/svol.ll b/src/LLVM/test/CodeGen/MBlaze/svol.ll
new file mode 100644
index 0000000..c1e9620
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MBlaze/svol.ll

@@ -0,0 +1,80 @@
+; Ensure that the MBlaze save_volatiles calling convention (cc74) is handled
+; correctly correctly by the MBlaze backend.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+@.str = private constant [28 x i8] c"The interrupt has gone off\0A\00"
+
+define cc74 void @mysvol() nounwind noinline {
+  ; CHECK:        mysvol:
+  ; CHECK:        swi   r3, r1
+  ; CHECK:        swi   r4, r1
+  ; CHECK:        swi   r5, r1
+  ; CHECK:        swi   r6, r1
+  ; CHECK:        swi   r7, r1
+  ; CHECK:        swi   r8, r1
+  ; CHECK:        swi   r9, r1
+  ; CHECK:        swi   r10, r1
+  ; CHECK:        swi   r11, r1
+  ; CHECK:        swi   r12, r1
+  ; CHECK:        swi   r17, r1
+  ; CHECK:        swi   r18, r1
+  ; CHECK-NOT:    mfs   r11, rmsr
+  entry:
+    %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i32 0, i32 0))
+      ret void
+
+  ; CHECK-NOT:    mts   rmsr, r11
+  ; CHECK:        lwi   r18, r1
+  ; CHECK:        lwi   r17, r1
+  ; CHECK:        lwi   r12, r1
+  ; CHECK:        lwi   r11, r1
+  ; CHECK:        lwi   r10, r1
+  ; CHECK:        lwi   r9, r1
+  ; CHECK:        lwi   r8, r1
+  ; CHECK:        lwi   r7, r1
+  ; CHECK:        lwi   r6, r1
+  ; CHECK:        lwi   r5, r1
+  ; CHECK:        lwi   r4, r1
+  ; CHECK:        lwi   r3, r1
+  ; CHECK:        rtsd  r15, 8
+}
+
+define cc74 void @mysvol2() nounwind noinline {
+  ; CHECK:        mysvol2:
+  ; CHECK-NOT:    swi   r3, r1
+  ; CHECK-NOT:    swi   r4, r1
+  ; CHECK-NOT:    swi   r5, r1
+  ; CHECK-NOT:    swi   r6, r1
+  ; CHECK-NOT:    swi   r7, r1
+  ; CHECK-NOT:    swi   r8, r1
+  ; CHECK-NOT:    swi   r9, r1
+  ; CHECK-NOT:    swi   r10, r1
+  ; CHECK-NOT:    swi   r11, r1
+  ; CHECK-NOT:    swi   r12, r1
+  ; CHECK:        swi   r17, r1
+  ; CHECK:        swi   r18, r1
+  ; CHECK-NOT:    mfs   r11, rmsr
+entry:
+
+  ; CHECK-NOT:    mts   rmsr, r11
+  ; CHECK:        lwi   r18, r1
+  ; CHECK:        lwi   r17, r1
+  ; CHECK-NOT:    lwi   r12, r1
+  ; CHECK-NOT:    lwi   r11, r1
+  ; CHECK-NOT:    lwi   r10, r1
+  ; CHECK-NOT:    lwi   r9, r1
+  ; CHECK-NOT:    lwi   r8, r1
+  ; CHECK-NOT:    lwi   r7, r1
+  ; CHECK-NOT:    lwi   r6, r1
+  ; CHECK-NOT:    lwi   r5, r1
+  ; CHECK-NOT:    lwi   r4, r1
+  ; CHECK-NOT:    lwi   r3, r1
+  ; CHECK:        rtsd  r15, 8
+  ret void
+}
+
+  ; CHECK-NOT:    .globl  _interrupt_handler
+  ; CHECK-NOT:    _interrupt_handler = mysvol
+  ; CHECK-NOT:    _interrupt_handler = mysvol2
+declare i32 @printf(i8*, ...)

diff --git a/src/LLVM/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll b/src/LLVM/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
new file mode 100644
index 0000000..f339373
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s
+; PR4136
+
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-unknown-linux-gnu"
+@uip_len = external global i16		; <i16*> [#uses=2]
+
+define void @uip_arp_arpin() nounwind {
+entry:
+	%tmp = volatile load i16* @uip_len		; <i16> [#uses=1]
+	%cmp = icmp ult i16 %tmp, 42		; <i1> [#uses=1]
+	volatile store i16 0, i16* @uip_len
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	ret void
+
+if.end:		; preds = %entry
+	switch i16 0, label %return [
+		i16 256, label %sw.bb
+		i16 512, label %sw.bb18
+	]
+
+sw.bb:		; preds = %if.end
+	ret void
+
+sw.bb18:		; preds = %if.end
+	ret void
+
+return:		; preds = %if.end
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/2009-05-17-Rot.ll b/src/LLVM/test/CodeGen/MSP430/2009-05-17-Rot.ll
new file mode 100644
index 0000000..d622aa7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/2009-05-17-Rot.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=msp430
+
+define i16 @rol1u16(i16 %x.arg) nounwind {
+        %retval = alloca i16
+        %x = alloca i16
+        store i16 %x.arg, i16* %x
+        %1 = load i16* %x
+        %2 = shl i16 %1, 1
+        %3 = load i16* %x
+        %4 = lshr i16 %3, 15
+        %5 = or i16 %2, %4
+        store i16 %5, i16* %retval
+        br label %return
+return:
+        %6 = load i16* %retval
+        ret i16 %6
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/2009-05-17-Shift.ll b/src/LLVM/test/CodeGen/MSP430/2009-05-17-Shift.ll
new file mode 100644
index 0000000..e23df78
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/2009-05-17-Shift.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=msp430 | grep rra | count 1
+
+define i16 @lsr2u16(i16 %x.arg) nounwind {
+        %retval = alloca i16
+        %x = alloca i16
+        store i16 %x.arg, i16* %x
+        %1 = load i16* %x
+        %2 = lshr i16 %1, 2
+        store i16 %2, i16* %retval
+        br label %return
+return:
+        %3 = load i16* %retval
+        ret i16 %3
+
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll b/src/LLVM/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
new file mode 100644
index 0000000..54eb7ff
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=msp430
+
+define i16 @test(double %d) nounwind {
+entry:
+        %add = fadd double %d, 1.000000e+00
+        %call = tail call i16 @funct(double %add) nounwind
+        ret i16 %call
+}
+
+declare i16 @funct(double)
+

diff --git a/src/LLVM/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll b/src/LLVM/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
new file mode 100644
index 0000000..088d3e1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s
+; PR4769
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i16 @foo() nounwind readnone {
+entry:
+  %result = alloca i16, align 1                   ; <i16*> [#uses=2]
+  volatile store i16 0, i16* %result
+  %tmp = volatile load i16* %result               ; <i16> [#uses=1]
+  ret i16 %tmp
+}
+
+define i16 @main() nounwind {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %call = call i16 @bar() nounwind                ; <i16> [#uses=1]
+  %tobool = icmp eq i16 %call, 0                  ; <i1> [#uses=1]
+  br i1 %tobool, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  %result.i = alloca i16, align 1                 ; <i16*> [#uses=2]
+  volatile store i16 0, i16* %result.i
+  %tmp.i = volatile load i16* %result.i           ; <i16> [#uses=0]
+  ret i16 0
+}
+
+declare i16 @bar()

diff --git a/src/LLVM/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll b/src/LLVM/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
new file mode 100644
index 0000000..4d7d9b9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s | grep 0x0021 | count 2
+; PR4776
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-unknown-unknown"
+
+@"\010x0021" = external global i8, align 1        ; <i8*> [#uses=2]
+
+define zeroext i8 @foo(i8 zeroext %x) nounwind {
+entry:
+  %retval = alloca i8                             ; <i8*> [#uses=2]
+  %x.addr = alloca i8                             ; <i8*> [#uses=2]
+  %tmp = alloca i8, align 1                       ; <i8*> [#uses=2]
+  store i8 %x, i8* %x.addr
+  %tmp1 = volatile load i8* @"\010x0021"          ; <i8> [#uses=1]
+  store i8 %tmp1, i8* %tmp
+  %tmp2 = load i8* %x.addr                        ; <i8> [#uses=1]
+  volatile store i8 %tmp2, i8* @"\010x0021"
+  %tmp3 = load i8* %tmp                           ; <i8> [#uses=1]
+  store i8 %tmp3, i8* %retval
+  %0 = load i8* %retval                           ; <i8> [#uses=1]
+  ret i8 %0
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll b/src/LLVM/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
new file mode 100644
index 0000000..856eb9d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll

@@ -0,0 +1,14 @@
+; RUN: llc -march=msp430 < %s
+; PR4779 
+define void @foo() nounwind {
+entry:
+	%r = alloca i8		; <i8*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	volatile load i8* %r, align 1		; <i8>:0 [#uses=1]
+	or i8 %0, 1		; <i8>:1 [#uses=1]
+	volatile store i8 %1, i8* %r, align 1
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll b/src/LLVM/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll
new file mode 100644
index 0000000..94fe5c7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-elf"
+
+@g_29 = common global i8 0, align 1               ; <i8*> [#uses=0]
+
+define signext i8 @foo(i8 signext %_si1, i8 signext %_si2) nounwind readnone {
+entry:
+; CHECK: foo:
+; CHECK: call #__mulqi3
+  %mul = mul i8 %_si2, %_si1                      ; <i8> [#uses=1]
+  ret i8 %mul
+}
+
+define void @uint81(i16* nocapture %p_32) nounwind {
+entry:
+  %call = tail call i16 @bar(i8* bitcast (i8 (i8, i8)* @foo to i8*)) nounwind ; <i16> [#uses=0]
+  ret void
+}
+
+declare i16 @bar(i8*)

diff --git a/src/LLVM/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll b/src/LLVM/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll
new file mode 100644
index 0000000..d232aea
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll

@@ -0,0 +1,64 @@
+; RUN: llc < %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-elf"
+
+%struct.httpd_fs_file = type { i8*, i16 }
+%struct.psock = type { %struct.pt, %struct.pt, i8*, i8*, i8*, i16, i16, %struct.httpd_fs_file, i16, i8, i8 }
+%struct.pt = type { i16 }
+
+@foo = external global i8*
+
+define signext i8 @psock_readto(%struct.psock* nocapture %psock, i8 zeroext %c) nounwind {
+entry:
+  switch i16 undef, label %sw.epilog [
+    i16 0, label %sw.bb
+    i16 283, label %if.else.i
+  ]
+
+sw.bb:                                            ; preds = %entry
+  br label %do.body
+
+do.body:                                          ; preds = %while.cond36.i, %while.end.i, %sw.bb
+  br label %while.cond.i
+
+if.else.i:                                        ; preds = %entry
+  br i1 undef, label %psock_newdata.exit, label %if.else11.i
+
+if.else11.i:                                      ; preds = %if.else.i
+  ret i8 0
+
+psock_newdata.exit:                               ; preds = %if.else.i
+  ret i8 0
+
+while.cond.i:                                     ; preds = %while.body.i, %do.body
+  br i1 undef, label %while.end.i, label %while.body.i
+
+while.body.i:                                     ; preds = %while.cond.i
+  br i1 undef, label %do.end41, label %while.cond.i
+
+while.end.i:                                      ; preds = %while.cond.i
+  br i1 undef, label %do.body, label %while.cond36.i.preheader
+
+while.cond36.i.preheader:                         ; preds = %while.end.i
+  br label %while.cond36.i
+
+while.cond36.i:                                   ; preds = %while.body41.i, %while.cond36.i.preheader
+  br i1 undef, label %do.body, label %while.body41.i
+
+while.body41.i:                                   ; preds = %while.cond36.i
+  %tmp43.i = load i8** @foo                      ; <i8*> [#uses=2]
+  %tmp44.i = load i8* %tmp43.i                    ; <i8> [#uses=1]
+  %ptrincdec50.i = getelementptr inbounds i8* %tmp43.i, i16 1 ; <i8*> [#uses=1]
+  store i8* %ptrincdec50.i, i8** @foo
+  %cmp55.i = icmp eq i8 %tmp44.i, %c              ; <i1> [#uses=1]
+  br i1 %cmp55.i, label %do.end41, label %while.cond36.i
+
+do.end41:                                         ; preds = %while.body41.i, %while.body.i
+  br i1 undef, label %if.then46, label %sw.epilog
+
+if.then46:                                        ; preds = %do.end41
+  ret i8 0
+
+sw.epilog:                                        ; preds = %do.end41, %entry
+  ret i8 2
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/2009-11-20-NewNode.ll b/src/LLVM/test/CodeGen/MSP430/2009-11-20-NewNode.ll
new file mode 100644
index 0000000..887c7d6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/2009-11-20-NewNode.ll

@@ -0,0 +1,36 @@
+; RUN: llc -march=msp430 < %s
+; PR5558
+
+define i64 @_strtoll_r(i16 %base) nounwind {
+entry:
+  br i1 undef, label %if.then, label %if.end27
+
+if.then:                                          ; preds = %do.end
+  br label %if.end27
+
+if.end27:                                         ; preds = %if.then, %do.end
+  %cond66 = select i1 undef, i64 -9223372036854775808, i64 9223372036854775807 ; <i64> [#uses=3]
+  %conv69 = sext i16 %base to i64                 ; <i64> [#uses=1]
+  %div = udiv i64 %cond66, %conv69                ; <i64> [#uses=1]
+  br label %for.cond
+
+for.cond:                                         ; preds = %if.end116, %if.end27
+  br i1 undef, label %if.then152, label %if.then93
+
+if.then93:                                        ; preds = %for.cond
+  br i1 undef, label %if.end116, label %if.then152
+
+if.end116:                                        ; preds = %if.then93
+  %cmp123 = icmp ugt i64 undef, %div              ; <i1> [#uses=1]
+  %or.cond = or i1 undef, %cmp123                 ; <i1> [#uses=0]
+  br label %for.cond
+
+if.then152:                                       ; preds = %if.then93, %for.cond
+  br i1 undef, label %if.end182, label %if.then172
+
+if.then172:                                       ; preds = %if.then152
+  ret i64 %cond66
+
+if.end182:                                        ; preds = %if.then152
+  ret i64 %cond66
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll b/src/LLVM/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll
new file mode 100644
index 0000000..b92477b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s
+; PR5703
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-unknown-linux-gnu"
+
+define msp430_intrcc void @foo() nounwind {
+entry:
+	%fa = call i16* @llvm.frameaddress(i32 0)
+	store i16 0, i16* %fa
+	ret void
+}
+
+declare i16* @llvm.frameaddress(i32)

diff --git a/src/LLVM/test/CodeGen/MSP430/2009-12-22-InlineAsm.ll b/src/LLVM/test/CodeGen/MSP430/2009-12-22-InlineAsm.ll
new file mode 100644
index 0000000..a9df1a3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/2009-12-22-InlineAsm.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s
+; PR 5570
+; ModuleID = 'test.c'
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8:16"
+target triple = "msp430-unknown-unknown"
+
+@buf = common global [10 x i8] zeroinitializer, align 1 ; <[10 x i8]*> [#uses=2]
+
+define i16 @main() noreturn nounwind {
+entry:
+  %0 = tail call i8* asm "", "=r,0"(i8* getelementptr inbounds ([10 x i8]* @buf, i16 0, i16 0)) nounwind ; <i8*> [#uses=1]
+  %sub.ptr = getelementptr inbounds i8* %0, i16 1 ; <i8*> [#uses=1]
+  %sub.ptr.lhs.cast = ptrtoint i8* %sub.ptr to i16 ; <i16> [#uses=1]
+  %sub.ptr.sub = sub i16 %sub.ptr.lhs.cast, ptrtoint ([10 x i8]* @buf to i16) ; <i16> [#uses=1]
+  %cmp = icmp eq i16 %sub.ptr.sub, 1              ; <i1> [#uses=1]
+  br i1 %cmp, label %bar.exit, label %if.then.i
+
+if.then.i:                                        ; preds = %entry
+  tail call void @abort() nounwind
+  br label %bar.exit
+
+bar.exit:                                         ; preds = %entry, %if.then.i
+  tail call void @exit(i16 0) nounwind
+  unreachable
+}
+
+declare void @exit(i16) noreturn
+
+declare void @abort()

diff --git a/src/LLVM/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll b/src/LLVM/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 0000000..9d549da
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll

@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=msp430 -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+

diff --git a/src/LLVM/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll b/src/LLVM/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll
new file mode 100644
index 0000000..9910037
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s
+; PR7001
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430-elf"
+
+define i16 @main() nounwind {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %entry
+  br i1 undef, label %land.rhs, label %land.end
+
+land.rhs:                                         ; preds = %while.cond
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %while.cond
+  %0 = phi i1 [ false, %while.cond ], [ undef, %land.rhs ] ; <i1> [#uses=1]
+  br i1 %0, label %while.body, label %while.end
+
+while.body:                                       ; preds = %land.end
+  %tmp4 = load i16* undef                         ; <i16> [#uses=0]
+  br label %while.cond
+
+while.end:                                        ; preds = %land.end
+  ret i16 undef
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/AddrMode-bis-rx.ll b/src/LLVM/test/CodeGen/MSP430/AddrMode-bis-rx.ll
new file mode 100644
index 0000000..4f9a724
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/AddrMode-bis-rx.ll

@@ -0,0 +1,74 @@
+; RUN: llc < %s -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define i16 @am1(i16 %x, i16* %a) nounwind {
+	%1 = load i16* %a
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am1:
+; CHECK:		bis.w	0(r14), r15
+
+@foo = external global i16
+
+define i16 @am2(i16 %x) nounwind {
+	%1 = load i16* @foo
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am2:
+; CHECK:		bis.w	&foo, r15
+
+@bar = internal constant [2 x i8] [ i8 32, i8 64 ]
+
+define i8 @am3(i8 %x, i16 %n) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %n
+	%2 = load i8* %1
+	%3 = or i8 %2,%x
+	ret i8 %3
+}
+; CHECK: am3:
+; CHECK:		bis.b	bar(r14), r15
+
+define i16 @am4(i16 %x) nounwind {
+	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am4:
+; CHECK:		bis.w	&32, r15
+
+define i16 @am5(i16 %x, i16* %a) nounwind {
+	%1 = getelementptr i16* %a, i16 2
+	%2 = load i16* %1
+	%3 = or i16 %2,%x
+	ret i16 %3
+}
+; CHECK: am5:
+; CHECK:		bis.w	4(r14), r15
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer, align 1
+
+define i16 @am6(i16 %x) nounwind {
+	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am6:
+; CHECK:		bis.w	&baz+2, r15
+
+%T = type { i16, [2 x i8] }
+@duh = internal constant %T { i16 16, [2 x i8][i8 32, i8 64 ] }
+
+define i8 @am7(i8 %x, i16 %n) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	%3= load i8* %2
+	%4 = or i8 %3,%x
+	ret i8 %4
+}
+; CHECK: am7:
+; CHECK:		bis.b	duh+2(r14), r15
+

diff --git a/src/LLVM/test/CodeGen/MSP430/AddrMode-bis-xr.ll b/src/LLVM/test/CodeGen/MSP430/AddrMode-bis-xr.ll
new file mode 100644
index 0000000..17ebd87
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/AddrMode-bis-xr.ll

@@ -0,0 +1,81 @@
+; RUN: llc < %s -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define void @am1(i16* %a, i16 %x) nounwind {
+	%1 = load i16* %a
+	%2 = or i16 %x, %1
+	store i16 %2, i16* %a
+	ret void
+}
+; CHECK: am1:
+; CHECK:		bis.w	r14, 0(r15)
+
+@foo = external global i16
+
+define void @am2(i16 %x) nounwind {
+	%1 = load i16* @foo
+	%2 = or i16 %x, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+; CHECK: am2:
+; CHECK:		bis.w	r15, &foo
+
+@bar = external global [2 x i8]
+
+define void @am3(i16 %i, i8 %x) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %i
+	%2 = load i8* %1
+	%3 = or i8 %x, %2
+	store i8 %3, i8* %1
+	ret void
+}
+; CHECK: am3:
+; CHECK:		bis.b	r14, bar(r15)
+
+define void @am4(i16 %x) nounwind {
+	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%2 = or i16 %x, %1
+	volatile store i16 %2, i16* inttoptr(i16 32 to i16*)
+	ret void
+}
+; CHECK: am4:
+; CHECK:		bis.w	r15, &32
+
+define void @am5(i16* %a, i16 %x) readonly {
+	%1 = getelementptr inbounds i16* %a, i16 2
+	%2 = load i16* %1
+	%3 = or i16 %x, %2
+	store i16 %3, i16* %1
+	ret void
+}
+; CHECK: am5:
+; CHECK:		bis.w	r14, 4(r15)
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer
+
+define void @am6(i16 %x) nounwind {
+	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	%2 = or i16 %x, %1
+	store i16 %2, i16* getelementptr (%S* @baz, i32 0, i32 1)
+	ret void
+}
+; CHECK: am6:
+; CHECK:		bis.w	r15, &baz+2
+
+%T = type { i16, [2 x i8] }
+@duh = external global %T
+
+define void @am7(i16 %n, i8 %x) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	%3 = load i8* %2
+	%4 = or i8 %x, %3
+	store i8 %4, i8* %2
+	ret void
+}
+; CHECK: am7:
+; CHECK:		bis.b	r14, duh+2(r15)
+

diff --git a/src/LLVM/test/CodeGen/MSP430/AddrMode-mov-rx.ll b/src/LLVM/test/CodeGen/MSP430/AddrMode-mov-rx.ll
new file mode 100644
index 0000000..6676b88
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/AddrMode-mov-rx.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define i16 @am1(i16* %a) nounwind {
+	%1 = load i16* %a
+	ret i16 %1
+}
+; CHECK: am1:
+; CHECK:		mov.w	0(r15), r15
+
+@foo = external global i16
+
+define i16 @am2() nounwind {
+	%1 = load i16* @foo
+	ret i16 %1
+}
+; CHECK: am2:
+; CHECK:		mov.w	&foo, r15
+
+@bar = internal constant [2 x i8] [ i8 32, i8 64 ]
+
+define i8 @am3(i16 %n) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %n
+	%2 = load i8* %1
+	ret i8 %2
+}
+; CHECK: am3:
+; CHECK:		mov.b	bar(r15), r15
+
+define i16 @am4() nounwind {
+	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	ret i16 %1
+}
+; CHECK: am4:
+; CHECK:		mov.w	&32, r15
+
+define i16 @am5(i16* %a) nounwind {
+	%1 = getelementptr i16* %a, i16 2
+	%2 = load i16* %1
+	ret i16 %2
+}
+; CHECK: am5:
+; CHECK:		mov.w	4(r15), r15
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer, align 1
+
+define i16 @am6() nounwind {
+	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	ret i16 %1
+}
+; CHECK: am6:
+; CHECK:		mov.w	&baz+2, r15
+
+%T = type { i16, [2 x i8] }
+@duh = internal constant %T { i16 16, [2 x i8][i8 32, i8 64 ] }
+
+define i8 @am7(i16 %n) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	%3= load i8* %2
+	ret i8 %3
+}
+; CHECK: am7:
+; CHECK:		mov.b	duh+2(r15), r15
+

diff --git a/src/LLVM/test/CodeGen/MSP430/AddrMode-mov-xr.ll b/src/LLVM/test/CodeGen/MSP430/AddrMode-mov-xr.ll
new file mode 100644
index 0000000..4b327b0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/AddrMode-mov-xr.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define void @am1(i16* %a, i16 %b) nounwind {
+	store i16 %b, i16* %a
+	ret void
+}
+; CHECK: am1:
+; CHECK:		mov.w	r14, 0(r15)
+
+@foo = external global i16
+
+define void @am2(i16 %a) nounwind {
+	store i16 %a, i16* @foo
+	ret void
+}
+; CHECK: am2:
+; CHECK:		mov.w	r15, &foo
+
+@bar = external global [2 x i8]
+
+define void @am3(i16 %i, i8 %a) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %i
+	store i8 %a, i8* %1
+	ret void
+}
+; CHECK: am3:
+; CHECK:		mov.b	r14, bar(r15)
+
+define void @am4(i16 %a) nounwind {
+	volatile store i16 %a, i16* inttoptr(i16 32 to i16*)
+	ret void
+}
+; CHECK: am4:
+; CHECK:		mov.w	r15, &32
+
+define void @am5(i16* nocapture %p, i16 %a) nounwind readonly {
+	%1 = getelementptr inbounds i16* %p, i16 2
+	store i16 %a, i16* %1
+	ret void
+}
+; CHECK: am5:
+; CHECK:		mov.w	r14, 4(r15)
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer, align 1
+
+define void @am6(i16 %a) nounwind {
+	store i16 %a, i16* getelementptr (%S* @baz, i32 0, i32 1)
+	ret void
+}
+; CHECK: am6:
+; CHECK:		mov.w	r15, &baz+2
+
+%T = type { i16, [2 x i8] }
+@duh = external global %T
+
+define void @am7(i16 %n, i8 %a) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	store i8 %a, i8* %2
+	ret void
+}
+; CHECK: am7:
+; CHECK:		mov.b	r14, duh+2(r15)
+

diff --git a/src/LLVM/test/CodeGen/MSP430/Inst16mi.ll b/src/LLVM/test/CodeGen/MSP430/Inst16mi.ll
new file mode 100644
index 0000000..33d7aa4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/Inst16mi.ll

@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.w	#2, &foo
+	store i16 2, i16 * @foo
+	ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = add i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = and i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = or i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = xor i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/Inst16mm.ll b/src/LLVM/test/CodeGen/MSP430/Inst16mm.ll
new file mode 100644
index 0000000..2337c2c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/Inst16mm.ll

@@ -0,0 +1,69 @@
+; RUN: llc -march=msp430 -combiner-alias-analysis < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+@bar = common global i16 0, align 2
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.w	&bar, &foo
+        %1 = load i16* @bar
+        store i16 %1, i16* @foo
+        ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = add i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = and i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = or i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = xor i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define i16 @mov2() nounwind {
+entry:
+ %retval = alloca i16                            ; <i16*> [#uses=3]
+ %x = alloca i32, align 2                        ; <i32*> [#uses=1]
+ %y = alloca i32, align 2                        ; <i32*> [#uses=1]
+ store i16 0, i16* %retval
+ %tmp = load i32* %y                             ; <i32> [#uses=1]
+ store i32 %tmp, i32* %x
+ store i16 0, i16* %retval
+ %0 = load i16* %retval                          ; <i16> [#uses=1]
+ ret i16 %0
+; CHECK: mov2:
+; CHECK:	mov.w	0(r1), 4(r1)
+; CHECK:	mov.w	2(r1), 6(r1)
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/Inst16mr.ll b/src/LLVM/test/CodeGen/MSP430/Inst16mr.ll
new file mode 100644
index 0000000..2613f01
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/Inst16mr.ll

@@ -0,0 +1,58 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define void @mov(i16 %a) nounwind {
+; CHECK: mov:
+; CHECK: mov.w	r15, &foo
+	store i16 %a, i16* @foo
+	ret void
+}
+
+define void @add(i16 %a) nounwind {
+; CHECK: add:
+; CHECK: add.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = add i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @and(i16 %a) nounwind {
+; CHECK: and:
+; CHECK: and.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = and i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @bis(i16 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = or i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @bic(i16 zeroext %m) nounwind {
+; CHECK: bic:
+; CHECK: bic.w   r15, &foo
+        %1 = xor i16 %m, -1
+        %2 = load i16* @foo
+        %3 = and i16 %2, %1
+        store i16 %3, i16* @foo
+        ret void
+}
+
+define void @xor(i16 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = xor i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/MSP430/Inst16ri.ll b/src/LLVM/test/CodeGen/MSP430/Inst16ri.ll
new file mode 100644
index 0000000..5115a23
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/Inst16ri.ll

@@ -0,0 +1,37 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i16 @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.w	#1, r15
+	ret i16 1
+}
+
+define i16 @add(i16 %a, i16 %b) nounwind {
+; CHECK: add:
+; CHECK: add.w	#1, r15
+	%1 = add i16 %a, 1
+	ret i16 %1
+}
+
+define i16 @and(i16 %a, i16 %b) nounwind {
+; CHECK: and:
+; CHECK: and.w	#1, r15
+	%1 = and i16 %a, 1
+	ret i16 %1
+}
+
+define i16 @bis(i16 %a, i16 %b) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	#1, r15
+	%1 = or i16 %a, 1
+	ret i16 %1
+}
+
+define i16 @xor(i16 %a, i16 %b) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	#1, r15
+	%1 = xor i16 %a, 1
+	ret i16 %1
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/Inst16rm.ll b/src/LLVM/test/CodeGen/MSP430/Inst16rm.ll
new file mode 100644
index 0000000..02e89c7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/Inst16rm.ll

@@ -0,0 +1,46 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define i16 @add(i16 %a) nounwind {
+; CHECK: add:
+; CHECK: add.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = add i16 %a, %1
+	ret i16 %2
+}
+
+define i16 @and(i16 %a) nounwind {
+; CHECK: and:
+; CHECK: and.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = and i16 %a, %1
+	ret i16 %2
+}
+
+define i16 @bis(i16 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = or i16 %a, %1
+	ret i16 %2
+}
+
+define i16  @bic(i16 %a) nounwind {
+; CHECK: bic:
+; CHECK: bic.w	&foo, r15
+        %1 = load i16* @foo
+        %2 = xor i16 %1, -1
+        %3 = and i16 %a, %2
+        ret i16 %3
+}
+
+define i16 @xor(i16 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = xor i16 %a, %1
+	ret i16 %2
+}
+

diff --git a/src/LLVM/test/CodeGen/MSP430/Inst16rr.ll b/src/LLVM/test/CodeGen/MSP430/Inst16rr.ll
new file mode 100644
index 0000000..2f1ba5b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/Inst16rr.ll

@@ -0,0 +1,45 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i16 @mov(i16 %a, i16 %b) nounwind {
+; CHECK: mov:
+; CHECK: mov.w	r14, r15
+	ret i16 %b
+}
+
+define i16 @add(i16 %a, i16 %b) nounwind {
+; CHECK: add:
+; CHECK: add.w	r14, r15
+	%1 = add i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @and(i16 %a, i16 %b) nounwind {
+; CHECK: and:
+; CHECK: and.w	r14, r15
+	%1 = and i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @bis(i16 %a, i16 %b) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r14, r15
+	%1 = or i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @bic(i16 %a, i16 %b) nounwind {
+; CHECK: bic:
+; CHECK: bic.w	r14, r15
+        %1 = xor i16 %b, -1
+        %2 = and i16 %a, %1
+        ret i16 %2
+}
+
+define i16 @xor(i16 %a, i16 %b) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r14, r15
+	%1 = xor i16 %a, %b
+	ret i16 %1
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/Inst8mi.ll b/src/LLVM/test/CodeGen/MSP430/Inst8mi.ll
new file mode 100644
index 0000000..ef318ce
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/Inst8mi.ll

@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.b	#2, &foo
+	store i8 2, i8 * @foo
+	ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = add i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = and i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = or i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = xor i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/MSP430/Inst8mm.ll b/src/LLVM/test/CodeGen/MSP430/Inst8mm.ll
new file mode 100644
index 0000000..a2987ac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/Inst8mm.ll

@@ -0,0 +1,55 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+@foo = common global i8 0, align 1
+@bar = common global i8 0, align 1
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.b	&bar, &foo
+        %1 = load i8* @bar
+        store i8 %1, i8* @foo
+        ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = add i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = and i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = or i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = xor i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/MSP430/Inst8mr.ll b/src/LLVM/test/CodeGen/MSP430/Inst8mr.ll
new file mode 100644
index 0000000..428d1fa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/Inst8mr.ll

@@ -0,0 +1,58 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define void @mov(i8 %a) nounwind {
+; CHECK: mov:
+; CHECK: mov.b	r15, &foo
+	store i8 %a, i8* @foo
+	ret void
+}
+
+define void @and(i8 %a) nounwind {
+; CHECK: and:
+; CHECK: and.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = and i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @add(i8 %a) nounwind {
+; CHECK: add:
+; CHECK: add.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = add i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @bis(i8 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = or i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @bic(i8 zeroext %m) nounwind {
+; CHECK: bic:
+; CHECK: bic.b   r15, &foo
+        %1 = xor i8 %m, -1
+        %2 = load i8* @foo
+        %3 = and i8 %2, %1
+        store i8 %3, i8* @foo
+        ret void
+}
+
+define void @xor(i8 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = xor i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/MSP430/Inst8ri.ll b/src/LLVM/test/CodeGen/MSP430/Inst8ri.ll
new file mode 100644
index 0000000..ac3418a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/Inst8ri.ll

@@ -0,0 +1,37 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i8 @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.b	#1, r15
+	ret i8 1
+}
+
+define i8 @add(i8 %a, i8 %b) nounwind {
+; CHECK: add:
+; CHECK: add.b	#1, r15
+	%1 = add i8 %a, 1
+	ret i8 %1
+}
+
+define i8 @and(i8 %a, i8 %b) nounwind {
+; CHECK: and:
+; CHECK: and.b	#1, r15
+	%1 = and i8 %a, 1
+	ret i8 %1
+}
+
+define i8 @bis(i8 %a, i8 %b) nounwind {
+; CHECK: bis:
+; CHECK: bis.b	#1, r15
+	%1 = or i8 %a, 1
+	ret i8 %1
+}
+
+define i8 @xor(i8 %a, i8 %b) nounwind {
+; CHECK: xor:
+; CHECK: xor.b	#1, r15
+	%1 = xor i8 %a, 1
+	ret i8 %1
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/Inst8rm.ll b/src/LLVM/test/CodeGen/MSP430/Inst8rm.ll
new file mode 100644
index 0000000..c062f04
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/Inst8rm.ll

@@ -0,0 +1,46 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define i8 @add(i8 %a) nounwind {
+; CHECK: add:
+; CHECK: add.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = add i8 %a, %1
+	ret i8 %2
+}
+
+define i8 @and(i8 %a) nounwind {
+; CHECK: and:
+; CHECK: and.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = and i8 %a, %1
+	ret i8 %2
+}
+
+define i8 @bis(i8 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = or i8 %a, %1
+	ret i8 %2
+}
+
+define i8  @bic(i8 %a) nounwind {
+; CHECK: bic:
+; CHECK: bic.b  &foo, r15
+        %1 = load i8* @foo
+        %2 = xor i8 %1, -1
+        %3 = and i8 %a, %2
+        ret i8 %3
+}
+
+define i8 @xor(i8 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = xor i8 %a, %1
+	ret i8 %2
+}
+

diff --git a/src/LLVM/test/CodeGen/MSP430/Inst8rr.ll b/src/LLVM/test/CodeGen/MSP430/Inst8rr.ll
new file mode 100644
index 0000000..45342e2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/Inst8rr.ll

@@ -0,0 +1,46 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i8 @mov(i8 %a, i8 %b) nounwind {
+; CHECK: mov:
+; CHECK: mov.b	r14, r15
+	ret i8 %b
+}
+
+define i8 @add(i8 %a, i8 %b) nounwind {
+; CHECK: add:
+; CHECK: add.b
+	%1 = add i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @and(i8 %a, i8 %b) nounwind {
+; CHECK: and:
+; CHECK: and.w	r14, r15
+	%1 = and i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @bis(i8 %a, i8 %b) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r14, r15
+	%1 = or i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @bic(i8 %a, i8 %b) nounwind {
+; CHECK: bic:
+; CHECK: bic.b  r14, r15
+        %1 = xor i8 %b, -1
+        %2 = and i8 %a, %1
+        ret i8 %2
+}
+
+define i8 @xor(i8 %a, i8 %b) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r14, r15
+	%1 = xor i8 %a, %b
+	ret i8 %1
+}
+

diff --git a/src/LLVM/test/CodeGen/MSP430/bit.ll b/src/LLVM/test/CodeGen/MSP430/bit.ll
new file mode 100644
index 0000000..03d672b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/bit.ll

@@ -0,0 +1,166 @@
+; RUN: llc < %s -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:32"
+target triple = "msp430-generic-generic"
+
+@foo8 = external global i8
+@bar8 = external global i8
+
+define i8 @bitbrr(i8 %a, i8 %b) nounwind {
+	%t1 = and i8 %a, %b
+	%t2 = icmp ne i8 %t1, 0
+	%t3 = zext i1 %t2 to i8
+	ret i8 %t3
+}
+; CHECK: bitbrr:
+; CHECK: bit.b	r14, r15
+
+define i8 @bitbri(i8 %a) nounwind {
+	%t1 = and i8 %a, 15
+	%t2 = icmp ne i8 %t1, 0
+	%t3 = zext i1 %t2 to i8
+	ret i8 %t3
+}
+; CHECK: bitbri:
+; CHECK: bit.b	#15, r15
+
+define i8 @bitbir(i8 %a) nounwind {
+	%t1 = and i8 15, %a
+	%t2 = icmp ne i8 %t1, 0
+	%t3 = zext i1 %t2 to i8
+	ret i8 %t3
+}
+; CHECK: bitbir:
+; CHECK: bit.b	#15, r15
+
+define i8 @bitbmi() nounwind {
+	%t1 = load i8* @foo8
+	%t2 = and i8 %t1, 15
+	%t3 = icmp ne i8 %t2, 0
+	%t4 = zext i1 %t3 to i8
+	ret i8 %t4
+}
+; CHECK: bitbmi:
+; CHECK: bit.b	#15, &foo8
+
+define i8 @bitbim() nounwind {
+	%t1 = load i8* @foo8
+	%t2 = and i8 15, %t1
+	%t3 = icmp ne i8 %t2, 0
+	%t4 = zext i1 %t3 to i8
+	ret i8 %t4
+}
+; CHECK: bitbim:
+; CHECK: bit.b	#15, &foo8
+
+define i8 @bitbrm(i8 %a) nounwind {
+	%t1 = load i8* @foo8
+	%t2 = and i8 %a, %t1
+	%t3 = icmp ne i8 %t2, 0
+	%t4 = zext i1 %t3 to i8
+	ret i8 %t4
+}
+; CHECK: bitbrm:
+; CHECK: bit.b	&foo8, r15
+
+define i8 @bitbmr(i8 %a) nounwind {
+	%t1 = load i8* @foo8
+	%t2 = and i8 %t1, %a
+	%t3 = icmp ne i8 %t2, 0
+	%t4 = zext i1 %t3 to i8
+	ret i8 %t4
+}
+; CHECK: bitbmr:
+; CHECK: bit.b	r15, &foo8
+
+define i8 @bitbmm() nounwind {
+	%t1 = load i8* @foo8
+	%t2 = load i8* @bar8
+	%t3 = and i8 %t1, %t2
+	%t4 = icmp ne i8 %t3, 0
+	%t5 = zext i1 %t4 to i8
+	ret i8 %t5
+}
+; CHECK: bitbmm:
+; CHECK: bit.b	&bar8, &foo8
+
+@foo16 = external global i16
+@bar16 = external global i16
+
+define i16 @bitwrr(i16 %a, i16 %b) nounwind {
+	%t1 = and i16 %a, %b
+	%t2 = icmp ne i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: bitwrr:
+; CHECK: bit.w	r14, r15
+
+define i16 @bitwri(i16 %a) nounwind {
+	%t1 = and i16 %a, 4080
+	%t2 = icmp ne i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: bitwri:
+; CHECK: bit.w	#4080, r15
+
+define i16 @bitwir(i16 %a) nounwind {
+	%t1 = and i16 4080, %a
+	%t2 = icmp ne i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: bitwir:
+; CHECK: bit.w	#4080, r15
+
+define i16 @bitwmi() nounwind {
+	%t1 = load i16* @foo16
+	%t2 = and i16 %t1, 4080
+	%t3 = icmp ne i16 %t2, 0
+	%t4 = zext i1 %t3 to i16
+	ret i16 %t4
+}
+; CHECK: bitwmi:
+; CHECK: bit.w	#4080, &foo16
+
+define i16 @bitwim() nounwind {
+	%t1 = load i16* @foo16
+	%t2 = and i16 4080, %t1
+	%t3 = icmp ne i16 %t2, 0
+	%t4 = zext i1 %t3 to i16
+	ret i16 %t4
+}
+; CHECK: bitwim:
+; CHECK: bit.w	#4080, &foo16
+
+define i16 @bitwrm(i16 %a) nounwind {
+	%t1 = load i16* @foo16
+	%t2 = and i16 %a, %t1
+	%t3 = icmp ne i16 %t2, 0
+	%t4 = zext i1 %t3 to i16
+	ret i16 %t4
+}
+; CHECK: bitwrm:
+; CHECK: bit.w	&foo16, r15
+
+define i16 @bitwmr(i16 %a) nounwind {
+	%t1 = load i16* @foo16
+	%t2 = and i16 %t1, %a
+	%t3 = icmp ne i16 %t2, 0
+	%t4 = zext i1 %t3 to i16
+	ret i16 %t4
+}
+; CHECK: bitwmr:
+; CHECK: bit.w	r15, &foo16
+
+define i16 @bitwmm() nounwind {
+	%t1 = load i16* @foo16
+	%t2 = load i16* @bar16
+	%t3 = and i16 %t1, %t2
+	%t4 = icmp ne i16 %t3, 0
+	%t5 = zext i1 %t4 to i16
+	ret i16 %t5
+}
+; CHECK: bitwmm:
+; CHECK: bit.w	&bar16, &foo16
+

diff --git a/src/LLVM/test/CodeGen/MSP430/dg.exp b/src/LLVM/test/CodeGen/MSP430/dg.exp
new file mode 100644
index 0000000..e4ea13a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target MSP430] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/indirectbr.ll b/src/LLVM/test/CodeGen/MSP430/indirectbr.ll
new file mode 100644
index 0000000..2a62c91
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/indirectbr.ll

@@ -0,0 +1,41 @@
+; RUN: llc -march=msp430 < %s
+
+@nextaddr = global i8* null                       ; <i8**> [#uses=2]
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal i16 @foo(i16 %i) nounwind {
+entry:
+  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
+  br i1 %1, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb3, %entry
+  %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+bb3:                                              ; preds = %entry
+  %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
+  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  br label %bb2
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i16 [ 385, %L5 ], [ 35, %bb2 ]     ; <i16> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i16 [ %res.0, %L4 ], [ 5, %bb2 ]   ; <i16> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+  %res.2 = phi i16 [ %res.1, %L3 ], [ 1, %bb2 ]   ; <i16> [#uses=1]
+  %phitmp = mul i16 %res.2, 6                     ; <i16> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i16 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i16> [#uses=1]
+  store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
+  ret i16 %res.3
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/indirectbr2.ll b/src/LLVM/test/CodeGen/MSP430/indirectbr2.ll
new file mode 100644
index 0000000..93cfb25
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/indirectbr2.ll

@@ -0,0 +1,29 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal i16 @foo(i16 %i) nounwind {
+entry:
+  %tmp1 = getelementptr inbounds [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
+  %gotovar.4.0 = load i8** %tmp1, align 4        ; <i8*> [#uses=1]
+; CHECK: mov.w   .LC.0.2070(r15), pc
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i16 [ 385, %L5 ], [ 35, %entry ]     ; <i16> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i16 [ %res.0, %L4 ], [ 5, %entry ]   ; <i16> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+  %res.2 = phi i16 [ %res.1, %L3 ], [ 1, %entry ]   ; <i16> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i16 [ %res.2, %L2 ], [ 2, %entry ]  ; <i16> [#uses=1]
+  ret i16 %res.3
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/inline-asm.ll b/src/LLVM/test/CodeGen/MSP430/inline-asm.ll
new file mode 100644
index 0000000..0e7886a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/inline-asm.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define void @imm() nounwind {
+        call void asm sideeffect "bic\09$0,r2", "i"(i16 32) nounwind
+        ret void
+}
+
+define void @reg(i16 %a) nounwind {
+        call void asm sideeffect "bic\09$0,r2", "r"(i16 %a) nounwind
+        ret void
+}
+
+@foo = global i16 0, align 2
+
+define void @immmem() nounwind {
+        call void asm sideeffect "bic\09$0,r2", "i"(i16* getelementptr(i16* @foo, i32 1)) nounwind
+        ret void
+}
+
+define void @mem() nounwind {
+        %fooval = load i16* @foo
+        call void asm sideeffect "bic\09$0,r2", "m"(i16 %fooval) nounwind
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/mult-alt-generic-msp430.ll b/src/LLVM/test/CodeGen/MSP430/mult-alt-generic-msp430.ll
new file mode 100644
index 0000000..342afed
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/mult-alt-generic-msp430.ll

@@ -0,0 +1,323 @@
+; RUN: llc < %s -march=msp430
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430"
+
+@mout0 = common global i16 0, align 2
+@min1 = common global i16 0, align 2
+@marray = common global [2 x i16] zeroinitializer, align 2
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i16* @mout0, i16* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %index = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %index, align 2
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,<r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,r<"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,>r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,r>"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,i"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,n"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,imr"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,imr"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r,imr"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,X"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,X"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r,X"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  %3 = call i16 asm "foo $1,$0", "=r,X"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %3, i16* %out0, align 2
+; No lowering support.
+;  %4 = call i16 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+;  store i16 %4, i16* %out0, align 2
+;  %5 = call i16 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+;  store i16 %5, i16* %out0, align 2
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,r"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i16* @min1, align 2
+  call void asm "foo $1,$0", "=*m|r,m|r"(i16* @mout0, i16 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %index = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %index, align 2
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|<r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|r<"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|>r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|r>"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|m"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|i"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|n"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  %3 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %3, i16* %out0, align 2
+; No lowering support.
+;  %4 = call i16 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+;  store i16 %4, i16* %out0, align 2
+;  %5 = call i16 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+;  store i16 %5, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|r"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/MSP430/postinc.ll b/src/LLVM/test/CodeGen/MSP430/postinc.ll
new file mode 100644
index 0000000..8f01b83
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/postinc.ll

@@ -0,0 +1,114 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430"
+
+define zeroext i16 @add(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: add:
+; CHECK: add.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = add i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @sub(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: sub:
+; CHECK: sub.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = sub i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @or(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: or:
+; CHECK: bis.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = or i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @xor(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: xor:
+; CHECK: xor.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = xor i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @and(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: and:
+; CHECK: and.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = and i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+

diff --git a/src/LLVM/test/CodeGen/MSP430/setcc.ll b/src/LLVM/test/CodeGen/MSP430/setcc.ll
new file mode 100644
index 0000000..c99b17e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/setcc.ll

@@ -0,0 +1,116 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:32"
+target triple = "msp430-generic-generic"
+
+define i16 @sccweqand(i16 %a, i16 %b) nounwind {
+	%t1 = and i16 %a, %b
+	%t2 = icmp eq i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: sccweqand:
+; CHECK:	bit.w	r14, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	rra.w   r15
+; CHECK:	and.w	#1, r15
+
+define i16 @sccwneand(i16 %a, i16 %b) nounwind {
+	%t1 = and i16 %a, %b
+	%t2 = icmp ne i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: sccwneand:
+; CHECK: 	bit.w	r14, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
+
+define i16 @sccwne(i16 %a, i16 %b) nounwind {
+	%t1 = icmp ne i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwne:
+; CHECK:	cmp.w	r14, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	rra.w	r15
+; CHECK:	and.w	#1, r15
+; CHECK:	xor.w   #1, r15
+
+define i16 @sccweq(i16 %a, i16 %b) nounwind {
+	%t1 = icmp eq i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccweq:
+; CHECK:	cmp.w	r14, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	rra.w	r15
+; CHECK:	and.w	#1, r15
+
+define i16 @sccwugt(i16 %a, i16 %b) nounwind {
+	%t1 = icmp ugt i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwugt:
+; CHECK:	cmp.w	r15, r14
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
+; CHECK:	xor.w	#1, r15
+
+define i16 @sccwuge(i16 %a, i16 %b) nounwind {
+	%t1 = icmp uge i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwuge:
+; CHECK:	cmp.w	r14, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
+
+define i16 @sccwult(i16 %a, i16 %b) nounwind {
+	%t1 = icmp ult i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwult:
+; CHECK:	cmp.w	r14, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
+; CHECK:	xor.w	#1, r15
+
+define i16 @sccwule(i16 %a, i16 %b) nounwind {
+	%t1 = icmp ule i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwule:
+; CHECK:	cmp.w	r15, r14
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
+
+define i16 @sccwsgt(i16 %a, i16 %b) nounwind {
+	%t1 = icmp sgt i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+
+define i16 @sccwsge(i16 %a, i16 %b) nounwind {
+	%t1 = icmp sge i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+
+define i16 @sccwslt(i16 %a, i16 %b) nounwind {
+	%t1 = icmp slt i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+
+define i16 @sccwsle(i16 %a, i16 %b) nounwind {
+	%t1 = icmp sle i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+

diff --git a/src/LLVM/test/CodeGen/MSP430/shifts.ll b/src/LLVM/test/CodeGen/MSP430/shifts.ll
new file mode 100644
index 0000000..b5b3054
--- /dev/null
+++ b/src/LLVM/test/CodeGen/MSP430/shifts.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8:16"
+target triple = "msp430-elf"
+
+define zeroext i8 @lshr8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: lshr8:
+; CHECK: rrc.b
+  %shr = lshr i8 %a, %cnt
+  ret i8 %shr
+}
+
+define signext i8 @ashr8(i8 signext %a, i8 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: ashr8:
+; CHECK: rra.b
+  %shr = ashr i8 %a, %cnt
+  ret i8 %shr
+}
+
+define zeroext i8 @shl8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: shl8
+; CHECK: rla.b
+  %shl = shl i8 %a, %cnt
+  ret i8 %shl
+}
+
+define zeroext i16 @lshr16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: lshr16:
+; CHECK: rrc.w
+  %shr = lshr i16 %a, %cnt
+  ret i16 %shr
+}
+
+define signext i16 @ashr16(i16 signext %a, i16 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: ashr16:
+; CHECK: rra.w
+  %shr = ashr i16 %a, %cnt
+  ret i16 %shr
+}
+
+define zeroext i16 @shl16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: shl16:
+; CHECK: rla.w
+  %shl = shl i16 %a, %cnt
+  ret i16 %shl
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2008-06-05-Carry.ll b/src/LLVM/test/CodeGen/Mips/2008-06-05-Carry.ll
new file mode 100644
index 0000000..9d8e391
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-06-05-Carry.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep subu %t | count 2
+; RUN: grep addu %t | count 4
+
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define i64 @add64(i64 %u, i64 %v) nounwind  {
+entry:
+	%tmp2 = add i64 %u, %v	
+  ret i64 %tmp2
+}
+
+define i64 @sub64(i64 %u, i64 %v) nounwind  {
+entry:
+  %tmp2 = sub i64 %u, %v
+  ret i64 %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2008-07-03-SRet.ll b/src/LLVM/test/CodeGen/Mips/2008-07-03-SRet.ll
new file mode 100644
index 0000000..b1d20d9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-07-03-SRet.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=mips | grep {sw.*(\$4)} | count 3
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+	%struct.sret0 = type { i32, i32, i32 }
+
+define void @test0(%struct.sret0* noalias sret %agg.result, i32 %dummy) nounwind {
+entry:
+	getelementptr %struct.sret0* %agg.result, i32 0, i32 0		; <i32*>:0 [#uses=1]
+	store i32 %dummy, i32* %0, align 4
+	getelementptr %struct.sret0* %agg.result, i32 0, i32 1		; <i32*>:1 [#uses=1]
+	store i32 %dummy, i32* %1, align 4
+	getelementptr %struct.sret0* %agg.result, i32 0, i32 2		; <i32*>:2 [#uses=1]
+	store i32 %dummy, i32* %2, align 4
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/2008-07-06-fadd64.ll b/src/LLVM/test/CodeGen/Mips/2008-07-06-fadd64.ll
new file mode 100644
index 0000000..ff8ed4d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-07-06-fadd64.ll

@@ -0,0 +1,8 @@
+; RUN: llc -march=mips -mattr=single-float  < %s | FileCheck %s
+
+define double @dofloat(double %a, double %b) nounwind {
+entry:
+; CHECK: __adddf3
+	fadd double %a, %b		; <double>:0 [#uses=1]
+	ret double %0
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2008-07-07-FPExtend.ll b/src/LLVM/test/CodeGen/Mips/2008-07-07-FPExtend.ll
new file mode 100644
index 0000000..29c8e84
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-07-07-FPExtend.ll

@@ -0,0 +1,8 @@
+; RUN: llc -march=mips -mattr=single-float  < %s | FileCheck %s
+
+define double @dofloat(float %a) nounwind {
+entry:
+; CHECK: __extendsfdf2
+	fpext float %a to double		; <double>:0 [#uses=1]
+	ret double %0
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2008-07-07-Float2Int.ll b/src/LLVM/test/CodeGen/Mips/2008-07-07-Float2Int.ll
new file mode 100644
index 0000000..d804c7d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-07-07-Float2Int.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=mips | grep trunc.w.s | count 3
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define i32 @fptoint(float %a) nounwind {
+entry:
+	fptosi float %a to i32		; <i32>:0 [#uses=1]
+	ret i32 %0
+}
+
+define i32 @fptouint(float %a) nounwind {
+entry:
+	fptoui float %a to i32		; <i32>:0 [#uses=1]
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll b/src/LLVM/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
new file mode 100644
index 0000000..9a6bbdf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll

@@ -0,0 +1,34 @@
+; RUN: llc -march=mips -mattr=single-float  < %s | FileCheck %s
+
+define double @int2fp(i32 %a) nounwind {
+entry:
+; CHECK: int2fp
+; CHECK: __floatsidf
+	sitofp i32 %a to double		; <double>:0 [#uses=1]
+	ret double %0
+}
+
+define double @uint2double(i32 %a) nounwind {
+entry:
+; CHECK: uint2double
+; CHECK: __floatunsidf
+	uitofp i32 %a to double		; <double>:0 [#uses=1]
+	ret double %0
+}
+
+define i32 @double2int(double %a) nounwind {
+entry:
+; CHECK: double2int
+; CHECK: __fixdfsi
+  fptosi double %a to i32   ; <i32>:0 [#uses=1]
+  ret i32 %0
+}
+
+define i32 @double2uint(double %a) nounwind {
+entry:
+; CHECK: double2uint
+; CHECK: __fixunsdfsi
+  fptoui double %a to i32   ; <i32>:0 [#uses=1]
+  ret i32 %0
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/2008-07-15-InternalConstant.ll b/src/LLVM/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
new file mode 100644
index 0000000..29a7b5c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-07-15-InternalConstant.ll

@@ -0,0 +1,23 @@
+; RUN: llc -march=mips -relocation-model=static  < %s | FileCheck %s
+
+@.str = internal unnamed_addr constant [10 x i8] c"AAAAAAAAA\00"
+@i0 = internal unnamed_addr constant [5 x i32] [ i32 0, i32 1, i32 2, i32 3, i32 4 ]
+
+define i8* @foo() nounwind {
+entry:
+; CHECK: foo
+; CHECK: %hi(.str)
+; CHECK: %lo(.str)
+	ret i8* getelementptr ([10 x i8]* @.str, i32 0, i32 0)
+}
+
+define i32* @bar() nounwind  {
+entry:
+; CHECK: bar
+; CHECK: %hi(i0)
+; CHECK: %lo(i0)
+  ret i32* getelementptr ([5 x i32]* @i0, i32 0, i32 0)
+}
+
+; CHECK: rodata.str1.4,"aMS",@progbits
+; CHECK: rodata,"a",@progbits

diff --git a/src/LLVM/test/CodeGen/Mips/2008-07-15-SmallSection.ll b/src/LLVM/test/CodeGen/Mips/2008-07-15-SmallSection.ll
new file mode 100644
index 0000000..cbc3ecf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-07-15-SmallSection.ll

@@ -0,0 +1,35 @@
+; DISABLED: llc < %s -mips-ssection-threshold=8 -march=mips -o %t0
+; DISABLED: llc < %s -mips-ssection-threshold=0 -march=mips -o %t1
+; DISABLED: grep {sdata} %t0 | count 1
+; DISABLED: grep {sbss} %t0 | count 1
+; DISABLED: grep {gp_rel} %t0 | count 2
+; DISABLED: not grep {sdata} %t1 
+; DISABLED: not grep {sbss} %t1 
+; DISABLED: not grep {gp_rel} %t1
+; DISABLED: grep {\%hi} %t1 | count 2
+; DISABLED: grep {\%lo} %t1 | count 3
+; RUN: false
+; XFAIL: *
+
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+  %struct.anon = type { i32, i32 }
+@s0 = global [8 x i8] c"AAAAAAA\00", align 4
+@foo = global %struct.anon { i32 2, i32 3 }
+@bar = global %struct.anon zeroinitializer 
+
+define i8* @A0() nounwind {
+entry:
+	ret i8* getelementptr ([8 x i8]* @s0, i32 0, i32 0)
+}
+
+define i32 @A1() nounwind {
+entry:
+  load i32* getelementptr (%struct.anon* @foo, i32 0, i32 0), align 8 
+  load i32* getelementptr (%struct.anon* @foo, i32 0, i32 1), align 4 
+  add i32 %1, %0
+  ret i32 %2
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/src/LLVM/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
new file mode 100644
index 0000000..e0c745f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll

@@ -0,0 +1,21 @@
+; DISABLED: llc < %s -march=mips -o %t
+; DISABLED: grep seh %t | count 1
+; DISABLED: grep seb %t | count 1
+; RUN: false
+; XFAIL: *
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define signext i8 @A(i8 %e.0, i8 signext %sum)  nounwind {
+entry:
+	add i8 %sum, %e.0		; <i8>:0 [#uses=1]
+	ret i8 %0
+}
+
+define signext i16 @B(i16 %e.0, i16 signext %sum) nounwind {
+entry:
+	add i16 %sum, %e.0		; <i16>:0 [#uses=1]
+	ret i16 %0
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/src/LLVM/test/CodeGen/Mips/2008-07-22-Cstpool.ll
new file mode 100644
index 0000000..94dfe35
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-07-22-Cstpool.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2
+; RUN: grep {.rodata.cst4,"aM",@progbits} %t | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define float @F(float %a) nounwind {
+entry:
+	fadd float %a, 0x4011333340000000		; <float>:0 [#uses=1]
+	fadd float %0, 0x4010666660000000		; <float>:1 [#uses=1]
+	ret float %1
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/src/LLVM/test/CodeGen/Mips/2008-07-23-fpcmp.ll
new file mode 100644
index 0000000..519e4b9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-07-23-fpcmp.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep {c\\..*\\.s} %t | count 3
+; RUN: grep {bc1\[tf\]} %t | count 3
+
+; FIXME: Disabled because branch instructions are generated where
+; conditional move instructions are expected.
+; REQUIRES: disabled
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define float @A(float %a, float %b) nounwind {
+entry:
+	fcmp ogt float %a, 1.000000e+00		; <i1>:0 [#uses=1]
+	br i1 %0, label %bb, label %bb2
+
+bb:		; preds = %entry
+	fadd float %a, 1.000000e+00		; <float>:1 [#uses=1]
+	ret float %1
+
+bb2:		; preds = %entry
+	ret float %b
+}
+
+define float @B(float %a, float %b) nounwind {
+entry:
+  fcmp ogt float %a, 1.000000e+00   ; <i1>:0 [#uses=1]
+  %.0 = select i1 %0, float %a, float %b    ; <float> [#uses=1]
+  ret float %.0
+}
+
+define i32 @C(i32 %a, i32 %b, float %j) nounwind {
+entry:
+  fcmp ogt float %j, 1.000000e+00   ; <i1>:0 [#uses=1]
+  %.0 = select i1 %0, i32 %a, i32 %b    ; <i32> [#uses=1]
+  ret i32 %.0
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/2008-07-29-icmp.ll b/src/LLVM/test/CodeGen/Mips/2008-07-29-icmp.ll
new file mode 100644
index 0000000..e85a749
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-07-29-icmp.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1
+
+; FIXME: Disabled because branch instructions are generated where
+; conditional move instructions are expected.
+; REQUIRES: disabled
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define float @A(float %a, float %b, i32 %j) nounwind {
+entry:
+	icmp sgt i32 %j, 1		; <i1>:0 [#uses=1]
+	%.0 = select i1 %0, float %a, float %b		; <float> [#uses=1]
+	ret float %.0
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2008-07-31-fcopysign.ll b/src/LLVM/test/CodeGen/Mips/2008-07-31-fcopysign.ll
new file mode 100644
index 0000000..f152acc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-07-31-fcopysign.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep abs.s  %t | count 1
+; RUN: grep neg.s %t | count 1
+
+; FIXME: Should not emit abs.s or neg.s since these instructions produce
+;        incorrect results if the operand is NaN.
+; REQUIRES: disabled
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define float @A(float %i, float %j) nounwind  {
+entry:
+	tail call float @copysignf( float %i, float %j ) nounwind readnone 		; <float>:0 [#uses=1]
+	ret float %0
+}
+
+declare float @copysignf(float, float) nounwind readnone 

diff --git a/src/LLVM/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/src/LLVM/test/CodeGen/Mips/2008-08-01-AsmInline.ll
new file mode 100644
index 0000000..23ed64a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-08-01-AsmInline.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep mfhi  %t | count 1
+; RUN: grep mflo  %t | count 1
+; RUN: grep multu %t | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+	%struct.DWstruct = type { i32, i32 }
+
+define i32 @A0(i32 %u, i32 %v) nounwind  {
+entry:
+	%asmtmp = tail call %struct.DWstruct asm "multu $2,$3", "={lo},={hi},d,d"( i32 %u, i32 %v ) nounwind
+	%asmresult = extractvalue %struct.DWstruct %asmtmp, 0
+	%asmresult1 = extractvalue %struct.DWstruct %asmtmp, 1		; <i32> [#uses=1]
+  %res = add i32 %asmresult, %asmresult1
+	ret i32 %res
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll b/src/LLVM/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
new file mode 100644
index 0000000..c41d521
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll

@@ -0,0 +1,18 @@
+; Double return in abicall (default)
+; RUN: llc < %s -march=mips
+; PR2615
+
+define double @main(...) {
+entry:
+        %retval = alloca double         ; <double*> [#uses=3]
+        store double 0.000000e+00, double* %retval
+        %r = alloca double              ; <double*> [#uses=1]
+        load double* %r         ; <double>:0 [#uses=1]
+        store double %0, double* %retval
+        br label %return
+
+return:         ; preds = %entry
+        load double* %retval            ; <double>:1 [#uses=1]
+        ret double %1
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/2008-08-03-fabs64.ll b/src/LLVM/test/CodeGen/Mips/2008-08-03-fabs64.ll
new file mode 100644
index 0000000..2b1713c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-08-03-fabs64.ll

@@ -0,0 +1,18 @@
+; DISABLED: llc < %s -march=mips -o %t
+; DISABLED: grep {lui.*32767} %t | count 1
+; DISABLED: grep {ori.*65535} %t | count 1
+; RUN: false
+; XFAIL: *
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define double @A(double %c, double %d) nounwind readnone  {
+entry:
+	tail call double @fabs( double %c ) nounwind readnone 		; <double>:0 [#uses=1]
+	tail call double @fabs( double %d ) nounwind readnone 		; <double>:0 [#uses=1]
+  fadd double %0, %1
+  ret double %2
+}
+
+declare double @fabs(double) nounwind readnone 

diff --git a/src/LLVM/test/CodeGen/Mips/2008-08-04-Bitconvert.ll b/src/LLVM/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
new file mode 100644
index 0000000..f8eb028
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-08-04-Bitconvert.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep mtc1 %t | count 1
+; RUN: grep mfc1 %t | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define float @A(i32 %u) nounwind  {
+entry:
+	bitcast i32 %u to float
+	ret float %0
+}
+
+define i32 @B(float %u) nounwind  {
+entry:
+	bitcast float %u to i32
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2008-08-06-Alloca.ll b/src/LLVM/test/CodeGen/Mips/2008-08-06-Alloca.ll
new file mode 100644
index 0000000..6dd4af1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-08-06-Alloca.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2
+; RUN: llc < %s -march=mips -regalloc=basic | grep {subu.*sp} | count 2
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define i32 @twoalloca(i32 %size) nounwind {
+entry:
+	alloca i8, i32 %size		; <i8*>:0 [#uses=1]
+	alloca i8, i32 %size		; <i8*>:1 [#uses=1]
+	call i32 @foo( i8* %0 ) nounwind		; <i32>:2 [#uses=1]
+	call i32 @foo( i8* %1 ) nounwind		; <i32>:3 [#uses=1]
+	add i32 %3, %2		; <i32>:4 [#uses=1]
+	ret i32 %4
+}
+
+declare i32 @foo(i8*)

diff --git a/src/LLVM/test/CodeGen/Mips/2008-08-07-CC.ll b/src/LLVM/test/CodeGen/Mips/2008-08-07-CC.ll
new file mode 100644
index 0000000..63c2595
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-08-07-CC.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=mips
+; Mips must ignore fastcc
+
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define internal fastcc i32 @A(i32 %u) nounwind  {
+entry:
+  ret i32 %u 
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/2008-08-07-FPRound.ll b/src/LLVM/test/CodeGen/Mips/2008-08-07-FPRound.ll
new file mode 100644
index 0000000..4fa43b6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-08-07-FPRound.ll

@@ -0,0 +1,8 @@
+; RUN: llc -march=mips -mattr=single-float  < %s | FileCheck %s
+
+define float @round2float(double %a) nounwind {
+entry:
+; CHECK: __truncdfsf2
+	fptrunc double %a to float		; <float>:0 [#uses=1]
+	ret float %0
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2008-08-08-bswap.ll b/src/LLVM/test/CodeGen/Mips/2008-08-08-bswap.ll
new file mode 100644
index 0000000..596da24
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-08-08-bswap.ll

@@ -0,0 +1,15 @@
+; DISABLED: llc < %s | grep wsbw | count 1
+; RUN: false
+; XFAIL: *
+
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "psp"
+
+define i32 @__bswapsi2(i32 %u) nounwind {
+entry:
+	tail call i32 @llvm.bswap.i32( i32 %u )		; <i32>:0 [#uses=1]
+	ret i32 %0
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/Mips/2008-08-08-ctlz.ll b/src/LLVM/test/CodeGen/Mips/2008-08-08-ctlz.ll
new file mode 100644
index 0000000..fb33323
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-08-08-ctlz.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=mips | grep clz | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define i32 @A0(i32 %u) nounwind  {
+entry:
+	call i32 @llvm.ctlz.i32( i32 %u )
+  ret i32 %0
+}
+
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone 

diff --git a/src/LLVM/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll b/src/LLVM/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
new file mode 100644
index 0000000..18f5b3d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=mips
+; PR2794
+
+define i32 @main(i8*) nounwind {
+entry:
+        br label %continue.outer
+
+continue.outer:         ; preds = %case4, %entry
+        %p.0.ph.rec = phi i32 [ 0, %entry ], [ %indvar.next, %case4 ]          ; <i32> [#uses=2]
+        %p.0.ph = getelementptr i8* %0, i32 %p.0.ph.rec         ; <i8*> [#uses=1]
+        %1 = load i8* %p.0.ph           ; <i8> [#uses=1]
+        switch i8 %1, label %infloop [
+                i8 0, label %return.split
+                i8 76, label %case4
+                i8 108, label %case4
+                i8 104, label %case4
+                i8 42, label %case4
+        ]
+
+case4:          ; preds = %continue.outer, %continue.outer, %continue.outer, %continue.outer
+        %indvar.next = add i32 %p.0.ph.rec, 1           ; <i32> [#uses=1]
+        br label %continue.outer
+
+return.split:           ; preds = %continue.outer
+        ret i32 0
+
+infloop:                ; preds = %infloop, %continue.outer
+        br label %infloop
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll b/src/LLVM/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
new file mode 100644
index 0000000..f518843
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s
+; PR2667
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "psp"
+	%struct._Bigint = type { %struct._Bigint*, i32, i32, i32, i32, [1 x i32] }
+	%struct.__FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*, i8*, i32)*, i32 (i8*, i8*, i32)*, i32 (i8*, i32, i32)*, i32 (i8*)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i32, %struct._reent*, i32 }
+	%struct.__sbuf = type { i8*, i32 }
+	%struct._atexit = type { %struct._atexit*, i32, [32 x void ()*], %struct._on_exit_args }
+	%struct._glue = type { %struct._glue*, i32, %struct.__FILE* }
+	%struct._on_exit_args = type { [32 x i8*], [32 x i8*], i32, i32 }
+	%struct._reent = type { i32, %struct.__FILE*, %struct.__FILE*, %struct.__FILE*, i32, [25 x i8], i32, i8*, i32, void (%struct._reent*)*, %struct._Bigint*, i32, %struct._Bigint*, %struct._Bigint**, i32, i8*, { { [30 x i8*], [30 x i32] } }, %struct._atexit*, %struct._atexit, void (i32)**, %struct._glue, [3 x %struct.__FILE] }
+@_impure_ptr = external global %struct._reent*		; <%struct._reent**> [#uses=1]
+
+define double @_erand48_r(%struct._reent* %r, i16* %xseed) nounwind {
+entry:
+	tail call void @__dorand48( %struct._reent* %r, i16* %xseed ) nounwind
+	load i16* %xseed, align 2		; <i16>:0 [#uses=1]
+	uitofp i16 %0 to double		; <double>:1 [#uses=1]
+	tail call double @ldexp( double %1, i32 -48 ) nounwind		; <double>:2 [#uses=1]
+	getelementptr i16* %xseed, i32 1		; <i16*>:3 [#uses=1]
+	load i16* %3, align 2		; <i16>:4 [#uses=1]
+	uitofp i16 %4 to double		; <double>:5 [#uses=1]
+	tail call double @ldexp( double %5, i32 -32 ) nounwind		; <double>:6 [#uses=1]
+	fadd double %2, %6		; <double>:7 [#uses=1]
+	getelementptr i16* %xseed, i32 2		; <i16*>:8 [#uses=1]
+	load i16* %8, align 2		; <i16>:9 [#uses=1]
+	uitofp i16 %9 to double		; <double>:10 [#uses=1]
+	tail call double @ldexp( double %10, i32 -16 ) nounwind		; <double>:11 [#uses=1]
+	fadd double %7, %11		; <double>:12 [#uses=1]
+	ret double %12
+}
+
+declare void @__dorand48(%struct._reent*, i16*)
+
+declare double @ldexp(double, i32)
+
+define double @erand48(i16* %xseed) nounwind {
+entry:
+	load %struct._reent** @_impure_ptr, align 4		; <%struct._reent*>:0 [#uses=1]
+	tail call void @__dorand48( %struct._reent* %0, i16* %xseed ) nounwind
+	load i16* %xseed, align 2		; <i16>:1 [#uses=1]
+	uitofp i16 %1 to double		; <double>:2 [#uses=1]
+	tail call double @ldexp( double %2, i32 -48 ) nounwind		; <double>:3 [#uses=1]
+	getelementptr i16* %xseed, i32 1		; <i16*>:4 [#uses=1]
+	load i16* %4, align 2		; <i16>:5 [#uses=1]
+	uitofp i16 %5 to double		; <double>:6 [#uses=1]
+	tail call double @ldexp( double %6, i32 -32 ) nounwind		; <double>:7 [#uses=1]
+	fadd double %3, %7		; <double>:8 [#uses=1]
+	getelementptr i16* %xseed, i32 2		; <i16*>:9 [#uses=1]
+	load i16* %9, align 2		; <i16>:10 [#uses=1]
+	uitofp i16 %10 to double		; <double>:11 [#uses=1]
+	tail call double @ldexp( double %11, i32 -16 ) nounwind		; <double>:12 [#uses=1]
+	fadd double %8, %12		; <double>:13 [#uses=1]
+	ret double %13
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll b/src/LLVM/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
new file mode 100644
index 0000000..b8d6826
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-n32"
+target triple = "mips-unknown-linux"
+
+define float @h() nounwind readnone {
+entry:
+; CHECK: lw $2, %got($CPI0_0)($gp)
+; CHECK: lwc1 $f0, %lo($CPI0_0)($2)
+  ret float 0x400B333340000000
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll b/src/LLVM/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 0000000..994e19a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll

@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=mips -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+

diff --git a/src/LLVM/test/CodeGen/Mips/2010-07-20-Switch.ll b/src/LLVM/test/CodeGen/Mips/2010-07-20-Switch.ll
new file mode 100644
index 0000000..07fc10c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2010-07-20-Switch.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s
+
+define i32 @main() nounwind readnone {
+entry:
+  %x = alloca i32, align 4                        ; <i32*> [#uses=2]
+  volatile store i32 2, i32* %x, align 4
+  %0 = volatile load i32* %x, align 4             ; <i32> [#uses=1]
+; CHECK: lui $3, %hi($JTI0_0)
+; CHECK: sll $2, $2, 2
+; CHECK: addiu $3, $3, %lo($JTI0_0)
+  switch i32 %0, label %bb4 [
+    i32 0, label %bb5
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+  ]
+
+bb1:                                              ; preds = %entry
+  ret i32 2
+
+; CHECK: $BB0_2
+bb2:                                              ; preds = %entry
+  ret i32 0
+
+bb3:                                              ; preds = %entry
+  ret i32 3
+
+bb4:                                              ; preds = %entry
+  ret i32 4
+
+bb5:                                              ; preds = %entry
+  ret i32 1
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2010-11-09-CountLeading.ll b/src/LLVM/test/CodeGen/Mips/2010-11-09-CountLeading.ll
new file mode 100644
index 0000000..c592b31
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2010-11-09-CountLeading.ll

@@ -0,0 +1,33 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+; CHECK: clz $2, $4
+define i32 @t1(i32 %X) nounwind readnone {
+entry:
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  ret i32 %tmp1
+}
+
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+
+; CHECK: clz $2, $4
+define i32 @t2(i32 %X) nounwind readnone {
+entry:
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  ret i32 %tmp1
+}
+
+; CHECK: clo $2, $4
+define i32 @t3(i32 %X) nounwind readnone {
+entry:
+  %neg = xor i32 %X, -1
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  ret i32 %tmp1
+}
+
+; CHECK: clo $2, $4
+define i32 @t4(i32 %X) nounwind readnone {
+entry:
+  %neg = xor i32 %X, -1
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2010-11-09-Mul.ll b/src/LLVM/test/CodeGen/Mips/2010-11-09-Mul.ll
new file mode 100644
index 0000000..dcade3c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2010-11-09-Mul.ll

@@ -0,0 +1,15 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+; CHECK: mul $2, $5, $4
+define i32 @mul1(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %mul = mul i32 %b, %a
+  ret i32 %mul
+}
+
+; CHECK: mul $2, $5, $4
+define i32 @mul2(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %mul = mul nsw i32 %b, %a
+  ret i32 %mul
+}

diff --git a/src/LLVM/test/CodeGen/Mips/2011-05-26-BranchKillsVreg.ll b/src/LLVM/test/CodeGen/Mips/2011-05-26-BranchKillsVreg.ll
new file mode 100644
index 0000000..1255949
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/2011-05-26-BranchKillsVreg.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -verify-coalescing
+; PR10046
+;
+; PHI elimination splits the critical edge from %while.end415 to %if.end427.
+; This requires updating the BNE-J terminators to a BEQ. The BNE instruction
+; kills a virtual register, and LiveVariables must be updated with the new kill
+; instruction.
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-n32"
+target triple = "mips-ellcc-linux"
+
+define i32 @mergesort(i8* %base, i32 %nmemb, i32 %size, i32 (i8*, i8*)* nocapture %cmp) nounwind {
+entry:
+  br i1 undef, label %return, label %if.end13
+
+if.end13:                                         ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %if.end13
+  %list1.0482 = phi i8* [ %base, %if.end13 ], [ null, %while.body ]
+  br i1 undef, label %while.end415, label %while.body
+
+while.end415:                                     ; preds = %while.body
+  br i1 undef, label %if.then419, label %if.end427
+
+if.then419:                                       ; preds = %while.end415
+  %call425 = tail call i8* @memmove(i8* %list1.0482, i8* undef, i32 undef) nounwind
+  br label %if.end427
+
+if.end427:                                        ; preds = %if.then419, %while.end415
+  %list2.1 = phi i8* [ undef, %if.then419 ], [ %list1.0482, %while.end415 ]
+  tail call void @free(i8* %list2.1)
+  unreachable
+
+return:                                           ; preds = %entry
+  ret i32 -1
+}
+
+
+declare i8* @memmove(i8*, i8*, i32)
+
+declare void @free(i8*)
+

diff --git a/src/LLVM/test/CodeGen/Mips/addc.ll b/src/LLVM/test/CodeGen/Mips/addc.ll
new file mode 100644
index 0000000..e5d05b1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/addc.ll

@@ -0,0 +1,13 @@
+; RUN: llc  < %s -march=mipsel | FileCheck %s 
+; RUN: llc  < %s -march=mips   | FileCheck %s
+
+define void @f(i64 %l, i64* nocapture %p) nounwind {
+entry:
+; CHECK: lui  
+; CHECK: ori
+; CHECK: addu  
+  %add = add i64 %l, 1311768467294899695
+  store i64 %add, i64* %p, align 4 
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/alloca.ll b/src/LLVM/test/CodeGen/Mips/alloca.ll
new file mode 100644
index 0000000..15c73e2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/alloca.ll

@@ -0,0 +1,98 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+define i32 @twoalloca(i32 %size) nounwind {
+entry:
+; CHECK: subu  $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
+; CHECK: addu  $sp, $zero, $[[T0]]
+; CHECK: addiu $[[T1:[0-9]+]], $sp, [[OFF:[0-9]+]]
+; CHECK: subu  $[[T2:[0-9]+]], $sp, $[[SZ]]
+; CHECK: addu  $sp, $zero, $[[T2]]
+; CHECK: addiu $[[T3:[0-9]+]], $sp, [[OFF]]
+; CHECK: lw    $[[T4:[0-9]+]], %call16(foo)($gp)
+; CHECK: addu  $25, $zero, $[[T4]]
+; CHECK: addu  $4, $zero, $[[T1]]
+; CHECK: jalr  $25
+  %tmp1 = alloca i8, i32 %size, align 4
+  %add.ptr = getelementptr inbounds i8* %tmp1, i32 5
+  store i8 97, i8* %add.ptr, align 1
+  %tmp4 = alloca i8, i32 %size, align 4
+  call void @foo2(double 1.000000e+00, double 2.000000e+00, i32 3) nounwind
+  %call = call i32 @foo(i8* %tmp1) nounwind
+  %call7 = call i32 @foo(i8* %tmp4) nounwind
+  %add = add nsw i32 %call7, %call
+  ret i32 %add
+}
+
+declare void @foo2(double, double, i32)
+
+declare i32 @foo(i8*)
+
+@.str = private unnamed_addr constant [22 x i8] c"%d %d %d %d %d %d %d\0A\00", align 1
+
+define i32 @alloca2(i32 %size) nounwind {
+entry:
+; dynamic allocated stack area and $gp restore slot have the same offsets
+; relative to $sp.
+;
+; CHECK: alloca2
+; CHECK: .cprestore [[OFF:[0-9]+]]
+; CHECK: subu  $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
+; CHECK: addu  $sp, $zero, $[[T0]]
+; CHECK: addiu $[[T1:[0-9]+]], $sp, [[OFF]]
+
+  %tmp1 = alloca i8, i32 %size, align 4
+  %0 = bitcast i8* %tmp1 to i32*
+  %cmp = icmp sgt i32 %size, 10
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+; CHECK: addiu $4, $[[T1]], 40
+
+  %add.ptr = getelementptr inbounds i8* %tmp1, i32 40
+  %1 = bitcast i8* %add.ptr to i32*
+  call void @foo3(i32* %1) nounwind
+  %arrayidx15.pre = getelementptr inbounds i8* %tmp1, i32 12
+  %.pre = bitcast i8* %arrayidx15.pre to i32*
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+; CHECK: addiu $4, $[[T1]], 12
+
+  %add.ptr5 = getelementptr inbounds i8* %tmp1, i32 12
+  %2 = bitcast i8* %add.ptr5 to i32*
+  call void @foo3(i32* %2) nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+; CHECK: lw  $5, 0($[[T1]])
+; CHECK: lw  $25, %call16(printf)
+
+  %.pre-phi = phi i32* [ %2, %if.else ], [ %.pre, %if.then ]
+  %tmp7 = load i32* %0, align 4, !tbaa !0
+  %arrayidx9 = getelementptr inbounds i8* %tmp1, i32 4
+  %3 = bitcast i8* %arrayidx9 to i32*
+  %tmp10 = load i32* %3, align 4, !tbaa !0
+  %arrayidx12 = getelementptr inbounds i8* %tmp1, i32 8
+  %4 = bitcast i8* %arrayidx12 to i32*
+  %tmp13 = load i32* %4, align 4, !tbaa !0
+  %tmp16 = load i32* %.pre-phi, align 4, !tbaa !0
+  %arrayidx18 = getelementptr inbounds i8* %tmp1, i32 16
+  %5 = bitcast i8* %arrayidx18 to i32*
+  %tmp19 = load i32* %5, align 4, !tbaa !0
+  %arrayidx21 = getelementptr inbounds i8* %tmp1, i32 20
+  %6 = bitcast i8* %arrayidx21 to i32*
+  %tmp22 = load i32* %6, align 4, !tbaa !0
+  %arrayidx24 = getelementptr inbounds i8* %tmp1, i32 24
+  %7 = bitcast i8* %arrayidx24 to i32*
+  %tmp25 = load i32* %7, align 4, !tbaa !0
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str, i32 0, i32 0), i32 %tmp7, i32 %tmp10, i32 %tmp13, i32 %tmp16, i32 %tmp19, i32 %tmp22, i32 %tmp25) nounwind
+  ret i32 0
+}
+
+declare void @foo3(i32*)
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}

diff --git a/src/LLVM/test/CodeGen/Mips/analyzebranch.ll b/src/LLVM/test/CodeGen/Mips/analyzebranch.ll
new file mode 100644
index 0000000..8f0bdf2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/analyzebranch.ll

@@ -0,0 +1,46 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+define double @foo(double %a, double %b) nounwind readnone {
+entry:
+; CHECK: bc1f $BB0_2
+; CHECK: nop
+; CHECK: # BB#1:    
+
+  %cmp = fcmp ogt double %a, 0.000000e+00
+  br i1 %cmp, label %if.end6, label %if.else
+
+if.else:                                          ; preds = %entry
+  %cmp3 = fcmp ogt double %b, 0.000000e+00
+  br i1 %cmp3, label %if.end6, label %return
+
+if.end6:                                          ; preds = %if.else, %entry
+  %c.0 = phi double [ %a, %entry ], [ 0.000000e+00, %if.else ]
+  %sub = fsub double %b, %c.0
+  %mul = fmul double %sub, 2.000000e+00
+  br label %return
+
+return:                                           ; preds = %if.else, %if.end6
+  %retval.0 = phi double [ %mul, %if.end6 ], [ 0.000000e+00, %if.else ]
+  ret double %retval.0
+}
+
+define void @f1(float %f) nounwind {
+entry:
+; CHECK: bc1t $BB1_2
+; CHECK: nop
+; CHECK: # BB#1:      
+  %cmp = fcmp une float %f, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @abort() noreturn
+  unreachable
+
+if.end:                                           ; preds = %entry
+  tail call void (...)* @f2() nounwind
+  ret void
+}
+
+declare void @abort() noreturn nounwind
+
+declare void @f2(...)

diff --git a/src/LLVM/test/CodeGen/Mips/atomic.ll b/src/LLVM/test/CodeGen/Mips/atomic.ll
new file mode 100644
index 0000000..a4763b1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/atomic.ll

@@ -0,0 +1,244 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@x = common global i32 0, align 4
+
+define i32 @AtomicLoadAdd32(i32 %incr) nounwind {
+entry:
+  %0 = atomicrmw add i32* @x, i32 %incr monotonic
+  ret i32 %0
+
+; CHECK:   AtomicLoadAdd32:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK:   addu    $[[R2:[0-9]+]], $[[R1]], $4
+; CHECK:   sc      $[[R2]], 0($[[R0]])
+; CHECK:   beq     $[[R2]], $zero, $[[BB0]]
+}
+
+define i32 @AtomicLoadNand32(i32 %incr) nounwind {
+entry:
+  %0 = atomicrmw nand i32* @x, i32 %incr monotonic
+  ret i32 %0
+
+; CHECK:   AtomicLoadNand32:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK:   and     $[[R3:[0-9]+]], $[[R1]], $4
+; CHECK:   nor     $[[R2:[0-9]+]], $zero, $[[R3]]
+; CHECK:   sc      $[[R2]], 0($[[R0]])
+; CHECK:   beq     $[[R2]], $zero, $[[BB0]]
+}
+
+define i32 @AtomicSwap32(i32 %newval) nounwind {
+entry:
+  %newval.addr = alloca i32, align 4
+  store i32 %newval, i32* %newval.addr, align 4
+  %tmp = load i32* %newval.addr, align 4
+  %0 = atomicrmw xchg i32* @x, i32 %tmp monotonic
+  ret i32 %0
+
+; CHECK:   AtomicSwap32:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      ${{[0-9]+}}, 0($[[R0]])
+; CHECK:   sc      $[[R2:[0-9]+]], 0($[[R0]])
+; CHECK:   beq     $[[R2]], $zero, $[[BB0]]
+}
+
+define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind {
+entry:
+  %newval.addr = alloca i32, align 4
+  store i32 %newval, i32* %newval.addr, align 4
+  %tmp = load i32* %newval.addr, align 4
+  %0 = cmpxchg i32* @x, i32 %oldval, i32 %tmp monotonic
+  ret i32 %0
+
+; CHECK:   AtomicCmpSwap32:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $2, 0($[[R0]])
+; CHECK:   bne     $2, $4, $[[BB1:[A-Z_0-9]+]]
+; CHECK:   sc      $[[R2:[0-9]+]], 0($[[R0]])
+; CHECK:   beq     $[[R2]], $zero, $[[BB0]]
+; CHECK:   $[[BB1]]:
+}
+
+
+
+@y = common global i8 0, align 1
+
+define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind {
+entry:
+  %0 = atomicrmw add i8* @y, i8 %incr monotonic
+  ret i8 %0
+
+; CHECK:   AtomicLoadAdd8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK:   addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
+; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK:   sc      $[[R14]], 0($[[R2]])
+; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
+
+; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
+
+define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind {
+entry:
+  %0 = atomicrmw sub i8* @y, i8 %incr monotonic
+  ret i8 %0
+
+; CHECK:   AtomicLoadSub8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   sllv     $[[R9:[0-9]+]], $4, $[[R4]]
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK:   subu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
+; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK:   sc      $[[R14]], 0($[[R2]])
+; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
+
+; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
+
+define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind {
+entry:
+  %0 = atomicrmw nand i8* @y, i8 %incr monotonic
+  ret i8 %0
+
+; CHECK:   AtomicLoadNand8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK:   and     $[[R18:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK:   nor     $[[R11:[0-9]+]], $zero, $[[R18]]
+; CHECK:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
+; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK:   sc      $[[R14]], 0($[[R2]])
+; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
+
+; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
+
+define signext i8 @AtomicSwap8(i8 signext %newval) nounwind {
+entry:
+  %0 = atomicrmw xchg i8* @y, i8 %newval monotonic
+  ret i8 %0
+
+; CHECK:   AtomicSwap8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R9]]
+; CHECK:   sc      $[[R14]], 0($[[R2]])
+; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
+
+; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
+
+define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwind {
+entry:
+  %0 = cmpxchg i8* @y, i8 %oldval, i8 %newval monotonic
+  ret i8 %0
+
+; CHECK:   AtomicCmpSwap8:
+; CHECK:   lw      $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK:   andi    $[[R8:[0-9]+]], $4, 255
+; CHECK:   sllv    $[[R9:[0-9]+]], $[[R8]], $[[R4]]
+; CHECK:   andi    $[[R10:[0-9]+]], $5, 255
+; CHECK:   sllv    $[[R11:[0-9]+]], $[[R10]], $[[R4]]
+
+; CHECK:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK:   ll      $[[R12:[0-9]+]], 0($[[R2]])
+; CHECK:   and     $[[R13:[0-9]+]], $[[R12]], $[[R6]]
+; CHECK:   bne     $[[R13]], $[[R9]], $[[BB1:[A-Z_0-9]+]]
+
+; CHECK:   and     $[[R14:[0-9]+]], $[[R12]], $[[R7]]
+; CHECK:   or      $[[R15:[0-9]+]], $[[R14]], $[[R11]]
+; CHECK:   sc      $[[R15]], 0($[[R2]])
+; CHECK:   beq     $[[R15]], $zero, $[[BB0]]
+
+; CHECK:   $[[BB1]]:
+; CHECK:   srlv    $[[R16:[0-9]+]], $[[R13]], $[[R4]]
+; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK:   sra     $2, $[[R17]], 24
+}
+
+@countsint = common global i32 0, align 4
+
+define i32 @CheckSync(i32 %v) nounwind noinline {
+entry:
+  %0 = atomicrmw add i32* @countsint, i32 %v seq_cst
+  ret i32 %0 
+
+; CHECK:   CheckSync:
+; CHECK:   sync 0
+; CHECK:   ll
+; CHECK:   sc
+; CHECK:   beq
+; CHECK:   sync 0
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/blockaddr.ll b/src/LLVM/test/CodeGen/Mips/blockaddr.ll
new file mode 100644
index 0000000..6de6b77
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/blockaddr.ll

@@ -0,0 +1,31 @@
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
+
+@reg = common global i8* null, align 4
+
+define i8* @dummy(i8* %x) nounwind readnone noinline {
+entry:
+  ret i8* %x
+}
+
+; CHECK-PIC: lw  $[[R0:[0-9]+]], %got($tmp[[T0:[0-9]+]])($gp)
+; CHECK-PIC: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp[[T0]])
+; CHECK-PIC: lw  $[[R1:[0-9]+]], %got($tmp[[T1:[0-9]+]])($gp)
+; CHECK-PIC: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp[[T1]])
+; CHECK-STATIC: lui  $[[R2:[0-9]+]], %hi($tmp[[T0:[0-9]+]])
+; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T0]])
+; CHECK-STATIC: lui   $[[R3:[0-9]+]], %hi($tmp[[T1:[0-9]+]])
+; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T1]])
+define void @f() nounwind {
+entry:
+  %call = tail call i8* @dummy(i8* blockaddress(@f, %baz))
+  indirectbr i8* %call, [label %baz, label %foo]
+
+foo:                                              ; preds = %foo, %entry
+  store i8* blockaddress(@f, %foo), i8** @reg, align 4
+  br label %foo
+
+baz:                                              ; preds = %entry
+  store i8* null, i8** @reg, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Mips/brdelayslot.ll b/src/LLVM/test/CodeGen/Mips/brdelayslot.ll
new file mode 100644
index 0000000..b266ce6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/brdelayslot.ll

@@ -0,0 +1,15 @@
+; RUN: llc -march=mipsel  -enable-mips-delay-filler < %s | FileCheck %s
+
+define void @foo1() nounwind {
+entry:
+; CHECK:      jalr 
+; CHECK-NOT:  nop 
+; CHECK:      jr 
+; CHECK-NOT:  nop
+; CHECK:      .end
+
+  tail call void @foo2(i32 3) nounwind
+  ret void
+}
+
+declare void @foo2(i32)

diff --git a/src/LLVM/test/CodeGen/Mips/buildpairextractelementf64.ll b/src/LLVM/test/CodeGen/Mips/buildpairextractelementf64.ll
new file mode 100644
index 0000000..585bc25
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/buildpairextractelementf64.ll

@@ -0,0 +1,23 @@
+; RUN: llc  < %s -march=mipsel | FileCheck %s
+; RUN: llc  < %s -march=mips   | FileCheck %s
+@a = external global i32
+
+define double @f(i32 %a1, double %d) nounwind {
+entry:
+; CHECK: mtc1
+; CHECK: mtc1
+  store i32 %a1, i32* @a, align 4
+  %add = fadd double %d, 2.000000e+00
+  ret double %add
+}
+
+define void @f3(double %d, i32 %a1) nounwind {
+entry:
+; CHECK: mfc1
+; CHECK: mfc1
+  tail call void @f2(i32 %a1, double %d) nounwind
+  ret void
+}
+
+declare void @f2(i32, double)
+

diff --git a/src/LLVM/test/CodeGen/Mips/cmov.ll b/src/LLVM/test/CodeGen/Mips/cmov.ll
new file mode 100644
index 0000000..7851ba9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/cmov.ll

@@ -0,0 +1,32 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+; RUN: llc -march=mips -regalloc=basic < %s | FileCheck %s
+
+@i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
+@i3 = common global i32* null, align 4
+
+; CHECK:  addiu ${{[0-9]+}}, $gp, %got(i1)
+; CHECK:  lw  ${{[0-9]+}}, %got(i3)($gp)
+define i32* @cmov1(i32 %s) nounwind readonly {
+entry:
+  %tobool = icmp ne i32 %s, 0
+  %tmp1 = load i32** @i3, align 4
+  %cond = select i1 %tobool, i32* getelementptr inbounds ([3 x i32]* @i1, i32 0, i32 0), i32* %tmp1
+  ret i32* %cond
+}
+
+@c = global i32 1, align 4
+@d = global i32 0, align 4
+
+; CHECK: cmov2:
+; CHECK: addiu $[[R0:[0-9]+]], $gp, %got(c)
+; CHECK: addiu $[[R1:[0-9]+]], $gp, %got(d)
+; CHECK: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
+define i32 @cmov2(i32 %s) nounwind readonly {
+entry:
+  %tobool = icmp ne i32 %s, 0
+  %tmp1 = load i32* @c, align 4
+  %tmp2 = load i32* @d, align 4
+  %cond = select i1 %tobool, i32 %tmp1, i32 %tmp2
+  ret i32 %cond
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/constantfp0.ll b/src/LLVM/test/CodeGen/Mips/constantfp0.ll
new file mode 100644
index 0000000..191f31d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/constantfp0.ll

@@ -0,0 +1,11 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+define i32 @branch(double %d) nounwind readnone {
+entry:
+; CHECK: mtc1  $zero, $f[[R0:[0-9]+]]
+; CHECK: c.eq.d  $f{{[0-9]+}}, $f[[R0]]
+
+  %tobool = fcmp une double %d, 0.000000e+00
+  %. = zext i1 %tobool to i32
+  ret i32 %.
+}

diff --git a/src/LLVM/test/CodeGen/Mips/cprestore.ll b/src/LLVM/test/CodeGen/Mips/cprestore.ll
new file mode 100644
index 0000000..391f5c7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/cprestore.ll

@@ -0,0 +1,20 @@
+; DISABLED: llc -march=mipsel < %s | FileCheck %s
+; RUN: false
+
+; byval is currently unsupported.
+; XFAIL: *
+
+; CHECK: .set macro
+; CHECK-NEXT: .cprestore
+; CHECK-NEXT: .set nomacro
+
+%struct.S = type { [16384 x i32] }
+
+define void @foo2() nounwind {
+entry:
+  %s = alloca %struct.S, align 4
+  call void @foo1(%struct.S* byval %s)
+  ret void
+}
+
+declare void @foo1(%struct.S* byval)

diff --git a/src/LLVM/test/CodeGen/Mips/dg.exp b/src/LLVM/test/CodeGen/Mips/dg.exp
new file mode 100644
index 0000000..adb2cac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target Mips] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}

diff --git a/src/LLVM/test/CodeGen/Mips/divrem.ll b/src/LLVM/test/CodeGen/Mips/divrem.ll
new file mode 100644
index 0000000..398d1b7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/divrem.ll

@@ -0,0 +1,51 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+; CHECK: div $zero,
+define i32 @sdiv1(i32 %a0, i32 %a1) nounwind readnone {
+entry:
+  %div = sdiv i32 %a0, %a1
+  ret i32 %div
+}
+
+; CHECK: div $zero,
+define i32 @srem1(i32 %a0, i32 %a1) nounwind readnone {
+entry:
+  %rem = srem i32 %a0, %a1
+  ret i32 %rem
+}
+
+; CHECK: divu $zero,
+define i32 @udiv1(i32 %a0, i32 %a1) nounwind readnone {
+entry:
+  %div = udiv i32 %a0, %a1
+  ret i32 %div
+}
+
+; CHECK: divu $zero,
+define i32 @urem1(i32 %a0, i32 %a1) nounwind readnone {
+entry:
+  %rem = urem i32 %a0, %a1
+  ret i32 %rem
+}
+
+; CHECK: div $zero,
+define i32 @sdivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
+entry:
+  %rem = srem i32 %a0, %a1
+  store i32 %rem, i32* %r, align 4, !tbaa !0
+  %div = sdiv i32 %a0, %a1
+  ret i32 %div
+}
+
+; CHECK: divu $zero,
+define i32 @udivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
+entry:
+  %rem = urem i32 %a0, %a1
+  store i32 %rem, i32* %r, align 4, !tbaa !0
+  %div = udiv i32 %a0, %a1
+  ret i32 %div
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}

diff --git a/src/LLVM/test/CodeGen/Mips/double2int.ll b/src/LLVM/test/CodeGen/Mips/double2int.ll
new file mode 100644
index 0000000..445ccb3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/double2int.ll

@@ -0,0 +1,8 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+define i32 @f1(double %d) nounwind readnone {
+entry:
+; CHECK: trunc.w.d $f{{[0-9]+}}, $f12
+  %conv = fptosi double %d to i32
+  ret i32 %conv
+}

diff --git a/src/LLVM/test/CodeGen/Mips/eh.ll b/src/LLVM/test/CodeGen/Mips/eh.ll
new file mode 100644
index 0000000..9cd3413
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/eh.ll

@@ -0,0 +1,79 @@
+; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-EL
+; RUN: llc  < %s -march=mips   | FileCheck %s -check-prefix=CHECK-EB
+
+@g1 = global double 0.000000e+00, align 8
+@_ZTId = external constant i8*
+
+define void @_Z1fd(double %i2) {
+entry:
+; CHECK-EL:  addiu $sp, $sp
+; CHECK-EL:  .cfi_def_cfa_offset
+; CHECK-EL:  sdc1 $f20
+; CHECK-EL:  sw  $ra
+; CHECK-EL:  sw  $17
+; CHECK-EL:  sw  $16
+; CHECK-EL:  .cfi_offset 52, -8
+; CHECK-EL:  .cfi_offset 53, -4
+; CHECK-EB:  .cfi_offset 53, -8
+; CHECK-EB:  .cfi_offset 52, -4
+; CHECK-EL:  .cfi_offset 31, -12
+; CHECK-EL:  .cfi_offset 17, -16
+; CHECK-EL:  .cfi_offset 16, -20
+; CHECK-EL:  .cprestore 
+
+  %exception = tail call i8* @__cxa_allocate_exception(i32 8) nounwind
+  %0 = bitcast i8* %exception to double*
+  store double 3.200000e+00, double* %0, align 8, !tbaa !0
+  invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTId to i8*), i8* null) noreturn
+          to label %unreachable unwind label %lpad
+
+lpad:                                             ; preds = %entry
+; CHECK-EL:  # %lpad
+; CHECK-EL:  lw  $gp
+; CHECK-EL:  beq $5
+
+  %exn.val = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+           catch i8* bitcast (i8** @_ZTId to i8*)
+  %exn = extractvalue { i8*, i32 } %exn.val, 0
+  %sel = extractvalue { i8*, i32 } %exn.val, 1
+  %1 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTId to i8*)) nounwind
+  %2 = icmp eq i32 %sel, %1
+  br i1 %2, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %3 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind
+  %4 = bitcast i8* %3 to double*
+  %exn.scalar = load double* %4, align 8
+  %add = fadd double %exn.scalar, %i2
+  store double %add, double* @g1, align 8, !tbaa !0
+  tail call void @__cxa_end_catch() nounwind
+  ret void
+
+eh.resume:                                        ; preds = %lpad
+  resume { i8*, i32 } %exn.val
+
+unreachable:                                      ; preds = %entry
+  unreachable
+}
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
+
+declare void @llvm.eh.resume(i8*, i32)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}

diff --git a/src/LLVM/test/CodeGen/Mips/extins.ll b/src/LLVM/test/CodeGen/Mips/extins.ll
new file mode 100644
index 0000000..69f53e5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/extins.ll

@@ -0,0 +1,21 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+define i32 @ext0_5_9(i32 %s, i32 %pos, i32 %sz) nounwind readnone {
+entry:
+; CHECK: ext ${{[0-9]+}}, $4, 5, 9
+  %shr = lshr i32 %s, 5
+  %and = and i32 %shr, 511
+  ret i32 %and
+}
+
+define void @ins2_5_9(i32 %s, i32* nocapture %d) nounwind {
+entry:
+; CHECK: ins ${{[0-9]+}}, $4, 5, 9
+  %and = shl i32 %s, 5
+  %shl = and i32 %and, 16352
+  %tmp3 = load i32* %d, align 4
+  %and5 = and i32 %tmp3, -16353
+  %or = or i32 %and5, %shl
+  store i32 %or, i32* %d, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Mips/fcopysign.ll b/src/LLVM/test/CodeGen/Mips/fcopysign.ll
new file mode 100644
index 0000000..79f956d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/fcopysign.ll

@@ -0,0 +1,55 @@
+; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-EL
+; RUN: llc  < %s -march=mips | FileCheck %s -check-prefix=CHECK-EB
+
+define double @func0(double %d0, double %d1) nounwind readnone {
+entry:
+; CHECK-EL: func0:
+; CHECK-EL: lui $[[T0:[0-9]+]], 32767
+; CHECK-EL: lui $[[T1:[0-9]+]], 32768
+; CHECK-EL: mfc1 $[[HI0:[0-9]+]], $f13
+; CHECK-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
+; CHECK-EL: mfc1 $[[HI1:[0-9]+]], $f15
+; CHECK-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
+; CHECK-EL: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
+; CHECK-EL: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
+; CHECK-EL: mfc1 $[[LO0:[0-9]+]], $f12
+; CHECK-EL: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; CHECK-EL: mtc1 $[[LO0]], $f0
+; CHECK-EL: mtc1 $[[OR]], $f1
+;
+; CHECK-EB: lui $[[T0:[0-9]+]], 32767
+; CHECK-EB: lui $[[T1:[0-9]+]], 32768
+; CHECK-EB: mfc1 $[[HI0:[0-9]+]], $f12
+; CHECK-EB: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
+; CHECK-EB: mfc1 $[[HI1:[0-9]+]], $f14
+; CHECK-EB: ori $[[MSK1:[0-9]+]], $[[T1]], 0
+; CHECK-EB: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
+; CHECK-EB: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
+; CHECK-EB: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; CHECK-EB: mfc1 $[[LO0:[0-9]+]], $f13
+; CHECK-EB: mtc1 $[[OR]], $f0
+; CHECK-EB: mtc1 $[[LO0]], $f1
+  %call = tail call double @copysign(double %d0, double %d1) nounwind readnone
+  ret double %call
+}
+
+declare double @copysign(double, double) nounwind readnone
+
+define float @func1(float %f0, float %f1) nounwind readnone {
+entry:
+; CHECK-EL: func1:
+; CHECK-EL: lui $[[T0:[0-9]+]], 32767
+; CHECK-EL: lui $[[T1:[0-9]+]], 32768
+; CHECK-EL: mfc1 $[[ARG0:[0-9]+]], $f12
+; CHECK-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
+; CHECK-EL: mfc1 $[[ARG1:[0-9]+]], $f14
+; CHECK-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
+; CHECK-EL: and $[[T2:[0-9]+]], $[[ARG0]], $[[MSK0]]
+; CHECK-EL: and $[[T3:[0-9]+]], $[[ARG1]], $[[MSK1]]
+; CHECK-EL: or  $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+; CHECK-EL: mtc1 $[[T4]], $f0
+  %call = tail call float @copysignf(float %f0, float %f1) nounwind readnone
+  ret float %call
+}
+
+declare float @copysignf(float, float) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/Mips/fpbr.ll b/src/LLVM/test/CodeGen/Mips/fpbr.ll
new file mode 100644
index 0000000..0a6478b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/fpbr.ll

@@ -0,0 +1,119 @@
+; RUN: llc  < %s -march=mipsel | FileCheck %s
+
+define void @func0(float %f2, float %f3) nounwind {
+entry:
+; CHECK: c.eq.s
+; CHECK: bc1f
+  %cmp = fcmp oeq float %f2, %f3
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g0() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void (...)* @g1() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+declare void @g0(...)
+
+declare void @g1(...)
+
+define void @func1(float %f2, float %f3) nounwind {
+entry:
+; CHECK: c.olt.s
+; CHECK: bc1f
+  %cmp = fcmp olt float %f2, %f3
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g0() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void (...)* @g1() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+define void @func2(float %f2, float %f3) nounwind {
+entry:
+; CHECK: c.ole.s
+; CHECK: bc1f
+  %cmp = fcmp ugt float %f2, %f3
+  br i1 %cmp, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g0() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void (...)* @g1() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+define void @func3(double %f2, double %f3) nounwind {
+entry:
+; CHECK: c.eq.d
+; CHECK: bc1f
+  %cmp = fcmp oeq double %f2, %f3
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g0() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void (...)* @g1() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+define void @func4(double %f2, double %f3) nounwind {
+entry:
+; CHECK: c.olt.d
+; CHECK: bc1f
+  %cmp = fcmp olt double %f2, %f3
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g0() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void (...)* @g1() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+define void @func5(double %f2, double %f3) nounwind {
+entry:
+; CHECK: c.ole.d
+; CHECK: bc1f
+  %cmp = fcmp ugt double %f2, %f3
+  br i1 %cmp, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g0() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void (...)* @g1() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Mips/fpcmp.ll b/src/LLVM/test/CodeGen/Mips/fpcmp.ll
new file mode 100644
index 0000000..86545e3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/fpcmp.ll

@@ -0,0 +1,18 @@
+; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-MIPS32
+
+@g1 = external global i32
+
+define i32 @f(float %f0, float %f1) nounwind {
+entry:
+; CHECK-MIPS32: c.olt.s
+; CHECK-MIPS32: movt
+; CHECK-MIPS32: c.olt.s
+; CHECK-MIPS32: movt
+  %cmp = fcmp olt float %f0, %f1
+  %conv = zext i1 %cmp to i32
+  %tmp2 = load i32* @g1, align 4
+  %add = add nsw i32 %tmp2, %conv
+  store i32 %add, i32* @g1, align 4
+  %cond = select i1 %cmp, i32 10, i32 20
+  ret i32 %cond
+}

diff --git a/src/LLVM/test/CodeGen/Mips/frame-address.ll b/src/LLVM/test/CodeGen/Mips/frame-address.ll
new file mode 100644
index 0000000..9df1808
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/frame-address.ll

@@ -0,0 +1,12 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+define i8* @f() nounwind {
+entry:
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; CHECK:   addu    $fp, $sp, $zero
+; CHECK:   addu    $2, $zero, $fp
+}

diff --git a/src/LLVM/test/CodeGen/Mips/gprestore.ll b/src/LLVM/test/CodeGen/Mips/gprestore.ll
new file mode 100644
index 0000000..ee7e131
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/gprestore.ll

@@ -0,0 +1,32 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+@p = external global i32
+@q = external global i32
+@r = external global i32
+
+define void @f0() nounwind {
+entry:
+; CHECK: jalr
+; CHECK-NOT: got({{.*}})($gp)
+; CHECK: lw $gp
+; CHECK: jalr
+; CHECK-NOT: got({{.*}})($gp)
+; CHECK: lw $gp
+; CHECK: jalr
+; CHECK-NOT: got({{.*}})($gp)
+; CHECK: lw $gp
+  tail call void (...)* @f1() nounwind
+  %tmp = load i32* @p, align 4
+  tail call void @f2(i32 %tmp) nounwind
+  %tmp1 = load i32* @q, align 4
+  %tmp2 = load i32* @r, align 4
+  tail call void @f3(i32 %tmp1, i32 %tmp2) nounwind
+  ret void
+}
+
+declare void @f1(...)
+
+declare void @f2(i32)
+
+declare void @f3(i32, i32)
+

diff --git a/src/LLVM/test/CodeGen/Mips/i64arg.ll b/src/LLVM/test/CodeGen/Mips/i64arg.ll
new file mode 100644
index 0000000..87cf2a6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/i64arg.ll

@@ -0,0 +1,34 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+define void @f1(i64 %ll1, float %f, i64 %ll, i32 %i, float %f2) nounwind {
+entry:
+; CHECK: addu $[[R1:[0-9]+]], $zero, $5
+; CHECK: addu $[[R0:[0-9]+]], $zero, $4
+; CHECK: lw  $25, %call16(ff1)
+; CHECK: ori $6, ${{[0-9]+}}, 3855
+; CHECK: ori $7, ${{[0-9]+}}, 22136
+; CHECK: jalr
+  tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind
+; CHECK: lw $25, %call16(ff2)
+; CHECK: lw $[[R2:[0-9]+]], 88($sp)
+; CHECK: lw $[[R3:[0-9]+]], 92($sp)
+; CHECK: addu $4, $zero, $[[R2]]
+; CHECK: addu $5, $zero, $[[R3]]
+; CHECK: jalr $25
+  tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind
+  %sub = add nsw i32 %i, -1
+; CHECK: sw $[[R0]], 24($sp)
+; CHECK: sw $[[R1]], 28($sp)
+; CHECK: lw $25, %call16(ff3)
+; CHECK: addu $6, $zero, $[[R2]]
+; CHECK: addu $7, $zero, $[[R3]]
+; CHECK: jalr $25
+  tail call void @ff3(i32 %i, i64 %ll, i32 %sub, i64 %ll1) nounwind
+  ret void
+}
+
+declare void @ff1(i32, i64)
+
+declare void @ff2(i64, double)
+
+declare void @ff3(i32, i64, i32, i64)

diff --git a/src/LLVM/test/CodeGen/Mips/inlineasmmemop.ll b/src/LLVM/test/CodeGen/Mips/inlineasmmemop.ll
new file mode 100644
index 0000000..b5db58a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/inlineasmmemop.ll

@@ -0,0 +1,23 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@g1 = external global i32
+
+define i32 @f1(i32 %x) nounwind {
+entry:
+; CHECK: addiu $[[T0:[0-9]+]], $sp
+; CHECK: #APP
+; CHECK: sw $4, 0($[[T0]])
+; CHECK: #NO_APP
+; CHECK: lw  $[[T1:[0-9]+]], %got(g1)($gp)
+; CHECK: #APP
+; CHECK: lw $[[T3:[0-9]+]], 0($[[T0]])
+; CHECK: #NO_APP
+; CHECK: sw  $[[T3]], 0($[[T1]])
+
+  %l1 = alloca i32, align 4
+  call void asm "sw $1, $0", "=*m,r"(i32* %l1, i32 %x) nounwind
+  %0 = call i32 asm "lw $0, $1", "=r,*m"(i32* %l1) nounwind
+  store i32 %0, i32* @g1, align 4
+  ret i32 %0
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/internalfunc.ll b/src/LLVM/test/CodeGen/Mips/internalfunc.ll
new file mode 100644
index 0000000..434b386
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/internalfunc.ll

@@ -0,0 +1,52 @@
+; RUN: llc  < %s -march=mipsel | FileCheck %s
+
+@caller.sf1 = internal unnamed_addr global void (...)* null, align 4
+@gf1 = external global void (...)*
+@.str = private unnamed_addr constant [3 x i8] c"f2\00"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+; CHECK: lw $[[R0:[0-9]+]], %got(f2)($gp)
+; CHECK: addiu $25, $[[R0]], %lo(f2)
+  tail call fastcc void @f2()
+  ret i32 0
+}
+
+define void @caller(i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: lw  $[[R1:[0-9]+]], %got(caller.sf1)($gp)
+; CHECK: lw  $25, %lo(caller.sf1)($[[R1]])
+  %tobool = icmp eq i32 %a1, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %tmp1 = load void (...)** @caller.sf1, align 4
+  tail call void (...)* %tmp1() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+; CHECK: lw  $[[R2:[0-9]+]], %got(sf2)($gp)
+; CHECK: addiu ${{[0-9]+}}, $[[R2]], %lo(sf2)
+; CHECK: lw  $[[R3:[0-9]+]], %got(caller.sf1)($gp)
+; CHECK: sw  ${{[0-9]+}}, %lo(caller.sf1)($[[R3]])
+  %tobool3 = icmp ne i32 %a0, 0
+  %tmp4 = load void (...)** @gf1, align 4
+  %cond = select i1 %tobool3, void (...)* %tmp4, void (...)* bitcast (void ()* @sf2 to void (...)*)
+  store void (...)* %cond, void (...)** @caller.sf1, align 4
+  ret void
+}
+
+define internal void @sf2() nounwind {
+entry:
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) nounwind
+  ret void
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define internal fastcc void @f2() nounwind noinline {
+entry:
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) nounwind
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/largeimm1.ll b/src/LLVM/test/CodeGen/Mips/largeimm1.ll
new file mode 100644
index 0000000..d65cc02
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/largeimm1.ll

@@ -0,0 +1,13 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+; CHECK: lui $at, 49152
+; CHECK: lui $at, 16384
+define void @f() nounwind {
+entry:
+  %a1 = alloca [1073741824 x i8], align 1
+  %arrayidx = getelementptr inbounds [1073741824 x i8]* %a1, i32 0, i32 1048676
+  call void @f2(i8* %arrayidx) nounwind
+  ret void
+}
+
+declare void @f2(i8*)

diff --git a/src/LLVM/test/CodeGen/Mips/largeimmprinting.ll b/src/LLVM/test/CodeGen/Mips/largeimmprinting.ll
new file mode 100644
index 0000000..579a319
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/largeimmprinting.ll

@@ -0,0 +1,27 @@
+; DISABLED: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
+; RUN: false
+
+; byval is currently unsupported.
+; XFAIL: *
+
+%struct.S1 = type { [65536 x i8] }
+
+@s1 = external global %struct.S1
+
+define void @f() nounwind {
+entry:
+; CHECK:  lui $at, 65534
+; CHECK:  addu  $at, $sp, $at
+; CHECK:  addiu $sp, $at, -24
+; CHECK:  .cprestore  65536
+
+  %agg.tmp = alloca %struct.S1, align 1
+  %tmp = getelementptr inbounds %struct.S1* %agg.tmp, i32 0, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.S1* @s1, i32 0, i32 0, i32 0), i32 65536, i32 1, i1 false)
+  call void @f2(%struct.S1* byval %agg.tmp) nounwind
+  ret void
+}
+
+declare void @f2(%struct.S1* byval)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/Mips/madd-msub.ll b/src/LLVM/test/CodeGen/Mips/madd-msub.ll
new file mode 100644
index 0000000..0aeabb3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/madd-msub.ll

@@ -0,0 +1,65 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+; CHECK: madd 
+define i64 @madd1(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv2, %conv
+  %conv4 = sext i32 %c to i64
+  %add = add nsw i64 %mul, %conv4
+  ret i64 %add
+}
+
+; CHECK: maddu
+define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = zext i32 %a to i64
+  %conv2 = zext i32 %b to i64
+  %mul = mul nsw i64 %conv2, %conv
+  %conv4 = zext i32 %c to i64
+  %add = add nsw i64 %mul, %conv4
+  ret i64 %add
+}
+
+; CHECK: madd
+define i64 @madd3(i32 %a, i32 %b, i64 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv2, %conv
+  %add = add nsw i64 %mul, %c
+  ret i64 %add
+}
+
+; CHECK: msub
+define i64 @msub1(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %c to i64
+  %conv2 = sext i32 %a to i64
+  %conv4 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv4, %conv2
+  %sub = sub nsw i64 %conv, %mul
+  ret i64 %sub
+}
+
+; CHECK: msubu 
+define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = zext i32 %c to i64
+  %conv2 = zext i32 %a to i64
+  %conv4 = zext i32 %b to i64
+  %mul = mul nsw i64 %conv4, %conv2
+  %sub = sub nsw i64 %conv, %mul
+  ret i64 %sub
+}
+
+; CHECK: msub 
+define i64 @msub3(i32 %a, i32 %b, i64 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %a to i64
+  %conv3 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv3, %conv
+  %sub = sub nsw i64 %c, %mul
+  ret i64 %sub
+}

diff --git a/src/LLVM/test/CodeGen/Mips/mips64fpldst.ll b/src/LLVM/test/CodeGen/Mips/mips64fpldst.ll
new file mode 100644
index 0000000..b8f3ca9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/mips64fpldst.ll

@@ -0,0 +1,58 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
+
+@f0 = common global float 0.000000e+00, align 4
+@d0 = common global double 0.000000e+00, align 8
+@f1 = common global float 0.000000e+00, align 4
+@d1 = common global double 0.000000e+00, align 8
+
+define float @funcfl1() nounwind readonly {
+entry:
+; CHECK-N64: funcfl1
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(f0)
+; CHECK-N64: lwc1 $f{{[0-9]+}}, 0($[[R0]]) 
+; CHECK-N32: funcfl1
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(f0)
+; CHECK-N32: lwc1 $f{{[0-9]+}}, 0($[[R0]]) 
+  %0 = load float* @f0, align 4
+  ret float %0
+}
+
+define double @funcfl2() nounwind readonly {
+entry:
+; CHECK-N64: funcfl2
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(d0)
+; CHECK-N64: ldc1 $f{{[0-9]+}}, 0($[[R0]]) 
+; CHECK-N32: funcfl2
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(d0)
+; CHECK-N32: ldc1 $f{{[0-9]+}}, 0($[[R0]]) 
+  %0 = load double* @d0, align 8 
+  ret double %0
+}
+
+define void @funcfs1() nounwind {
+entry:
+; CHECK-N64: funcfs1
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(f0)
+; CHECK-N64: swc1 $f{{[0-9]+}}, 0($[[R0]]) 
+; CHECK-N32: funcfs1
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(f0)
+; CHECK-N32: swc1 $f{{[0-9]+}}, 0($[[R0]]) 
+  %0 = load float* @f1, align 4 
+  store float %0, float* @f0, align 4 
+  ret void
+}
+
+define void @funcfs2() nounwind {
+entry:
+; CHECK-N64: funcfs2
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(d0)
+; CHECK-N64: sdc1 $f{{[0-9]+}}, 0($[[R0]]) 
+; CHECK-N32: funcfs2
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(d0)
+; CHECK-N32: sdc1 $f{{[0-9]+}}, 0($[[R0]]) 
+  %0 = load double* @d1, align 8 
+  store double %0, double* @d0, align 8 
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/mips64instrs.ll b/src/LLVM/test/CodeGen/Mips/mips64instrs.ll
new file mode 100644
index 0000000..c9812a2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/mips64instrs.ll

@@ -0,0 +1,143 @@
+; RUN: llc -march=mips64el -mcpu=mips64r1 < %s | FileCheck %s
+
+define i64 @f0(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: daddu
+  %add = add nsw i64 %a1, %a0
+  ret i64 %add
+}
+
+define i64 @f1(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: dsubu
+  %sub = sub nsw i64 %a0, %a1
+  ret i64 %sub
+}
+
+define i64 @f4(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: and
+  %and = and i64 %a1, %a0
+  ret i64 %and
+}
+
+define i64 @f5(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: or
+  %or = or i64 %a1, %a0
+  ret i64 %or
+}
+
+define i64 @f6(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: xor
+  %xor = xor i64 %a1, %a0
+  ret i64 %xor
+}
+
+define i64 @f7(i64 %a0) nounwind readnone {
+entry:
+; CHECK: daddiu ${{[0-9]+}}, ${{[0-9]+}}, 20
+  %add = add nsw i64 %a0, 20
+  ret i64 %add
+}
+
+define i64 @f8(i64 %a0) nounwind readnone {
+entry:
+; CHECK: daddiu ${{[0-9]+}}, ${{[0-9]+}}, -20
+  %sub = add nsw i64 %a0, -20
+  ret i64 %sub
+}
+
+define i64 @f9(i64 %a0) nounwind readnone {
+entry:
+; CHECK: andi ${{[0-9]+}}, ${{[0-9]+}}, 20
+  %and = and i64 %a0, 20
+  ret i64 %and
+}
+
+define i64 @f10(i64 %a0) nounwind readnone {
+entry:
+; CHECK: ori ${{[0-9]+}}, ${{[0-9]+}}, 20
+  %or = or i64 %a0, 20
+  ret i64 %or
+}
+
+define i64 @f11(i64 %a0) nounwind readnone {
+entry:
+; CHECK: xori ${{[0-9]+}}, ${{[0-9]+}}, 20
+  %xor = xor i64 %a0, 20
+  ret i64 %xor
+}
+
+define i64 @f12(i64 %a, i64 %b) nounwind readnone {
+entry:
+; CHECK: mult
+  %mul = mul nsw i64 %b, %a
+  ret i64 %mul
+}
+
+define i64 @f13(i64 %a, i64 %b) nounwind readnone {
+entry:
+; CHECK: mult
+  %mul = mul i64 %b, %a
+  ret i64 %mul
+}
+
+define i64 @f14(i64 %a, i64 %b) nounwind readnone {
+entry:
+; CHECK: ddiv $zero
+; CHECK: mflo
+  %div = sdiv i64 %a, %b
+  ret i64 %div
+}
+
+define i64 @f15(i64 %a, i64 %b) nounwind readnone {
+entry:
+; CHECK: ddivu $zero
+; CHECK: mflo
+  %div = udiv i64 %a, %b
+  ret i64 %div
+}
+
+define i64 @f16(i64 %a, i64 %b) nounwind readnone {
+entry:
+; CHECK: ddiv $zero
+; CHECK: mfhi
+  %rem = srem i64 %a, %b
+  ret i64 %rem
+}
+
+define i64 @f17(i64 %a, i64 %b) nounwind readnone {
+entry:
+; CHECK: ddivu $zero
+; CHECK: mfhi
+  %rem = urem i64 %a, %b
+  ret i64 %rem
+}
+
+declare i64 @llvm.ctlz.i64(i64) nounwind readnone
+
+define i64 @f18(i64 %X) nounwind readnone {
+entry:
+; CHECK: dclz $2, $4
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X)
+  ret i64 %tmp1
+}
+
+define i64 @f19(i64 %X) nounwind readnone {
+entry:
+; CHECK: dclo $2, $4
+  %neg = xor i64 %X, -1
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg)
+  ret i64 %tmp1
+}
+
+define i64 @f20(i64 %a, i64 %b) nounwind readnone {
+entry:
+; CHECK: nor
+  %or = or i64 %b, %a
+  %neg = xor i64 %or, -1
+  ret i64 %neg
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/mips64intldst.ll b/src/LLVM/test/CodeGen/Mips/mips64intldst.ll
new file mode 100644
index 0000000..fdf496b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/mips64intldst.ll

@@ -0,0 +1,157 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
+
+@c = common global i8 0, align 4
+@s = common global i16 0, align 4
+@i = common global i32 0, align 4
+@l = common global i64 0, align 8
+@uc = common global i8 0, align 4
+@us = common global i16 0, align 4
+@ui = common global i32 0, align 4
+@l1 = common global i64 0, align 8
+
+define i64 @func1() nounwind readonly {
+entry:
+; CHECK-N64: func1
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(c)
+; CHECK-N64: lb ${{[0-9]+}}, 0($[[R0]])
+; CHECK-N32: func1
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(c)
+; CHECK-N32: lb ${{[0-9]+}}, 0($[[R0]])
+  %0 = load i8* @c, align 4
+  %conv = sext i8 %0 to i64
+  ret i64 %conv
+}
+
+define i64 @func2() nounwind readonly {
+entry:
+; CHECK-N64: func2
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(s)
+; CHECK-N64: lh ${{[0-9]+}}, 0($[[R0]])
+; CHECK-N32: func2
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(s)
+; CHECK-N32: lh ${{[0-9]+}}, 0($[[R0]])
+  %0 = load i16* @s, align 4
+  %conv = sext i16 %0 to i64
+  ret i64 %conv
+}
+
+define i64 @func3() nounwind readonly {
+entry:
+; CHECK-N64: func3
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(i)
+; CHECK-N64: lw ${{[0-9]+}}, 0($[[R0]])
+; CHECK-N32: func3
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(i)
+; CHECK-N32: lw ${{[0-9]+}}, 0($[[R0]])
+  %0 = load i32* @i, align 4
+  %conv = sext i32 %0 to i64
+  ret i64 %conv
+}
+
+define i64 @func4() nounwind readonly {
+entry:
+; CHECK-N64: func4
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(l)
+; CHECK-N64: ld ${{[0-9]+}}, 0($[[R0]])
+; CHECK-N32: func4
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(l)
+; CHECK-N32: ld ${{[0-9]+}}, 0($[[R0]])
+  %0 = load i64* @l, align 8
+  ret i64 %0
+}
+
+define i64 @ufunc1() nounwind readonly {
+entry:
+; CHECK-N64: ufunc1
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(uc)
+; CHECK-N64: lbu ${{[0-9]+}}, 0($[[R0]])
+; CHECK-N32: ufunc1
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(uc)
+; CHECK-N32: lbu ${{[0-9]+}}, 0($[[R0]])
+  %0 = load i8* @uc, align 4
+  %conv = zext i8 %0 to i64
+  ret i64 %conv
+}
+
+define i64 @ufunc2() nounwind readonly {
+entry:
+; CHECK-N64: ufunc2
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(us)
+; CHECK-N64: lhu ${{[0-9]+}}, 0($[[R0]])
+; CHECK-N32: ufunc2
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(us)
+; CHECK-N32: lhu ${{[0-9]+}}, 0($[[R0]])
+  %0 = load i16* @us, align 4
+  %conv = zext i16 %0 to i64
+  ret i64 %conv
+}
+
+define i64 @ufunc3() nounwind readonly {
+entry:
+; CHECK-N64: ufunc3
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(ui)
+; CHECK-N64: lwu ${{[0-9]+}}, 0($[[R0]])
+; CHECK-N32: ufunc3
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(ui)
+; CHECK-N32: lwu ${{[0-9]+}}, 0($[[R0]])
+  %0 = load i32* @ui, align 4
+  %conv = zext i32 %0 to i64
+  ret i64 %conv
+}
+
+define void @sfunc1() nounwind {
+entry:
+; CHECK-N64: sfunc1
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(c)
+; CHECK-N64: sb ${{[0-9]+}}, 0($[[R0]])
+; CHECK-N32: sfunc1
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(c)
+; CHECK-N32: sb ${{[0-9]+}}, 0($[[R0]])
+  %0 = load i64* @l1, align 8
+  %conv = trunc i64 %0 to i8
+  store i8 %conv, i8* @c, align 4
+  ret void
+}
+
+define void @sfunc2() nounwind {
+entry:
+; CHECK-N64: sfunc2
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(s)
+; CHECK-N64: sh ${{[0-9]+}}, 0($[[R0]])
+; CHECK-N32: sfunc2
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(s)
+; CHECK-N32: sh ${{[0-9]+}}, 0($[[R0]])
+  %0 = load i64* @l1, align 8
+  %conv = trunc i64 %0 to i16
+  store i16 %conv, i16* @s, align 4
+  ret void
+}
+
+define void @sfunc3() nounwind {
+entry:
+; CHECK-N64: sfunc3
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(i)
+; CHECK-N64: sw ${{[0-9]+}}, 0($[[R0]])
+; CHECK-N32: sfunc3
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(i)
+; CHECK-N32: sw ${{[0-9]+}}, 0($[[R0]])
+  %0 = load i64* @l1, align 8
+  %conv = trunc i64 %0 to i32
+  store i32 %conv, i32* @i, align 4
+  ret void
+}
+
+define void @sfunc4() nounwind {
+entry:
+; CHECK-N64: sfunc4
+; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(l)
+; CHECK-N64: sd ${{[0-9]+}}, 0($[[R0]])
+; CHECK-N32: sfunc4
+; CHECK-N32: lw $[[R0:[0-9]+]], %got(l)
+; CHECK-N32: sd ${{[0-9]+}}, 0($[[R0]])
+  %0 = load i64* @l1, align 8
+  store i64 %0, i64* @l, align 8
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/mips64shift.ll b/src/LLVM/test/CodeGen/Mips/mips64shift.ll
new file mode 100644
index 0000000..cc5e508
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/mips64shift.ll

@@ -0,0 +1,104 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 < %s | FileCheck %s
+
+define i64 @f0(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: dsllv
+  %shl = shl i64 %a0, %a1
+  ret i64 %shl
+}
+
+define i64 @f1(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: dsrav
+  %shr = ashr i64 %a0, %a1
+  ret i64 %shr
+}
+
+define i64 @f2(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: dsrlv
+  %shr = lshr i64 %a0, %a1
+  ret i64 %shr
+}
+
+define i64 @f3(i64 %a0) nounwind readnone {
+entry:
+; CHECK: dsll ${{[0-9]+}}, ${{[0-9]+}}, 10
+  %shl = shl i64 %a0, 10
+  ret i64 %shl
+}
+
+define i64 @f4(i64 %a0) nounwind readnone {
+entry:
+; CHECK: dsra ${{[0-9]+}}, ${{[0-9]+}}, 10
+  %shr = ashr i64 %a0, 10
+  ret i64 %shr
+}
+
+define i64 @f5(i64 %a0) nounwind readnone {
+entry:
+; CHECK: dsrl ${{[0-9]+}}, ${{[0-9]+}}, 10
+  %shr = lshr i64 %a0, 10
+  ret i64 %shr
+}
+
+define i64 @f6(i64 %a0) nounwind readnone {
+entry:
+; CHECK: dsll32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+  %shl = shl i64 %a0, 40
+  ret i64 %shl
+}
+
+define i64 @f7(i64 %a0) nounwind readnone {
+entry:
+; CHECK: dsra32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+  %shr = ashr i64 %a0, 40
+  ret i64 %shr
+}
+
+define i64 @f8(i64 %a0) nounwind readnone {
+entry:
+; CHECK: dsrl32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+  %shr = lshr i64 %a0, 40
+  ret i64 %shr
+}
+
+define i64 @f9(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: drotrv
+  %shr = lshr i64 %a0, %a1
+  %sub = sub i64 64, %a1
+  %shl = shl i64 %a0, %sub
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define i64 @f10(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: drotrv
+  %shl = shl i64 %a0, %a1
+  %sub = sub i64 64, %a1
+  %shr = lshr i64 %a0, %sub
+  %or = or i64 %shr, %shl
+  ret i64 %or
+}
+
+define i64 @f11(i64 %a0) nounwind readnone {
+entry:
+; CHECK: drotr ${{[0-9]+}}, ${{[0-9]+}}, 10
+  %shr = lshr i64 %a0, 10
+  %shl = shl i64 %a0, 54
+  %or = or i64 %shr, %shl
+  ret i64 %or
+}
+
+define i64 @f12(i64 %a0) nounwind readnone {
+entry:
+; CHECK: drotr32 ${{[0-9]+}}, ${{[0-9]+}}, 22
+  %shl = shl i64 %a0, 10
+  %shr = lshr i64 %a0, 54
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+

diff --git a/src/LLVM/test/CodeGen/Mips/mipslopat.ll b/src/LLVM/test/CodeGen/Mips/mipslopat.ll
new file mode 100644
index 0000000..0279828
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/mipslopat.ll

@@ -0,0 +1,19 @@
+; This test does not check the machine code output.   
+; RUN: llc -march=mips < %s 
+
+@stat_vol_ptr_int = internal global i32* null, align 4
+@stat_ptr_vol_int = internal global i32* null, align 4
+
+define void @simple_vol_file() nounwind {
+entry:
+  %tmp = volatile load i32** @stat_vol_ptr_int, align 4
+  %0 = bitcast i32* %tmp to i8*
+  call void @llvm.prefetch(i8* %0, i32 0, i32 0, i32 1)
+  %tmp1 = load i32** @stat_ptr_vol_int, align 4
+  %1 = bitcast i32* %tmp1 to i8*
+  call void @llvm.prefetch(i8* %1, i32 0, i32 0, i32 1)
+  ret void
+}
+
+declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind
+

diff --git a/src/LLVM/test/CodeGen/Mips/o32_cc.ll b/src/LLVM/test/CodeGen/Mips/o32_cc.ll
new file mode 100644
index 0000000..70b66ef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/o32_cc.ll

@@ -0,0 +1,325 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+; FIXME: Disabled because it unpredictably fails on certain platforms.
+; REQUIRES: disabled
+
+; $f12, $f14
+; CHECK: ldc1 $f12, %lo
+; CHECK: ldc1 $f14, %lo
+define void @testlowercall0() nounwind {
+entry:
+  tail call void @f0(double 5.000000e+00, double 6.000000e+00) nounwind
+  ret void
+}
+
+declare void @f0(double, double)
+
+; $f12, $f14
+; CHECK: lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+define void @testlowercall1() nounwind {
+entry:
+  tail call void @f1(float 8.000000e+00, float 9.000000e+00) nounwind
+  ret void
+}
+
+declare void @f1(float, float)
+
+; $f12, $f14
+; CHECK: lwc1 $f12, %lo
+; CHECK: ldc1 $f14, %lo
+define void @testlowercall2() nounwind {
+entry:
+  tail call void @f2(float 8.000000e+00, double 6.000000e+00) nounwind
+  ret void
+}
+
+declare void @f2(float, double)
+
+; $f12, $f14
+; CHECK: ldc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+define void @testlowercall3() nounwind {
+entry:
+  tail call void @f3(double 5.000000e+00, float 9.000000e+00) nounwind
+  ret void
+}
+
+declare void @f3(double, float)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 12
+; CHECK: addiu $5, $zero, 13
+; CHECK: addiu $6, $zero, 14
+; CHECK: addiu $7, $zero, 15
+define void @testlowercall4() nounwind {
+entry:
+  tail call void @f4(i32 12, i32 13, i32 14, i32 15) nounwind
+  ret void
+}
+
+declare void @f4(i32, i32, i32, i32)
+
+; $f12, $6, stack
+; CHECK: sw
+; CHECK: sw
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 23
+define void @testlowercall5() nounwind {
+entry:
+  tail call void @f5(double 1.500000e+01, i32 23, double 1.700000e+01) nounwind
+  ret void
+}
+
+declare void @f5(double, i32, double)
+
+; $f12, $6, $7
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 33
+; CHECK: addiu $7, $zero, 24
+define void @testlowercall6() nounwind {
+entry:
+  tail call void @f6(double 2.500000e+01, i32 33, i32 24) nounwind
+  ret void
+}
+
+declare void @f6(double, i32, i32)
+
+; $f12, $5, $6
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 43
+; CHECK: addiu $6, $zero, 34
+define void @testlowercall7() nounwind {
+entry:
+  tail call void @f7(float 1.800000e+01, i32 43, i32 34) nounwind
+  ret void
+}
+
+declare void @f7(float, i32, i32)
+
+; $4, $5, $6, stack
+; CHECK: sw
+; CHECK: sw
+; CHECK: addiu $4, $zero, 22
+; CHECK: addiu $5, $zero, 53
+; CHECK: addiu $6, $zero, 44
+define void @testlowercall8() nounwind {
+entry:
+  tail call void @f8(i32 22, i32 53, i32 44, double 4.000000e+00) nounwind
+  ret void
+}
+
+declare void @f8(i32, i32, i32, double)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 32
+; CHECK: addiu $5, $zero, 63
+; CHECK: addiu $6, $zero, 54
+; CHECK: ori $7
+define void @testlowercall9() nounwind {
+entry:
+  tail call void @f9(i32 32, i32 63, i32 54, float 1.100000e+01) nounwind
+  ret void
+}
+
+declare void @f9(i32, i32, i32, float)
+
+; $4, $5, ($6, $7)
+; CHECK: addiu $4, $zero, 42
+; CHECK: addiu $5, $zero, 73
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7
+define void @testlowercall10() nounwind {
+entry:
+  tail call void @f10(i32 42, i32 73, double 2.700000e+01) nounwind
+  ret void
+}
+
+declare void @f10(i32, i32, double)
+
+; $4, ($6, $7)
+; CHECK: addiu $4, $zero, 52
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7
+define void @testlowercall11() nounwind {
+entry:
+  tail call void @f11(i32 52, double 1.600000e+01) nounwind
+  ret void
+}
+
+declare void @f11(i32, double)
+
+; $f12, $f14, $6, $7
+; CHECK: lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: ori $6
+; CHECK: ori $7
+define void @testlowercall12() nounwind {
+entry:
+  tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
+  ret void
+}
+
+declare void @f12(float, float, float, float)
+
+; $f12, $5, $6, $7
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 83
+; CHECK: ori $6
+; CHECK: addiu $7, $zero, 25
+define void @testlowercall13() nounwind {
+entry:
+  tail call void @f13(float 3.800000e+01, i32 83, float 2.000000e+01, i32 25) nounwind
+  ret void
+}
+
+
+declare void @f13(float, i32, float, i32)
+
+; $f12, $f14, $7
+; CHECK: ldc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: ori $7
+define void @testlowercall14() nounwind {
+entry:
+  tail call void @f14(double 3.500000e+01, float 2.900000e+01, float 3.000000e+01) nounwind
+  ret void
+}
+
+declare void @f14(double, float, float)
+
+; $f12, $f14, ($6, $7)
+; CHECK: lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7
+define void @testlowercall15() nounwind {
+entry:
+  tail call void @f15(float 4.800000e+01, float 3.900000e+01, double 3.700000e+01) nounwind
+  ret void
+}
+
+declare void @f15(float, float, double)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 62
+; CHECK: ori $5
+; CHECK: addiu $6, $zero, 64
+; CHECK: ori $7
+define void @testlowercall16() nounwind {
+entry:
+  tail call void @f16(i32 62, float 4.900000e+01, i32 64, float 3.100000e+01) nounwind
+  ret void
+}
+
+declare void @f16(i32, float, i32, float)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 72
+; CHECK: ori $5
+; CHECK: addiu $6, $zero, 74
+; CHECK: addiu $7, $zero, 35
+define void @testlowercall17() nounwind {
+entry:
+  tail call void @f17(i32 72, float 5.900000e+01, i32 74, i32 35) nounwind
+  ret void
+}
+
+declare void @f17(i32, float, i32, i32)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 82
+; CHECK: addiu $5, $zero, 93
+; CHECK: ori $6
+; CHECK: addiu $7, $zero, 45
+define void @testlowercall18() nounwind {
+entry:
+  tail call void @f18(i32 82, i32 93, float 4.000000e+01, i32 45) nounwind
+  ret void
+}
+
+declare void @f18(i32, i32, float, i32)
+
+
+; $4, ($6, $7), stack
+; CHECK: sw
+; CHECK: sw
+; CHECK: addiu $4, $zero, 92
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7
+define void @testlowercall20() nounwind {
+entry:
+  tail call void @f20(i32 92, double 2.600000e+01, double 4.700000e+01) nounwind
+  ret void
+}
+
+declare void @f20(i32, double, double)
+
+; $f12, $5
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 103
+define void @testlowercall21() nounwind {
+entry:
+  tail call void @f21(float 5.800000e+01, i32 103) nounwind
+  ret void
+}
+
+declare void @f21(float, i32)
+
+; $f12, $5, ($6, $7)
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 113
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7
+define void @testlowercall22() nounwind {
+entry:
+  tail call void @f22(float 6.800000e+01, i32 113, double 5.700000e+01) nounwind
+  ret void
+}
+
+declare void @f22(float, i32, double)
+
+; $f12, f6
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 123
+define void @testlowercall23() nounwind {
+entry:
+  tail call void @f23(double 4.500000e+01, i32 123) nounwind
+  ret void
+}
+
+declare void @f23(double, i32)
+
+; $f12,$6, stack
+; CHECK: sw
+; CHECK: sw
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 133
+define void @testlowercall24() nounwind {
+entry:
+  tail call void @f24(double 5.500000e+01, i32 133, double 6.700000e+01) nounwind
+  ret void
+}
+
+declare void @f24(double, i32, double)
+
+; CHECK: lwc1 $f12, %lo
+; lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: ori $6
+; CHECK: ori $7
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 83
+; CHECK: ori $6
+; CHECK: addiu $7, $zero, 25
+; CHECK: addiu $4, $zero, 82
+; CHECK: addiu $5, $zero, 93
+; CHECK: ori $6
+; CHECK: addiu $7, $zero, 45
+define void @testlowercall25() nounwind {
+entry:
+  tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
+  tail call void @f13(float 3.800000e+01, i32 83, float 2.000000e+01, i32 25) nounwind
+  tail call void @f18(i32 82, i32 93, float 4.000000e+01, i32 45) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Mips/o32_cc_byval.ll b/src/LLVM/test/CodeGen/Mips/o32_cc_byval.ll
new file mode 100644
index 0000000..e673480
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/o32_cc_byval.ll

@@ -0,0 +1,127 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+%0 = type { i8, i16, i32, i64, double, i32, [4 x i8] }
+%struct.S1 = type { i8, i16, i32, i64, double, i32 }
+%struct.S2 = type { [4 x i32] }
+%struct.S3 = type { i8 }
+
+@f1.s1 = internal unnamed_addr constant %0 { i8 1, i16 2, i32 3, i64 4, double 5.000000e+00, i32 6, [4 x i8] undef }, align 8
+@f1.s2 = internal unnamed_addr constant %struct.S2 { [4 x i32] [i32 7, i32 8, i32 9, i32 10] }, align 4
+
+define void @f1() nounwind {
+entry:
+; CHECK: lw  $[[R1:[0-9]+]], %got(f1.s1)($gp)
+; CHECK: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
+; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
+; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
+; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
+; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
+; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
+; CHECK: lw  $[[R6:[0-9]+]], 28($[[R0]])
+; CHECK: sw  $[[R2]], 16($sp)
+; CHECK: sw  $[[R7]], 20($sp)
+; CHECK: sw  $[[R3]], 24($sp)
+; CHECK: sw  $[[R4]], 28($sp)
+; CHECK: sw  $[[R5]], 32($sp)
+; CHECK: sw  $[[R6]], 36($sp)
+; CHECK: lw  $6, %lo(f1.s1)($[[R1]])
+; CHECK: lw  $7, 4($[[R0]])
+  %agg.tmp10 = alloca %struct.S3, align 4
+  call void @callee1(float 2.000000e+01, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
+  call void @callee2(%struct.S2* byval @f1.s2) nounwind
+  %tmp11 = getelementptr inbounds %struct.S3* %agg.tmp10, i32 0, i32 0
+  store i8 11, i8* %tmp11, align 4
+  call void @callee3(float 2.100000e+01, %struct.S3* byval %agg.tmp10, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
+  ret void
+}
+
+declare void @callee1(float, %struct.S1* byval)
+
+declare void @callee2(%struct.S2* byval)
+
+declare void @callee3(float, %struct.S3* byval, %struct.S1* byval)
+
+define void @f2(float %f, %struct.S1* nocapture byval %s1) nounwind {
+entry:
+; CHECK: addiu $sp, $sp, -56
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $7, 68($sp)
+; CHECK: ldc1 $f[[F0:[0-9]+]], 80($sp)
+; CHECK: lw  $[[R2:[0-9]+]], 68($sp)
+; CHECK: lh  $[[R1:[0-9]+]], 66($sp)
+; CHECK: lb  $[[R0:[0-9]+]], 64($sp)
+; CHECK: lw  $[[R3:[0-9]+]], 72($sp)
+; CHECK: lw  $[[R4:[0-9]+]], 76($sp)
+; CHECK: lw  $4, 88($sp)
+; CHECK: sw  $[[R3]], 16($sp)
+; CHECK: sw  $[[R4]], 20($sp)
+; CHECK: sw  $[[R2]], 24($sp)
+; CHECK: sw  $[[R1]], 28($sp)
+; CHECK: sw  $[[R0]], 32($sp)
+; CHECK: mfc1 $6, $f[[F0]]
+
+  %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
+  %tmp = load i32* %i2, align 4, !tbaa !0
+  %d = getelementptr inbounds %struct.S1* %s1, i32 0, i32 4
+  %tmp1 = load double* %d, align 8, !tbaa !3
+  %ll = getelementptr inbounds %struct.S1* %s1, i32 0, i32 3
+  %tmp2 = load i64* %ll, align 8, !tbaa !4
+  %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
+  %tmp3 = load i32* %i, align 4, !tbaa !0
+  %s = getelementptr inbounds %struct.S1* %s1, i32 0, i32 1
+  %tmp4 = load i16* %s, align 2, !tbaa !5
+  %c = getelementptr inbounds %struct.S1* %s1, i32 0, i32 0
+  %tmp5 = load i8* %c, align 1, !tbaa !1
+  tail call void @callee4(i32 %tmp, double %tmp1, i64 %tmp2, i32 %tmp3, i16 signext %tmp4, i8 signext %tmp5, float %f) nounwind
+  ret void
+}
+
+declare void @callee4(i32, double, i64, i32, i16 signext, i8 signext, float)
+
+define void @f3(%struct.S2* nocapture byval %s2) nounwind {
+entry:
+; CHECK: addiu $sp, $sp, -56
+; CHECK: sw  $4, 56($sp)
+; CHECK: sw  $5, 60($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $7, 68($sp)
+; CHECK: lw  $[[R0:[0-9]+]], 68($sp)
+; CHECK: lw  $4, 56($sp)
+; CHECK: sw  $[[R0]], 24($sp)
+
+  %arrayidx = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 0
+  %tmp = load i32* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 3
+  %tmp3 = load i32* %arrayidx2, align 4, !tbaa !0
+  tail call void @callee4(i32 %tmp, double 2.000000e+00, i64 3, i32 %tmp3, i16 signext 4, i8 signext 5, float 6.000000e+00) nounwind
+  ret void
+}
+
+define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind {
+entry:
+; CHECK: addiu $sp, $sp, -56
+; CHECK: sw  $5, 60($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $7, 68($sp)
+; CHECK: lw  $[[R1:[0-9]+]], 88($sp)
+; CHECK: lb  $[[R0:[0-9]+]], 60($sp)
+; CHECK: lw  $4, 68($sp)
+; CHECK: sw  $[[R1]], 24($sp)
+; CHECK: sw  $[[R0]], 32($sp)
+
+  %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
+  %tmp = load i32* %i, align 4, !tbaa !0
+  %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
+  %tmp1 = load i32* %i2, align 4, !tbaa !0
+  %c = getelementptr inbounds %struct.S3* %s3, i32 0, i32 0
+  %tmp2 = load i8* %c, align 1, !tbaa !1
+  tail call void @callee4(i32 %tmp, double 2.000000e+00, i64 3, i32 %tmp1, i16 signext 4, i8 signext %tmp2, float 6.000000e+00) nounwind
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"double", metadata !1}
+!4 = metadata !{metadata !"long long", metadata !1}
+!5 = metadata !{metadata !"short", metadata !1}

diff --git a/src/LLVM/test/CodeGen/Mips/o32_cc_vararg.ll b/src/LLVM/test/CodeGen/Mips/o32_cc_vararg.ll
new file mode 100644
index 0000000..4a3d9ab
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/o32_cc_vararg.ll

@@ -0,0 +1,271 @@
+; RUN: llc -march=mipsel -pre-RA-sched=source < %s | FileCheck %s
+
+
+; All test functions do the same thing - they return the first variable
+; argument.
+
+; All CHECK's do the same thing - they check whether variable arguments from
+; registers are placed on correct stack locations, and whether the first
+; variable argument is returned from the correct stack location.
+
+
+declare void @llvm.va_start(i8*) nounwind
+declare void @llvm.va_end(i8*) nounwind
+
+; return int
+define i32 @va1(i32 %a, ...) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %ap = alloca i8*, align 4
+  %b = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32
+  store i32 %0, i32* %b, align 4
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load i32* %b, align 4
+  ret i32 %tmp
+
+; CHECK: va1:
+; CHECK: addiu   $sp, $sp, -16
+; CHECK: sw      $7, 28($sp)
+; CHECK: sw      $6, 24($sp)
+; CHECK: sw      $5, 20($sp)
+; CHECK: lw      $2, 20($sp)
+}
+
+; check whether the variable double argument will be accessed from the 8-byte
+; aligned location (i.e. whether the address is computed by adding 7 and
+; clearing lower 3 bits)
+define double @va2(i32 %a, ...) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %ap = alloca i8*, align 4
+  %b = alloca double, align 8
+  store i32 %a, i32* %a.addr, align 4
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, double
+  store double %0, double* %b, align 8
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load double* %b, align 8
+  ret double %tmp
+
+; CHECK: va2:
+; CHECK: addiu   $sp, $sp, -16
+; CHECK: sw      $7, 28($sp)
+; CHECK: sw      $6, 24($sp)
+; CHECK: sw      $5, 20($sp)
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 20
+; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
+; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
+; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
+; CHECK: ldc1    $f0, 0($[[R3]])
+}
+
+; int
+define i32 @va3(double %a, ...) nounwind {
+entry:
+  %a.addr = alloca double, align 8
+  %ap = alloca i8*, align 4
+  %b = alloca i32, align 4
+  store double %a, double* %a.addr, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32
+  store i32 %0, i32* %b, align 4
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load i32* %b, align 4
+  ret i32 %tmp
+
+; CHECK: va3:
+; CHECK: addiu   $sp, $sp, -16
+; CHECK: sw      $7, 28($sp)
+; CHECK: sw      $6, 24($sp)
+; CHECK: lw      $2, 24($sp)
+}
+
+; double
+define double @va4(double %a, ...) nounwind {
+entry:
+  %a.addr = alloca double, align 8
+  %ap = alloca i8*, align 4
+  %b = alloca double, align 8
+  store double %a, double* %a.addr, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, double
+  store double %0, double* %b, align 8
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load double* %b, align 8
+  ret double %tmp
+
+; CHECK: va4:
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: sw      $7, 36($sp)
+; CHECK: sw      $6, 32($sp)
+; CHECK: addiu   ${{[0-9]+}}, $sp, 32
+; CHECK: ldc1    $f0, 32($sp)
+}
+
+; int
+define i32 @va5(i32 %a, i32 %b, i32 %c, ...) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c.addr = alloca i32, align 4
+  %ap = alloca i8*, align 4
+  %d = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32
+  store i32 %0, i32* %d, align 4
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load i32* %d, align 4
+  ret i32 %tmp
+
+; CHECK: va5:
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: sw      $7, 36($sp)
+; CHECK: lw      $2, 36($sp)
+}
+
+; double
+define double @va6(i32 %a, i32 %b, i32 %c, ...) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c.addr = alloca i32, align 4
+  %ap = alloca i8*, align 4
+  %d = alloca double, align 8
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, double
+  store double %0, double* %d, align 8
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load double* %d, align 8
+  ret double %tmp
+
+; CHECK: va6:
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: sw      $7, 36($sp)
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 36
+; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
+; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
+; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
+; CHECK: ldc1    $f0, 0($[[R3]])
+}
+
+; int
+define i32 @va7(i32 %a, double %b, ...) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca double, align 8
+  %ap = alloca i8*, align 4
+  %c = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store double %b, double* %b.addr, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32
+  store i32 %0, i32* %c, align 4
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load i32* %c, align 4
+  ret i32 %tmp
+
+; CHECK: va7:
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: lw      $2, 40($sp)
+}
+
+; double
+define double @va8(i32 %a, double %b, ...) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca double, align 8
+  %ap = alloca i8*, align 4
+  %c = alloca double, align 8
+  store i32 %a, i32* %a.addr, align 4
+  store double %b, double* %b.addr, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, double
+  store double %0, double* %c, align 8
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load double* %c, align 8
+  ret double %tmp
+
+; CHECK: va8:
+; CHECK: addiu   $sp, $sp, -32
+; CHECK: addiu   ${{[0-9]+}}, $sp, 48
+; CHECK: ldc1    $f0, 48($sp)
+}
+
+; int
+define i32 @va9(double %a, double %b, i32 %c, ...) nounwind {
+entry:
+  %a.addr = alloca double, align 8
+  %b.addr = alloca double, align 8
+  %c.addr = alloca i32, align 4
+  %ap = alloca i8*, align 4
+  %d = alloca i32, align 4
+  store double %a, double* %a.addr, align 8
+  store double %b, double* %b.addr, align 8
+  store i32 %c, i32* %c.addr, align 4
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32
+  store i32 %0, i32* %d, align 4
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load i32* %d, align 4
+  ret i32 %tmp
+
+; CHECK: va9:
+; CHECK: addiu   $sp, $sp, -32
+; CHECK: lw      $2, 52($sp)
+}
+
+; double
+define double @va10(double %a, double %b, i32 %c, ...) nounwind {
+entry:
+  %a.addr = alloca double, align 8
+  %b.addr = alloca double, align 8
+  %c.addr = alloca i32, align 4
+  %ap = alloca i8*, align 4
+  %d = alloca double, align 8
+  store double %a, double* %a.addr, align 8
+  store double %b, double* %b.addr, align 8
+  store i32 %c, i32* %c.addr, align 4
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, double
+  store double %0, double* %d, align 8
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load double* %d, align 8
+  ret double %tmp
+
+; CHECK: va10:
+; CHECK: addiu   $sp, $sp, -32
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 52
+; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
+; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
+; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
+; CHECK: ldc1    $f0, 0($[[R3]])
+}

diff --git a/src/LLVM/test/CodeGen/Mips/private.ll b/src/LLVM/test/CodeGen/Mips/private.ll
new file mode 100644
index 0000000..4cc48f0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/private.ll

@@ -0,0 +1,19 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -march=mips > %t
+; RUN: grep \\\$foo: %t
+; RUN: grep call.*\\\$foo %t
+; RUN: grep \\\$baz: %t
+; RUN: grep lw.*\\\$baz %t
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}

diff --git a/src/LLVM/test/CodeGen/Mips/rotate.ll b/src/LLVM/test/CodeGen/Mips/rotate.ll
new file mode 100644
index 0000000..8e27f4a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/rotate.ll

@@ -0,0 +1,40 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; CHECK:  rotrv $2, $4
+define i32 @rot0(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %shl = shl i32 %a, %b
+  %sub = sub i32 32, %b
+  %shr = lshr i32 %a, %sub
+  %or = or i32 %shr, %shl
+  ret i32 %or
+}
+
+; CHECK:  rotr  $2, $4, 22
+define i32 @rot1(i32 %a) nounwind readnone {
+entry:
+  %shl = shl i32 %a, 10
+  %shr = lshr i32 %a, 22
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+; CHECK:  rotrv $2, $4, $5
+define i32 @rot2(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %shr = lshr i32 %a, %b
+  %sub = sub i32 32, %b
+  %shl = shl i32 %a, %sub
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+; CHECK:  rotr  $2, $4, 10
+define i32 @rot3(i32 %a) nounwind readnone {
+entry:
+  %shr = lshr i32 %a, 10
+  %shl = shl i32 %a, 22
+  %or = or i32 %shr, %shl
+  ret i32 %or
+}
+

diff --git a/src/LLVM/test/CodeGen/Mips/select.ll b/src/LLVM/test/CodeGen/Mips/select.ll
new file mode 100644
index 0000000..40115be
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/select.ll

@@ -0,0 +1,164 @@
+; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK
+
+@d2 = external global double
+@d3 = external global double
+
+define i32 @sel1(i32 %s, i32 %f0, i32 %f1) nounwind readnone {
+entry:
+; CHECK: movn
+  %tobool = icmp ne i32 %s, 0
+  %cond = select i1 %tobool, i32 %f1, i32 %f0
+  ret i32 %cond
+}
+
+define float @sel2(i32 %s, float %f0, float %f1) nounwind readnone {
+entry:
+; CHECK: movn.s
+  %tobool = icmp ne i32 %s, 0
+  %cond = select i1 %tobool, float %f0, float %f1
+  ret float %cond
+}
+
+define double @sel2_1(i32 %s, double %f0, double %f1) nounwind readnone {
+entry:
+; CHECK: movn.d
+  %tobool = icmp ne i32 %s, 0
+  %cond = select i1 %tobool, double %f0, double %f1
+  ret double %cond
+}
+
+define float @sel3(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
+entry:
+; CHECK: c.eq.s
+; CHECK: movt.s
+  %cmp = fcmp oeq float %f2, %f3
+  %cond = select i1 %cmp, float %f0, float %f1
+  ret float %cond
+}
+
+define float @sel4(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
+entry:
+; CHECK: c.olt.s
+; CHECK: movt.s
+  %cmp = fcmp olt float %f2, %f3
+  %cond = select i1 %cmp, float %f0, float %f1
+  ret float %cond
+}
+
+define float @sel5(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
+entry:
+; CHECK: c.ule.s
+; CHECK: movf.s
+  %cmp = fcmp ogt float %f2, %f3
+  %cond = select i1 %cmp, float %f0, float %f1
+  ret float %cond
+}
+
+define double @sel5_1(double %f0, double %f1, float %f2, float %f3) nounwind readnone {
+entry:
+; CHECK: c.ule.s
+; CHECK: movf.d
+  %cmp = fcmp ogt float %f2, %f3
+  %cond = select i1 %cmp, double %f0, double %f1
+  ret double %cond
+}
+
+define double @sel6(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
+entry:
+; CHECK: c.eq.d
+; CHECK: movt.d
+  %cmp = fcmp oeq double %f2, %f3
+  %cond = select i1 %cmp, double %f0, double %f1
+  ret double %cond
+}
+
+define double @sel7(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
+entry:
+; CHECK: c.olt.d
+; CHECK: movt.d
+  %cmp = fcmp olt double %f2, %f3
+  %cond = select i1 %cmp, double %f0, double %f1
+  ret double %cond
+}
+
+define double @sel8(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
+entry:
+; CHECK: c.ule.d
+; CHECK: movf.d
+  %cmp = fcmp ogt double %f2, %f3
+  %cond = select i1 %cmp, double %f0, double %f1
+  ret double %cond
+}
+
+define float @sel8_1(float %f0, float %f1, double %f2, double %f3) nounwind readnone {
+entry:
+; CHECK: c.ule.d
+; CHECK: movf.s
+  %cmp = fcmp ogt double %f2, %f3
+  %cond = select i1 %cmp, float %f0, float %f1
+  ret float %cond
+}
+
+define i32 @sel9(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+entry:
+; CHECK: c.eq.s
+; CHECK: movt
+  %cmp = fcmp oeq float %f2, %f3
+  %cond = select i1 %cmp, i32 %f0, i32 %f1
+  ret i32 %cond
+}
+
+define i32 @sel10(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+entry:
+; CHECK: c.olt.s
+; CHECK: movt
+  %cmp = fcmp olt float %f2, %f3
+  %cond = select i1 %cmp, i32 %f0, i32 %f1
+  ret i32 %cond
+}
+
+define i32 @sel11(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+entry:
+; CHECK: c.ule.s
+; CHECK: movf
+  %cmp = fcmp ogt float %f2, %f3
+  %cond = select i1 %cmp, i32 %f0, i32 %f1
+  ret i32 %cond
+}
+
+define i32 @sel12(i32 %f0, i32 %f1) nounwind readonly {
+entry:
+; CHECK: c.eq.d
+; CHECK: movt
+  %tmp = load double* @d2, align 8, !tbaa !0
+  %tmp1 = load double* @d3, align 8, !tbaa !0
+  %cmp = fcmp oeq double %tmp, %tmp1
+  %cond = select i1 %cmp, i32 %f0, i32 %f1
+  ret i32 %cond
+}
+
+define i32 @sel13(i32 %f0, i32 %f1) nounwind readonly {
+entry:
+; CHECK: c.olt.d
+; CHECK: movt
+  %tmp = load double* @d2, align 8, !tbaa !0
+  %tmp1 = load double* @d3, align 8, !tbaa !0
+  %cmp = fcmp olt double %tmp, %tmp1
+  %cond = select i1 %cmp, i32 %f0, i32 %f1
+  ret i32 %cond
+}
+
+define i32 @sel14(i32 %f0, i32 %f1) nounwind readonly {
+entry:
+; CHECK: c.ule.d
+; CHECK: movf
+  %tmp = load double* @d2, align 8, !tbaa !0
+  %tmp1 = load double* @d3, align 8, !tbaa !0
+  %cmp = fcmp ogt double %tmp, %tmp1
+  %cond = select i1 %cmp, i32 %f0, i32 %f1
+  ret i32 %cond
+}
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}

diff --git a/src/LLVM/test/CodeGen/Mips/tls.ll b/src/LLVM/test/CodeGen/Mips/tls.ll
new file mode 100644
index 0000000..b0474b4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/tls.ll

@@ -0,0 +1,46 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=PIC
+; RUN: llc -march=mipsel -relocation-model=static < %s \
+; RUN:                             | FileCheck %s -check-prefix=STATIC
+
+
+@t1 = thread_local global i32 0, align 4
+
+define i32 @f1() nounwind {
+entry:
+  %tmp = load i32* @t1, align 4
+  ret i32 %tmp
+
+; CHECK: f1:
+
+; PIC:   lw      $25, %call16(__tls_get_addr)($gp)
+; PIC:   addiu   $4, $gp, %tlsgd(t1)
+; PIC:   jalr    $25
+; PIC:   lw      $2, 0($2)
+
+; STATIC:   rdhwr   $3, $29
+; STATIC:   lui     $[[R0:[0-9]+]], %tprel_hi(t1)
+; STATIC:   addiu   $[[R1:[0-9]+]], $[[R0]], %tprel_lo(t1)
+; STATIC:   addu    $[[R2:[0-9]+]], $3, $[[R1]]
+; STATIC:   lw      $2, 0($[[R2]])
+}
+
+
+@t2 = external thread_local global i32
+
+define i32 @f2() nounwind {
+entry:
+  %tmp = load i32* @t2, align 4
+  ret i32 %tmp
+
+; CHECK: f2:
+
+; PIC:   lw      $25, %call16(__tls_get_addr)($gp)
+; PIC:   addiu   $4, $gp, %tlsgd(t2)
+; PIC:   jalr    $25
+; PIC:   lw      $2, 0($2)
+
+; STATIC:   rdhwr   $3, $29
+; STATIC:   lw      $[[R0:[0-9]+]], %gottprel(t2)($gp)
+; STATIC:   addu    $[[R1:[0-9]+]], $3, $[[R0]]
+; STATIC:   lw      $2, 0($[[R1]])
+}

diff --git a/src/LLVM/test/CodeGen/Mips/unalignedload.ll b/src/LLVM/test/CodeGen/Mips/unalignedload.ll
new file mode 100644
index 0000000..433e896
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/unalignedload.ll

@@ -0,0 +1,41 @@
+; RUN: llc  < %s -march=mipsel  | FileCheck %s -check-prefix=CHECK-EL
+; RUN: llc  < %s -march=mips    | FileCheck %s -check-prefix=CHECK-EB
+%struct.S2 = type { %struct.S1, %struct.S1 }
+%struct.S1 = type { i8, i8 }
+%struct.S4 = type { [7 x i8] }
+
+@s2 = common global %struct.S2 zeroinitializer, align 1
+@s4 = common global %struct.S4 zeroinitializer, align 1
+
+define void @foo1() nounwind {
+entry:
+; CHECK-EL: lw  $25, %call16(foo2)
+; CHECK-EL: ulhu  $4, 2
+; CHECK-EL: lw  $[[R0:[0-9]+]], %got(s4)
+; CHECK-EL: lbu $[[R1:[0-9]+]], 6($[[R0]])
+; CHECK-EL: ulhu  $[[R2:[0-9]+]], 4($[[R0]])
+; CHECK-EL: sll $[[R3:[0-9]+]], $[[R1]], 16
+; CHECK-EL: ulw $4, 0($[[R0]])
+; CHECK-EL: lw  $25, %call16(foo4)
+; CHECK-EL: or  $5, $[[R2]], $[[R3]]
+
+; CHECK-EB: ulhu  $[[R0:[0-9]+]], 2
+; CHECK-EB: lw  $25, %call16(foo2)
+; CHECK-EB: sll $4, $[[R0]], 16
+; CHECK-EB: lw  $[[R1:[0-9]+]], %got(s4)
+; CHECK-EB: ulhu  $[[R2:[0-9]+]], 4($[[R1]])
+; CHECK-EB: lbu $[[R3:[0-9]+]], 6($[[R1]])
+; CHECK-EB: sll $[[R4:[0-9]+]], $[[R2]], 16
+; CHECK-EB: sll $[[R5:[0-9]+]], $[[R3]], 8
+; CHECK-EB: ulw $4, 0($[[R1]])
+; CHECK-EB: lw  $25, %call16(foo4)
+; CHECK-EB: or  $5, $[[R4]], $[[R5]]
+
+  tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
+  tail call void @foo4(%struct.S4* byval @s4) nounwind
+  ret void
+}
+
+declare void @foo2(%struct.S1* byval)
+
+declare void @foo4(%struct.S4* byval)

diff --git a/src/LLVM/test/CodeGen/Mips/weak.ll b/src/LLVM/test/CodeGen/Mips/weak.ll
new file mode 100644
index 0000000..09dd2a4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Mips/weak.ll

@@ -0,0 +1,12 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+@t = common global i32 (...)* null, align 4
+
+define void @f() nounwind {
+entry:
+  store i32 (...)* @test_weak, i32 (...)** @t, align 4
+  ret void
+}
+
+; CHECK: .weak test_weak
+declare extern_weak i32 @test_weak(...)

diff --git a/src/LLVM/test/CodeGen/PTX/20110926-sitofp.ll b/src/LLVM/test/CodeGen/PTX/20110926-sitofp.ll
new file mode 100644
index 0000000..38d35c5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/20110926-sitofp.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+@A = common global [1536 x [1536 x float]] zeroinitializer, align 4
+@B = common global [1536 x [1536 x float]] zeroinitializer, align 4
+
+define internal ptx_device void @init_array(i32 %x, i32 %y) {
+  %arrayidx103 = getelementptr [1536 x [1536 x float]]* @A, i32 0, i32 %x, i32 %y
+  %arrayidx224 = getelementptr [1536 x [1536 x float]]* @B, i32 0, i32 %x, i32 %y
+  %mul5 = mul i32 %x, %y
+  %rem = srem i32 %mul5, 1024
+  %add = add nsw i32 %rem, 1
+; CHECK: cvt.rn.f64.s32 %fd{{[0-9]+}}, %r{{[0-9]+}}
+  %conv = sitofp i32 %add to double
+  %div = fmul double %conv, 5.000000e-01
+  %conv7 = fptrunc double %div to float
+  store float %conv7, float* %arrayidx103, align 4
+  %rem14 = srem i32 %mul5, 1024
+  %add15 = add nsw i32 %rem14, 1
+  %conv16 = sitofp i32 %add15 to double
+  %div17 = fmul double %conv16, 5.000000e-01
+  %conv18 = fptrunc double %div17 to float
+  store float %conv18, float* %arrayidx224, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/PTX/add.ll b/src/LLVM/test/CodeGen/PTX/add.ll
new file mode 100644
index 0000000..8b10d11
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/add.ll

@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
+; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, %rh{{[0-9]+}};
+; CHECK: ret;
+	%z = add i16 %x, %y
+	ret i16 %z
+}
+
+define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
+; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: ret;
+	%z = add i32 %x, %y
+	ret i32 %z
+}
+
+define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
+; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}};
+; CHECK: ret;
+	%z = add i64 %x, %y
+	ret i64 %z
+}
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret;
+  %z = fadd float %x, %y
+  ret float %z
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
+; CHECK: ret;
+  %z = fadd double %x, %y
+  ret double %z
+}
+
+define ptx_device i16 @t2_u16(i16 %x) {
+; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, 1;
+; CHECK: ret;
+	%z = add i16 %x, 1
+	ret i16 %z
+}
+
+define ptx_device i32 @t2_u32(i32 %x) {
+; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 1;
+; CHECK: ret;
+	%z = add i32 %x, 1
+	ret i32 %z
+}
+
+define ptx_device i64 @t2_u64(i64 %x) {
+; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, 1;
+; CHECK: ret;
+	%z = add i64 %x, 1
+	ret i64 %z
+}
+
+define ptx_device float @t2_f32(float %x) {
+; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0D3FF0000000000000;
+; CHECK: ret;
+  %z = fadd float %x, 1.0
+  ret float %z
+}
+
+define ptx_device double @t2_f64(double %x) {
+; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0D3FF0000000000000;
+; CHECK: ret;
+  %z = fadd double %x, 1.0
+  ret double %z
+}

diff --git a/src/LLVM/test/CodeGen/PTX/aggregates.ll b/src/LLVM/test/CodeGen/PTX/aggregates.ll
new file mode 100644
index 0000000..3fc0c40
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/aggregates.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
+; XFAIL: *
+
+%complex = type { float, float }
+
+define ptx_device %complex @complex_add(%complex %a, %complex %b) {
+entry:
+; CHECK:      ld.param.f32	r[[R0:[0-9]+]], [__param_1];
+; CHECK-NEXT:	ld.param.f32	r[[R2:[0-9]+]], [__param_3];
+; CHECK-NEXT:	ld.param.f32	r[[R1:[0-9]+]], [__param_2];
+; CHECK-NEXT:	ld.param.f32	r[[R3:[0-9]+]], [__param_4];
+; CHECK-NEXT:	add.rn.f32	r[[R0]], r[[R0]], r[[R2]];
+; CHECK-NEXT:	add.rn.f32	r[[R1]], r[[R1]], r[[R3]];
+; CHECK-NEXT:	ret;
+  %a.real = extractvalue %complex %a, 0
+  %a.imag = extractvalue %complex %a, 1
+  %b.real = extractvalue %complex %b, 0
+  %b.imag = extractvalue %complex %b, 1
+  %ret.real = fadd float %a.real, %b.real
+  %ret.imag = fadd float %a.imag, %b.imag
+  %ret.0 = insertvalue %complex undef, float %ret.real, 0
+  %ret.1 = insertvalue %complex %ret.0, float %ret.imag, 1
+  ret %complex %ret.1
+}

diff --git a/src/LLVM/test/CodeGen/PTX/bitwise.ll b/src/LLVM/test/CodeGen/PTX/bitwise.ll
new file mode 100644
index 0000000..1403a23
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/bitwise.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+; preds
+
+define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) {
+; CHECK: and.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
+  %c = and i1 %x, %y
+  %d = zext i1 %c to i32 
+  ret i32 %d
+}
+
+define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) {
+; CHECK: or.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
+  %a = or i1 %x, %y
+  %b = zext i1 %a to i32 
+  ret i32 %b
+}
+
+define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) {
+; CHECK: xor.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
+  %a = xor i1 %x, %y
+  %b = zext i1 %a to i32 
+  ret i32 %b
+}

diff --git a/src/LLVM/test/CodeGen/PTX/bra.ll b/src/LLVM/test/CodeGen/PTX/bra.ll
new file mode 100644
index 0000000..464c29c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/bra.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device void @test_bra_direct() {
+; CHECK: bra $L__BB0_1;
+entry:
+	br label %loop
+loop:
+	br label %loop
+}
+
+define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) {
+entry:
+; CHECK: setp.le.u32 %p0, %r[[R0:[0-9]+]], %r[[R1:[0-9]+]]
+	%p = icmp ugt i32 %x, %y
+; CHECK-NEXT: @%p0 bra
+; CHECK-NOT: bra
+	br i1 %p, label %clause.if, label %clause.else
+clause.if:
+; CHECK: mov.u32 %ret{{[0-9]+}}, %r[[R0]]
+	ret i32 %x
+clause.else:
+; CHECK: mov.u32 %ret{{[0-9]+}}, %r[[R1]]
+	ret i32 %y
+}

diff --git a/src/LLVM/test/CodeGen/PTX/cvt.ll b/src/LLVM/test/CodeGen/PTX/cvt.ll
new file mode 100644
index 0000000..a643d25
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/cvt.ll

@@ -0,0 +1,290 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+; preds
+; (note: we convert back to i32 to return)
+
+define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
+; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rh{{[0-9]+}}, 0
+; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
+; CHECK: ret;
+	%a = trunc i16 %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
+; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0
+; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
+; CHECK: ret;
+	%a = trunc i32 %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
+; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, 0
+; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
+; CHECK: ret;
+	%a = trunc i64 %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
+; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0
+; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
+; CHECK: ret;
+	%a = fptoui float %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
+; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, 0
+; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
+; CHECK: ret;
+	%a = fptoui double %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
+; i16
+
+define ptx_device i16 @cvt_i16_preds(i1 %x) {
+; CHECK: selp.u16 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
+; CHECK: ret;
+	%a = zext i1 %x to i16
+	ret i16 %a
+}
+
+define ptx_device i16 @cvt_i16_i32(i32 %x) {
+; CHECK: cvt.u16.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: ret;
+	%a = trunc i32 %x to i16
+	ret i16 %a
+}
+
+define ptx_device i16 @cvt_i16_i64(i64 %x) {
+; CHECK: cvt.u16.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
+; CHECK: ret;
+	%a = trunc i64 %x to i16
+	ret i16 %a
+}
+
+define ptx_device i16 @cvt_i16_f32(float %x) {
+; CHECK: cvt.rzi.u16.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+	%a = fptoui float %x to i16
+	ret i16 %a
+}
+
+define ptx_device i16 @cvt_i16_f64(double %x) {
+; CHECK: cvt.rzi.u16.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+	%a = fptoui double %x to i16
+	ret i16 %a
+}
+
+; i32
+
+define ptx_device i32 @cvt_i32_preds(i1 %x) {
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
+; CHECK: ret;
+	%a = zext i1 %x to i32
+	ret i32 %a
+}
+
+define ptx_device i32 @cvt_i32_i16(i16 %x) {
+; CHECK: cvt.u32.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
+; CHECK: ret;
+	%a = zext i16 %x to i32
+	ret i32 %a
+}
+
+define ptx_device i32 @cvt_i32_i64(i64 %x) {
+; CHECK: cvt.u32.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
+; CHECK: ret;
+	%a = trunc i64 %x to i32
+	ret i32 %a
+}
+
+define ptx_device i32 @cvt_i32_f32(float %x) {
+; CHECK: cvt.rzi.u32.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+	%a = fptoui float %x to i32
+	ret i32 %a
+}
+
+define ptx_device i32 @cvt_i32_f64(double %x) {
+; CHECK: cvt.rzi.u32.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+	%a = fptoui double %x to i32
+	ret i32 %a
+}
+
+; i64
+
+define ptx_device i64 @cvt_i64_preds(i1 %x) {
+; CHECK: selp.u64 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
+; CHECK: ret;
+	%a = zext i1 %x to i64
+	ret i64 %a
+}
+
+define ptx_device i64 @cvt_i64_i16(i16 %x) {
+; CHECK: cvt.u64.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
+; CHECK: ret;
+	%a = zext i16 %x to i64
+	ret i64 %a
+}
+
+define ptx_device i64 @cvt_i64_i32(i32 %x) {
+; CHECK: cvt.u64.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: ret;
+	%a = zext i32 %x to i64
+	ret i64 %a
+}
+
+define ptx_device i64 @cvt_i64_f32(float %x) {
+; CHECK: cvt.rzi.u64.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+	%a = fptoui float %x to i64
+	ret i64 %a
+}
+
+define ptx_device i64 @cvt_i64_f64(double %x) {
+; CHECK: cvt.rzi.u64.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+	%a = fptoui double %x to i64
+	ret i64 %a
+}
+
+; f32
+
+define ptx_device float @cvt_f32_preds(i1 %x) {
+; CHECK: mov.b32 %f0, 1065353216;
+; CHECK: mov.b32 %f1, 0;
+; CHECK: selp.f32 %ret{{[0-9]+}}, %f0, %f1, %p{{[0-9]+}};
+; CHECK: ret;
+	%a = uitofp i1 %x to float
+	ret float %a
+}
+
+define ptx_device float @cvt_f32_i16(i16 %x) {
+; CHECK: cvt.rn.f32.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
+; CHECK: ret;
+	%a = uitofp i16 %x to float
+	ret float %a
+}
+
+define ptx_device float @cvt_f32_i32(i32 %x) {
+; CHECK: cvt.rn.f32.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: ret;
+	%a = uitofp i32 %x to float
+	ret float %a
+}
+
+define ptx_device float @cvt_f32_i64(i64 %x) {
+; CHECK: cvt.rn.f32.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
+; CHECK: ret;
+	%a = uitofp i64 %x to float
+	ret float %a
+}
+
+define ptx_device float @cvt_f32_f64(double %x) {
+; CHECK: cvt.rn.f32.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+	%a = fptrunc double %x to float
+	ret float %a
+}
+
+define ptx_device float @cvt_f32_s16(i16 %x) {
+; CHECK: cvt.rn.f32.s16 %ret{{[0-9]+}}, %rh{{[0-9]+}}
+; CHECK: ret
+  %a = sitofp i16 %x to float
+  ret float %a
+}
+
+define ptx_device float @cvt_f32_s32(i32 %x) {
+; CHECK: cvt.rn.f32.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %a = sitofp i32 %x to float
+  ret float %a
+}
+
+define ptx_device float @cvt_f32_s64(i64 %x) {
+; CHECK: cvt.rn.f32.s64 %ret{{[0-9]+}}, %rd{{[0-9]+}}
+; CHECK: ret
+  %a = sitofp i64 %x to float
+  ret float %a
+}
+
+; f64
+
+define ptx_device double @cvt_f64_preds(i1 %x) {
+; CHECK: mov.b64 %fd0, 4575657221408423936;
+; CHECK: mov.b64 %fd1, 0;
+; CHECK: selp.f64 %ret{{[0-9]+}}, %fd0, %fd1, %p{{[0-9]+}};
+; CHECK: ret;
+	%a = uitofp i1 %x to double
+	ret double %a
+}
+
+define ptx_device double @cvt_f64_i16(i16 %x) {
+; CHECK: cvt.rn.f64.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
+; CHECK: ret;
+	%a = uitofp i16 %x to double
+	ret double %a
+}
+
+define ptx_device double @cvt_f64_i32(i32 %x) {
+; CHECK: cvt.rn.f64.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: ret;
+	%a = uitofp i32 %x to double
+	ret double %a
+}
+
+define ptx_device double @cvt_f64_i64(i64 %x) {
+; CHECK: cvt.rn.f64.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
+; CHECK: ret;
+	%a = uitofp i64 %x to double
+	ret double %a
+}
+
+define ptx_device double @cvt_f64_f32(float %x) {
+; CHECK: cvt.f64.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+	%a = fpext float %x to double
+	ret double %a
+}
+
+define ptx_device double @cvt_f64_s16(i16 %x) {
+; CHECK: cvt.rn.f64.s16 %ret{{[0-9]+}}, %rh{{[0-9]+}}
+; CHECK: ret
+  %a = sitofp i16 %x to double
+  ret double %a
+}
+
+define ptx_device double @cvt_f64_s32(i32 %x) {
+; CHECK: cvt.rn.f64.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+  %a = sitofp i32 %x to double
+  ret double %a
+}
+
+define ptx_device double @cvt_f64_s64(i64 %x) {
+; CHECK: cvt.rn.f64.s64 %ret{{[0-9]+}}, %rd{{[0-9]+}}
+; CHECK: ret
+  %a = sitofp i64 %x to double
+  ret double %a
+}

diff --git a/src/LLVM/test/CodeGen/PTX/dg.exp b/src/LLVM/test/CodeGen/PTX/dg.exp
new file mode 100644
index 0000000..2c304b5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target PTX] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}

diff --git a/src/LLVM/test/CodeGen/PTX/exit.ll b/src/LLVM/test/CodeGen/PTX/exit.ll
new file mode 100644
index 0000000..7816c80
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/exit.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_kernel void @t1() {
+; CHECK: exit;
+; CHECK-NOT: ret;
+  ret void
+}
+
+define ptx_kernel void @t2(i32* %p, i32 %x) {
+  store i32 %x, i32* %p
+; CHECK: exit;
+; CHECK-NOT: ret;
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/PTX/fdiv-sm10.ll b/src/LLVM/test/CodeGen/PTX/fdiv-sm10.ll
new file mode 100644
index 0000000..e1013be
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/fdiv-sm10.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: div.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+	%a = fdiv float %x, %y
+	ret float %a
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: div.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+	%a = fdiv double %x, %y
+	ret double %a
+}

diff --git a/src/LLVM/test/CodeGen/PTX/fdiv-sm13.ll b/src/LLVM/test/CodeGen/PTX/fdiv-sm13.ll
new file mode 100644
index 0000000..1afa2eb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/fdiv-sm13.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: div.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+	%a = fdiv float %x, %y
+	ret float %a
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: div.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+	%a = fdiv double %x, %y
+	ret double %a
+}

diff --git a/src/LLVM/test/CodeGen/PTX/fneg.ll b/src/LLVM/test/CodeGen/PTX/fneg.ll
new file mode 100644
index 0000000..2b76e63
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/fneg.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device float @t1_f32(float %x) {
+; CHECK: neg.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+	%y = fsub float -0.000000e+00, %x
+	ret float %y
+}
+
+define ptx_device double @t1_f64(double %x) {
+; CHECK: neg.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+	%y = fsub double -0.000000e+00, %x
+	ret double %y
+}

diff --git a/src/LLVM/test/CodeGen/PTX/intrinsic.ll b/src/LLVM/test/CodeGen/PTX/intrinsic.ll
new file mode 100644
index 0000000..9f37ead
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/intrinsic.ll

@@ -0,0 +1,281 @@
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s
+
+define ptx_device i32 @test_tid_x() {
+; CHECK: mov.u32 %ret0, %tid.x;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.tid.x()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_tid_y() {
+; CHECK: mov.u32 %ret0, %tid.y;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.tid.y()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_tid_z() {
+; CHECK: mov.u32 %ret0, %tid.z;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.tid.z()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_tid_w() {
+; CHECK: mov.u32 %ret0, %tid.w;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.tid.w()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ntid_x() {
+; CHECK: mov.u32 %ret0, %ntid.x;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.ntid.x()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ntid_y() {
+; CHECK: mov.u32 %ret0, %ntid.y;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.ntid.y()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ntid_z() {
+; CHECK: mov.u32 %ret0, %ntid.z;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.ntid.z()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ntid_w() {
+; CHECK: mov.u32 %ret0, %ntid.w;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.ntid.w()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_laneid() {
+; CHECK: mov.u32 %ret0, %laneid;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.laneid()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_warpid() {
+; CHECK: mov.u32 %ret0, %warpid;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.warpid()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_nwarpid() {
+; CHECK: mov.u32 %ret0, %nwarpid;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.nwarpid()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ctaid_x() {
+; CHECK: mov.u32 %ret0, %ctaid.x;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.ctaid.x()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ctaid_y() {
+; CHECK: mov.u32 %ret0, %ctaid.y;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.ctaid.y()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ctaid_z() {
+; CHECK: mov.u32 %ret0, %ctaid.z;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.ctaid.z()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ctaid_w() {
+; CHECK: mov.u32 %ret0, %ctaid.w;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.ctaid.w()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_nctaid_x() {
+; CHECK: mov.u32 %ret0, %nctaid.x;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.nctaid.x()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_nctaid_y() {
+; CHECK: mov.u32 %ret0, %nctaid.y;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.nctaid.y()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_nctaid_z() {
+; CHECK: mov.u32 %ret0, %nctaid.z;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.nctaid.z()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_nctaid_w() {
+; CHECK: mov.u32 %ret0, %nctaid.w;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.nctaid.w()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_smid() {
+; CHECK: mov.u32 %ret0, %smid;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.smid()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_nsmid() {
+; CHECK: mov.u32 %ret0, %nsmid;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.nsmid()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_gridid() {
+; CHECK: mov.u32 %ret0, %gridid;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.gridid()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_lanemask_eq() {
+; CHECK: mov.u32 %ret0, %lanemask_eq;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.lanemask.eq()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_lanemask_le() {
+; CHECK: mov.u32 %ret0, %lanemask_le;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.lanemask.le()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_lanemask_lt() {
+; CHECK: mov.u32 %ret0, %lanemask_lt;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.lanemask.lt()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_lanemask_ge() {
+; CHECK: mov.u32 %ret0, %lanemask_ge;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.lanemask.ge()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_lanemask_gt() {
+; CHECK: mov.u32 %ret0, %lanemask_gt;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.lanemask.gt()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_clock() {
+; CHECK: mov.u32 %ret0, %clock;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.clock()
+	ret i32 %x
+}
+
+define ptx_device i64 @test_clock64() {
+; CHECK: mov.u64 %ret0, %clock64;
+; CHECK: ret;
+	%x = call i64 @llvm.ptx.read.clock64()
+	ret i64 %x
+}
+
+define ptx_device i32 @test_pm0() {
+; CHECK: mov.u32 %ret0, %pm0;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.pm0()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_pm1() {
+; CHECK: mov.u32 %ret0, %pm1;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.pm1()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_pm2() {
+; CHECK: mov.u32 %ret0, %pm2;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.pm2()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_pm3() {
+; CHECK: mov.u32 %ret0, %pm3;
+; CHECK: ret;
+	%x = call i32 @llvm.ptx.read.pm3()
+	ret i32 %x
+}
+
+define ptx_device void @test_bar_sync() {
+; CHECK: bar.sync 0
+; CHECK: ret;
+	call void @llvm.ptx.bar.sync(i32 0)
+	ret void
+}
+
+declare i32 @llvm.ptx.read.tid.x()
+declare i32 @llvm.ptx.read.tid.y()
+declare i32 @llvm.ptx.read.tid.z()
+declare i32 @llvm.ptx.read.tid.w()
+declare i32 @llvm.ptx.read.ntid.x()
+declare i32 @llvm.ptx.read.ntid.y()
+declare i32 @llvm.ptx.read.ntid.z()
+declare i32 @llvm.ptx.read.ntid.w()
+
+declare i32 @llvm.ptx.read.laneid()
+declare i32 @llvm.ptx.read.warpid()
+declare i32 @llvm.ptx.read.nwarpid()
+
+declare i32 @llvm.ptx.read.ctaid.x()
+declare i32 @llvm.ptx.read.ctaid.y()
+declare i32 @llvm.ptx.read.ctaid.z()
+declare i32 @llvm.ptx.read.ctaid.w()
+declare i32 @llvm.ptx.read.nctaid.x()
+declare i32 @llvm.ptx.read.nctaid.y()
+declare i32 @llvm.ptx.read.nctaid.z()
+declare i32 @llvm.ptx.read.nctaid.w()
+
+declare i32 @llvm.ptx.read.smid()
+declare i32 @llvm.ptx.read.nsmid()
+declare i32 @llvm.ptx.read.gridid()
+
+declare i32 @llvm.ptx.read.lanemask.eq()
+declare i32 @llvm.ptx.read.lanemask.le()
+declare i32 @llvm.ptx.read.lanemask.lt()
+declare i32 @llvm.ptx.read.lanemask.ge()
+declare i32 @llvm.ptx.read.lanemask.gt()
+
+declare i32 @llvm.ptx.read.clock()
+declare i64 @llvm.ptx.read.clock64()
+
+declare i32 @llvm.ptx.read.pm0()
+declare i32 @llvm.ptx.read.pm1()
+declare i32 @llvm.ptx.read.pm2()
+declare i32 @llvm.ptx.read.pm3()
+
+declare void @llvm.ptx.bar.sync(i32 %i)

diff --git a/src/LLVM/test/CodeGen/PTX/ld.ll b/src/LLVM/test/CodeGen/PTX/ld.ll
new file mode 100644
index 0000000..81fd33a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/ld.ll

@@ -0,0 +1,382 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+;CHECK: .extern .global .b8 array_i16[20];
+@array_i16 = external global [10 x i16]
+
+;CHECK: .extern .const .b8 array_constant_i16[20];
+@array_constant_i16 = external addrspace(1) constant [10 x i16]
+
+;CHECK: .extern .shared .b8 array_shared_i16[20];
+@array_shared_i16 = external addrspace(4) global [10 x i16]
+
+;CHECK: .extern .global .b8 array_i32[40];
+@array_i32 = external global [10 x i32]
+
+;CHECK: .extern .const .b8 array_constant_i32[40];
+@array_constant_i32 = external addrspace(1) constant [10 x i32]
+
+;CHECK: .extern .shared .b8 array_shared_i32[40];
+@array_shared_i32 = external addrspace(4) global [10 x i32]
+
+;CHECK: .extern .global .b8 array_i64[80];
+@array_i64 = external global [10 x i64]
+
+;CHECK: .extern .const .b8 array_constant_i64[80];
+@array_constant_i64 = external addrspace(1) constant [10 x i64]
+
+;CHECK: .extern .shared .b8 array_shared_i64[80];
+@array_shared_i64 = external addrspace(4) global [10 x i64]
+
+;CHECK: .extern .global .b8 array_float[40];
+@array_float = external global [10 x float]
+
+;CHECK: .extern .const .b8 array_constant_float[40];
+@array_constant_float = external addrspace(1) constant [10 x float]
+
+;CHECK: .extern .shared .b8 array_shared_float[40];
+@array_shared_float = external addrspace(4) global [10 x float]
+
+;CHECK: .extern .global .b8 array_double[80];
+@array_double = external global [10 x double]
+
+;CHECK: .extern .const .b8 array_constant_double[80];
+@array_constant_double = external addrspace(1) constant [10 x double]
+
+;CHECK: .extern .shared .b8 array_shared_double[80];
+@array_shared_double = external addrspace(4) global [10 x double]
+
+
+define ptx_device i16 @t1_u16(i16* %p) {
+entry:
+;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
+;CHECK: ret;
+  %x = load i16* %p
+  ret i16 %x
+}
+
+define ptx_device i32 @t1_u32(i32* %p) {
+entry:
+;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
+;CHECK: ret;
+  %x = load i32* %p
+  ret i32 %x
+}
+
+define ptx_device i64 @t1_u64(i64* %p) {
+entry:
+;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
+;CHECK: ret;
+  %x = load i64* %p
+  ret i64 %x
+}
+
+define ptx_device float @t1_f32(float* %p) {
+entry:
+;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
+;CHECK: ret;
+  %x = load float* %p
+  ret float %x
+}
+
+define ptx_device double @t1_f64(double* %p) {
+entry:
+;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
+;CHECK: ret;
+  %x = load double* %p
+  ret double %x
+}
+
+define ptx_device i16 @t2_u16(i16* %p) {
+entry:
+;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}+2];
+;CHECK: ret;
+  %i = getelementptr i16* %p, i32 1
+  %x = load i16* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t2_u32(i32* %p) {
+entry:
+;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4];
+;CHECK: ret;
+  %i = getelementptr i32* %p, i32 1
+  %x = load i32* %i
+  ret i32 %x
+}
+
+define ptx_device i64 @t2_u64(i64* %p) {
+entry:
+;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8];
+;CHECK: ret;
+  %i = getelementptr i64* %p, i32 1
+  %x = load i64* %i
+  ret i64 %x
+}
+
+define ptx_device float @t2_f32(float* %p) {
+entry:
+;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4];
+;CHECK: ret;
+  %i = getelementptr float* %p, i32 1
+  %x = load float* %i
+  ret float %x
+}
+
+define ptx_device double @t2_f64(double* %p) {
+entry:
+;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8];
+;CHECK: ret;
+  %i = getelementptr double* %p, i32 1
+  %x = load double* %i
+  ret double %x
+}
+
+define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1;
+;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
+;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
+  %i = getelementptr i16* %p, i32 %q
+  %x = load i16* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
+;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
+;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
+  %i = getelementptr i32* %p, i32 %q
+  %x = load i32* %i
+  ret i32 %x
+}
+
+define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
+;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
+;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
+  %i = getelementptr i64* %p, i32 %q
+  %x = load i64* %i
+  ret i64 %x
+}
+
+define ptx_device float @t3_f32(float* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
+;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
+;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
+  %i = getelementptr float* %p, i32 %q
+  %x = load float* %i
+  ret float %x
+}
+
+define ptx_device double @t3_f64(double* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
+;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
+;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
+  %i = getelementptr double* %p, i32 %q
+  %x = load double* %i
+  ret double %x
+}
+
+define ptx_device i16 @t4_global_u16() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
+;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
+  %x = load i16* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t4_global_u32() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
+;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
+  %x = load i32* %i
+  ret i32 %x
+}
+
+define ptx_device i64 @t4_global_u64() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
+;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
+  %x = load i64* %i
+  ret i64 %x
+}
+
+define ptx_device float @t4_global_f32() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
+;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
+  %x = load float* %i
+  ret float %x
+}
+
+define ptx_device double @t4_global_f64() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
+;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
+  %x = load double* %i
+  ret double %x
+}
+
+define ptx_device i16 @t4_const_u16() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i16;
+;CHECK: ld.const.u16 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
+  %x = load i16 addrspace(1)* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t4_const_u32() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i32;
+;CHECK: ld.const.u32 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
+  %x = load i32 addrspace(1)* %i
+  ret i32 %x
+}
+
+define ptx_device i64 @t4_const_u64() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i64;
+;CHECK: ld.const.u64 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
+  %x = load i64 addrspace(1)* %i
+  ret i64 %x
+}
+
+define ptx_device float @t4_const_f32() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_float;
+;CHECK: ld.const.f32 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
+  %x = load float addrspace(1)* %i
+  ret float %x
+}
+
+define ptx_device double @t4_const_f64() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_double;
+;CHECK: ld.const.f64 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
+  %x = load double addrspace(1)* %i
+  ret double %x
+}
+
+define ptx_device i16 @t4_shared_u16() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16;
+;CHECK: ld.shared.u16 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
+  %x = load i16 addrspace(4)* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t4_shared_u32() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32;
+;CHECK: ld.shared.u32 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
+  %x = load i32 addrspace(4)* %i
+  ret i32 %x
+}
+
+define ptx_device i64 @t4_shared_u64() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64;
+;CHECK: ld.shared.u64 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
+  %x = load i64 addrspace(4)* %i
+  ret i64 %x
+}
+
+define ptx_device float @t4_shared_f32() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float;
+;CHECK: ld.shared.f32 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
+  %x = load float addrspace(4)* %i
+  ret float %x
+}
+
+define ptx_device double @t4_shared_f64() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double;
+;CHECK: ld.shared.f64 %ret{{[0-9]+}}, [%r[[R0]]];
+;CHECK: ret;
+  %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
+  %x = load double addrspace(4)* %i
+  ret double %x
+}
+
+define ptx_device i16 @t5_u16() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
+;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]+2];
+;CHECK: ret;
+  %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
+  %x = load i16* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t5_u32() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
+;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]+4];
+;CHECK: ret;
+  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
+  %x = load i32* %i
+  ret i32 %x
+}
+
+define ptx_device i64 @t5_u64() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
+;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]+8];
+;CHECK: ret;
+  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
+  %x = load i64* %i
+  ret i64 %x
+}
+
+define ptx_device float @t5_f32() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
+;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]+4];
+;CHECK: ret;
+  %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
+  %x = load float* %i
+  ret float %x
+}
+
+define ptx_device double @t5_f64() {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
+;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]+8];
+;CHECK: ret;
+  %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
+  %x = load double* %i
+  ret double %x
+}

diff --git a/src/LLVM/test/CodeGen/PTX/llvm-intrinsic.ll b/src/LLVM/test/CodeGen/PTX/llvm-intrinsic.ll
new file mode 100644
index 0000000..e73ad25
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/llvm-intrinsic.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s
+
+define ptx_device float @test_sqrt_f32(float %x) {
+entry:
+; CHECK: sqrt.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+  %y = call float @llvm.sqrt.f32(float %x)
+  ret float %y
+}
+
+define ptx_device double @test_sqrt_f64(double %x) {
+entry:
+; CHECK: sqrt.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+  %y = call double @llvm.sqrt.f64(double %x)
+  ret double %y
+}
+
+define ptx_device float @test_sin_f32(float %x) {
+entry:
+; CHECK: sin.approx.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+  %y = call float @llvm.sin.f32(float %x)
+  ret float %y
+}
+
+define ptx_device double @test_sin_f64(double %x) {
+entry:
+; CHECK: sin.approx.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+  %y = call double @llvm.sin.f64(double %x)
+  ret double %y
+}
+
+define ptx_device float @test_cos_f32(float %x) {
+entry:
+; CHECK: cos.approx.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+  %y = call float @llvm.cos.f32(float %x)
+  ret float %y
+}
+
+define ptx_device double @test_cos_f64(double %x) {
+entry:
+; CHECK: cos.approx.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+  %y = call double @llvm.cos.f64(double %x)
+  ret double %y
+}
+
+declare float  @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
+declare float  @llvm.sin.f32(float)
+declare double @llvm.sin.f64(double)
+declare float  @llvm.cos.f32(float)
+declare double @llvm.cos.f64(double)

diff --git a/src/LLVM/test/CodeGen/PTX/mad-disabling.ll b/src/LLVM/test/CodeGen/PTX/mad-disabling.ll
new file mode 100644
index 0000000..ad7b341
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/mad-disabling.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | grep "mad"
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | grep -v "mad"
+
+define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
+entry:
+  %a = fmul float %x, %y
+  %b = fadd float %a, %z
+  ret float %b
+}
+
+define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
+entry:
+  %a = fmul double %x, %y
+  %b = fadd double %a, %z
+  ret double %b
+}

diff --git a/src/LLVM/test/CodeGen/PTX/mad.ll b/src/LLVM/test/CodeGen/PTX/mad.ll
new file mode 100644
index 0000000..cc28e3f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/mad.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
+
+define ptx_device float @t1_f32(float %x, float %y, float %z) {
+; CHECK: mad.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+	%a = fmul float %x, %y
+  %b = fadd float %a, %z
+	ret float %b
+}
+
+define ptx_device double @t1_f64(double %x, double %y, double %z) {
+; CHECK: mad.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+	%a = fmul double %x, %y
+  %b = fadd double %a, %z
+	ret double %b
+}

diff --git a/src/LLVM/test/CodeGen/PTX/mov.ll b/src/LLVM/test/CodeGen/PTX/mov.ll
new file mode 100644
index 0000000..75555a7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/mov.ll

@@ -0,0 +1,62 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device i16 @t1_u16() {
+; CHECK: mov.u16 %ret{{[0-9]+}}, 0;
+; CHECK: ret;
+	ret i16 0
+}
+
+define ptx_device i32 @t1_u32() {
+; CHECK: mov.u32 %ret{{[0-9]+}}, 0;
+; CHECK: ret;
+	ret i32 0
+}
+
+define ptx_device i64 @t1_u64() {
+; CHECK: mov.u64 %ret{{[0-9]+}}, 0;
+; CHECK: ret;
+	ret i64 0
+}
+
+define ptx_device float @t1_f32() {
+; CHECK: mov.f32 %ret{{[0-9]+}}, 0D0000000000000000;
+; CHECK: ret;
+	ret float 0.0
+}
+
+define ptx_device double @t1_f64() {
+; CHECK: mov.f64 %ret{{[0-9]+}}, 0D0000000000000000;
+; CHECK: ret;
+	ret double 0.0
+}
+
+define ptx_device i16 @t2_u16(i16 %x) {
+; CHECK: mov.b16 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: ret;
+	ret i16 %x
+}
+
+define ptx_device i32 @t2_u32(i32 %x) {
+; CHECK: mov.b32 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: ret;
+	ret i32 %x
+}
+
+define ptx_device i64 @t2_u64(i64 %x) {
+; CHECK: mov.b64 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: ret;
+	ret i64 %x
+}
+
+define ptx_device float @t3_f32(float %x) {
+; CHECK: mov.f32 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: ret;
+	ret float %x
+}
+
+define ptx_device double @t3_f64(double %x) {
+; CHECK: mov.f64 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: ret;
+	ret double %x
+}
+

diff --git a/src/LLVM/test/CodeGen/PTX/mul.ll b/src/LLVM/test/CodeGen/PTX/mul.ll
new file mode 100644
index 0000000..91949db
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/mul.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+;define ptx_device i32 @t1(i32 %x, i32 %y) {
+;	%z = mul i32 %x, %y
+;	ret i32 %z
+;}
+
+;define ptx_device i32 @t2(i32 %x) {
+;	%z = mul i32 %x, 1
+;	ret i32 %z
+;}
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: mul.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret;
+  %z = fmul float %x, %y
+  ret float %z
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: mul.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
+; CHECK: ret;
+  %z = fmul double %x, %y
+  ret double %z
+}
+
+define ptx_device float @t2_f32(float %x) {
+; CHECK: mul.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0D4014000000000000;
+; CHECK: ret;
+  %z = fmul float %x, 5.0
+  ret float %z
+}
+
+define ptx_device double @t2_f64(double %x) {
+; CHECK: mul.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0D4014000000000000;
+; CHECK: ret;
+  %z = fmul double %x, 5.0
+  ret double %z
+}

diff --git a/src/LLVM/test/CodeGen/PTX/options.ll b/src/LLVM/test/CodeGen/PTX/options.ll
new file mode 100644
index 0000000..0fb6602
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/options.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=ptx32 -mattr=ptx20 | grep ".version 2.0"
+; RUN: llc < %s -march=ptx32 -mattr=ptx21 | grep ".version 2.1"
+; RUN: llc < %s -march=ptx32 -mattr=ptx22 | grep ".version 2.2"
+; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".version 2.3"
+; RUN: llc < %s -march=ptx32 -mattr=sm10 | grep ".target sm_10"
+; RUN: llc < %s -march=ptx32 -mattr=sm13 | grep ".target sm_13"
+; RUN: llc < %s -march=ptx32 -mattr=sm20 | grep ".target sm_20"
+; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".address_size 32"
+; RUN: llc < %s -march=ptx64 -mattr=ptx23 | grep ".address_size 64"
+
+define ptx_device void @t1() {
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/PTX/parameter-order.ll b/src/LLVM/test/CodeGen/PTX/parameter-order.ll
new file mode 100644
index 0000000..09015da
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/parameter-order.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}})
+define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) {
+; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+	%result = sub i32 %b, %c
+	ret i32 %result
+}

diff --git a/src/LLVM/test/CodeGen/PTX/ret.ll b/src/LLVM/test/CodeGen/PTX/ret.ll
new file mode 100644
index 0000000..ba0523f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/ret.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device void @t1() {
+; CHECK: ret;
+; CHECK-NOT: exit;
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/PTX/selp.ll b/src/LLVM/test/CodeGen/PTX/selp.ll
new file mode 100644
index 0000000..aa7ce85
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/selp.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device i32 @test_selp_i32(i1 %x, i32 %y, i32 %z) {
+; CHECK: selp.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}};
+	%a = select i1 %x, i32 %y, i32 %z
+	ret i32 %a
+}
+
+define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) {
+; CHECK: selp.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}, %p{{[0-9]+}};
+	%a = select i1 %x, i64 %y, i64 %z
+	ret i64 %a
+}
+
+define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) {
+; CHECK: selp.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %p{{[0-9]+}};
+	%a = select i1 %x, float %y, float %z
+	ret float %a
+}
+
+define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) {
+; CHECK: selp.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %p{{[0-9]+}};
+	%a = select i1 %x, double %y, double %z
+	ret double %a
+}

diff --git a/src/LLVM/test/CodeGen/PTX/setp.ll b/src/LLVM/test/CodeGen/PTX/setp.ll
new file mode 100644
index 0000000..646abab
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/setp.ll

@@ -0,0 +1,206 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
+; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp eq i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) {
+; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp ne i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) {
+; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp ult i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) {
+; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp ule i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) {
+; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp ugt i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) {
+; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp uge i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) {
+; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp slt i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) {
+; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp sle i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) {
+; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp sgt i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) {
+; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp sge i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) {
+; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp eq i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) {
+; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp ne i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) {
+; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp ult i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_le_u32_ri(i32 %x) {
+; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 2;
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp ule i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) {
+; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp ugt i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) {
+; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp uge i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) {
+; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp slt i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_le_s32_ri(i32 %x) {
+; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 2;
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp sle i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) {
+; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp sgt i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) {
+; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
+; CHECK: ret;
+	%p = icmp sge i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
+; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: setp.eq.and.u32 %p1, %r{{[0-9]+}}, %r{{[0-9]+}}, %p[[P0]];
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p1;
+; CHECK: ret;
+	%c = icmp eq i32 %x, %y
+	%d = icmp ugt i32 %u, %v
+	%e = and i1 %c, %d
+	%z = zext i1 %e to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) {
+; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
+; CHECK: setp.eq.and.u32 %p1, %r{{[0-9]+}}, %r{{[0-9]+}}, !%p[[P0]];
+; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p1;
+; CHECK: ret;
+	%c = trunc i32 %w to i1
+	%d = icmp eq i32 %x, %y
+	%e = xor i1 %c, 1
+	%f = and i1 %d, %e
+	%z = zext i1 %f to i32
+	ret i32 %z
+}

diff --git a/src/LLVM/test/CodeGen/PTX/shl.ll b/src/LLVM/test/CodeGen/PTX/shl.ll
new file mode 100644
index 0000000..d9fe2cd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/shl.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device i32 @t1(i32 %x, i32 %y) {
+; CHECK: shl.b32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+	%z = shl i32 %x, %y
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t2(i32 %x) {
+; CHECK: shl.b32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
+	%z = shl i32 %x, 3
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t3(i32 %x) {
+; CHECK: shl.b32 %ret{{[0-9]+}}, 3, %r{{[0-9]+}}
+	%z = shl i32 3, %x
+; CHECK: ret;
+	ret i32 %z
+}

diff --git a/src/LLVM/test/CodeGen/PTX/shr.ll b/src/LLVM/test/CodeGen/PTX/shr.ll
new file mode 100644
index 0000000..eb4666f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/shr.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device i32 @t1(i32 %x, i32 %y) {
+; CHECK: shr.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+	%z = lshr i32 %x, %y
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t2(i32 %x) {
+; CHECK: shr.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
+	%z = lshr i32 %x, 3
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t3(i32 %x) {
+; CHECK: shr.u32 %ret{{[0-9]+}}, 3, %r{{[0-9]+}}
+	%z = lshr i32 3, %x
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t4(i32 %x, i32 %y) {
+; CHECK: shr.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+	%z = ashr i32 %x, %y
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t5(i32 %x) {
+; CHECK: shr.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
+	%z = ashr i32 %x, 3
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t6(i32 %x) {
+; CHECK: shr.s32 %ret{{[0-9]+}}, -3, %r{{[0-9]+}}
+	%z = ashr i32 -3, %x
+; CHECK: ret;
+	ret i32 %z
+}

diff --git a/src/LLVM/test/CodeGen/PTX/simple-call.ll b/src/LLVM/test/CodeGen/PTX/simple-call.ll
new file mode 100644
index 0000000..77ea29e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/simple-call.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
+
+define ptx_device void @test_add(float %x, float %y) {
+; CHECK: ret;
+	%z = fadd float %x, %y
+	ret void
+}
+
+define ptx_device float @test_call(float %x, float %y) {
+  %a = fadd float %x, %y
+; CHECK: call.uni test_add, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
+  call void @test_add(float %a, float %y)
+  ret float %a
+}
+
+define ptx_device float @test_compute(float %x, float %y) {
+; CHECK: ret;
+  %z = fadd float %x, %y
+  ret float %z
+}
+
+define ptx_device float @test_call_compute(float %x, float %y) {
+; CHECK: call.uni (__localparam_{{[0-9]+}}), test_compute, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}})
+  %z = call float @test_compute(float %x, float %y)
+  ret float %z
+}
+

diff --git a/src/LLVM/test/CodeGen/PTX/st.ll b/src/LLVM/test/CodeGen/PTX/st.ll
new file mode 100644
index 0000000..63ef58c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/st.ll

@@ -0,0 +1,337 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+;CHECK: .extern .global .b8 array_i16[20];
+@array_i16 = external global [10 x i16]
+
+;CHECK: .extern .const .b8 array_constant_i16[20];
+@array_constant_i16 = external addrspace(1) constant [10 x i16]
+
+;CHECK: .extern .shared .b8 array_shared_i16[20];
+@array_shared_i16 = external addrspace(4) global [10 x i16]
+
+;CHECK: .extern .global .b8 array_i32[40];
+@array_i32 = external global [10 x i32]
+
+;CHECK: .extern .const .b8 array_constant_i32[40];
+@array_constant_i32 = external addrspace(1) constant [10 x i32]
+
+;CHECK: .extern .shared .b8 array_shared_i32[40];
+@array_shared_i32 = external addrspace(4) global [10 x i32]
+
+;CHECK: .extern .global .b8 array_i64[80];
+@array_i64 = external global [10 x i64]
+
+;CHECK: .extern .const .b8 array_constant_i64[80];
+@array_constant_i64 = external addrspace(1) constant [10 x i64]
+
+;CHECK: .extern .shared .b8 array_shared_i64[80];
+@array_shared_i64 = external addrspace(4) global [10 x i64]
+
+;CHECK: .extern .global .b8 array_float[40];
+@array_float = external global [10 x float]
+
+;CHECK: .extern .const .b8 array_constant_float[40];
+@array_constant_float = external addrspace(1) constant [10 x float]
+
+;CHECK: .extern .shared .b8 array_shared_float[40];
+@array_shared_float = external addrspace(4) global [10 x float]
+
+;CHECK: .extern .global .b8 array_double[80];
+@array_double = external global [10 x double]
+
+;CHECK: .extern .const .b8 array_constant_double[80];
+@array_constant_double = external addrspace(1) constant [10 x double]
+
+;CHECK: .extern .shared .b8 array_shared_double[80];
+@array_shared_double = external addrspace(4) global [10 x double]
+
+
+define ptx_device void @t1_u16(i16* %p, i16 %x) {
+entry:
+;CHECK: st.global.u16 [%r{{[0-9]+}}], %rh{{[0-9]+}};
+;CHECK: ret;
+  store i16 %x, i16* %p
+  ret void
+}
+
+define ptx_device void @t1_u32(i32* %p, i32 %x) {
+entry:
+;CHECK: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}};
+;CHECK: ret;
+  store i32 %x, i32* %p
+  ret void
+}
+
+define ptx_device void @t1_u64(i64* %p, i64 %x) {
+entry:
+;CHECK: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}};
+;CHECK: ret;
+  store i64 %x, i64* %p
+  ret void
+}
+
+define ptx_device void @t1_f32(float* %p, float %x) {
+entry:
+;CHECK: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}};
+;CHECK: ret;
+  store float %x, float* %p
+  ret void
+}
+
+define ptx_device void @t1_f64(double* %p, double %x) {
+entry:
+;CHECK: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}};
+;CHECK: ret;
+  store double %x, double* %p
+  ret void
+}
+
+define ptx_device void @t2_u16(i16* %p, i16 %x) {
+entry:
+;CHECK: st.global.u16 [%r{{[0-9]+}}+2], %rh{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr i16* %p, i32 1
+  store i16 %x, i16* %i
+  ret void
+}
+
+define ptx_device void @t2_u32(i32* %p, i32 %x) {
+entry:
+;CHECK: st.global.u32 [%r{{[0-9]+}}+4], %r{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr i32* %p, i32 1
+  store i32 %x, i32* %i
+  ret void
+}
+
+define ptx_device void @t2_u64(i64* %p, i64 %x) {
+entry:
+;CHECK: st.global.u64 [%r{{[0-9]+}}+8], %rd{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr i64* %p, i32 1
+  store i64 %x, i64* %i
+  ret void
+}
+
+define ptx_device void @t2_f32(float* %p, float %x) {
+entry:
+;CHECK: st.global.f32 [%r{{[0-9]+}}+4], %f{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr float* %p, i32 1
+  store float %x, float* %i
+  ret void
+}
+
+define ptx_device void @t2_f64(double* %p, double %x) {
+entry:
+;CHECK: st.global.f64 [%r{{[0-9]+}}+8], %fd{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr double* %p, i32 1
+  store double %x, double* %i
+  ret void
+}
+
+define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
+entry:
+;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1;
+;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
+;CHECK: st.global.u16 [%r{{[0-9]+}}], %rh{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr i16* %p, i32 %q
+  store i16 %x, i16* %i
+  ret void
+}
+
+define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) {
+entry:
+;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
+;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
+;CHECK: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr i32* %p, i32 %q
+  store i32 %x, i32* %i
+  ret void
+}
+
+define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) {
+entry:
+;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
+;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
+;CHECK: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr i64* %p, i32 %q
+  store i64 %x, i64* %i
+  ret void
+}
+
+define ptx_device void @t3_f32(float* %p, i32 %q, float %x) {
+entry:
+;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
+;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
+;CHECK: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr float* %p, i32 %q
+  store float %x, float* %i
+  ret void
+}
+
+define ptx_device void @t3_f64(double* %p, i32 %q, double %x) {
+entry:
+;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
+;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
+;CHECK: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr double* %p, i32 %q
+  store double %x, double* %i
+  ret void
+}
+
+define ptx_device void @t4_global_u16(i16 %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
+;CHECK: st.global.u16 [%r[[R0]]], %rh{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0
+  store i16 %x, i16* %i
+  ret void
+}
+
+define ptx_device void @t4_global_u32(i32 %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
+;CHECK: st.global.u32 [%r[[R0]]], %r{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
+  store i32 %x, i32* %i
+  ret void
+}
+
+define ptx_device void @t4_global_u64(i64 %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
+;CHECK: st.global.u64 [%r[[R0]]], %rd{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
+  store i64 %x, i64* %i
+  ret void
+}
+
+define ptx_device void @t4_global_f32(float %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
+;CHECK: st.global.f32 [%r[[R0]]], %f{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
+  store float %x, float* %i
+  ret void
+}
+
+define ptx_device void @t4_global_f64(double %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
+;CHECK: st.global.f64 [%r[[R0]]], %fd{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
+  store double %x, double* %i
+  ret void
+}
+
+define ptx_device void @t4_shared_u16(i16 %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16;
+;CHECK: st.shared.u16 [%r[[R0]]], %rh{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
+  store i16 %x, i16 addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared_u32(i32 %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32;
+;CHECK: st.shared.u32 [%r[[R0]]], %r{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
+  store i32 %x, i32 addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared_u64(i64 %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64;
+;CHECK: st.shared.u64 [%r[[R0]]], %rd{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
+  store i64 %x, i64 addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared_f32(float %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float;
+;CHECK: st.shared.f32 [%r[[R0]]], %f{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
+  store float %x, float addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared_f64(double %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double;
+;CHECK: st.shared.f64 [%r[[R0]]], %fd{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
+  store double %x, double addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t5_u16(i16 %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
+;CHECK: st.global.u16 [%r[[R0]]+2], %rh{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
+  store i16 %x, i16* %i
+  ret void
+}
+
+define ptx_device void @t5_u32(i32 %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
+;CHECK: st.global.u32 [%r[[R0]]+4], %r{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
+  store i32 %x, i32* %i
+  ret void
+}
+
+define ptx_device void @t5_u64(i64 %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
+;CHECK: st.global.u64 [%r[[R0]]+8], %rd{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
+  store i64 %x, i64* %i
+  ret void
+}
+
+define ptx_device void @t5_f32(float %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
+;CHECK: st.global.f32 [%r[[R0]]+4], %f{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
+  store float %x, float* %i
+  ret void
+}
+
+define ptx_device void @t5_f64(double %x) {
+entry:
+;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
+;CHECK: st.global.f64 [%r[[R0]]+8], %fd{{[0-9]+}};
+;CHECK: ret;
+  %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
+  store double %x, double* %i
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/PTX/stack-object.ll b/src/LLVM/test/CodeGen/PTX/stack-object.ll
new file mode 100644
index 0000000..65f8ee2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/stack-object.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
+
+define ptx_device float @stack1(float %a) {
+  ; CHECK: .local .align 4 .b8 __local0[4];
+  %a.2 = alloca float, align 4
+  ; CHECK: st.local.f32 [__local0], %f0
+  store float %a, float* %a.2
+  %a.3 = load float* %a.2
+  ret float %a.3
+}
+
+define ptx_device float @stack1_align8(float %a) {
+  ; CHECK: .local .align 8 .b8 __local0[4];
+  %a.2 = alloca float, align 8
+  ; CHECK: st.local.f32 [__local0], %f0
+  store float %a, float* %a.2
+  %a.3 = load float* %a.2
+  ret float %a.3
+}

diff --git a/src/LLVM/test/CodeGen/PTX/sub.ll b/src/LLVM/test/CodeGen/PTX/sub.ll
new file mode 100644
index 0000000..7ac886a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PTX/sub.ll

@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
+; CHECK: sub.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, %rh{{[0-9]+}};
+; CHECK: ret;
+	%z = sub i16 %x, %y
+	ret i16 %z
+}
+
+define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
+; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: ret;
+	%z = sub i32 %x, %y
+	ret i32 %z
+}
+
+define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
+; CHECK: sub.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}};
+; CHECK: ret;
+	%z = sub i64 %x, %y
+	ret i64 %z
+}
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: sub.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret;
+  %z = fsub float %x, %y
+  ret float %z
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: sub.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
+; CHECK: ret;
+  %z = fsub double %x, %y
+  ret double %z
+}
+
+define ptx_device i16 @t2_u16(i16 %x) {
+; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, -1;
+; CHECK: ret;
+	%z = sub i16 %x, 1
+	ret i16 %z
+}
+
+define ptx_device i32 @t2_u32(i32 %x) {
+; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, -1;
+; CHECK: ret;
+	%z = sub i32 %x, 1
+	ret i32 %z
+}
+
+define ptx_device i64 @t2_u64(i64 %x) {
+; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, -1;
+; CHECK: ret;
+	%z = sub i64 %x, 1
+	ret i64 %z
+}
+
+define ptx_device float @t2_f32(float %x) {
+; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0DBFF0000000000000;
+; CHECK: ret;
+  %z = fsub float %x, 1.0
+  ret float %z
+}
+
+define ptx_device double @t2_f64(double %x) {
+; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0DBFF0000000000000;
+; CHECK: ret;
+  %z = fsub double %x, 1.0
+  ret double %z
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll b/src/LLVM/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
new file mode 100644
index 0000000..2883162
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=ppc32

+define void @test() {

+	%tr1 = lshr i32 1, 0		; <i32> [#uses=0]

+	ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll b/src/LLVM/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
new file mode 100644
index 0000000..37dab2b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ppc32

+

+define void @main() {

+        %tr4 = shl i64 1, 0             ; <i64> [#uses=0]

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll b/src/LLVM/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
new file mode 100644
index 0000000..b1c100b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32

+

+define void @main() {

+        %shamt = add i8 0, 1            ; <i8> [#uses=1]

+        %shift.upgrd.1 = zext i8 %shamt to i64          ; <i64> [#uses=1]

+        %tr2 = ashr i64 1, %shift.upgrd.1               ; <i64> [#uses=0]

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll b/src/LLVM/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
new file mode 100644
index 0000000..9f48f0a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll

@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=ppc32 | not grep .comm.*X,0

+

+@X = linkonce global {  } zeroinitializer               ; <{  }*> [#uses=0]

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll b/src/LLVM/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
new file mode 100644
index 0000000..46bc99e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 

+

+define i32 @main() {

+        %setle = icmp sle i64 1, 0              ; <i1> [#uses=1]

+        %select = select i1 true, i1 %setle, i1 true            ; <i1> [#uses=0]

+        ret i32 0

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll b/src/LLVM/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
new file mode 100644
index 0000000..7c190b2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=ppc32

+

+define i64 @test() {

+        ret i64 undef

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll b/src/LLVM/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
new file mode 100644
index 0000000..44cc341
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll

@@ -0,0 +1,13 @@
+; this should not crash the ppc backend

+

+; RUN: llc < %s -march=ppc32

+

+

+define i32 @test(i32 %j.0.0.i) {

+        %tmp.85.i = and i32 %j.0.0.i, 7         ; <i32> [#uses=1]

+        %tmp.161278.i = bitcast i32 %tmp.85.i to i32            ; <i32> [#uses=1]

+        %tmp.5.i77.i = lshr i32 %tmp.161278.i, 3                ; <i32> [#uses=1]

+        ret i32 %tmp.5.i77.i

+}

+

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll b/src/LLVM/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
new file mode 100644
index 0000000..17a5867
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll

@@ -0,0 +1,11 @@
+; This function should have exactly one call to fixdfdi, no more!

+

+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \

+; RUN:    grep {bl .*fixdfdi} | count 1

+

+define double @test2(double %tmp.7705) {

+        %mem_tmp.2.0.in = fptosi double %tmp.7705 to i64                ; <i64> [#uses=1]

+        %mem_tmp.2.0 = sitofp i64 %mem_tmp.2.0.in to double             ; <double> [#uses=1]

+        ret double %mem_tmp.2.0

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll b/src/LLVM/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
new file mode 100644
index 0000000..554ad3c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll

@@ -0,0 +1,11 @@
+; This was erroneously being turned into an rlwinm instruction.

+; The sign bit does matter in this case.

+

+; RUN: llc < %s -march=ppc32 | grep srawi

+

+define i32 @test(i32 %X) {

+        %Y = and i32 %X, -2             ; <i32> [#uses=1]

+        %Z = ashr i32 %Y, 11            ; <i32> [#uses=1]

+        ret i32 %Z

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll b/src/LLVM/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
new file mode 100644
index 0000000..43b90cb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s

+

+target datalayout = "E-p:32:32"

+target triple = "powerpc-apple-darwin8.2.0"

+

+define void @bar(i32 %G, i32 %E, i32 %F, i32 %A, i32 %B, i32 %C, i32 %D, i8* %fmt, ...) {

+        %ap = alloca i8*                ; <i8**> [#uses=2]

+        %va.upgrd.1 = bitcast i8** %ap to i8*           ; <i8*> [#uses=1]

+        call void @llvm.va_start( i8* %va.upgrd.1 )

+        %tmp.1 = load i8** %ap          ; <i8*> [#uses=1]

+        %tmp.0 = call double @foo( i8* %tmp.1 )         ; <double> [#uses=0]

+        ret void

+}

+

+declare void @llvm.va_start(i8*)

+

+declare double @foo(i8*)

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll b/src/LLVM/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
new file mode 100644
index 0000000..95885a0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s | not grep {, f1}

+

+target datalayout = "E-p:32:32"

+target triple = "powerpc-apple-darwin8.2.0"

+

+; Dead argument should reserve an FP register.

+define double @bar(double %DEAD, double %X, double %Y) {

+        %tmp.2 = fadd double %X, %Y              ; <double> [#uses=1]

+        ret double %tmp.2

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll b/src/LLVM/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
new file mode 100644
index 0000000..0b99a9a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s

+

+define void @iterative_hash_host_wide_int() {

+        %zero = alloca i32              ; <i32*> [#uses=2]

+        %b = alloca i32         ; <i32*> [#uses=1]

+        store i32 0, i32* %zero

+        %tmp = load i32* %zero          ; <i32> [#uses=1]

+        %tmp5 = bitcast i32 %tmp to i32         ; <i32> [#uses=1]

+        %tmp6.u = add i32 %tmp5, 32             ; <i32> [#uses=1]

+        %tmp6 = bitcast i32 %tmp6.u to i32              ; <i32> [#uses=1]

+        %tmp7 = load i64* null          ; <i64> [#uses=1]

+        %tmp6.upgrd.1 = trunc i32 %tmp6 to i8           ; <i8> [#uses=1]

+        %shift.upgrd.2 = zext i8 %tmp6.upgrd.1 to i64           ; <i64> [#uses=1]

+        %tmp8 = ashr i64 %tmp7, %shift.upgrd.2          ; <i64> [#uses=1]

+        %tmp8.upgrd.3 = trunc i64 %tmp8 to i32          ; <i32> [#uses=1]

+        store i32 %tmp8.upgrd.3, i32* %b

+        unreachable

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll b/src/LLVM/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
new file mode 100644
index 0000000..ba2df2f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32

+

+

+define double @CalcSpeed(float %tmp127) {

+        %tmp145 = fpext float %tmp127 to double         ; <double> [#uses=1]

+        %tmp150 = call double asm "frsqrte $0,$1", "=f,f"( double %tmp145 )             ; <double> [#uses=1]

+        ret double %tmp150

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll b/src/LLVM/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
new file mode 100644
index 0000000..bce027d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \

+; RUN:   grep {vspltish v.*, 10}

+

+define void @test(<8 x i16>* %P) {

+        %tmp = load <8 x i16>* %P               ; <<8 x i16>> [#uses=1]

+        %tmp1 = add <8 x i16> %tmp, < i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10 >          ; <<8 x i16>> [#uses=1]

+        store <8 x i16> %tmp1, <8 x i16>* %P

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll b/src/LLVM/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
new file mode 100644
index 0000000..73608e4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll

@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5

+; END.

+

+define void @test(i8* %stack) {

+entry:

+	%tmp9 = icmp eq i32 0, 0		; <i1> [#uses=1]

+	%tmp30 = icmp eq i32 0, 0		; <i1> [#uses=1]

+	br i1 %tmp30, label %cond_next54, label %cond_true31

+cond_true860:		; preds = %bb855

+	%tmp879 = tail call <4 x float> @llvm.ppc.altivec.vmaddfp( <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x float> zeroinitializer )		; <<4 x float>> [#uses=1]

+	%tmp880 = bitcast <4 x float> %tmp879 to <4 x i32>		; <<4 x i32>> [#uses=2]

+	%tmp883 = shufflevector <4 x i32> %tmp880, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>> [#uses=1]

+	%tmp883.upgrd.1 = bitcast <4 x i32> %tmp883 to <4 x float>		; <<4 x float>> [#uses=1]

+	%tmp885 = shufflevector <4 x i32> %tmp880, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x i32>> [#uses=1]

+	%tmp885.upgrd.2 = bitcast <4 x i32> %tmp885 to <4 x float>		; <<4 x float>> [#uses=1]

+	br label %cond_next905

+cond_true31:		; preds = %entry

+	ret void

+cond_next54:		; preds = %entry

+	br i1 %tmp9, label %cond_false385, label %bb279

+bb279:		; preds = %cond_next54

+	ret void

+cond_false385:		; preds = %cond_next54

+	%tmp388 = icmp eq i32 0, 0		; <i1> [#uses=1]

+	br i1 %tmp388, label %cond_next463, label %cond_true389

+cond_true389:		; preds = %cond_false385

+	ret void

+cond_next463:		; preds = %cond_false385

+	%tmp1208107 = icmp ugt i8* null, %stack		; <i1> [#uses=1]

+	br i1 %tmp1208107, label %cond_true1209.preheader, label %bb1212

+cond_true498:		; preds = %cond_true1209.preheader

+	ret void

+cond_true519:		; preds = %cond_true1209.preheader

+	%bothcond = or i1 false, false		; <i1> [#uses=1]

+	br i1 %bothcond, label %bb855, label %bb980

+cond_false548:		; preds = %cond_true1209.preheader

+	ret void

+bb855:		; preds = %cond_true519

+	%tmp859 = icmp eq i32 0, 0		; <i1> [#uses=1]

+	br i1 %tmp859, label %cond_true860, label %cond_next905

+cond_next905:		; preds = %bb855, %cond_true860

+	%vfpw2.4 = phi <4 x float> [ %tmp885.upgrd.2, %cond_true860 ], [ undef, %bb855 ]		; <<4 x float>> [#uses=0]

+	%vfpw1.4 = phi <4 x float> [ %tmp883.upgrd.1, %cond_true860 ], [ undef, %bb855 ]		; <<4 x float>> [#uses=0]

+	%tmp930 = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=0]

+	ret void

+bb980:		; preds = %cond_true519

+	ret void

+cond_true1209.preheader:		; preds = %cond_next463

+	%tmp496 = and i32 0, 12288		; <i32> [#uses=1]

+	switch i32 %tmp496, label %cond_false548 [

+		 i32 0, label %cond_true498

+		 i32 4096, label %cond_true519

+	]

+bb1212:		; preds = %cond_next463

+	ret void

+}

+

+declare <4 x float> @llvm.ppc.altivec.vmaddfp(<4 x float>, <4 x float>, <4 x float>)


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll b/src/LLVM/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
new file mode 100644
index 0000000..8b41082
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=ppc32

+; END.

+

+	%struct.attr_desc = type { i8*, %struct.attr_desc*, %struct.attr_value*, %struct.attr_value*, i32 }

+	%struct.attr_value = type { %struct.rtx_def*, %struct.attr_value*, %struct.insn_ent*, i32, i32 }

+	%struct.insn_def = type { %struct.insn_def*, %struct.rtx_def*, i32, i32, i32, i32, i32 }

+	%struct.insn_ent = type { %struct.insn_ent*, %struct.insn_def* }

+	%struct.rtx_def = type { i16, i8, i8, %struct.u }

+	%struct.u = type { [1 x i64] }

+

+define void @find_attr() {

+entry:

+	%tmp26 = icmp eq %struct.attr_desc* null, null		; <i1> [#uses=1]

+	br i1 %tmp26, label %bb30, label %cond_true27

+cond_true27:		; preds = %entry

+	ret void

+bb30:		; preds = %entry

+	%tmp67 = icmp eq %struct.attr_desc* null, null		; <i1> [#uses=1]

+	br i1 %tmp67, label %cond_next92, label %cond_true68

+cond_true68:		; preds = %bb30

+	ret void

+cond_next92:		; preds = %bb30

+	%tmp173 = getelementptr %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=2]

+	%tmp174 = load i32* %tmp173		; <i32> [#uses=1]

+	%tmp177 = and i32 %tmp174, -9		; <i32> [#uses=1]

+	store i32 %tmp177, i32* %tmp173

+	%tmp180 = getelementptr %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=1]

+	%tmp181 = load i32* %tmp180		; <i32> [#uses=1]

+	%tmp185 = getelementptr %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=2]

+	%tmp186 = load i32* %tmp185		; <i32> [#uses=1]

+	%tmp183187 = shl i32 %tmp181, 1		; <i32> [#uses=1]

+	%tmp188 = and i32 %tmp183187, 16		; <i32> [#uses=1]

+	%tmp190 = and i32 %tmp186, -17		; <i32> [#uses=1]

+	%tmp191 = or i32 %tmp190, %tmp188		; <i32> [#uses=1]

+	store i32 %tmp191, i32* %tmp185

+	%tmp193 = getelementptr %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=1]

+	%tmp194 = load i32* %tmp193		; <i32> [#uses=1]

+	%tmp198 = getelementptr %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=2]

+	%tmp199 = load i32* %tmp198		; <i32> [#uses=1]

+	%tmp196200 = shl i32 %tmp194, 2		; <i32> [#uses=1]

+	%tmp201 = and i32 %tmp196200, 64		; <i32> [#uses=1]

+	%tmp203 = and i32 %tmp199, -65		; <i32> [#uses=1]

+	%tmp204 = or i32 %tmp203, %tmp201		; <i32> [#uses=1]

+	store i32 %tmp204, i32* %tmp198

+	%tmp206 = getelementptr %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=1]

+	%tmp207 = load i32* %tmp206		; <i32> [#uses=1]

+	%tmp211 = getelementptr %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=2]

+	%tmp212 = load i32* %tmp211		; <i32> [#uses=1]

+	%tmp209213 = shl i32 %tmp207, 1		; <i32> [#uses=1]

+	%tmp214 = and i32 %tmp209213, 128		; <i32> [#uses=1]

+	%tmp216 = and i32 %tmp212, -129		; <i32> [#uses=1]

+	%tmp217 = or i32 %tmp216, %tmp214		; <i32> [#uses=1]

+	store i32 %tmp217, i32* %tmp211

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll b/src/LLVM/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
new file mode 100644
index 0000000..a9d3d24
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | grep extsw | count 2

+

+@lens = external global i8*             ; <i8**> [#uses=1]

+@vals = external global i32*            ; <i32**> [#uses=1]

+

+define i32 @test(i32 %i) {

+        %tmp = load i8** @lens          ; <i8*> [#uses=1]

+        %tmp1 = getelementptr i8* %tmp, i32 %i          ; <i8*> [#uses=1]

+        %tmp.upgrd.1 = load i8* %tmp1           ; <i8> [#uses=1]

+        %tmp2 = zext i8 %tmp.upgrd.1 to i32             ; <i32> [#uses=1]

+        %tmp3 = load i32** @vals                ; <i32*> [#uses=1]

+        %tmp5 = sub i32 1, %tmp2                ; <i32> [#uses=1]

+        %tmp6 = getelementptr i32* %tmp3, i32 %tmp5             ; <i32*> [#uses=1]

+        %tmp7 = load i32* %tmp6         ; <i32> [#uses=1]

+        ret i32 %tmp7

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll b/src/LLVM/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
new file mode 100644
index 0000000..ef5566a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=ppc32

+

+define void @img2buf(i32 %symbol_size_in_bytes, i16* %ui16) nounwind {

+        %tmp93 = load i16* null         ; <i16> [#uses=1]

+        %tmp99 = call i16 @llvm.bswap.i16( i16 %tmp93 )         ; <i16> [#uses=1]

+        store i16 %tmp99, i16* %ui16

+        ret void

+}

+

+declare i16 @llvm.bswap.i16(i16)

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-08-11-RetVector.ll b/src/LLVM/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
new file mode 100644
index 0000000..3ea1ae1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-08-11-RetVector.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsldoi

+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vor

+

+define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) {

+        %tmp76 = shufflevector <4 x float> %fp0, <4 x float> %fp1, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >     ; <<4 x float>> [#uses=1]

+        ret <4 x float> %tmp76

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll b/src/LLVM/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
new file mode 100644
index 0000000..a9ae7f7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s

+

+	%struct..0anon = type { i32 }

+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }

+

+define fastcc void @immed_double_const(i32 %i0, i32 %i1) {

+entry:

+	%tmp1 = load i32* null		; <i32> [#uses=1]

+	switch i32 %tmp1, label %bb103 [

+		 i32 1, label %bb

+		 i32 3, label %bb

+	]

+bb:		; preds = %entry, %entry

+	%tmp14 = icmp sgt i32 0, 31		; <i1> [#uses=1]

+	br i1 %tmp14, label %cond_next77, label %cond_next17

+cond_next17:		; preds = %bb

+	ret void

+cond_next77:		; preds = %bb

+	%tmp79.not = icmp ne i32 %i1, 0		; <i1> [#uses=1]

+	%tmp84 = icmp slt i32 %i0, 0		; <i1> [#uses=2]

+	%bothcond1 = or i1 %tmp79.not, %tmp84		; <i1> [#uses=1]

+	br i1 %bothcond1, label %bb88, label %bb99

+bb88:		; preds = %cond_next77

+	%bothcond2 = and i1 false, %tmp84		; <i1> [#uses=0]

+	ret void

+bb99:		; preds = %cond_next77

+	ret void

+bb103:		; preds = %entry

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-09-28-shift_64.ll b/src/LLVM/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
new file mode 100644
index 0000000..5faf759
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-09-28-shift_64.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc64

+

+target datalayout = "E-p:64:64"

+target triple = "powerpc64-apple-darwin8"

+

+define void @glArrayElement_CompExec() {

+entry:

+        %tmp3 = and i64 0, -8388609             ; <i64> [#uses=1]

+        br label %cond_true24

+cond_false:             ; preds = %cond_true24

+        ret void

+cond_true24:            ; preds = %cond_true24, %entry

+        %indvar.ph = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true24 ]              ; <i32> [#uses=1]

+        %indvar = add i32 0, %indvar.ph         ; <i32> [#uses=2]

+        %code.0 = trunc i32 %indvar to i8               ; <i8> [#uses=1]

+        %tmp5 = add i8 %code.0, 16              ; <i8> [#uses=1]

+        %shift.upgrd.1 = zext i8 %tmp5 to i64           ; <i64> [#uses=1]

+        %tmp7 = lshr i64 %tmp3, %shift.upgrd.1          ; <i64> [#uses=1]

+        %tmp7.upgrd.2 = trunc i64 %tmp7 to i32          ; <i32> [#uses=1]

+        %tmp8 = and i32 %tmp7.upgrd.2, 1                ; <i32> [#uses=1]

+        %tmp8.upgrd.3 = icmp eq i32 %tmp8, 0            ; <i1> [#uses=1]

+        %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]

+        br i1 %tmp8.upgrd.3, label %cond_false, label %cond_true24

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll b/src/LLVM/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
new file mode 100644
index 0000000..8e93047
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=ppc32 -combiner-alias-analysis | grep f5

+

+target datalayout = "E-p:32:32"

+target triple = "powerpc-apple-darwin8.2.0"

+        %struct.Point = type { double, double, double }

+

+define void @offset(%struct.Point* %pt, double %x, double %y, double %z) {

+entry:

+        %tmp = getelementptr %struct.Point* %pt, i32 0, i32 0           ; <double*> [#uses=2]

+        %tmp.upgrd.1 = load double* %tmp                ; <double> [#uses=1]

+        %tmp2 = fadd double %tmp.upgrd.1, %x             ; <double> [#uses=1]

+        store double %tmp2, double* %tmp

+        %tmp6 = getelementptr %struct.Point* %pt, i32 0, i32 1          ; <double*> [#uses=2]

+        %tmp7 = load double* %tmp6              ; <double> [#uses=1]

+        %tmp9 = fadd double %tmp7, %y            ; <double> [#uses=1]

+        store double %tmp9, double* %tmp6

+        %tmp13 = getelementptr %struct.Point* %pt, i32 0, i32 2         ; <double*> [#uses=2]

+        %tmp14 = load double* %tmp13            ; <double> [#uses=1]

+        %tmp16 = fadd double %tmp14, %z          ; <double> [#uses=1]

+        store double %tmp16, double* %tmp13

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll b/src/LLVM/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
new file mode 100644
index 0000000..16a2186
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ppc32 | not grep IMPLICIT_DEF

+

+define void @foo(i64 %X) {

+entry:

+        %tmp1 = and i64 %X, 3           ; <i64> [#uses=1]

+        %tmp = icmp sgt i64 %tmp1, 2            ; <i1> [#uses=1]

+        br i1 %tmp, label %UnifiedReturnBlock, label %cond_true

+cond_true:              ; preds = %entry

+        %tmp.upgrd.1 = tail call i32 (...)* @bar( )             ; <i32> [#uses=0]

+        ret void

+UnifiedReturnBlock:             ; preds = %entry

+        ret void

+}

+

+declare i32 @bar(...)

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll b/src/LLVM/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
new file mode 100644
index 0000000..6a46451
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=ppc32 | grep xor 

+

+target datalayout = "E-p:32:32"

+target triple = "powerpc-apple-darwin8.7.0"

+

+define void @foo(i32 %X) {

+entry:

+        %tmp1 = and i32 %X, 3           ; <i32> [#uses=1]

+        %tmp2 = xor i32 %tmp1, 1                ; <i32> [#uses=1]

+        %tmp = icmp eq i32 %tmp2, 0             ; <i1> [#uses=1]

+        br i1 %tmp, label %UnifiedReturnBlock, label %cond_true

+cond_true:              ; preds = %entry

+        tail call i32 (...)* @bar( )            ; <i32>:0 [#uses=0]

+        ret void

+UnifiedReturnBlock:             ; preds = %entry

+        ret void

+}

+

+declare i32 @bar(...)

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll b/src/LLVM/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
new file mode 100644
index 0000000..b780507
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ppc64

+

+define i32* @foo(i32 %n) {

+        %A = alloca i32, i32 %n         ; <i32*> [#uses=1]

+        ret i32* %A

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll b/src/LLVM/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
new file mode 100644
index 0000000..e2934c6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=ppc32 | grep rlwimi

+

+define void @test(i16 %div.0.i.i.i.i, i32 %L_num.0.i.i.i.i, i32 %tmp1.i.i206.i.i, i16* %P) {

+        %X = shl i16 %div.0.i.i.i.i, 1          ; <i16> [#uses=1]

+        %tmp28.i.i.i.i = shl i32 %L_num.0.i.i.i.i, 1            ; <i32> [#uses=1]

+        %tmp31.i.i.i.i = icmp slt i32 %tmp28.i.i.i.i, %tmp1.i.i206.i.i          ; <i1> [#uses=1]

+        %tmp31.i.i.i.i.upgrd.1 = zext i1 %tmp31.i.i.i.i to i16          ; <i16> [#uses=1]

+        %tmp371.i.i.i.i1 = or i16 %tmp31.i.i.i.i.upgrd.1, %X            ; <i16> [#uses=1]

+        %div.0.be.i.i.i.i = xor i16 %tmp371.i.i.i.i1, 1         ; <i16> [#uses=1]

+        store i16 %div.0.be.i.i.i.i, i16* %P

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll b/src/LLVM/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
new file mode 100644
index 0000000..e782815
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5

+

+define void @glgRunProcessor15() {

+        %tmp26355.i = shufflevector <4 x float> zeroinitializer, <4 x float> < float 0x379FFFE000000000, float 0x379FFFE000000000, float 0x379FFFE000000000, float 0x379FFFE000000000 >, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >; <<4 x float>> [#uses=1]

+        %tmp3030030304.i = bitcast <4 x float> %tmp26355.i to <8 x i16>         ; <<8 x i16>> [#uses=1]

+        %tmp30305.i = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp3030030304.i, <8 x i32> < i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15 >               ; <<8 x i16>> [#uses=1]

+        %tmp30305.i.upgrd.1 = bitcast <8 x i16> %tmp30305.i to <4 x i32>                ; <<4 x i32>> [#uses=1]

+        store <4 x i32> %tmp30305.i.upgrd.1, <4 x i32>* null

+        ret void

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll b/src/LLVM/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
new file mode 100644
index 0000000..f3e1ab3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ppc64

+; RUN: llc < %s -march=ppc32

+; RUN: llc < %s 

+

+define void @bitap() {

+entry:

+        %RMask.i = alloca [256 x i32], align 16         ; <[256 x i32]*> [#uses=1]

+        %buffer = alloca [147456 x i8], align 16                ; <[147456 x i8]*> [#uses=0]

+        br i1 false, label %bb19, label %bb.preheader

+bb.preheader:           ; preds = %entry

+        ret void

+bb19:           ; preds = %entry

+        br i1 false, label %bb12.i, label %cond_next39

+bb12.i:         ; preds = %bb12.i, %bb19

+        %i.0.i = phi i32 [ %tmp11.i, %bb12.i ], [ 0, %bb19 ]            ; <i32> [#uses=2]

+        %gep.upgrd.1 = zext i32 %i.0.i to i64           ; <i64> [#uses=1]

+        %tmp9.i = getelementptr [256 x i32]* %RMask.i, i32 0, i64 %gep.upgrd.1          ; <i32*> [#uses=1]

+        store i32 0, i32* %tmp9.i

+        %tmp11.i = add i32 %i.0.i, 1            ; <i32> [#uses=1]

+        br label %bb12.i

+cond_next39:            ; preds = %bb19

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll b/src/LLVM/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
new file mode 100644
index 0000000..4136b47
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=ppc64

+; RUN: llc < %s -march=ppc32

+; RUN: llc < %s

+

+@qsz.b = external global i1             ; <i1*> [#uses=1]

+

+define fastcc void @qst() {

+entry:

+        br i1 true, label %cond_next71, label %cond_true

+cond_true:              ; preds = %entry

+        ret void

+cond_next71:            ; preds = %entry

+        %tmp73.b = load i1* @qsz.b              ; <i1> [#uses=1]

+        %ii.4.ph = select i1 %tmp73.b, i64 4, i64 0             ; <i64> [#uses=1]

+        br label %bb139

+bb82:           ; preds = %bb139

+        ret void

+bb139:          ; preds = %bb139, %cond_next71

+        %exitcond89 = icmp eq i64 0, %ii.4.ph           ; <i1> [#uses=1]

+        br i1 %exitcond89, label %bb82, label %bb139

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll b/src/LLVM/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
new file mode 100644
index 0000000..5098749
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32 | grep extsb

+; RUN: llc < %s -march=ppc32 | grep extsh

+

+define i32 @p1(i8 %c, i16 %s) {

+entry:

+        %tmp = sext i8 %c to i32                ; <i32> [#uses=1]

+        %tmp1 = sext i16 %s to i32              ; <i32> [#uses=1]

+        %tmp2 = add i32 %tmp1, %tmp             ; <i32> [#uses=1]

+        ret i32 %tmp2

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/src/LLVM/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
new file mode 100644
index 0000000..b4b2e38
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \

+; RUN:    grep cntlzw

+

+define i32 @foo() nounwind {

+entry:

+	%retval = alloca i32, align 4		; <i32*> [#uses=2]

+	%temp = alloca i32, align 4		; <i32*> [#uses=2]

+	%ctz_x = alloca i32, align 4		; <i32*> [#uses=3]

+	%ctz_c = alloca i32, align 4		; <i32*> [#uses=2]

+	store i32 61440, i32* %ctz_x

+	%tmp = load i32* %ctz_x		; <i32> [#uses=1]

+	%tmp1 = sub i32 0, %tmp		; <i32> [#uses=1]

+	%tmp2 = load i32* %ctz_x		; <i32> [#uses=1]

+	%tmp3 = and i32 %tmp1, %tmp2		; <i32> [#uses=1]

+	%tmp4 = call i32 asm "$(cntlz$|cntlzw$) $0,$1", "=r,r,~{dirflag},~{fpsr},~{flags}"( i32 %tmp3 )		; <i32> [#uses=1]

+	store i32 %tmp4, i32* %ctz_c

+	%tmp5 = load i32* %ctz_c		; <i32> [#uses=1]

+	store i32 %tmp5, i32* %temp

+	%tmp6 = load i32* %temp		; <i32> [#uses=1]

+	store i32 %tmp6, i32* %retval

+	br label %return

+

+return:		; preds = %entry

+	%retval2 = load i32* %retval		; <i32> [#uses=1]

+	ret i32 %retval2

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll b/src/LLVM/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
new file mode 100644
index 0000000..e529edc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ppc32

+; RUN: llc < %s -march=ppc64

+

+define i16 @test(i8* %d1, i16* %d2) {

+	%tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 )		; <i16> [#uses=1]

+	ret i16 %tmp237

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll b/src/LLVM/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
new file mode 100644
index 0000000..43d662b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ppc32

+; RUN: llc < %s -march=ppc64

+

+; Test two things: 1) that a frameidx can be rewritten in an inline asm

+; 2) that inline asms can handle reg+imm addr modes.

+

+	%struct.A = type { i32, i32 }

+

+

+define void @test1() {

+entry:

+	%Out = alloca %struct.A, align 4		; <%struct.A*> [#uses=1]

+	%tmp2 = getelementptr %struct.A* %Out, i32 0, i32 1

+	%tmp5 = call i32 asm "lwbrx $0, $1", "=r,m"(i32* %tmp2 )

+	ret void

+}

+

+define void @test2() {

+entry:

+	%Out = alloca %struct.A, align 4		; <%struct.A*> [#uses=1]

+	%tmp2 = getelementptr %struct.A* %Out, i32 0, i32 0		; <i32*> [#uses=1]

+	%tmp5 = call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,m"( i8* null, i32 0, i32* %tmp2 )		; <i32> [#uses=0]

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll b/src/LLVM/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
new file mode 100644
index 0000000..f893318
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll

@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | \

+; RUN:   grep align.*3

+

+@X = global <{i32, i32}> <{ i32 1, i32 123 }>


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll b/src/LLVM/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
new file mode 100644
index 0000000..be277fa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s

+

+target datalayout = "E-p:32:32"

+target triple = "powerpc-apple-darwin8.8.0"

+

+

+define void @blargh() {

+entry:

+	%tmp4 = call i32 asm "rlwimi $0,$2,$3,$4,$5", "=r,0,r,n,n,n"( i32 0, i32 0, i32 0, i32 24, i32 31 )		; <i32> [#uses=0]

+	unreachable

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll b/src/LLVM/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
new file mode 100644
index 0000000..ec6fd33
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s | grep mflr | count 1

+

+target datalayout = "e-p:32:32"

+target triple = "powerpc-apple-darwin8"

+@str = internal constant [18 x i8] c"hello world!, %d\0A\00"            ; <[18 x i8]*> [#uses=1]

+

+

+define i32 @main() {

+entry:

+        %tmp = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([18 x i8]* @str, i32 0, i32 0) )                ; <i32> [#uses=0]

+        ret i32 0

+}

+

+declare i32 @printf(i8*, ...)


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/src/LLVM/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
new file mode 100644
index 0000000..ae81a6c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=ppc64 -mcpu=g5 | grep cntlzd

+

+define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) nounwind {

+        %tmp19 = load i64* %t

+        %tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19 )             ; <i64> [#uses=1]

+        %tmp23 = trunc i64 %tmp22 to i32

+        %tmp89 = add i32 %tmp23, -64          ; <i32> [#uses=1]

+        %tmp90 = add i32 %tmp89, 0            ; <i32> [#uses=1]

+        ret i32 %tmp90

+}

+

+declare i64 @llvm.ctlz.i64(i64)


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll b/src/LLVM/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
new file mode 100644
index 0000000..9bf1cd4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll

@@ -0,0 +1,1801 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5

+

+define void @test(<4 x float>*, { { i16, i16, i32 } }*) {

+xOperationInitMasks.exit:

+	%.sub7896 = getelementptr [4 x <4 x i32>]* null, i32 0, i32 0		; <<4 x i32>*> [#uses=24]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 175, i32 3		; <<4 x float>*>:2 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 174, i32 2		; <<4 x float>*>:3 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 174, i32 3		; <<4 x float>*>:4 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 1		; <<4 x float>*>:5 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 2		; <<4 x float>*>:6 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 3		; <<4 x float>*>:7 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 1		; <<4 x float>*>:8 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 2		; <<4 x float>*>:9 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 3		; <<4 x float>*>:10 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 1		; <<4 x float>*>:11 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 2		; <<4 x float>*>:12 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 3		; <<4 x float>*>:13 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 1		; <<4 x float>*>:14 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 2		; <<4 x float>*>:15 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 3		; <<4 x float>*>:16 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 1		; <<4 x float>*>:17 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 2		; <<4 x float>*>:18 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 3		; <<4 x float>*>:19 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 1		; <<4 x float>*>:20 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 2		; <<4 x float>*>:21 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 3		; <<4 x float>*>:22 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 1		; <<4 x float>*>:23 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 2		; <<4 x float>*>:24 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 3		; <<4 x float>*>:25 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 1		; <<4 x float>*>:26 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 2		; <<4 x float>*>:27 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 3		; <<4 x float>*>:28 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 1		; <<4 x float>*>:29 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 2		; <<4 x float>*>:30 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 3		; <<4 x float>*>:31 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 1		; <<4 x float>*>:32 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 2		; <<4 x float>*>:33 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 3		; <<4 x float>*>:34 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 1		; <<4 x float>*>:35 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 2		; <<4 x float>*>:36 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 3		; <<4 x float>*>:37 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 1		; <<4 x float>*>:38 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 2		; <<4 x float>*>:39 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 3		; <<4 x float>*>:40 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 1		; <<4 x float>*>:41 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 2		; <<4 x float>*>:42 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 3		; <<4 x float>*>:43 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 1		; <<4 x float>*>:44 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 2		; <<4 x float>*>:45 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 3		; <<4 x float>*>:46 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 1		; <<4 x float>*>:47 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 2		; <<4 x float>*>:48 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 3		; <<4 x float>*>:49 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 1		; <<4 x float>*>:50 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 2		; <<4 x float>*>:51 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 3		; <<4 x float>*>:52 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 1		; <<4 x float>*>:53 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 2		; <<4 x float>*>:54 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 3		; <<4 x float>*>:55 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 1		; <<4 x float>*>:56 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 2		; <<4 x float>*>:57 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 3		; <<4 x float>*>:58 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 1		; <<4 x float>*>:59 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 2		; <<4 x float>*>:60 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 3		; <<4 x float>*>:61 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 1		; <<4 x float>*>:62 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 2		; <<4 x float>*>:63 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 3		; <<4 x float>*>:64 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 1		; <<4 x float>*>:65 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 2		; <<4 x float>*>:66 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 3		; <<4 x float>*>:67 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 1		; <<4 x float>*>:68 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 2		; <<4 x float>*>:69 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 3		; <<4 x float>*>:70 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 1		; <<4 x float>*>:71 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 2		; <<4 x float>*>:72 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 3		; <<4 x float>*>:73 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 1		; <<4 x float>*>:74 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 2		; <<4 x float>*>:75 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 3		; <<4 x float>*>:76 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 1		; <<4 x float>*>:77 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 2		; <<4 x float>*>:78 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 3		; <<4 x float>*>:79 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 1		; <<4 x float>*>:80 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 2		; <<4 x float>*>:81 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 3		; <<4 x float>*>:82 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 1		; <<4 x float>*>:83 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 2		; <<4 x float>*>:84 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 3		; <<4 x float>*>:85 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 1		; <<4 x float>*>:86 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 2		; <<4 x float>*>:87 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 3		; <<4 x float>*>:88 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 1		; <<4 x float>*>:89 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 2		; <<4 x float>*>:90 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 3		; <<4 x float>*>:91 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 1		; <<4 x float>*>:92 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 2		; <<4 x float>*>:93 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 3		; <<4 x float>*>:94 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 1		; <<4 x float>*>:95 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 2		; <<4 x float>*>:96 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 3		; <<4 x float>*>:97 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 1		; <<4 x float>*>:98 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 2		; <<4 x float>*>:99 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 3		; <<4 x float>*>:100 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 1		; <<4 x float>*>:101 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 2		; <<4 x float>*>:102 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 3		; <<4 x float>*>:103 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 1		; <<4 x float>*>:104 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 2		; <<4 x float>*>:105 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 3		; <<4 x float>*>:106 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 1		; <<4 x float>*>:107 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 2		; <<4 x float>*>:108 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 3		; <<4 x float>*>:109 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 1		; <<4 x float>*>:110 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 2		; <<4 x float>*>:111 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 3		; <<4 x float>*>:112 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 1		; <<4 x float>*>:113 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 2		; <<4 x float>*>:114 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 3		; <<4 x float>*>:115 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 1		; <<4 x float>*>:116 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 2		; <<4 x float>*>:117 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 3		; <<4 x float>*>:118 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 1		; <<4 x float>*>:119 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 2		; <<4 x float>*>:120 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 3		; <<4 x float>*>:121 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 1		; <<4 x float>*>:122 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 2		; <<4 x float>*>:123 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 3		; <<4 x float>*>:124 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 1		; <<4 x float>*>:125 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 2		; <<4 x float>*>:126 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 3		; <<4 x float>*>:127 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 1		; <<4 x float>*>:128 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 2		; <<4 x float>*>:129 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 3		; <<4 x float>*>:130 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 1		; <<4 x float>*>:131 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 2		; <<4 x float>*>:132 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 3		; <<4 x float>*>:133 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 1		; <<4 x float>*>:134 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 2		; <<4 x float>*>:135 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 3		; <<4 x float>*>:136 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 1		; <<4 x float>*>:137 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 2		; <<4 x float>*>:138 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 3		; <<4 x float>*>:139 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 1		; <<4 x float>*>:140 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 2		; <<4 x float>*>:141 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 3		; <<4 x float>*>:142 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 1		; <<4 x float>*>:143 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 2		; <<4 x float>*>:144 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 3		; <<4 x float>*>:145 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 1		; <<4 x float>*>:146 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 2		; <<4 x float>*>:147 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 3		; <<4 x float>*>:148 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 1		; <<4 x float>*>:149 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 2		; <<4 x float>*>:150 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 3		; <<4 x float>*>:151 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 1		; <<4 x float>*>:152 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 2		; <<4 x float>*>:153 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 3		; <<4 x float>*>:154 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 1		; <<4 x float>*>:155 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 2		; <<4 x float>*>:156 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 3		; <<4 x float>*>:157 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 1		; <<4 x float>*>:158 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 2		; <<4 x float>*>:159 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 3		; <<4 x float>*>:160 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 1		; <<4 x float>*>:161 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 2		; <<4 x float>*>:162 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 3		; <<4 x float>*>:163 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 1		; <<4 x float>*>:164 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 2		; <<4 x float>*>:165 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 3		; <<4 x float>*>:166 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 1		; <<4 x float>*>:167 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 2		; <<4 x float>*>:168 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 3		; <<4 x float>*>:169 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 1		; <<4 x float>*>:170 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 2		; <<4 x float>*>:171 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 3		; <<4 x float>*>:172 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 1		; <<4 x float>*>:173 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 2		; <<4 x float>*>:174 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 3		; <<4 x float>*>:175 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 1		; <<4 x float>*>:176 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 2		; <<4 x float>*>:177 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 3		; <<4 x float>*>:178 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 1		; <<4 x float>*>:179 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 2		; <<4 x float>*>:180 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 3		; <<4 x float>*>:181 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 1		; <<4 x float>*>:182 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 2		; <<4 x float>*>:183 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 3		; <<4 x float>*>:184 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 1		; <<4 x float>*>:185 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 2		; <<4 x float>*>:186 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 3		; <<4 x float>*>:187 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 1		; <<4 x float>*>:188 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 2		; <<4 x float>*>:189 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 3		; <<4 x float>*>:190 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 1		; <<4 x float>*>:191 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 2		; <<4 x float>*>:192 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 3		; <<4 x float>*>:193 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 1		; <<4 x float>*>:194 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 2		; <<4 x float>*>:195 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 3		; <<4 x float>*>:196 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 1		; <<4 x float>*>:197 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 2		; <<4 x float>*>:198 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 3		; <<4 x float>*>:199 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 1		; <<4 x float>*>:200 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 2		; <<4 x float>*>:201 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 3		; <<4 x float>*>:202 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 1		; <<4 x float>*>:203 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 2		; <<4 x float>*>:204 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 3		; <<4 x float>*>:205 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 1		; <<4 x float>*>:206 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 2		; <<4 x float>*>:207 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 3		; <<4 x float>*>:208 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 1		; <<4 x float>*>:209 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 2		; <<4 x float>*>:210 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 3		; <<4 x float>*>:211 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 1		; <<4 x float>*>:212 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 2		; <<4 x float>*>:213 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 3		; <<4 x float>*>:214 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 1		; <<4 x float>*>:215 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 2		; <<4 x float>*>:216 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 3		; <<4 x float>*>:217 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 1		; <<4 x float>*>:218 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 2		; <<4 x float>*>:219 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 3		; <<4 x float>*>:220 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 1		; <<4 x float>*>:221 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 2		; <<4 x float>*>:222 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 3		; <<4 x float>*>:223 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 1		; <<4 x float>*>:224 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 2		; <<4 x float>*>:225 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 3		; <<4 x float>*>:226 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 1		; <<4 x float>*>:227 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 2		; <<4 x float>*>:228 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 3		; <<4 x float>*>:229 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 1		; <<4 x float>*>:230 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 2		; <<4 x float>*>:231 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 3		; <<4 x float>*>:232 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 1		; <<4 x float>*>:233 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 2		; <<4 x float>*>:234 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 3		; <<4 x float>*>:235 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 1		; <<4 x float>*>:236 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 2		; <<4 x float>*>:237 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 3		; <<4 x float>*>:238 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 1		; <<4 x float>*>:239 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 2		; <<4 x float>*>:240 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 3		; <<4 x float>*>:241 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 1		; <<4 x float>*>:242 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 2		; <<4 x float>*>:243 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 3		; <<4 x float>*>:244 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 1		; <<4 x float>*>:245 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 2		; <<4 x float>*>:246 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 3		; <<4 x float>*>:247 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 1		; <<4 x float>*>:248 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 2		; <<4 x float>*>:249 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 3		; <<4 x float>*>:250 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 1		; <<4 x float>*>:251 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 2		; <<4 x float>*>:252 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 3		; <<4 x float>*>:253 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 1		; <<4 x float>*>:254 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 2		; <<4 x float>*>:255 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 3		; <<4 x float>*>:256 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 1		; <<4 x float>*>:257 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 2		; <<4 x float>*>:258 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 3		; <<4 x float>*>:259 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 1		; <<4 x float>*>:260 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 2		; <<4 x float>*>:261 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 3		; <<4 x float>*>:262 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 1		; <<4 x float>*>:263 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 2		; <<4 x float>*>:264 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 3		; <<4 x float>*>:265 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 1		; <<4 x float>*>:266 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 2		; <<4 x float>*>:267 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 3		; <<4 x float>*>:268 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 1		; <<4 x float>*>:269 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 2		; <<4 x float>*>:270 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 3		; <<4 x float>*>:271 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 1		; <<4 x float>*>:272 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 2		; <<4 x float>*>:273 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 3		; <<4 x float>*>:274 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 1		; <<4 x float>*>:275 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 2		; <<4 x float>*>:276 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 3		; <<4 x float>*>:277 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 1		; <<4 x float>*>:278 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 2		; <<4 x float>*>:279 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 3		; <<4 x float>*>:280 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 1		; <<4 x float>*>:281 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 2		; <<4 x float>*>:282 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 3		; <<4 x float>*>:283 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 1		; <<4 x float>*>:284 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 2		; <<4 x float>*>:285 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 3		; <<4 x float>*>:286 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 1		; <<4 x float>*>:287 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 2		; <<4 x float>*>:288 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 3		; <<4 x float>*>:289 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 1		; <<4 x float>*>:290 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 2		; <<4 x float>*>:291 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 3		; <<4 x float>*>:292 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 1		; <<4 x float>*>:293 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 2		; <<4 x float>*>:294 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 3		; <<4 x float>*>:295 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 1		; <<4 x float>*>:296 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 2		; <<4 x float>*>:297 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 3		; <<4 x float>*>:298 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 1		; <<4 x float>*>:299 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 2		; <<4 x float>*>:300 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 3		; <<4 x float>*>:301 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 1		; <<4 x float>*>:302 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 2		; <<4 x float>*>:303 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 3		; <<4 x float>*>:304 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 1		; <<4 x float>*>:305 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 2		; <<4 x float>*>:306 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 3		; <<4 x float>*>:307 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 1		; <<4 x float>*>:308 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 2		; <<4 x float>*>:309 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 3		; <<4 x float>*>:310 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 1		; <<4 x float>*>:311 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 2		; <<4 x float>*>:312 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 3		; <<4 x float>*>:313 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 1		; <<4 x float>*>:314 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 2		; <<4 x float>*>:315 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 3		; <<4 x float>*>:316 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 1		; <<4 x float>*>:317 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 2		; <<4 x float>*>:318 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 3		; <<4 x float>*>:319 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 1		; <<4 x float>*>:320 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 2		; <<4 x float>*>:321 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 3		; <<4 x float>*>:322 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 1		; <<4 x float>*>:323 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 2		; <<4 x float>*>:324 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 3		; <<4 x float>*>:325 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 1		; <<4 x float>*>:326 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 2		; <<4 x float>*>:327 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 3		; <<4 x float>*>:328 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 1		; <<4 x float>*>:329 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 2		; <<4 x float>*>:330 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 3		; <<4 x float>*>:331 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 1		; <<4 x float>*>:332 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 2		; <<4 x float>*>:333 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 3		; <<4 x float>*>:334 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 1		; <<4 x float>*>:335 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 2		; <<4 x float>*>:336 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 3		; <<4 x float>*>:337 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 1		; <<4 x float>*>:338 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 2		; <<4 x float>*>:339 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 3		; <<4 x float>*>:340 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 1		; <<4 x float>*>:341 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 2		; <<4 x float>*>:342 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 3		; <<4 x float>*>:343 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 1		; <<4 x float>*>:344 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 2		; <<4 x float>*>:345 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 3		; <<4 x float>*>:346 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 1		; <<4 x float>*>:347 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 2		; <<4 x float>*>:348 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 3		; <<4 x float>*>:349 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 1		; <<4 x float>*>:350 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 2		; <<4 x float>*>:351 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 3		; <<4 x float>*>:352 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 1		; <<4 x float>*>:353 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 2		; <<4 x float>*>:354 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 3		; <<4 x float>*>:355 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 1		; <<4 x float>*>:356 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 2		; <<4 x float>*>:357 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 3		; <<4 x float>*>:358 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 1		; <<4 x float>*>:359 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 2		; <<4 x float>*>:360 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 3		; <<4 x float>*>:361 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 1		; <<4 x float>*>:362 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 2		; <<4 x float>*>:363 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 3		; <<4 x float>*>:364 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 1		; <<4 x float>*>:365 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 2		; <<4 x float>*>:366 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 3		; <<4 x float>*>:367 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 1		; <<4 x float>*>:368 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 2		; <<4 x float>*>:369 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 3		; <<4 x float>*>:370 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 1		; <<4 x float>*>:371 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 2		; <<4 x float>*>:372 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 3		; <<4 x float>*>:373 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 1		; <<4 x float>*>:374 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 2		; <<4 x float>*>:375 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 3		; <<4 x float>*>:376 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 1		; <<4 x float>*>:377 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 2		; <<4 x float>*>:378 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 3		; <<4 x float>*>:379 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 1		; <<4 x float>*>:380 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 2		; <<4 x float>*>:381 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 3		; <<4 x float>*>:382 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 1		; <<4 x float>*>:383 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 2		; <<4 x float>*>:384 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 3		; <<4 x float>*>:385 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 1		; <<4 x float>*>:386 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 2		; <<4 x float>*>:387 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 3		; <<4 x float>*>:388 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 1		; <<4 x float>*>:389 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 2		; <<4 x float>*>:390 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 3		; <<4 x float>*>:391 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 1		; <<4 x float>*>:392 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 2		; <<4 x float>*>:393 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 3		; <<4 x float>*>:394 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 1		; <<4 x float>*>:395 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 2		; <<4 x float>*>:396 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 3		; <<4 x float>*>:397 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 1		; <<4 x float>*>:398 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 2		; <<4 x float>*>:399 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 3		; <<4 x float>*>:400 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 1		; <<4 x float>*>:401 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 2		; <<4 x float>*>:402 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 3		; <<4 x float>*>:403 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 1		; <<4 x float>*>:404 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 2		; <<4 x float>*>:405 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 3		; <<4 x float>*>:406 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 1		; <<4 x float>*>:407 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 2		; <<4 x float>*>:408 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 3		; <<4 x float>*>:409 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 1		; <<4 x float>*>:410 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 2		; <<4 x float>*>:411 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 3		; <<4 x float>*>:412 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 1		; <<4 x float>*>:413 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 2		; <<4 x float>*>:414 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 3		; <<4 x float>*>:415 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 1		; <<4 x float>*>:416 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 2		; <<4 x float>*>:417 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 3		; <<4 x float>*>:418 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 1		; <<4 x float>*>:419 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 2		; <<4 x float>*>:420 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 3		; <<4 x float>*>:421 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 1		; <<4 x float>*>:422 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 2		; <<4 x float>*>:423 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 3		; <<4 x float>*>:424 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 1		; <<4 x float>*>:425 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 2		; <<4 x float>*>:426 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 3		; <<4 x float>*>:427 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 1		; <<4 x float>*>:428 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 2		; <<4 x float>*>:429 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 3		; <<4 x float>*>:430 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 1		; <<4 x float>*>:431 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 2		; <<4 x float>*>:432 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 3		; <<4 x float>*>:433 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 1		; <<4 x float>*>:434 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 2		; <<4 x float>*>:435 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 3		; <<4 x float>*>:436 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 1		; <<4 x float>*>:437 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 2		; <<4 x float>*>:438 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 3		; <<4 x float>*>:439 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 1		; <<4 x float>*>:440 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 2		; <<4 x float>*>:441 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 3		; <<4 x float>*>:442 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 1		; <<4 x float>*>:443 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 2		; <<4 x float>*>:444 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 3		; <<4 x float>*>:445 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 1		; <<4 x float>*>:446 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 2		; <<4 x float>*>:447 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 3		; <<4 x float>*>:448 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 1		; <<4 x float>*>:449 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 2		; <<4 x float>*>:450 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 3		; <<4 x float>*>:451 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 1		; <<4 x float>*>:452 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 2		; <<4 x float>*>:453 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 3		; <<4 x float>*>:454 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 1		; <<4 x float>*>:455 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 2		; <<4 x float>*>:456 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 3		; <<4 x float>*>:457 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 1		; <<4 x float>*>:458 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 2		; <<4 x float>*>:459 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 3		; <<4 x float>*>:460 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 1		; <<4 x float>*>:461 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 2		; <<4 x float>*>:462 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 3		; <<4 x float>*>:463 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 1		; <<4 x float>*>:464 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 2		; <<4 x float>*>:465 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 3		; <<4 x float>*>:466 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 1		; <<4 x float>*>:467 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 2		; <<4 x float>*>:468 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 3		; <<4 x float>*>:469 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 1		; <<4 x float>*>:470 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 2		; <<4 x float>*>:471 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 3		; <<4 x float>*>:472 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 1		; <<4 x float>*>:473 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 2		; <<4 x float>*>:474 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 3		; <<4 x float>*>:475 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 1		; <<4 x float>*>:476 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 2		; <<4 x float>*>:477 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 3		; <<4 x float>*>:478 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 1		; <<4 x float>*>:479 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 2		; <<4 x float>*>:480 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 3		; <<4 x float>*>:481 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 1		; <<4 x float>*>:482 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 2		; <<4 x float>*>:483 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 3		; <<4 x float>*>:484 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:485 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:486 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:487 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1		; <<4 x float>*>:488 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 2		; <<4 x float>*>:489 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 3		; <<4 x float>*>:490 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 1		; <<4 x float>*>:491 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 2		; <<4 x float>*>:492 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 3		; <<4 x float>*>:493 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 1		; <<4 x float>*>:494 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 2		; <<4 x float>*>:495 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 3		; <<4 x float>*>:496 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 1		; <<4 x float>*>:497 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 2		; <<4 x float>*>:498 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 3		; <<4 x float>*>:499 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 1		; <<4 x float>*>:500 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 2		; <<4 x float>*>:501 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 3		; <<4 x float>*>:502 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 1		; <<4 x float>*>:503 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 2		; <<4 x float>*>:504 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 3		; <<4 x float>*>:505 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 1		; <<4 x float>*>:506 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 2		; <<4 x float>*>:507 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 3		; <<4 x float>*>:508 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 1		; <<4 x float>*>:509 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 2		; <<4 x float>*>:510 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 3		; <<4 x float>*>:511 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 1		; <<4 x float>*>:512 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 2		; <<4 x float>*>:513 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 3		; <<4 x float>*>:514 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 1		; <<4 x float>*>:515 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 2		; <<4 x float>*>:516 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 3		; <<4 x float>*>:517 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 1		; <<4 x float>*>:518 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 2		; <<4 x float>*>:519 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 3		; <<4 x float>*>:520 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 1		; <<4 x float>*>:521 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 2		; <<4 x float>*>:522 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 3		; <<4 x float>*>:523 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 1		; <<4 x float>*>:524 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 2		; <<4 x float>*>:525 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 3		; <<4 x float>*>:526 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1		; <<4 x float>*>:527 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:528 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3		; <<4 x float>*>:529 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:530 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:531 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:532 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1		; <<4 x float>*>:533 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2		; <<4 x float>*>:534 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3		; <<4 x float>*>:535 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 1		; <<4 x float>*>:536 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 2		; <<4 x float>*>:537 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 3		; <<4 x float>*>:538 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 1		; <<4 x float>*>:539 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 2		; <<4 x float>*>:540 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 3		; <<4 x float>*>:541 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1		; <<4 x float>*>:542 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2		; <<4 x float>*>:543 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3		; <<4 x float>*>:544 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 1		; <<4 x float>*>:545 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 2		; <<4 x float>*>:546 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 3		; <<4 x float>*>:547 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 1		; <<4 x float>*>:548 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 2		; <<4 x float>*>:549 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 3		; <<4 x float>*>:550 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:551 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 1		; <<4 x float>*>:552 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 2		; <<4 x float>*>:553 [#uses=1]

+	load <4 x float>* %553		; <<4 x float>>:554 [#uses=1]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 3		; <<4 x float>*>:555 [#uses=0]

+	shufflevector <4 x float> %554, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:556 [#uses=1]

+	call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> zeroinitializer, <4 x float> %556 )		; <<4 x i32>>:557 [#uses=0]

+	bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>>:558 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0		; <<4 x float>*>:559 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 2		; <<4 x float>*>:560 [#uses=1]

+	store <4 x float> zeroinitializer, <4 x float>* %560

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3		; <<4 x float>*>:561 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:562 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 2		; <<4 x float>*>:563 [#uses=0]

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:564 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:565 [#uses=1]

+	store <4 x float> %565, <4 x float>* null

+	icmp eq i32 0, 0		; <i1>:566 [#uses=1]

+	br i1 %566, label %.critedge, label %xPIF.exit

+

+.critedge:		; preds = %xOperationInitMasks.exit

+	getelementptr [4 x <4 x i32>]* null, i32 0, i32 3		; <<4 x i32>*>:567 [#uses=0]

+	and <4 x i32> zeroinitializer, zeroinitializer		; <<4 x i32>>:568 [#uses=0]

+	or <4 x i32> zeroinitializer, zeroinitializer		; <<4 x i32>>:569 [#uses=0]

+	icmp eq i32 0, 0		; <i1>:570 [#uses=1]

+	br i1 %570, label %.critedge7898, label %xPBRK.exit

+

+.critedge7898:		; preds = %.critedge

+	br label %xPIF.exit

+

+xPIF.exit:		; preds = %.critedge7898, %xOperationInitMasks.exit

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 1		; <<4 x float>*>:571 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:572 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:573 [#uses=0]

+	icmp eq i32 0, 0		; <i1>:574 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1		; <<4 x float>*>:575 [#uses=0]

+	load <4 x float>* %0		; <<4 x float>>:576 [#uses=0]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:577 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 0		; <<4 x float>*>:578 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1		; <<4 x float>*>:579 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2		; <<4 x float>*>:580 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3		; <<4 x float>*>:581 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:582 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:583 [#uses=1]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:584 [#uses=1]

+	load <4 x float>* %584		; <<4 x float>>:585 [#uses=1]

+	load <4 x float>* null		; <<4 x float>>:586 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:587 [#uses=1]

+	load <4 x float>* %587		; <<4 x float>>:588 [#uses=1]

+	shufflevector <4 x float> %583, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:589 [#uses=1]

+	shufflevector <4 x float> %585, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:590 [#uses=1]

+	shufflevector <4 x float> %588, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:591 [#uses=1]

+	fmul <4 x float> zeroinitializer, %589		; <<4 x float>>:592 [#uses=0]

+	fmul <4 x float> zeroinitializer, %590		; <<4 x float>>:593 [#uses=0]

+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:594 [#uses=1]

+	fmul <4 x float> zeroinitializer, %591		; <<4 x float>>:595 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:596 [#uses=2]

+	load <4 x float>* %596		; <<4 x float>>:597 [#uses=0]

+	store <4 x float> zeroinitializer, <4 x float>* %596

+	load <4 x float>* null		; <<4 x float>>:598 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:599 [#uses=0]

+	shufflevector <4 x float> %594, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:600 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:601 [#uses=2]

+	load <4 x float>* %601		; <<4 x float>>:602 [#uses=0]

+	store <4 x float> zeroinitializer, <4 x float>* %601

+	load <4 x float>* null		; <<4 x float>>:603 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:604 [#uses=1]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:605 [#uses=1]

+	load <4 x float>* %605		; <<4 x float>>:606 [#uses=1]

+	fsub <4 x float> zeroinitializer, %604		; <<4 x float>>:607 [#uses=2]

+	fsub <4 x float> zeroinitializer, %606		; <<4 x float>>:608 [#uses=2]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:609 [#uses=0]

+	br i1 false, label %617, label %610

+

+; <label>:610		; preds = %xPIF.exit

+	load <4 x float>* null		; <<4 x float>>:611 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:612 [#uses=2]

+	load <4 x float>* %612		; <<4 x float>>:613 [#uses=1]

+	shufflevector <4 x float> %607, <4 x float> %613, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:614 [#uses=1]

+	store <4 x float> %614, <4 x float>* %612

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:615 [#uses=2]

+	load <4 x float>* %615		; <<4 x float>>:616 [#uses=0]

+	store <4 x float> zeroinitializer, <4 x float>* %615

+	br label %xST.exit400

+

+; <label>:617		; preds = %xPIF.exit

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:618 [#uses=0]

+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>>:619 [#uses=1]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %619, <4 x i32> zeroinitializer )		; <i32>:620 [#uses=1]

+	icmp eq i32 %620, 0		; <i1>:621 [#uses=1]

+	br i1 %621, label %625, label %622

+

+; <label>:622		; preds = %617

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:623 [#uses=0]

+	shufflevector <4 x float> %607, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:624 [#uses=0]

+	br label %625

+

+; <label>:625		; preds = %622, %617

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:626 [#uses=0]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:627 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:628 [#uses=1]

+	load <4 x float>* %628		; <<4 x float>>:629 [#uses=0]

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:630 [#uses=0]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:631 [#uses=1]

+	icmp eq i32 %631, 0		; <i1>:632 [#uses=1]

+	br i1 %632, label %xST.exit400, label %633

+

+; <label>:633		; preds = %625

+	load <4 x float>* null		; <<4 x float>>:634 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %634, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:635 [#uses=1]

+	store <4 x float> %635, <4 x float>* null

+	br label %xST.exit400

+

+xST.exit400:		; preds = %633, %625, %610

+	%.17218 = phi <4 x float> [ zeroinitializer, %610 ], [ %608, %633 ], [ %608, %625 ]		; <<4 x float>> [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 0		; <<4 x float>*>:636 [#uses=1]

+	load <4 x float>* %636		; <<4 x float>>:637 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:638 [#uses=2]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:639 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:640 [#uses=2]

+	fmul <4 x float> %638, %638		; <<4 x float>>:641 [#uses=1]

+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:642 [#uses=0]

+	fmul <4 x float> %640, %640		; <<4 x float>>:643 [#uses=2]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>>:644 [#uses=0]

+	shufflevector <4 x float> %643, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>>:645 [#uses=1]

+	fadd <4 x float> %645, %643		; <<4 x float>>:646 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:647 [#uses=1]

+	shufflevector <4 x float> %641, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:648 [#uses=1]

+	fadd <4 x float> zeroinitializer, %647		; <<4 x float>>:649 [#uses=2]

+	fadd <4 x float> zeroinitializer, %648		; <<4 x float>>:650 [#uses=0]

+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:651 [#uses=2]

+	call <4 x float> @llvm.ppc.altivec.vrsqrtefp( <4 x float> %649 )		; <<4 x float>>:652 [#uses=1]

+	fmul <4 x float> %652, %649		; <<4 x float>>:653 [#uses=1]

+	call <4 x float> @llvm.ppc.altivec.vrsqrtefp( <4 x float> %651 )		; <<4 x float>>:654 [#uses=1]

+	fmul <4 x float> %654, %651		; <<4 x float>>:655 [#uses=0]

+	icmp eq i32 0, 0		; <i1>:656 [#uses=1]

+	br i1 %656, label %665, label %657

+

+; <label>:657		; preds = %xST.exit400

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:658 [#uses=0]

+	shufflevector <4 x float> %653, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:659 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:660 [#uses=1]

+	load <4 x float>* %660		; <<4 x float>>:661 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:662 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:663 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:664 [#uses=0]

+	br label %xST.exit402

+

+; <label>:665		; preds = %xST.exit400

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:666 [#uses=0]

+	br i1 false, label %669, label %667

+

+; <label>:667		; preds = %665

+	load <4 x float>* null		; <<4 x float>>:668 [#uses=0]

+	br label %669

+

+; <label>:669		; preds = %667, %665

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:670 [#uses=0]

+	br label %xST.exit402

+

+xST.exit402:		; preds = %669, %657

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0		; <<4 x float>*>:671 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:672 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:673 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:674 [#uses=1]

+	load <4 x float>* %674		; <<4 x float>>:675 [#uses=1]

+	load <4 x float>* null		; <<4 x float>>:676 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:677 [#uses=1]

+	shufflevector <4 x float> %675, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:678 [#uses=1]

+	fmul <4 x float> zeroinitializer, %677		; <<4 x float>>:679 [#uses=0]

+	fmul <4 x float> zeroinitializer, %678		; <<4 x float>>:680 [#uses=0]

+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:681 [#uses=1]

+	icmp eq i32 0, 0		; <i1>:682 [#uses=1]

+	br i1 %682, label %689, label %683

+

+; <label>:683		; preds = %xST.exit402

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1		; <<4 x float>*>:684 [#uses=1]

+	load <4 x float>* %684		; <<4 x float>>:685 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:686 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3		; <<4 x float>*>:687 [#uses=0]

+	shufflevector <4 x float> %681, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:688 [#uses=0]

+	br label %xST.exit405

+

+; <label>:689		; preds = %xST.exit402

+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:690 [#uses=0]

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:691 [#uses=1]

+	shufflevector <4 x i32> %691, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:692 [#uses=1]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %692, <4 x i32> zeroinitializer )		; <i32>:693 [#uses=1]

+	icmp eq i32 %693, 0		; <i1>:694 [#uses=0]

+	br label %xST.exit405

+

+xST.exit405:		; preds = %689, %683

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:695 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:696 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:697 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:698 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:699 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:700 [#uses=1]

+	fadd <4 x float> zeroinitializer, %700		; <<4 x float>>:701 [#uses=0]

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:702 [#uses=1]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %702, <4 x i32> zeroinitializer )		; <i32>:703 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:704 [#uses=2]

+	load <4 x float>* %704		; <<4 x float>>:705 [#uses=0]

+	store <4 x float> zeroinitializer, <4 x float>* %704

+	load <4 x float>* null		; <<4 x float>>:706 [#uses=0]

+	store <4 x float> zeroinitializer, <4 x float>* null

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:707 [#uses=2]

+	load <4 x float>* %707		; <<4 x float>>:708 [#uses=0]

+	store <4 x float> zeroinitializer, <4 x float>* %707

+	load <4 x float>* null		; <<4 x float>>:709 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:710 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:711 [#uses=1]

+	shufflevector <4 x float> %711, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:712 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:713 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:714 [#uses=1]

+	load <4 x float>* %714		; <<4 x float>>:715 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:716 [#uses=0]

+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:717 [#uses=1]

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:718 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 0		; <<4 x float>*>:719 [#uses=1]

+	store <4 x float> zeroinitializer, <4 x float>* %719

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1		; <<4 x float>*>:720 [#uses=1]

+	shufflevector <4 x float> %717, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:721 [#uses=1]

+	store <4 x float> %721, <4 x float>* %720

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:722 [#uses=1]

+	load <4 x float>* %722		; <<4 x float>>:723 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %723, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:724 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3		; <<4 x float>*>:725 [#uses=1]

+	store <4 x float> zeroinitializer, <4 x float>* %725

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:726 [#uses=1]

+	load <4 x float>* %726		; <<4 x float>>:727 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3		; <<4 x float>*>:728 [#uses=1]

+	load <4 x float>* %728		; <<4 x float>>:729 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:730 [#uses=1]

+	load <4 x float>* %730		; <<4 x float>>:731 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:732 [#uses=1]

+	load <4 x float>* %732		; <<4 x float>>:733 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:734 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:735 [#uses=1]

+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:736 [#uses=1]

+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:737 [#uses=1]

+	fmul <4 x float> zeroinitializer, %735		; <<4 x float>>:738 [#uses=1]

+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:739 [#uses=1]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:740 [#uses=1]

+	icmp eq i32 %740, 0		; <i1>:741 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:742 [#uses=2]

+	load <4 x float>* %742		; <<4 x float>>:743 [#uses=1]

+	shufflevector <4 x float> %736, <4 x float> %743, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:744 [#uses=1]

+	store <4 x float> %744, <4 x float>* %742

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:745 [#uses=1]

+	load <4 x float>* %745		; <<4 x float>>:746 [#uses=1]

+	shufflevector <4 x float> %737, <4 x float> %746, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:747 [#uses=0]

+	shufflevector <4 x float> %738, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:748 [#uses=1]

+	store <4 x float> %748, <4 x float>* null

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:749 [#uses=1]

+	load <4 x float>* %749		; <<4 x float>>:750 [#uses=1]

+	shufflevector <4 x float> %739, <4 x float> %750, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:751 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0		; <<4 x float>*>:752 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:753 [#uses=1]

+	load <4 x float>* %753		; <<4 x float>>:754 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:755 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:756 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:757 [#uses=1]

+	shufflevector <4 x float> %756, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:758 [#uses=1]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:759 [#uses=1]

+	load <4 x float>* %759		; <<4 x float>>:760 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:761 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:762 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:763 [#uses=1]

+	fadd <4 x float> %757, zeroinitializer		; <<4 x float>>:764 [#uses=0]

+	fadd <4 x float> %758, %763		; <<4 x float>>:765 [#uses=0]

+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:766 [#uses=1]

+	br i1 false, label %773, label %767

+

+; <label>:767		; preds = %xST.exit405

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:768 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:769 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %769, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:770 [#uses=1]

+	store <4 x float> %770, <4 x float>* null

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:771 [#uses=1]

+	load <4 x float>* %771		; <<4 x float>>:772 [#uses=0]

+	br label %xST.exit422

+

+; <label>:773		; preds = %xST.exit405

+	br label %xST.exit422

+

+xST.exit422:		; preds = %773, %767

+	%.07267 = phi <4 x float> [ %766, %767 ], [ undef, %773 ]		; <<4 x float>> [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:774 [#uses=0]

+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:775 [#uses=0]

+	icmp eq i32 0, 0		; <i1>:776 [#uses=1]

+	br i1 %776, label %780, label %777

+

+; <label>:777		; preds = %xST.exit422

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:778 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:779 [#uses=0]

+	br label %xST.exit431

+

+; <label>:780		; preds = %xST.exit422

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:781 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:782 [#uses=2]

+	load <4 x float>* %782		; <<4 x float>>:783 [#uses=0]

+	store <4 x float> zeroinitializer, <4 x float>* %782

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:784 [#uses=1]

+	shufflevector <4 x i32> %784, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:785 [#uses=0]

+	icmp eq i32 0, 0		; <i1>:786 [#uses=0]

+	br label %xST.exit431

+

+xST.exit431:		; preds = %780, %777

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:787 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:788 [#uses=0]

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:789 [#uses=2]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %789, <4 x i32> zeroinitializer )		; <i32>:790 [#uses=1]

+	icmp eq i32 %790, 0		; <i1>:791 [#uses=0]

+	shufflevector <4 x i32> %789, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:792 [#uses=1]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %792, <4 x i32> zeroinitializer )		; <i32>:793 [#uses=1]

+	icmp eq i32 %793, 0		; <i1>:794 [#uses=1]

+	br i1 %794, label %797, label %795

+

+; <label>:795		; preds = %xST.exit431

+	load <4 x float>* null		; <<4 x float>>:796 [#uses=0]

+	store <4 x float> zeroinitializer, <4 x float>* null

+	br label %797

+

+; <label>:797		; preds = %795, %xST.exit431

+	%.07332 = phi <4 x float> [ zeroinitializer, %795 ], [ undef, %xST.exit431 ]		; <<4 x float>> [#uses=0]

+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>>:798 [#uses=0]

+	br i1 false, label %xST.exit434, label %799

+

+; <label>:799		; preds = %797

+	load <4 x float>* null		; <<4 x float>>:800 [#uses=0]

+	store <4 x float> zeroinitializer, <4 x float>* null

+	br label %xST.exit434

+

+xST.exit434:		; preds = %799, %797

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:801 [#uses=1]

+	shufflevector <4 x i32> %801, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x i32>>:802 [#uses=0]

+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:803 [#uses=0]

+	icmp eq i32 0, 0		; <i1>:804 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 0		; <<4 x float>*>:805 [#uses=1]

+	load <4 x float>* %805		; <<4 x float>>:806 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:807 [#uses=1]

+	load <4 x float>* %807		; <<4 x float>>:808 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:809 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:810 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0		; <<4 x float>*>:811 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:812 [#uses=1]

+	load <4 x float>* %812		; <<4 x float>>:813 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:814 [#uses=1]

+	load <4 x float>* %814		; <<4 x float>>:815 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:816 [#uses=0]

+	unreachable

+

+xPBRK.exit:		; preds = %.critedge

+	store <4 x i32> < i32 -1, i32 -1, i32 -1, i32 -1 >, <4 x i32>* %.sub7896

+	store <4 x i32> zeroinitializer, <4 x i32>* null

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1		; <<4 x float>*>:817 [#uses=1]

+	load <4 x float>* %817		; <<4 x float>>:818 [#uses=1]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2		; <<4 x float>*>:819 [#uses=1]

+	load <4 x float>* %819		; <<4 x float>>:820 [#uses=1]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3		; <<4 x float>*>:821 [#uses=1]

+	load <4 x float>* %821		; <<4 x float>>:822 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:823 [#uses=1]

+	shufflevector <4 x float> %818, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:824 [#uses=1]

+	shufflevector <4 x float> %820, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:825 [#uses=1]

+	shufflevector <4 x float> %822, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:826 [#uses=1]

+	shufflevector <4 x float> %823, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:827 [#uses=0]

+	shufflevector <4 x float> %824, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:828 [#uses=1]

+	store <4 x float> %828, <4 x float>* null

+	load <4 x float>* null		; <<4 x float>>:829 [#uses=1]

+	shufflevector <4 x float> %825, <4 x float> %829, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:830 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3		; <<4 x float>*>:831 [#uses=2]

+	load <4 x float>* %831		; <<4 x float>>:832 [#uses=1]

+	shufflevector <4 x float> %826, <4 x float> %832, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:833 [#uses=1]

+	store <4 x float> %833, <4 x float>* %831

+	br label %xLS.exit449

+

+xLS.exit449:		; preds = %1215, %xPBRK.exit

+	%.27464 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.17463, %1215 ]		; <<4 x float>> [#uses=2]

+	%.27469 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.17468, %1215 ]		; <<4 x float>> [#uses=2]

+	%.27474 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=1]

+	%.17482 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]

+	%.17486 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]

+	%.17490 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07489, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17494 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]

+	%.27504 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]

+	%.17513 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]

+	%.17517 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]

+	%.17552 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07551, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17556 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07555, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17560 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]

+	%.17583 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07582, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17591 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07590, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17599 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]

+	%.17618 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07617, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17622 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07621, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17626 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]

+	%.17653 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07652, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17657 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07656, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17661 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07660, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17665 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07664, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17723 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07722, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17727 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07726, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17731 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07730, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17735 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07734, %1215 ]		; <<4 x float>> [#uses=2]

+	%.17770 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07769, %1215 ]		; <<4 x float>> [#uses=2]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 0		; <<4 x float>*>:834 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:835 [#uses=1]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2		; <<4 x float>*>:836 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3		; <<4 x float>*>:837 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:838 [#uses=0]

+	shufflevector <4 x float> %835, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:839 [#uses=1]

+	getelementptr <4 x float>* null, i32 878		; <<4 x float>*>:840 [#uses=1]

+	load <4 x float>* %840		; <<4 x float>>:841 [#uses=0]

+	call <4 x float> @llvm.ppc.altivec.vcfsx( <4 x i32> zeroinitializer, i32 0 )		; <<4 x float>>:842 [#uses=1]

+	shufflevector <4 x float> %842, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:843 [#uses=2]

+	call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> %843, <4 x float> %839 )		; <<4 x i32>>:844 [#uses=1]

+	bitcast <4 x i32> %844 to <4 x float>		; <<4 x float>>:845 [#uses=1]

+	call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> %843, <4 x float> zeroinitializer )		; <<4 x i32>>:846 [#uses=0]

+	bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>>:847 [#uses=1]

+	icmp eq i32 0, 0		; <i1>:848 [#uses=1]

+	br i1 %848, label %854, label %849

+

+; <label>:849		; preds = %xLS.exit449

+	shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:850 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:851 [#uses=1]

+	store <4 x float> zeroinitializer, <4 x float>* %851

+	shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:852 [#uses=1]

+	store <4 x float> %852, <4 x float>* null

+	shufflevector <4 x float> %847, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:853 [#uses=0]

+	br label %xST.exit451

+

+; <label>:854		; preds = %xLS.exit449

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:855 [#uses=0]

+	br i1 false, label %859, label %856

+

+; <label>:856		; preds = %854

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0		; <<4 x float>*>:857 [#uses=2]

+	load <4 x float>* %857		; <<4 x float>>:858 [#uses=0]

+	store <4 x float> zeroinitializer, <4 x float>* %857

+	br label %859

+

+; <label>:859		; preds = %856, %854

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:860 [#uses=0]

+	br i1 false, label %864, label %861

+

+; <label>:861		; preds = %859

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:862 [#uses=1]

+	shufflevector <4 x float> %845, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:863 [#uses=1]

+	store <4 x float> %863, <4 x float>* %862

+	br label %864

+

+; <label>:864		; preds = %861, %859

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:865 [#uses=1]

+	shufflevector <4 x i32> %865, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x i32>>:866 [#uses=0]

+	br i1 false, label %868, label %867

+

+; <label>:867		; preds = %864

+	store <4 x float> zeroinitializer, <4 x float>* null

+	br label %868

+

+; <label>:868		; preds = %867, %864

+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:869 [#uses=0]

+	br label %xST.exit451

+

+xST.exit451:		; preds = %868, %849

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0		; <<4 x float>*>:870 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:871 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:872 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:873 [#uses=1]

+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:874 [#uses=1]

+	xor <4 x i32> %874, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>>:875 [#uses=0]

+	bitcast <4 x float> %873 to <4 x i32>		; <<4 x i32>>:876 [#uses=1]

+	xor <4 x i32> %876, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>>:877 [#uses=0]

+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:878 [#uses=1]

+	xor <4 x i32> %878, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>>:879 [#uses=1]

+	bitcast <4 x i32> %879 to <4 x float>		; <<4 x float>>:880 [#uses=0]

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:881 [#uses=1]

+	icmp eq i32 0, 0		; <i1>:882 [#uses=1]

+	br i1 %882, label %888, label %883

+

+; <label>:883		; preds = %xST.exit451

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0		; <<4 x float>*>:884 [#uses=1]

+	store <4 x float> zeroinitializer, <4 x float>* %884

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:885 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:886 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3		; <<4 x float>*>:887 [#uses=0]

+	br label %xST.exit453

+

+; <label>:888		; preds = %xST.exit451

+	shufflevector <4 x i32> %881, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:889 [#uses=0]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:890 [#uses=0]

+	br i1 false, label %894, label %891

+

+; <label>:891		; preds = %888

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:892 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:893 [#uses=1]

+	store <4 x float> %893, <4 x float>* %892

+	br label %894

+

+; <label>:894		; preds = %891, %888

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:895 [#uses=1]

+	icmp eq i32 %895, 0		; <i1>:896 [#uses=1]

+	br i1 %896, label %898, label %897

+

+; <label>:897		; preds = %894

+	br label %898

+

+; <label>:898		; preds = %897, %894

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:899 [#uses=0]

+	br i1 false, label %xST.exit453, label %900

+

+; <label>:900		; preds = %898

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3		; <<4 x float>*>:901 [#uses=1]

+	load <4 x float>* %901		; <<4 x float>>:902 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %902, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:903 [#uses=0]

+	br label %xST.exit453

+

+xST.exit453:		; preds = %900, %898, %883

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:904 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:905 [#uses=1]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3		; <<4 x float>*>:906 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:907 [#uses=1]

+	shufflevector <4 x float> %905, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:908 [#uses=1]

+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:909 [#uses=0]

+	bitcast <4 x float> %908 to <4 x i32>		; <<4 x i32>>:910 [#uses=0]

+	bitcast <4 x float> %907 to <4 x i32>		; <<4 x i32>>:911 [#uses=0]

+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:912 [#uses=0]

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:913 [#uses=0]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 2, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:914 [#uses=0]

+	br i1 false, label %915, label %xPIF.exit455

+

+; <label>:915		; preds = %xST.exit453

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:916 [#uses=0]

+	getelementptr [4 x <4 x i32>]* null, i32 0, i32 3		; <<4 x i32>*>:917 [#uses=1]

+	store <4 x i32> zeroinitializer, <4 x i32>* %917

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:918 [#uses=1]

+	and <4 x i32> %918, zeroinitializer		; <<4 x i32>>:919 [#uses=0]

+	br label %.critedge7899

+

+.critedge7899:		; preds = %.critedge7899, %915

+	or <4 x i32> zeroinitializer, zeroinitializer		; <<4 x i32>>:920 [#uses=1]

+	br i1 false, label %.critedge7899, label %xPBRK.exit456

+

+xPBRK.exit456:		; preds = %.critedge7899

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 2, <4 x i32> %920, <4 x i32> zeroinitializer )		; <i32>:921 [#uses=0]

+	unreachable

+

+xPIF.exit455:		; preds = %xST.exit453

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 0		; <<4 x float>*>:922 [#uses=1]

+	load <4 x float>* %922		; <<4 x float>>:923 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1		; <<4 x float>*>:924 [#uses=1]

+	load <4 x float>* %924		; <<4 x float>>:925 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2		; <<4 x float>*>:926 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3		; <<4 x float>*>:927 [#uses=0]

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:928 [#uses=0]

+	bitcast { { i16, i16, i32 } }* %1 to <4 x float>*		; <<4 x float>*>:929 [#uses=0]

+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:930 [#uses=0]

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:931 [#uses=0]

+	icmp eq i32 0, 0		; <i1>:932 [#uses=1]

+	br i1 %932, label %934, label %933

+

+; <label>:933		; preds = %xPIF.exit455

+	store <4 x float> zeroinitializer, <4 x float>* null

+	br label %934

+

+; <label>:934		; preds = %933, %xPIF.exit455

+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>>:935 [#uses=0]

+	icmp eq i32 0, 0		; <i1>:936 [#uses=1]

+	br i1 %936, label %xST.exit459, label %937

+

+; <label>:937		; preds = %934

+	br label %xST.exit459

+

+xST.exit459:		; preds = %937, %934

+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x i32>>:938 [#uses=1]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %938, <4 x i32> zeroinitializer )		; <i32>:939 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 2		; <<4 x float>*>:940 [#uses=1]

+	store <4 x float> zeroinitializer, <4 x float>* %940

+	load <4 x float>* null		; <<4 x float>>:941 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %941, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:942 [#uses=1]

+	store <4 x float> %942, <4 x float>* null

+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:943 [#uses=0]

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:944 [#uses=0]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:945 [#uses=0]

+	br i1 false, label %947, label %946

+

+; <label>:946		; preds = %xST.exit459

+	br label %947

+

+; <label>:947		; preds = %946, %xST.exit459

+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>>:948 [#uses=0]

+	icmp eq i32 0, 0		; <i1>:949 [#uses=1]

+	br i1 %949, label %952, label %950

+

+; <label>:950		; preds = %947

+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:951 [#uses=1]

+	call void @llvm.ppc.altivec.stvewx( <4 x i32> %951, i8* null )

+	br label %952

+

+; <label>:952		; preds = %950, %947

+	br i1 false, label %955, label %953

+

+; <label>:953		; preds = %952

+	getelementptr [4 x <4 x i32>]* null, i32 0, i32 2		; <<4 x i32>*>:954 [#uses=0]

+	br label %955

+

+; <label>:955		; preds = %953, %952

+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:956 [#uses=0]

+	icmp eq i32 0, 0		; <i1>:957 [#uses=1]

+	br i1 %957, label %xStoreDestAddressWithMask.exit461, label %958

+

+; <label>:958		; preds = %955

+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:959 [#uses=1]

+	call void @llvm.ppc.altivec.stvewx( <4 x i32> %959, i8* null )

+	br label %xStoreDestAddressWithMask.exit461

+

+xStoreDestAddressWithMask.exit461:		; preds = %958, %955

+	load <4 x float>* %0		; <<4 x float>>:960 [#uses=0]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:961 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 0		; <<4 x float>*>:962 [#uses=0]

+	br i1 false, label %968, label %xST.exit463

+

+xST.exit463:		; preds = %xStoreDestAddressWithMask.exit461

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1		; <<4 x float>*>:963 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 2		; <<4 x float>*>:964 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 3		; <<4 x float>*>:965 [#uses=0]

+	load <4 x float>* %0		; <<4 x float>>:966 [#uses=3]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:967 [#uses=0]

+	br i1 false, label %972, label %969

+

+; <label>:968		; preds = %xStoreDestAddressWithMask.exit461

+	unreachable

+

+; <label>:969		; preds = %xST.exit463

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1		; <<4 x float>*>:970 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2		; <<4 x float>*>:971 [#uses=1]

+	store <4 x float> %966, <4 x float>* %971

+	store <4 x float> %966, <4 x float>* null

+	br label %xST.exit465

+

+; <label>:972		; preds = %xST.exit463

+	call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <<4 x i32>>:973 [#uses=0]

+	store <4 x float> zeroinitializer, <4 x float>* null

+	store <4 x float> zeroinitializer, <4 x float>* null

+	load <4 x float>* null		; <<4 x float>>:974 [#uses=0]

+	bitcast <4 x float> %966 to <4 x i32>		; <<4 x i32>>:975 [#uses=1]

+	call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> zeroinitializer, <4 x i32> %975, <4 x i32> zeroinitializer )		; <<4 x i32>>:976 [#uses=1]

+	bitcast <4 x i32> %976 to <4 x float>		; <<4 x float>>:977 [#uses=1]

+	store <4 x float> %977, <4 x float>* null

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3		; <<4 x float>*>:978 [#uses=0]

+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:979 [#uses=1]

+	call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> %979, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <<4 x i32>>:980 [#uses=1]

+	bitcast <4 x i32> %980 to <4 x float>		; <<4 x float>>:981 [#uses=0]

+	br label %xST.exit465

+

+xST.exit465:		; preds = %972, %969

+	load <4 x float>* %0		; <<4 x float>>:982 [#uses=3]

+	icmp eq i32 0, 0		; <i1>:983 [#uses=1]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0		; <<4 x float>*>:984 [#uses=1]

+	br i1 %983, label %989, label %985

+

+; <label>:985		; preds = %xST.exit465

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:986 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:987 [#uses=1]

+	store <4 x float> %982, <4 x float>* %987

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:988 [#uses=0]

+	br label %xST.exit467

+

+; <label>:989		; preds = %xST.exit465

+	bitcast <4 x float> %982 to <4 x i32>		; <<4 x i32>>:990 [#uses=0]

+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:991 [#uses=0]

+	store <4 x float> zeroinitializer, <4 x float>* %984

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:992 [#uses=0]

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:993 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:994 [#uses=0]

+	bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>>:995 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:996 [#uses=0]

+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:997 [#uses=1]

+	bitcast <4 x float> %982 to <4 x i32>		; <<4 x i32>>:998 [#uses=1]

+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:999 [#uses=1]

+	call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> %997, <4 x i32> %998, <4 x i32> %999 )		; <<4 x i32>>:1000 [#uses=1]

+	bitcast <4 x i32> %1000 to <4 x float>		; <<4 x float>>:1001 [#uses=0]

+	br label %xST.exit467

+

+xST.exit467:		; preds = %989, %985

+	load <4 x float>* %0		; <<4 x float>>:1002 [#uses=5]

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:1003 [#uses=2]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1003, <4 x i32> zeroinitializer )		; <i32>:1004 [#uses=0]

+	br i1 false, label %1011, label %1005

+

+; <label>:1005		; preds = %xST.exit467

+	load <4 x float>* null		; <<4 x float>>:1006 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:1007 [#uses=1]

+	load <4 x float>* %1007		; <<4 x float>>:1008 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:1009 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:1010 [#uses=0]

+	br label %xST.exit469

+

+; <label>:1011		; preds = %xST.exit467

+	shufflevector <4 x i32> %1003, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:1012 [#uses=0]

+	icmp eq i32 0, 0		; <i1>:1013 [#uses=1]

+	br i1 %1013, label %1015, label %1014

+

+; <label>:1014		; preds = %1011

+	br label %1015

+

+; <label>:1015		; preds = %1014, %1011

+	%.07472 = phi <4 x float> [ %1002, %1014 ], [ %.27474, %1011 ]		; <<4 x float>> [#uses=0]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:1016 [#uses=1]

+	icmp eq i32 %1016, 0		; <i1>:1017 [#uses=1]

+	br i1 %1017, label %1021, label %1018

+

+; <label>:1018		; preds = %1015

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:1019 [#uses=0]

+	shufflevector <4 x float> %1002, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:1020 [#uses=0]

+	br label %1021

+

+; <label>:1021		; preds = %1018, %1015

+	%.07467 = phi <4 x float> [ %1002, %1018 ], [ %.27469, %1015 ]		; <<4 x float>> [#uses=2]

+	icmp eq i32 0, 0		; <i1>:1022 [#uses=1]

+	br i1 %1022, label %1025, label %1023

+

+; <label>:1023		; preds = %1021

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:1024 [#uses=1]

+	store <4 x float> zeroinitializer, <4 x float>* %1024

+	br label %1025

+

+; <label>:1025		; preds = %1023, %1021

+	%.07462 = phi <4 x float> [ %1002, %1023 ], [ %.27464, %1021 ]		; <<4 x float>> [#uses=2]

+	icmp eq i32 0, 0		; <i1>:1026 [#uses=1]

+	br i1 %1026, label %xST.exit469, label %1027

+

+; <label>:1027		; preds = %1025

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:1028 [#uses=0]

+	br label %xST.exit469

+

+xST.exit469:		; preds = %1027, %1025, %1005

+	%.17463 = phi <4 x float> [ %.27464, %1005 ], [ %.07462, %1027 ], [ %.07462, %1025 ]		; <<4 x float>> [#uses=1]

+	%.17468 = phi <4 x float> [ %.27469, %1005 ], [ %.07467, %1027 ], [ %.07467, %1025 ]		; <<4 x float>> [#uses=1]

+	%.07489 = phi <4 x float> [ %1002, %1005 ], [ %.17490, %1027 ], [ %.17490, %1025 ]		; <<4 x float>> [#uses=1]

+	load <4 x float>* null		; <<4 x float>>:1029 [#uses=0]

+	load <4 x float>* null		; <<4 x float>>:1030 [#uses=0]

+	fsub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1031 [#uses=1]

+	br i1 false, label %1037, label %1032

+

+; <label>:1032		; preds = %xST.exit469

+	load <4 x float>* null		; <<4 x float>>:1033 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:1034 [#uses=1]

+	load <4 x float>* %1034		; <<4 x float>>:1035 [#uses=0]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:1036 [#uses=0]

+	br label %xST.exit472

+

+; <label>:1037		; preds = %xST.exit469

+	icmp eq i32 0, 0		; <i1>:1038 [#uses=1]

+	br i1 %1038, label %1040, label %1039

+

+; <label>:1039		; preds = %1037

+	br label %1040

+

+; <label>:1040		; preds = %1039, %1037

+	%.07507 = phi <4 x float> [ zeroinitializer, %1039 ], [ zeroinitializer, %1037 ]		; <<4 x float>> [#uses=0]

+	icmp eq i32 0, 0		; <i1>:1041 [#uses=1]

+	br i1 %1041, label %1045, label %1042

+

+; <label>:1042		; preds = %1040

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:1043 [#uses=1]

+	load <4 x float>* %1043		; <<4 x float>>:1044 [#uses=0]

+	br label %1045

+

+; <label>:1045		; preds = %1042, %1040

+	br i1 false, label %1048, label %1046

+

+; <label>:1046		; preds = %1045

+	shufflevector <4 x float> %1031, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:1047 [#uses=0]

+	br label %1048

+

+; <label>:1048		; preds = %1046, %1045

+	icmp eq i32 0, 0		; <i1>:1049 [#uses=1]

+	br i1 %1049, label %xST.exit472, label %1050

+

+; <label>:1050		; preds = %1048

+	br label %xST.exit472

+

+xST.exit472:		; preds = %1050, %1048, %1032

+	br i1 false, label %1052, label %1051

+

+; <label>:1051		; preds = %xST.exit472

+	br label %xST.exit474

+

+; <label>:1052		; preds = %xST.exit472

+	br i1 false, label %1054, label %1053

+

+; <label>:1053		; preds = %1052

+	br label %1054

+

+; <label>:1054		; preds = %1053, %1052

+	br i1 false, label %1056, label %1055

+

+; <label>:1055		; preds = %1054

+	br label %1056

+

+; <label>:1056		; preds = %1055, %1054

+	br i1 false, label %1058, label %1057

+

+; <label>:1057		; preds = %1056

+	br label %1058

+

+; <label>:1058		; preds = %1057, %1056

+	br i1 false, label %xST.exit474, label %1059

+

+; <label>:1059		; preds = %1058

+	br label %xST.exit474

+

+xST.exit474:		; preds = %1059, %1058, %1051

+	load <4 x float>* null		; <<4 x float>>:1060 [#uses=1]

+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1061 [#uses=1]

+	fmul <4 x float> %1060, zeroinitializer		; <<4 x float>>:1062 [#uses=2]

+	br i1 false, label %1065, label %1063

+

+; <label>:1063		; preds = %xST.exit474

+	shufflevector <4 x float> %1062, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:1064 [#uses=1]

+	store <4 x float> %1064, <4 x float>* null

+	br label %xST.exit476

+

+; <label>:1065		; preds = %xST.exit474

+	br i1 false, label %1067, label %1066

+

+; <label>:1066		; preds = %1065

+	br label %1067

+

+; <label>:1067		; preds = %1066, %1065

+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>>:1068 [#uses=0]

+	br i1 false, label %1070, label %1069

+

+; <label>:1069		; preds = %1067

+	br label %1070

+

+; <label>:1070		; preds = %1069, %1067

+	br i1 false, label %1072, label %1071

+

+; <label>:1071		; preds = %1070

+	br label %1072

+

+; <label>:1072		; preds = %1071, %1070

+	br i1 false, label %xST.exit476, label %1073

+

+; <label>:1073		; preds = %1072

+	br label %xST.exit476

+

+xST.exit476:		; preds = %1073, %1072, %1063

+	%.07551 = phi <4 x float> [ %1062, %1063 ], [ %.17552, %1073 ], [ %.17552, %1072 ]		; <<4 x float>> [#uses=1]

+	%.07555 = phi <4 x float> [ %1061, %1063 ], [ %.17556, %1073 ], [ %.17556, %1072 ]		; <<4 x float>> [#uses=1]

+	br i1 false, label %1075, label %1074

+

+; <label>:1074		; preds = %xST.exit476

+	br label %xST.exit479

+

+; <label>:1075		; preds = %xST.exit476

+	br i1 false, label %1077, label %1076

+

+; <label>:1076		; preds = %1075

+	br label %1077

+

+; <label>:1077		; preds = %1076, %1075

+	br i1 false, label %1079, label %1078

+

+; <label>:1078		; preds = %1077

+	br label %1079

+

+; <label>:1079		; preds = %1078, %1077

+	br i1 false, label %1081, label %1080

+

+; <label>:1080		; preds = %1079

+	br label %1081

+

+; <label>:1081		; preds = %1080, %1079

+	br i1 false, label %xST.exit479, label %1082

+

+; <label>:1082		; preds = %1081

+	br label %xST.exit479

+

+xST.exit479:		; preds = %1082, %1081, %1074

+	br i1 false, label %1084, label %1083

+

+; <label>:1083		; preds = %xST.exit479

+	br label %xST.exit482

+

+; <label>:1084		; preds = %xST.exit479

+	br i1 false, label %1086, label %1085

+

+; <label>:1085		; preds = %1084

+	br label %1086

+

+; <label>:1086		; preds = %1085, %1084

+	br i1 false, label %1088, label %1087

+

+; <label>:1087		; preds = %1086

+	br label %1088

+

+; <label>:1088		; preds = %1087, %1086

+	br i1 false, label %1090, label %1089

+

+; <label>:1089		; preds = %1088

+	br label %1090

+

+; <label>:1090		; preds = %1089, %1088

+	br i1 false, label %xST.exit482, label %1091

+

+; <label>:1091		; preds = %1090

+	br label %xST.exit482

+

+xST.exit482:		; preds = %1091, %1090, %1083

+	br i1 false, label %1093, label %1092

+

+; <label>:1092		; preds = %xST.exit482

+	br label %xST.exit486

+

+; <label>:1093		; preds = %xST.exit482

+	br i1 false, label %1095, label %1094

+

+; <label>:1094		; preds = %1093

+	br label %1095

+

+; <label>:1095		; preds = %1094, %1093

+	br i1 false, label %1097, label %1096

+

+; <label>:1096		; preds = %1095

+	br label %1097

+

+; <label>:1097		; preds = %1096, %1095

+	br i1 false, label %1099, label %1098

+

+; <label>:1098		; preds = %1097

+	br label %1099

+

+; <label>:1099		; preds = %1098, %1097

+	br i1 false, label %xST.exit486, label %1100

+

+; <label>:1100		; preds = %1099

+	br label %xST.exit486

+

+xST.exit486:		; preds = %1100, %1099, %1092

+	br i1 false, label %1102, label %1101

+

+; <label>:1101		; preds = %xST.exit486

+	br label %xST.exit489

+

+; <label>:1102		; preds = %xST.exit486

+	br i1 false, label %1104, label %1103

+

+; <label>:1103		; preds = %1102

+	br label %1104

+

+; <label>:1104		; preds = %1103, %1102

+	br i1 false, label %1106, label %1105

+

+; <label>:1105		; preds = %1104

+	br label %1106

+

+; <label>:1106		; preds = %1105, %1104

+	br i1 false, label %1108, label %1107

+

+; <label>:1107		; preds = %1106

+	br label %1108

+

+; <label>:1108		; preds = %1107, %1106

+	br i1 false, label %xST.exit489, label %1109

+

+; <label>:1109		; preds = %1108

+	br label %xST.exit489

+

+xST.exit489:		; preds = %1109, %1108, %1101

+	br i1 false, label %1111, label %1110

+

+; <label>:1110		; preds = %xST.exit489

+	br label %xST.exit492

+

+; <label>:1111		; preds = %xST.exit489

+	br i1 false, label %1113, label %1112

+

+; <label>:1112		; preds = %1111

+	br label %1113

+

+; <label>:1113		; preds = %1112, %1111

+	br i1 false, label %1115, label %1114

+

+; <label>:1114		; preds = %1113

+	br label %1115

+

+; <label>:1115		; preds = %1114, %1113

+	br i1 false, label %1117, label %1116

+

+; <label>:1116		; preds = %1115

+	br label %1117

+

+; <label>:1117		; preds = %1116, %1115

+	br i1 false, label %xST.exit492, label %1118

+

+; <label>:1118		; preds = %1117

+	br label %xST.exit492

+

+xST.exit492:		; preds = %1118, %1117, %1110

+	load <4 x float>* null		; <<4 x float>>:1119 [#uses=1]

+	fmul <4 x float> %1119, zeroinitializer		; <<4 x float>>:1120 [#uses=1]

+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1121 [#uses=1]

+	br i1 false, label %1123, label %1122

+

+; <label>:1122		; preds = %xST.exit492

+	br label %xST.exit495

+

+; <label>:1123		; preds = %xST.exit492

+	br i1 false, label %1125, label %1124

+

+; <label>:1124		; preds = %1123

+	br label %1125

+

+; <label>:1125		; preds = %1124, %1123

+	br i1 false, label %1127, label %1126

+

+; <label>:1126		; preds = %1125

+	br label %1127

+

+; <label>:1127		; preds = %1126, %1125

+	br i1 false, label %1129, label %1128

+

+; <label>:1128		; preds = %1127

+	br label %1129

+

+; <label>:1129		; preds = %1128, %1127

+	br i1 false, label %xST.exit495, label %1130

+

+; <label>:1130		; preds = %1129

+	br label %xST.exit495

+

+xST.exit495:		; preds = %1130, %1129, %1122

+	%.07582 = phi <4 x float> [ %1121, %1122 ], [ %.17583, %1130 ], [ %.17583, %1129 ]		; <<4 x float>> [#uses=1]

+	%.07590 = phi <4 x float> [ %1120, %1122 ], [ %.17591, %1130 ], [ %.17591, %1129 ]		; <<4 x float>> [#uses=1]

+	load <4 x float>* null		; <<4 x float>>:1131 [#uses=1]

+	fadd <4 x float> %1131, zeroinitializer		; <<4 x float>>:1132 [#uses=1]

+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1133 [#uses=1]

+	br i1 false, label %1135, label %1134

+

+; <label>:1134		; preds = %xST.exit495

+	br label %xST.exit498

+

+; <label>:1135		; preds = %xST.exit495

+	br i1 false, label %1137, label %1136

+

+; <label>:1136		; preds = %1135

+	br label %1137

+

+; <label>:1137		; preds = %1136, %1135

+	br i1 false, label %1139, label %1138

+

+; <label>:1138		; preds = %1137

+	br label %1139

+

+; <label>:1139		; preds = %1138, %1137

+	br i1 false, label %1141, label %1140

+

+; <label>:1140		; preds = %1139

+	br label %1141

+

+; <label>:1141		; preds = %1140, %1139

+	br i1 false, label %xST.exit498, label %1142

+

+; <label>:1142		; preds = %1141

+	br label %xST.exit498

+

+xST.exit498:		; preds = %1142, %1141, %1134

+	%.07617 = phi <4 x float> [ %1133, %1134 ], [ %.17618, %1142 ], [ %.17618, %1141 ]		; <<4 x float>> [#uses=1]

+	%.07621 = phi <4 x float> [ %1132, %1134 ], [ %.17622, %1142 ], [ %.17622, %1141 ]		; <<4 x float>> [#uses=1]

+	load <4 x float>* null		; <<4 x float>>:1143 [#uses=1]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:1144 [#uses=1]

+	load <4 x float>* %1144		; <<4 x float>>:1145 [#uses=1]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:1146 [#uses=1]

+	load <4 x float>* %1146		; <<4 x float>>:1147 [#uses=1]

+	shufflevector <4 x float> %1143, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1148 [#uses=1]

+	shufflevector <4 x float> %1145, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1149 [#uses=1]

+	shufflevector <4 x float> %1147, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1150 [#uses=1]

+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1151 [#uses=1]

+	fmul <4 x float> zeroinitializer, %1148		; <<4 x float>>:1152 [#uses=1]

+	fmul <4 x float> zeroinitializer, %1149		; <<4 x float>>:1153 [#uses=1]

+	fmul <4 x float> zeroinitializer, %1150		; <<4 x float>>:1154 [#uses=1]

+	br i1 false, label %1156, label %1155

+

+; <label>:1155		; preds = %xST.exit498

+	br label %xST.exit501

+

+; <label>:1156		; preds = %xST.exit498

+	br i1 false, label %1158, label %1157

+

+; <label>:1157		; preds = %1156

+	br label %1158

+

+; <label>:1158		; preds = %1157, %1156

+	br i1 false, label %1160, label %1159

+

+; <label>:1159		; preds = %1158

+	br label %1160

+

+; <label>:1160		; preds = %1159, %1158

+	br i1 false, label %1162, label %1161

+

+; <label>:1161		; preds = %1160

+	br label %1162

+

+; <label>:1162		; preds = %1161, %1160

+	br i1 false, label %xST.exit501, label %1163

+

+; <label>:1163		; preds = %1162

+	br label %xST.exit501

+

+xST.exit501:		; preds = %1163, %1162, %1155

+	%.07652 = phi <4 x float> [ %1154, %1155 ], [ %.17653, %1163 ], [ %.17653, %1162 ]		; <<4 x float>> [#uses=1]

+	%.07656 = phi <4 x float> [ %1153, %1155 ], [ %.17657, %1163 ], [ %.17657, %1162 ]		; <<4 x float>> [#uses=1]

+	%.07660 = phi <4 x float> [ %1152, %1155 ], [ %.17661, %1163 ], [ %.17661, %1162 ]		; <<4 x float>> [#uses=1]

+	%.07664 = phi <4 x float> [ %1151, %1155 ], [ %.17665, %1163 ], [ %.17665, %1162 ]		; <<4 x float>> [#uses=1]

+	load <4 x float>* null		; <<4 x float>>:1164 [#uses=1]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:1165 [#uses=1]

+	load <4 x float>* %1165		; <<4 x float>>:1166 [#uses=1]

+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:1167 [#uses=1]

+	load <4 x float>* %1167		; <<4 x float>>:1168 [#uses=1]

+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1169 [#uses=1]

+	fadd <4 x float> zeroinitializer, %1164		; <<4 x float>>:1170 [#uses=1]

+	fadd <4 x float> zeroinitializer, %1166		; <<4 x float>>:1171 [#uses=1]

+	fadd <4 x float> zeroinitializer, %1168		; <<4 x float>>:1172 [#uses=1]

+	br i1 false, label %1174, label %1173

+

+; <label>:1173		; preds = %xST.exit501

+	br label %xST.exit504

+

+; <label>:1174		; preds = %xST.exit501

+	br i1 false, label %1176, label %1175

+

+; <label>:1175		; preds = %1174

+	br label %1176

+

+; <label>:1176		; preds = %1175, %1174

+	br i1 false, label %1178, label %1177

+

+; <label>:1177		; preds = %1176

+	br label %1178

+

+; <label>:1178		; preds = %1177, %1176

+	br i1 false, label %1180, label %1179

+

+; <label>:1179		; preds = %1178

+	br label %1180

+

+; <label>:1180		; preds = %1179, %1178

+	br i1 false, label %xST.exit504, label %1181

+

+; <label>:1181		; preds = %1180

+	br label %xST.exit504

+

+xST.exit504:		; preds = %1181, %1180, %1173

+	%.07722 = phi <4 x float> [ %1172, %1173 ], [ %.17723, %1181 ], [ %.17723, %1180 ]		; <<4 x float>> [#uses=1]

+	%.07726 = phi <4 x float> [ %1171, %1173 ], [ %.17727, %1181 ], [ %.17727, %1180 ]		; <<4 x float>> [#uses=1]

+	%.07730 = phi <4 x float> [ %1170, %1173 ], [ %.17731, %1181 ], [ %.17731, %1180 ]		; <<4 x float>> [#uses=1]

+	%.07734 = phi <4 x float> [ %1169, %1173 ], [ %.17735, %1181 ], [ %.17735, %1180 ]		; <<4 x float>> [#uses=1]

+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1182 [#uses=1]

+	br i1 false, label %1184, label %1183

+

+; <label>:1183		; preds = %xST.exit504

+	br label %xST.exit507

+

+; <label>:1184		; preds = %xST.exit504

+	br i1 false, label %1186, label %1185

+

+; <label>:1185		; preds = %1184

+	br label %1186

+

+; <label>:1186		; preds = %1185, %1184

+	br i1 false, label %1188, label %1187

+

+; <label>:1187		; preds = %1186

+	store <4 x float> zeroinitializer, <4 x float>* null

+	br label %1188

+

+; <label>:1188		; preds = %1187, %1186

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:1189 [#uses=1]

+	shufflevector <4 x i32> %1189, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x i32>>:1190 [#uses=1]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1190, <4 x i32> zeroinitializer )		; <i32>:1191 [#uses=1]

+	icmp eq i32 %1191, 0		; <i1>:1192 [#uses=1]

+	br i1 %1192, label %1196, label %1193

+

+; <label>:1193		; preds = %1188

+	load <4 x float>* null		; <<4 x float>>:1194 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %1194, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:1195 [#uses=1]

+	store <4 x float> %1195, <4 x float>* null

+	br label %1196

+

+; <label>:1196		; preds = %1193, %1188

+	%.07742 = phi <4 x float> [ zeroinitializer, %1193 ], [ zeroinitializer, %1188 ]		; <<4 x float>> [#uses=0]

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:1197 [#uses=1]

+	shufflevector <4 x i32> %1197, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:1198 [#uses=1]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1198, <4 x i32> zeroinitializer )		; <i32>:1199 [#uses=1]

+	icmp eq i32 %1199, 0		; <i1>:1200 [#uses=1]

+	br i1 %1200, label %xST.exit507, label %1201

+

+; <label>:1201		; preds = %1196

+	store <4 x float> zeroinitializer, <4 x float>* null

+	br label %xST.exit507

+

+xST.exit507:		; preds = %1201, %1196, %1183

+	%.07769 = phi <4 x float> [ %1182, %1183 ], [ %.17770, %1201 ], [ %.17770, %1196 ]		; <<4 x float>> [#uses=1]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:1202 [#uses=1]

+	icmp eq i32 %1202, 0		; <i1>:1203 [#uses=1]

+	br i1 %1203, label %1207, label %1204

+

+; <label>:1204		; preds = %xST.exit507

+	load <4 x float>* null		; <<4 x float>>:1205 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %1205, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:1206 [#uses=1]

+	store <4 x float> %1206, <4 x float>* null

+	br label %1207

+

+; <label>:1207		; preds = %1204, %xST.exit507

+	load <4 x i32>* %.sub7896		; <<4 x i32>>:1208 [#uses=1]

+	shufflevector <4 x i32> %1208, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>>:1209 [#uses=1]

+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1209, <4 x i32> zeroinitializer )		; <i32>:1210 [#uses=1]

+	icmp eq i32 %1210, 0		; <i1>:1211 [#uses=1]

+	br i1 %1211, label %1215, label %1212

+

+; <label>:1212		; preds = %1207

+	load <4 x float>* null		; <<4 x float>>:1213 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %1213, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:1214 [#uses=1]

+	store <4 x float> %1214, <4 x float>* null

+	br label %1215

+

+; <label>:1215		; preds = %1212, %1207

+	store <4 x float> zeroinitializer, <4 x float>* null

+	br label %xLS.exit449

+}

+

+declare <4 x i32> @llvm.ppc.altivec.vsel(<4 x i32>, <4 x i32>, <4 x i32>)

+

+declare void @llvm.ppc.altivec.stvewx(<4 x i32>, i8*)

+

+declare <4 x float> @llvm.ppc.altivec.vrsqrtefp(<4 x float>)

+

+declare <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32>, i32)

+

+declare i32 @llvm.ppc.altivec.vcmpequw.p(i32, <4 x i32>, <4 x i32>)

+

+declare <4 x i32> @llvm.ppc.altivec.vcmpgtfp(<4 x float>, <4 x float>)


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll b/src/LLVM/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
new file mode 100644
index 0000000..b9dd928
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {foo r3, r4}

+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {bari r3, 47}

+

+; PR1351

+

+define i32 @test1(i32 %Y, i32 %X) nounwind {

+	%tmp1 = tail call i32 asm "foo${1:I} $0, $1", "=r,rI"( i32 %X )

+	ret i32 %tmp1

+}

+

+define i32 @test2(i32 %Y, i32 %X) nounwind {

+	%tmp1 = tail call i32 asm "bar${1:I} $0, $1", "=r,rI"( i32 47 )

+	ret i32 %tmp1

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/src/LLVM/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
new file mode 100644
index 0000000..c8d4484
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s | FileCheck %s

+; RUN: llc < %s -regalloc=fast | FileCheck %s

+; The first argument of subfc must not be the same as any other register.

+

+; CHECK: subfc [[REG:r.]],

+; CHECK-NOT: [[REG]]

+; CHECK: InlineAsm End

+; PR1357

+

+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "powerpc-apple-darwin8.8.0"

+

+;long long test(int A, int B, int C) {

+;  unsigned X, Y;

+;  __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"

+;                 : "=r" (X), "=&r" (Y)

+;                 : "r" (A), "rI" (B), "r" (C));

+;  return ((long long)Y << 32) | X;

+;}

+

+define i64 @test(i32 %A, i32 %B, i32 %C) nounwind {

+entry:

+	%Y = alloca i32, align 4		; <i32*> [#uses=2]

+	%tmp4 = call i32 asm "subf${3:I}c $1,$4,$3\0A\09subfze $0,$2", "=r,=*&r,r,rI,r"( i32* %Y, i32 %A, i32 %B, i32 %C )		; <i32> [#uses=1]

+	%tmp5 = load i32* %Y		; <i32> [#uses=1]

+	%tmp56 = zext i32 %tmp5 to i64		; <i64> [#uses=1]

+	%tmp7 = shl i64 %tmp56, 32		; <i64> [#uses=1]

+	%tmp89 = zext i32 %tmp4 to i64		; <i64> [#uses=1]

+	%tmp10 = or i64 %tmp7, %tmp89		; <i64> [#uses=1]

+	ret i64 %tmp10

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll b/src/LLVM/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
new file mode 100644
index 0000000..35970a5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s

+; PR1382

+

+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "powerpc-apple-darwin8.8.0"

+@x = global [2 x i32] [ i32 1, i32 2 ]		; <[2 x i32]*> [#uses=1]

+

+define void @foo() {

+entry:

+	tail call void asm sideeffect "$0 $1", "s,i"( i8* bitcast (i32* getelementptr ([2 x i32]* @x, i32 0, i32 1) to i8*), i8* bitcast (i32* getelementptr ([2 x i32]* @x, i32 0, i32 1) to i8*) )

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll b/src/LLVM/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
new file mode 100644
index 0000000..d547b57
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc32

+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "powerpc-apple-darwin8.8.0"

+	%struct..0anon = type { i32 }

+	%struct.A = type { %struct.anon }

+	%struct.anon = type <{  }>

+

+define void @bork(%struct.A* %In0P) {

+entry:

+	%tmp56 = bitcast %struct.A* %In0P to float*		; <float*> [#uses=1]

+	br label %bb

+

+bb:		; preds = %bb, %entry

+	%i.035.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]

+	%tmp8 = getelementptr float* %tmp56, i32 %i.035.0		; <float*> [#uses=2]

+	%tmp101112 = bitcast float* %tmp8 to i8*		; <i8*> [#uses=1]

+	%tmp1617 = bitcast float* %tmp8 to i32*		; <i32*> [#uses=1]

+	%tmp21 = tail call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,*m"( i8* %tmp101112, i32 0, i32* %tmp1617 )		; <i32> [#uses=0]

+	%indvar.next = add i32 %i.035.0, 1		; <i32> [#uses=2]

+	%exitcond = icmp eq i32 %indvar.next, 4		; <i1> [#uses=1]

+	br i1 %exitcond, label %return, label %bb

+

+return:		; preds = %bb

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll b/src/LLVM/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
new file mode 100644
index 0000000..d80a775
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=ppc32 | grep bl.*baz | count 2

+; RUN: llc < %s -march=ppc32 | grep bl.*quux | count 2

+; RUN: llc < %s -march=ppc32 -enable-tail-merge | grep bl.*baz | count 1

+; RUN: llc < %s -march=ppc32 -enable-tail-merge=1 | grep bl.*quux | count 1

+; Check that tail merging is not the default on ppc, and that -enable-tail-merge works.

+

+; ModuleID = 'tail.c'

+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "i686-apple-darwin8"

+

+define i32 @f(i32 %i, i32 %q) {

+entry:

+	%i_addr = alloca i32		; <i32*> [#uses=2]

+	%q_addr = alloca i32		; <i32*> [#uses=2]

+	%retval = alloca i32, align 4		; <i32*> [#uses=1]

+	store i32 %i, i32* %i_addr

+	store i32 %q, i32* %q_addr

+	%tmp = load i32* %i_addr		; <i32> [#uses=1]

+	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]

+	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]

+	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]

+	br i1 %toBool, label %cond_true, label %cond_false

+

+cond_true:		; preds = %entry

+	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]

+	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]

+	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]

+	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]

+	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]

+	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]

+	br i1 %toBool10, label %cond_true11, label %cond_false15

+

+cond_false:		; preds = %entry

+	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]

+	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]

+	%tmp27 = load i32* %q_addr		; <i32> [#uses=1]

+	%tmp28 = icmp ne i32 %tmp27, 0		; <i1> [#uses=1]

+	%tmp289 = zext i1 %tmp28 to i8		; <i8> [#uses=1]

+	%toBool210 = icmp ne i8 %tmp289, 0		; <i1> [#uses=1]

+	br i1 %toBool210, label %cond_true11, label %cond_false15

+

+cond_true11:		; preds = %cond_next

+	%tmp13 = call i32 (...)* @foo( )		; <i32> [#uses=0]

+	%tmp14 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]

+	br label %cond_next18

+

+cond_false15:		; preds = %cond_next

+	%tmp16 = call i32 (...)* @bar( )		; <i32> [#uses=0]

+	%tmp17 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]

+	br label %cond_next18

+

+cond_next18:		; preds = %cond_false15, %cond_true11

+	%tmp19 = call i32 (...)* @bar( )		; <i32> [#uses=0]

+	br label %return

+

+return:		; preds = %cond_next18

+	%retval20 = load i32* %retval		; <i32> [#uses=1]

+	ret i32 %retval20

+}

+

+declare i32 @bar(...)

+

+declare i32 @baz(...)

+

+declare i32 @foo(...)

+

+declare i32 @quux(...)


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll b/src/LLVM/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
new file mode 100644
index 0000000..608db2c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll

@@ -0,0 +1,14 @@
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "powerpc-apple-darwin8.8.0"

+

+; RUN: llc < %s -march=ppc32 | grep {rlwinm r3, r3, 23, 30, 30}

+; PR1473

+

+define zeroext i8 @foo(i16 zeroext  %a)   {

+        %tmp2 = lshr i16 %a, 10         ; <i16> [#uses=1]

+        %tmp23 = trunc i16 %tmp2 to i8          ; <i8> [#uses=1]

+        %tmp4 = shl i8 %tmp23, 1                ; <i8> [#uses=1]

+        %tmp5 = and i8 %tmp4, 2         ; <i8> [#uses=1]

+        ret i8 %tmp5

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll b/src/LLVM/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
new file mode 100644
index 0000000..6864fcb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll

@@ -0,0 +1,85 @@
+; RUN: llc < %s -march=ppc32 -mattr=+altivec

+

+	%struct.XATest = type { float, i16, i8, i8 }

+	%struct.XArrayRange = type { i8, i8, i8, i8 }

+	%struct.XBlendMode = type { i16, i16, i16, i16, %struct.GIC4, i16, i16, i8, i8, i8, i8 }

+	%struct.XClearC = type { double, %struct.GIC4, %struct.GIC4, float, i32 }

+	%struct.XClipPlane = type { i32, [6 x %struct.GIC4] }

+	%struct.XCBuffer = type { i16, i16, [8 x i16] }

+	%struct.XCMatrix = type { [16 x float]*, %struct.XICSS }

+	%struct.XConvolution = type { %struct.GIC4, %struct.XICSS, i16, i16, float*, i32, i32 }

+	%struct.XDepthTest = type { i16, i16, i8, i8, i8, i8, double, double }

+	%struct.XFixedFunctionProgram = type { %struct.PPSToken* }

+	%struct.XFogMode = type { %struct.GIC4, float, float, float, float, float, i16, i16, i16, i8, i8 }

+	%struct.XFramebufferAttachment = type { i32, i32, i32, i32 }

+	%struct.XHintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }

+	%struct.XHistogram = type { %struct.XFramebufferAttachment*, i32, i16, i8, i8 }

+	%struct.XICSS = type { %struct.GTCoord2, %struct.GTCoord2, %struct.GTCoord2, %struct.GTCoord2 }

+	%struct.XISubset = type { %struct.XConvolution, %struct.XConvolution, %struct.XConvolution, %struct.XCMatrix, %struct.XMinmax, %struct.XHistogram, %struct.XICSS, %struct.XICSS, %struct.XICSS, %struct.XICSS, i32 }

+	%struct.XLight = type { %struct.GIC4, %struct.GIC4, %struct.GIC4, %struct.GIC4, %struct.XPointLineLimits, float, float, float, float, float, %struct.XPointLineLimits, float, float, float, float, float }

+	%struct.XLightModel = type { %struct.GIC4, [8 x %struct.XLight], [2 x %struct.XMaterial], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 }

+	%struct.XLightProduct = type { %struct.GIC4, %struct.GIC4, %struct.GIC4 }

+	%struct.XLineMode = type { float, i32, i16, i16, i8, i8, i8, i8 }

+	%struct.XLogicOp = type { i16, i8, i8 }

+	%struct.XMaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }

+	%struct.XMaterial = type { %struct.GIC4, %struct.GIC4, %struct.GIC4, %struct.GIC4, float, float, float, float, [8 x %struct.XLightProduct], %struct.GIC4, [6 x i32], [2 x i32] }

+	%struct.XMinmax = type { %struct.XMinmaxTable*, i16, i8, i8 }

+	%struct.XMinmaxTable = type { %struct.GIC4, %struct.GIC4 }

+	%struct.XMipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] }

+	%struct.XMultisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }

+	%struct.XPipelineProgramState = type { i8, i8, i8, i8, %struct.GIC4* }

+	%struct.XPMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }

+	%struct.XPMode = type { float, float, %struct.XPStore, %struct.XPTransfer, %struct.XPMap, %struct.XISubset, i32, i32 }

+	%struct.XPPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }

+	%struct.XPStore = type { %struct.XPPack, %struct.XPPack }

+	%struct.XPTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }

+	%struct.XPointLineLimits = type { float, float, float }

+	%struct.XPointMode = type { float, float, float, float, %struct.XPointLineLimits, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 }

+	%struct.XPGMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }

+	%struct.XRegisterCCs = type { i8, i8, i8, i8, i32, [2 x %struct.GIC4], [8 x %struct.XRegisterCCsPerStageState], %struct.XRegisterCCsFinalStageState }

+	%struct.XRegisterCCsFinalStageState = type { i8, i8, i8, i8, [7 x %struct.XRegisterCCsPerVariableState] }

+	%struct.XRegisterCCsPerPortionState = type { [4 x %struct.XRegisterCCsPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }

+	%struct.XRegisterCCsPerStageState = type { [2 x %struct.XRegisterCCsPerPortionState], [2 x %struct.GIC4] }

+	%struct.XRegisterCCsPerVariableState = type { i16, i16, i16, i16 }

+	%struct.XScissorTest = type { %struct.XFramebufferAttachment, i8, i8, i8, i8 }

+	%struct.XState = type { i16, i16, i16, i16, i32, i32, [256 x %struct.GIC4], [128 x %struct.GIC4], %struct.XViewport, %struct.XXF, %struct.XLightModel, %struct.XATest, %struct.XBlendMode, %struct.XClearC, %struct.XCBuffer, %struct.XDepthTest, %struct.XArrayRange, %struct.XFogMode, %struct.XHintMode, %struct.XLineMode, %struct.XLogicOp, %struct.XMaskMode, %struct.XPMode, %struct.XPointMode, %struct.XPGMode, %struct.XScissorTest, i32, %struct.XStencilTest, [16 x %struct.XTMode], %struct.XArrayRange, [8 x %struct.XTCoordGen], %struct.XClipPlane, %struct.XMultisample, %struct.XRegisterCCs, %struct.XArrayRange, %struct.XArrayRange, [3 x %struct.XPipelineProgramState], %struct.XXFFeedback, i32*, %struct.XFixedFunctionProgram, [3 x i32] }

+	%struct.XStencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }

+	%struct.XTCoordGen = type { { i16, i16, %struct.GIC4, %struct.GIC4 }, { i16, i16, %struct.GIC4, %struct.GIC4 }, { i16, i16, %struct.GIC4, %struct.GIC4 }, { i16, i16, %struct.GIC4, %struct.GIC4 }, i8, i8, i8, i8 }

+	%struct.XTGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }

+	%struct.XTLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }

+	%struct.XTMode = type { %struct.GIC4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }

+	%struct.XTParamState = type { i16, i16, i16, i16, i16, i16, %struct.GIC4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }

+	%struct.XTRec = type { %struct.XTState*, float, float, float, float, %struct.XMipmaplevel*, %struct.XMipmaplevel*, i32, i32, i32, i32, i32, i32, i32, [2 x %struct.PPSToken] }

+	%struct.XTState = type { i16, i8, i8, i16, i16, float, i32, %struct.GISWRSurface*, %struct.XTParamState, %struct.XTGeomState, %struct.XTLevel, [6 x [15 x %struct.XTLevel]] }

+	%struct.XXF = type { [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }

+	%struct.XXFFeedback = type { i8, i8, i8, i8, [16 x i32], [16 x i32] }

+	%struct.XViewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }

+	%struct.GIC4 = type { float, float, float, float }

+	%struct.GISWRSurface = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }

+	%struct.GTCoord2 = type { float, float }

+	%struct.GVMFPContext = type { float, i32, i32, i32, float, [3 x float] }

+	%struct.GVMFPStack = type { [8 x i8*], i8*, i8*, i32, i32, { <4 x float> }, { <4 x float> }, <4 x i32> }

+	%struct.GVMFGAttrib = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, [8 x <4 x float>] }

+	%struct.GVMTs = type { [16 x %struct.XTRec*] }

+	%struct.PPSToken = type { { i16, i16, i32 } }

+	%struct._GVMConstants = type { <4 x i32>, <4 x i32>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8] }

+

+declare <4 x i32> @llvm.ppc.altivec.lvewx(i8*)

+

+declare i32 @llvm.ppc.altivec.vcmpequw.p(i32, <4 x i32>, <4 x i32>)

+

+define void @test(%struct.XState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._GVMConstants* %cnstn, %struct.PPSToken* %pstrm, %struct.GVMFPContext* %vmctx, %struct.GVMTs* %txtrs, %struct.GVMFPStack* %fpstk, %struct.GVMFGAttrib* %start, %struct.GVMFGAttrib* %deriv, i32 %fragx, i32 %fragy) {

+bb58.i:

+	%tmp3405.i = getelementptr %struct.XTRec* null, i32 0, i32 1		; <float*> [#uses=1]

+	%tmp34053406.i = bitcast float* %tmp3405.i to i8*		; <i8*> [#uses=1]

+	%tmp3407.i = call <4 x i32> @llvm.ppc.altivec.lvewx( i8* %tmp34053406.i )		; <<4 x i32>> [#uses=0]

+	%tmp4146.i = call i32 @llvm.ppc.altivec.vcmpequw.p( i32 3, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32> [#uses=1]

+	%tmp4147.i = icmp eq i32 %tmp4146.i, 0		; <i1> [#uses=1]

+	br i1 %tmp4147.i, label %bb8799.i, label %bb4150.i

+

+bb4150.i:		; preds = %bb58.i

+	br label %bb8799.i

+

+bb8799.i:		; preds = %bb4150.i, %bb58.i

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll b/src/LLVM/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
new file mode 100644
index 0000000..06f40d9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=ppc64
+; PR1596
+
+	%struct._obstack_chunk = type { i8* }
+	%struct.obstack = type { i8*, %struct._obstack_chunk* (i8*, i64)*, i8*, i8 }
+
+define i32 @_obstack_newchunk(%struct.obstack* %h, i32 %length) {
+entry:
+	br i1 false, label %cond_false, label %cond_true
+
+cond_true:		; preds = %entry
+	br i1 false, label %cond_true28, label %cond_next30
+
+cond_false:		; preds = %entry
+	%tmp22 = tail call %struct._obstack_chunk* null( i64 undef )		; <%struct._obstack_chunk*> [#uses=2]
+	br i1 false, label %cond_true28, label %cond_next30
+
+cond_true28:		; preds = %cond_false, %cond_true
+	%iftmp.0.043.0 = phi %struct._obstack_chunk* [ null, %cond_true ], [ %tmp22, %cond_false ]		; <%struct._obstack_chunk*> [#uses=1]
+	tail call void null( )
+	br label %cond_next30
+
+cond_next30:		; preds = %cond_true28, %cond_false, %cond_true
+	%iftmp.0.043.1 = phi %struct._obstack_chunk* [ %iftmp.0.043.0, %cond_true28 ], [ null, %cond_true ], [ %tmp22, %cond_false ]		; <%struct._obstack_chunk*> [#uses=1]
+	%tmp41 = getelementptr %struct._obstack_chunk* %iftmp.0.043.1, i32 0, i32 0		; <i8**> [#uses=1]
+	store i8* null, i8** %tmp41, align 8
+	ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll b/src/LLVM/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
new file mode 100644
index 0000000..82ef2b8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc64 | grep dst | count 4
+
+define hidden void @_Z4borkPc(i8* %image) {
+entry:
+	tail call void @llvm.ppc.altivec.dst( i8* %image, i32 8, i32 0 )
+	tail call void @llvm.ppc.altivec.dstt( i8* %image, i32 8, i32 0 )
+	tail call void @llvm.ppc.altivec.dstst( i8* %image, i32 8, i32 0 )
+	tail call void @llvm.ppc.altivec.dststt( i8* %image, i32 8, i32 0 )
+	ret void
+}
+
+declare void @llvm.ppc.altivec.dst(i8*, i32, i32)
+declare void @llvm.ppc.altivec.dstt(i8*, i32, i32)
+declare void @llvm.ppc.altivec.dstst(i8*, i32, i32)
+declare void @llvm.ppc.altivec.dststt(i8*, i32, i32)

diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll b/src/LLVM/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
new file mode 100644
index 0000000..ea7de98
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ppc64 | grep lwzx
+
+        %struct.__db_region = type { %struct.__mutex_t, [4 x i8], %struct.anon, i32, [1 x i32] }
+        %struct.__mutex_t = type { i32 }
+        %struct.anon = type { i64, i64 }
+
+define void @foo() {
+entry:
+        %ttype = alloca i32, align 4            ; <i32*> [#uses=1]
+        %regs = alloca [1024 x %struct.__db_region], align 16           ; <[1024 x %struct.__db_region]*> [#uses=0]
+        %tmp = load i32* %ttype, align 4                ; <i32> [#uses=1]
+        %tmp1 = call i32 (...)* @bork( i32 %tmp )               ; <i32> [#uses=0]
+        ret void
+}
+
+declare i32 @bork(...)

diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-09-08-unaligned.ll b/src/LLVM/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
new file mode 100644
index 0000000..898c470
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-09-08-unaligned.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s | grep stfd | count 3
+; RUN: llc < %s | grep stfs | count 1
+; RUN: llc < %s | grep lfd | count 2
+; RUN: llc < %s | grep lfs | count 2
+; ModuleID = 'foo.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin8"
+	%struct.anon = type <{ i8, float }>
+@s = global %struct.anon <{ i8 3, float 0x4014666660000000 }>		; <%struct.anon*> [#uses=1]
+@u = global <{ i8, double }> <{ i8 3, double 5.100000e+00 }>		; <<{ i8, double }>*> [#uses=1]
+@t = weak global %struct.anon zeroinitializer		; <%struct.anon*> [#uses=2]
+@v = weak global <{ i8, double }> zeroinitializer		; <<{ i8, double }>*> [#uses=2]
+@.str = internal constant [8 x i8] c"%f %lf\0A\00"		; <[8 x i8]*> [#uses=1]
+
+define i32 @foo() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = getelementptr %struct.anon* @s, i32 0, i32 1		; <float*> [#uses=1]
+	%tmp1 = load float* %tmp, align 1		; <float> [#uses=1]
+	%tmp2 = getelementptr %struct.anon* @t, i32 0, i32 1		; <float*> [#uses=1]
+	store float %tmp1, float* %tmp2, align 1
+	%tmp3 = getelementptr <{ i8, double }>* @u, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp4 = load double* %tmp3, align 1		; <double> [#uses=1]
+	%tmp5 = getelementptr <{ i8, double }>* @v, i32 0, i32 1		; <double*> [#uses=1]
+	store double %tmp4, double* %tmp5, align 1
+	br label %return
+
+return:		; preds = %entry
+	%retval6 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval6
+}
+
+define i32 @main() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = call i32 @foo( )		; <i32> [#uses=0]
+	%tmp1 = getelementptr %struct.anon* @t, i32 0, i32 1		; <float*> [#uses=1]
+	%tmp2 = load float* %tmp1, align 1		; <float> [#uses=1]
+	%tmp23 = fpext float %tmp2 to double		; <double> [#uses=1]
+	%tmp4 = getelementptr <{ i8, double }>* @v, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp5 = load double* %tmp4, align 1		; <double> [#uses=1]
+	%tmp6 = getelementptr [8 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp7 = call i32 (i8*, ...)* @printf( i8* %tmp6, double %tmp23, double %tmp5 )		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	%retval8 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval8
+}
+
+declare i32 @printf(i8*, ...)

diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll b/src/LLVM/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll
new file mode 100644
index 0000000..d12698b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc64
+
+        %struct.TCMalloc_SpinLock = type { i32 }
+
+define void @_ZN17TCMalloc_SpinLock4LockEv(%struct.TCMalloc_SpinLock* %this) {
+entry:
+        %tmp3 = call i32 asm sideeffect "1: lwarx $0, 0, $1\0A\09stwcx. $2, 0, $1\0A\09bne- 1b\0A\09isync", "=&r,=*r,r,1,~{dirflag},~{fpsr},~{flags},~{memory}"( i32** null, i32 1, i32* null )         ; <i32> [#uses=0]
+        unreachable
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll b/src/LLVM/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll
new file mode 100644
index 0000000..5cfe54e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin
+
+declare void @cxa_atexit_check_1(i8*)
+
+define i32 @check_cxa_atexit(i32 (void (i8*)*, i8*, i8*)* %cxa_atexit, void (i8*)* %cxa_finalize) {
+entry:
+        %tmp7 = call i32 null( void (i8*)* @cxa_atexit_check_1, i8* null, i8* null )            ; <i32> [#uses=0]
+        br i1 false, label %cond_true, label %cond_next
+
+cond_true:    ; preds = %entry
+        ret i32 0
+
+cond_next:        ; preds = %entry
+        ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll b/src/LLVM/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll
new file mode 100644
index 0000000..c4152b4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc32
+; rdar://5538377
+
+        %struct.disk_unsigned = type { i32 }
+        %struct._StorePageMax = type { %struct.disk_unsigned, %struct.disk_unsigned, [65536 x i8] }
+
+define i32 @test() {
+entry:
+        %data = alloca i32              ; <i32*> [#uses=1]
+        %compressedPage = alloca %struct._StorePageMax          ; <%struct._StorePageMax*> [#uses=0]
+        %tmp107 = call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,*m"( i8* null, i32 0, i32* %data )          ; <i32> [#uses=0]
+        unreachable
+}
+

diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll b/src/LLVM/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
new file mode 100644
index 0000000..84fadd1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=ppc64 -mattr=+altivec
+	%struct.inoutprops = type <{ i8, [3 x i8] }>
+
+define void @bork(float* %argA, float* %argB, float* %res, i8 %inoutspec.0) {
+entry:
+	%.mask = and i8 %inoutspec.0, -16		; <i8> [#uses=1]
+	%tmp6 = icmp eq i8 %.mask, 16		; <i1> [#uses=1]
+	br i1 %tmp6, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	%tmp89 = bitcast float* %res to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%tmp1011 = bitcast float* %argA to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%tmp14 = load <4 x i32>* %tmp1011, align 16		; <<4 x i32>> [#uses=1]
+	%tmp1516 = bitcast float* %argB to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%tmp18 = load <4 x i32>* %tmp1516, align 16		; <<4 x i32>> [#uses=1]
+	%tmp19 = sdiv <4 x i32> %tmp14, %tmp18		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp19, <4 x i32>* %tmp89, align 16
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll b/src/LLVM/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
new file mode 100644
index 0000000..556a4a1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -relocation-model=pic
+
+	%struct.NSError = type opaque
+	%struct.NSManagedObjectContext = type opaque
+	%struct.NSPersistentStoreCoordinator = type opaque
+	%struct.NSString = type opaque
+	%struct.NSURL = type opaque
+	%struct._message_ref_t = type { %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_selector* }
+	%struct.objc_object = type {  }
+	%struct.objc_selector = type opaque
+@"\01L_OBJC_MESSAGE_REF_2" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=1]
+@"\01L_OBJC_MESSAGE_REF_6" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=1]
+@NSXMLStoreType = external constant %struct.NSString*		; <%struct.NSString**> [#uses=1]
+@"\01L_OBJC_MESSAGE_REF_5" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=2]
+@"\01L_OBJC_MESSAGE_REF_4" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=1]
+
+define %struct.NSManagedObjectContext* @"+[ListGenerator(Private) managedObjectContextWithModelURL:storeURL:]"(%struct.objc_object* %self, %struct._message_ref_t* %_cmd, %struct.NSURL* %modelURL, %struct.NSURL* %storeURL) {
+entry:
+	%storeCoordinator = alloca %struct.NSPersistentStoreCoordinator*		; <%struct.NSPersistentStoreCoordinator**> [#uses=0]
+	%tmp29 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2" )		; <%struct.objc_object*> [#uses=0]
+	%tmp34 = load %struct.NSString** @NSXMLStoreType, align 8		; <%struct.NSString*> [#uses=1]
+	%tmp37 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_5", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+	%tmp42 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 1 )		; <%struct.objc_object*> [#uses=1]
+	%tmp45 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp37( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_5", %struct.objc_object* %tmp42, %struct.NSString* null )		; <%struct.objc_object*> [#uses=1]
+	%tmp48 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", %struct.NSString* %tmp34, i8* null, %struct.NSURL* null, %struct.objc_object* %tmp45, %struct.NSError** null )		; <%struct.objc_object*> [#uses=0]
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll b/src/LLVM/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
new file mode 100644
index 0000000..b3b9280
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -relocation-model=pic
+
+	%struct.NSError = type opaque
+	%struct.NSManagedObjectContext = type opaque
+	%struct.NSString = type opaque
+	%struct.NSURL = type opaque
+	%struct._message_ref_t = type { %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_selector* }
+	%struct.objc_object = type {  }
+	%struct.objc_selector = type opaque
+@"\01L_OBJC_MESSAGE_REF_2" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=2]
+@"\01L_OBJC_MESSAGE_REF_6" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=2]
+@NSXMLStoreType = external constant %struct.NSString*		; <%struct.NSString**> [#uses=1]
+@"\01L_OBJC_MESSAGE_REF_4" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=2]
+
+define %struct.NSManagedObjectContext* @"+[ListGenerator(Private) managedObjectContextWithModelURL:storeURL:]"(%struct.objc_object* %self, %struct._message_ref_t* %_cmd, %struct.NSURL* %modelURL, %struct.NSURL* %storeURL) {
+entry:
+	%tmp27 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+	%tmp29 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp27( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2" )		; <%struct.objc_object*> [#uses=0]
+	%tmp33 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+	%tmp34 = load %struct.NSString** @NSXMLStoreType, align 8		; <%struct.NSString*> [#uses=1]
+	%tmp40 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+	%tmp42 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp40( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 1 )		; <%struct.objc_object*> [#uses=0]
+	%tmp48 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp33( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", %struct.NSString* %tmp34, i8* null, %struct.NSURL* null, %struct.objc_object* null, %struct.NSError** null )		; <%struct.objc_object*> [#uses=0]
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll b/src/LLVM/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll
new file mode 100644
index 0000000..a9f242b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll

@@ -0,0 +1,148 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
+
+	%struct.HDescriptor = type <{ i32, i32 }>
+
+declare void @bcopy(i8*, i8*, i32)
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	br i1 false, label %bb31, label %bb
+
+bb:		; preds = %entry
+	ret i32 -6
+
+bb31:		; preds = %entry
+	switch i32 0, label %bb189 [
+		 i32 73, label %cond_next209
+		 i32 74, label %bb74
+		 i32 77, label %bb57
+		 i32 78, label %cond_next209
+		 i32 85, label %cond_next209
+		 i32 97, label %cond_next209
+		 i32 100, label %cond_next209
+		 i32 107, label %cond_next209
+		 i32 109, label %bb57
+		 i32 112, label %bb43
+		 i32 115, label %cond_next209
+		 i32 117, label %bb51
+	]
+
+bb43:		; preds = %bb31
+	br i1 false, label %cond_true48, label %cond_true200.critedge2117
+
+cond_true48:		; preds = %bb43
+	br i1 false, label %cond_next372, label %AllDone
+
+bb51:		; preds = %bb31
+	ret i32 0
+
+bb57:		; preds = %bb31, %bb31
+	ret i32 0
+
+bb74:		; preds = %bb31
+	ret i32 0
+
+bb189:		; preds = %bb31
+	ret i32 0
+
+cond_true200.critedge2117:		; preds = %bb43
+	ret i32 0
+
+cond_next209:		; preds = %bb31, %bb31, %bb31, %bb31, %bb31, %bb31, %bb31
+	ret i32 0
+
+cond_next372:		; preds = %cond_true48
+	switch i32 0, label %bb1728 [
+		 i32 73, label %bb1723
+		 i32 74, label %cond_true1700
+		 i32 78, label %bb1718
+		 i32 85, label %bb1713
+		 i32 97, label %bb1620
+		 i32 107, label %AllDone
+		 i32 112, label %cond_next423
+		 i32 117, label %cond_next1453
+	]
+
+cond_next423:		; preds = %cond_next372
+	switch i16 0, label %cond_next691 [
+		 i16 18475, label %cond_next807
+		 i16 18520, label %cond_next807
+	]
+
+cond_next691:		; preds = %cond_next423
+	ret i32 0
+
+cond_next807:		; preds = %cond_next423, %cond_next423
+	switch i16 0, label %cond_true1192 [
+		 i16 18475, label %cond_next21.i
+		 i16 18520, label %cond_next21.i
+	]
+
+cond_next21.i:		; preds = %cond_next807, %cond_next807
+	br i1 false, label %cond_next934, label %free.i
+
+free.i:		; preds = %cond_next21.i
+	ret i32 0
+
+cond_next934:		; preds = %bb1005, %cond_next21.i
+	%listsize.1 = phi i32 [ 0, %bb1005 ], [ 64, %cond_next21.i ]		; <i32> [#uses=1]
+	%catalogExtents.2 = phi %struct.HDescriptor* [ %catalogExtents.1.reg2mem.1, %bb1005 ], [ null, %cond_next21.i ]		; <%struct.HDescriptor*> [#uses=3]
+	br i1 false, label %cond_next942, label %Return1020
+
+cond_next942:		; preds = %cond_next934
+	br i1 false, label %bb1005, label %bb947
+
+bb947:		; preds = %cond_next971, %cond_next942
+	%indvar = phi i32 [ 0, %cond_next942 ], [ %indvar.next2140, %cond_next971 ]		; <i32> [#uses=2]
+	%catalogExtents.1.reg2mem.0 = phi %struct.HDescriptor* [ %catalogExtents.2, %cond_next942 ], [ %tmp977978, %cond_next971 ]		; <%struct.HDescriptor*> [#uses=1]
+	%extents.0.reg2mem.0 = phi %struct.HDescriptor* [ null, %cond_next942 ], [ %tmp977978, %cond_next971 ]		; <%struct.HDescriptor*> [#uses=1]
+	br i1 false, label %cond_next971, label %Return1020
+
+cond_next971:		; preds = %bb947
+	%tmp = shl i32 %indvar, 6		; <i32> [#uses=1]
+	%listsize.0.reg2mem.0 = add i32 %tmp, %listsize.1		; <i32> [#uses=1]
+	%tmp973 = add i32 %listsize.0.reg2mem.0, 64		; <i32> [#uses=1]
+	%tmp974975 = bitcast %struct.HDescriptor* %extents.0.reg2mem.0 to i8*		; <i8*> [#uses=1]
+	%tmp977 = call i8* @realloc( i8* %tmp974975, i32 %tmp973 )		; <i8*> [#uses=1]
+	%tmp977978 = bitcast i8* %tmp977 to %struct.HDescriptor*		; <%struct.HDescriptor*> [#uses=3]
+	call void @bcopy( i8* null, i8* null, i32 64 )
+	%indvar.next2140 = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 false, label %bb1005, label %bb947
+
+bb1005:		; preds = %cond_next971, %cond_next942
+	%catalogExtents.1.reg2mem.1 = phi %struct.HDescriptor* [ %catalogExtents.2, %cond_next942 ], [ %tmp977978, %cond_next971 ]		; <%struct.HDescriptor*> [#uses=2]
+	br i1 false, label %Return1020, label %cond_next934
+
+Return1020:		; preds = %bb1005, %bb947, %cond_next934
+	%catalogExtents.3 = phi %struct.HDescriptor* [ %catalogExtents.1.reg2mem.0, %bb947 ], [ %catalogExtents.2, %cond_next934 ], [ %catalogExtents.1.reg2mem.1, %bb1005 ]		; <%struct.HDescriptor*> [#uses=0]
+	ret i32 0
+
+cond_true1192:		; preds = %cond_next807
+	ret i32 0
+
+cond_next1453:		; preds = %cond_next372
+	ret i32 0
+
+bb1620:		; preds = %cond_next372
+	ret i32 0
+
+cond_true1700:		; preds = %cond_next372
+	ret i32 0
+
+bb1713:		; preds = %cond_next372
+	ret i32 0
+
+bb1718:		; preds = %cond_next372
+	ret i32 0
+
+bb1723:		; preds = %cond_next372
+	ret i32 0
+
+bb1728:		; preds = %cond_next372
+	ret i32 -6
+
+AllDone:		; preds = %cond_next372, %cond_true48
+	ret i32 0
+}
+
+declare i8* @realloc(i8*, i32)

diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/src/LLVM/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
new file mode 100644
index 0000000..ecf45ef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll

@@ -0,0 +1,52 @@
+; RUN: llc < %s
+;; Formerly crashed, see PR 1508
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc64-apple-darwin8"
+	%struct.Range = type { i64, i64 }
+
+define void @Bork(i64 %range.0.0, i64 %range.0.1, i64 %size) {
+entry:
+	%effectiveRange = alloca %struct.Range, align 8		; <%struct.Range*> [#uses=2]
+	%tmp4 = call i8* @llvm.stacksave()		; <i8*> [#uses=1]
+	%size1 = trunc i64 %size to i32		; <i32> [#uses=1]
+	%tmp17 = alloca i8*, i32 %size1		; <i8**> [#uses=1]
+	invoke void @Foo(i8** %tmp17)
+			to label %bb30.preheader unwind label %unwind
+
+bb30.preheader:		; preds = %entry
+	%tmp26 = getelementptr %struct.Range* %effectiveRange, i64 0, i32 1		; <i64*> [#uses=1]
+	br label %bb30
+
+unwind:		; preds = %cond_true, %entry
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 catch i8* null
+	call void @llvm.stackrestore(i8* %tmp4)
+        resume { i8*, i32 } %exn
+
+invcont23:		; preds = %cond_true
+	%tmp27 = load i64* %tmp26, align 8		; <i64> [#uses=1]
+	%tmp28 = sub i64 %range_addr.1.0, %tmp27		; <i64> [#uses=1]
+	br label %bb30
+
+bb30:		; preds = %invcont23, %bb30.preheader
+	%range_addr.1.0 = phi i64 [ %tmp28, %invcont23 ], [ %range.0.1, %bb30.preheader ]		; <i64> [#uses=2]
+	%tmp33 = icmp eq i64 %range_addr.1.0, 0		; <i1> [#uses=1]
+	br i1 %tmp33, label %cleanup, label %cond_true
+
+cond_true:		; preds = %bb30
+	invoke void @Bar(i64 %range.0.0, %struct.Range* %effectiveRange)
+			to label %invcont23 unwind label %unwind
+
+cleanup:		; preds = %bb30
+	ret void
+}
+
+declare i8* @llvm.stacksave() nounwind
+
+declare void @Foo(i8**)
+
+declare void @Bar(i64, %struct.Range*)
+
+declare void @llvm.stackrestore(i8*) nounwind
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll b/src/LLVM/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
new file mode 100644
index 0000000..d1f0285
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s 
+; RUN: llc < %s -march=ppc32 -mcpu=g3
+; RUN: llc < %s -march=ppc32 -mcpu=g5
+; PR1811
+
+define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>*
+%CONST) {
+entry:
+        %input2 = load <4 x float>* null, align 16               ; <<4 x float>>
+       	%shuffle7 = shufflevector <4 x float> %input2, <4 x float> < float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>> [#uses=1]
+
+        %mul1 = fmul <4 x float> %shuffle7, zeroinitializer              ; <<4 x
+        %add2 = fadd <4 x float> %mul1, %input2          ; <<4 x float>>
+        store <4 x float> %add2, <4 x float>* null, align 16
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll b/src/LLVM/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
new file mode 100644
index 0000000..791e9e6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
+
+	%struct.Handle = type { %struct.oopDesc** }
+	%struct.JNI_ArgumentPusher = type { %struct.SignatureIterator, %struct.JavaCallArguments* }
+	%struct.JNI_ArgumentPusherArray = type { %struct.JNI_ArgumentPusher, %struct.JvmtiEventEnabled* }
+	%struct.JavaCallArguments = type { [9 x i32], [9 x i32], i32*, i32*, i32, i32, i32 }
+	%struct.JvmtiEventEnabled = type { i64 }
+	%struct.KlassHandle = type { %struct.Handle }
+	%struct.SignatureIterator = type { i32 (...)**, %struct.KlassHandle, i32, i32, i32 }
+	%struct.instanceOopDesc = type { %struct.oopDesc }
+	%struct.oopDesc = type { %struct.instanceOopDesc*, %struct.instanceOopDesc* }
+@.str = external constant [44 x i8]		; <[44 x i8]*> [#uses=1]
+
+define void @_ZN23JNI_ArgumentPusherArray7iterateEy(%struct.JNI_ArgumentPusherArray* %this, i64 %fingerprint) nounwind  {
+entry:
+	br label %bb113
+
+bb22.preheader:		; preds = %bb113
+	ret void
+
+bb32.preheader:		; preds = %bb113
+	ret void
+
+bb42.preheader:		; preds = %bb113
+	ret void
+
+bb52:		; preds = %bb113
+	br label %bb113
+
+bb62.preheader:		; preds = %bb113
+	ret void
+
+bb72.preheader:		; preds = %bb113
+	ret void
+
+bb82:		; preds = %bb113
+	br label %bb113
+
+bb93:		; preds = %bb113
+	br label %bb113
+
+bb103.preheader:		; preds = %bb113
+	ret void
+
+bb113:		; preds = %bb113, %bb93, %bb82, %bb52, %entry
+	%fingerprint_addr.0.reg2mem.9 = phi i64 [ 0, %entry ], [ 0, %bb52 ], [ 0, %bb82 ], [ 0, %bb93 ], [ %tmp118, %bb113 ]		; <i64> [#uses=1]
+	tail call void @_Z28report_should_not_reach_herePKci( i8* getelementptr ([44 x i8]* @.str, i32 0, i32 0), i32 817 ) nounwind 
+	%tmp118 = lshr i64 %fingerprint_addr.0.reg2mem.9, 4		; <i64> [#uses=2]
+	%tmp21158 = and i64 %tmp118, 15		; <i64> [#uses=1]
+	switch i64 %tmp21158, label %bb113 [
+		 i64 1, label %bb22.preheader
+		 i64 2, label %bb52
+		 i64 3, label %bb32.preheader
+		 i64 4, label %bb42.preheader
+		 i64 5, label %bb62.preheader
+		 i64 6, label %bb82
+		 i64 7, label %bb93
+		 i64 8, label %bb103.preheader
+		 i64 9, label %bb72.preheader
+		 i64 10, label %UnifiedReturnBlock
+	]
+
+UnifiedReturnBlock:		; preds = %bb113
+	ret void
+}
+
+declare void @_Z28report_should_not_reach_herePKci(i8*, i32)

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll b/src/LLVM/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
new file mode 100644
index 0000000..e03bd9e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -regalloc=fast
+
+define i32 @bork(i64 %foo, i64 %bar) {
+entry:
+        %tmp = load i64* null, align 8          ; <i64> [#uses=2]
+        %tmp2 = icmp ule i64 %tmp, 0            ; <i1> [#uses=1]
+        %min = select i1 %tmp2, i64 %tmp, i64 0   ; <i64> [#uses=1]
+        store i64 %min, i64* null, align 8
+        ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll b/src/LLVM/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
new file mode 100644
index 0000000..e50fac4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -enable-ppc32-regscavenger
+
+declare i8* @bar(i32)
+
+define void @foo(i8* %pp) nounwind  {
+entry:
+	%tmp2 = tail call i8* @bar( i32 14 ) nounwind 		; <i8*> [#uses=0]
+	%tmp28 = bitcast i8* %pp to void ()**		; <void ()**> [#uses=1]
+	%tmp38 = load void ()** %tmp28, align 4		; <void ()*> [#uses=2]
+	br i1 false, label %bb34, label %bb25
+bb25:		; preds = %entry
+	%tmp30 = bitcast void ()* %tmp38 to void (i8*)*		; <void (i8*)*> [#uses=1]
+	tail call void %tmp30( i8* null ) nounwind 
+	ret void
+bb34:		; preds = %entry
+	tail call void %tmp38( ) nounwind 
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll b/src/LLVM/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
new file mode 100644
index 0000000..9f35b83
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=ppc32 -enable-ppc32-regscavenger
+
+	%struct._cpp_strbuf = type { i8*, i32, i32 }
+	%struct.cpp_string = type { i32, i8* }
+
+declare fastcc void @emit_numeric_escape(i32, i32, %struct._cpp_strbuf*, i32) nounwind 
+
+define i32 @cpp_interpret_string(i32 %pfile, %struct.cpp_string* %from, i32 %wide) nounwind  {
+entry:
+	%tmp61 = load i32* null, align 4		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %wide, 0		; <i1> [#uses=2]
+	%iftmp.87.0 = select i1 %toBool, i32 %tmp61, i32 0		; <i32> [#uses=2]
+	%tmp69 = icmp ult i32 %iftmp.87.0, 33		; <i1> [#uses=1]
+	%min = select i1 %tmp69, i32 %iftmp.87.0, i32 32		; <i32> [#uses=1]
+	%tmp71 = icmp ugt i32 %min, 31		; <i1> [#uses=1]
+	br i1 %tmp71, label %bb79, label %bb75
+bb75:		; preds = %entry
+	ret i32 0
+bb79:		; preds = %entry
+	br i1 %toBool, label %bb103, label %bb94
+bb94:		; preds = %bb79
+	br i1 false, label %bb729, label %bb130.preheader
+bb103:		; preds = %bb79
+	ret i32 0
+bb130.preheader:		; preds = %bb94
+	%tmp134 = getelementptr %struct.cpp_string* %from, i32 0, i32 1		; <i8**> [#uses=0]
+	ret i32 0
+bb729:		; preds = %bb94
+	call fastcc void @emit_numeric_escape( i32 %pfile, i32 0, %struct._cpp_strbuf* null, i32 %wide ) nounwind 
+	ret i32 1
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll b/src/LLVM/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
new file mode 100644
index 0000000..dd425f5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=ppc64 -enable-ppc64-regscavenger
+
+define i16 @test(i8* %d1, i16* %d2) {
+ %tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 )
+ ret i16 %tmp237
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll b/src/LLVM/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
new file mode 100644
index 0000000..a8fef05
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc64
+
+define fastcc i8* @page_rec_get_next(i8* %rec) nounwind  {
+entry:
+	%tmp2627 = ptrtoint i8* %rec to i64		; <i64> [#uses=2]
+	%tmp28 = and i64 %tmp2627, -16384		; <i64> [#uses=2]
+	%tmp2829 = inttoptr i64 %tmp28 to i8*		; <i8*> [#uses=1]
+	%tmp37 = getelementptr i8* %tmp2829, i64 42		; <i8*> [#uses=1]
+	%tmp40 = load i8* %tmp37, align 1		; <i8> [#uses=1]
+	%tmp4041 = zext i8 %tmp40 to i64		; <i64> [#uses=1]
+	%tmp42 = shl i64 %tmp4041, 8		; <i64> [#uses=1]
+	%tmp47 = add i64 %tmp42, 0		; <i64> [#uses=1]
+	%tmp52 = and i64 %tmp47, 32768		; <i64> [#uses=1]
+	%tmp72 = icmp eq i64 %tmp52, 0		; <i1> [#uses=1]
+	br i1 %tmp72, label %bb91, label %bb
+bb:		; preds = %entry
+	ret i8* null
+bb91:		; preds = %entry
+	br i1 false, label %bb100, label %bb185
+bb100:		; preds = %bb91
+	%tmp106 = sub i64 %tmp2627, %tmp28		; <i64> [#uses=0]
+	ret i8* null
+bb185:		; preds = %bb91
+	ret i8* null
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll b/src/LLVM/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
new file mode 100644
index 0000000..01c83cb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=ppc32
+
+	%struct..0objc_object = type { %struct.objc_class* }
+	%struct.NSArray = type { %struct..0objc_object }
+	%struct.NSMutableArray = type { %struct.NSArray }
+	%struct.PFTPersistentSymbols = type { %struct..0objc_object, %struct.VMUSymbolicator*, %struct.NSMutableArray*, %struct.__CFDictionary*, %struct.__CFDictionary*, %struct.__CFDictionary*, %struct.__CFDictionary*, %struct.NSMutableArray*, i8, %struct.pthread_mutex_t, %struct.NSMutableArray*, %struct.pthread_rwlock_t }
+	%struct.VMUMachTaskContainer = type { %struct..0objc_object, i32, i32 }
+	%struct.VMUSymbolicator = type { %struct..0objc_object, %struct.NSMutableArray*, %struct.NSArray*, %struct.NSArray*, %struct.VMUMachTaskContainer*, i8 }
+	%struct.__CFDictionary = type opaque
+	%struct.__builtin_CFString = type { i32*, i32, i8*, i32 }
+	%struct.objc_class = type opaque
+	%struct.objc_selector = type opaque
+	%struct.pthread_mutex_t = type { i32, [40 x i8] }
+	%struct.pthread_rwlock_t = type { i32, [124 x i8] }
+external constant %struct.__builtin_CFString		; <%struct.__builtin_CFString*>:0 [#uses=1]
+
+define void @"-[PFTPersistentSymbols saveSymbolWithName:address:path:lineNumber:flags:owner:]"(%struct.PFTPersistentSymbols* %self, %struct.objc_selector* %_cmd, %struct.NSArray* %name, i64 %address, %struct.NSArray* %path, i32 %lineNumber, i64 %flags, %struct..0objc_object* %owner) nounwind  {
+entry:
+	br i1 false, label %bb12, label %bb21
+bb12:		; preds = %entry
+	%tmp17 = tail call signext i8 inttoptr (i64 4294901504 to i8 (%struct..0objc_object*, %struct.objc_selector*, %struct.NSArray*)*)( %struct..0objc_object* null, %struct.objc_selector* null, %struct.NSArray* bitcast (%struct.__builtin_CFString* @0 to %struct.NSArray*) )  nounwind 		; <i8> [#uses=0]
+	br i1 false, label %bb25, label %bb21
+bb21:		; preds = %bb12, %entry
+	%tmp24 = or i64 %flags, 4		; <i64> [#uses=1]
+	br label %bb25
+bb25:		; preds = %bb21, %bb12
+	%flags_addr.0 = phi i64 [ %tmp24, %bb21 ], [ %flags, %bb12 ]		; <i64> [#uses=1]
+	%tmp3233 = trunc i64 %flags_addr.0 to i32		; <i32> [#uses=0]
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll b/src/LLVM/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
new file mode 100644
index 0000000..8e5bf56
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
+
+define i32 @t(i64 %byteStart, i32 %activeIndex) nounwind  {
+entry:
+	%tmp50 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp5051 = zext i32 %tmp50 to i64		; <i64> [#uses=3]
+	%tmp53 = udiv i64 %byteStart, %tmp5051		; <i64> [#uses=1]
+	%tmp5354 = trunc i64 %tmp53 to i32		; <i32> [#uses=1]
+	%tmp62 = urem i64 %byteStart, %tmp5051		; <i64> [#uses=1]
+	%tmp94 = add i32 0, 1		; <i32> [#uses=1]
+	%tmp100 = urem i32 %tmp94, 0		; <i32> [#uses=2]
+	%tmp108 = add i32 0, %activeIndex		; <i32> [#uses=1]
+	%tmp110 = sub i32 %tmp108, 0		; <i32> [#uses=1]
+	%tmp112 = urem i32 %tmp110, 0		; <i32> [#uses=2]
+	%tmp122 = icmp ult i32 %tmp112, %tmp100		; <i1> [#uses=1]
+	%iftmp.175.0 = select i1 %tmp122, i32 %tmp112, i32 %tmp100		; <i32> [#uses=1]
+	%tmp119 = add i32 %tmp5354, 0		; <i32> [#uses=1]
+	%tmp131 = add i32 %tmp119, %iftmp.175.0		; <i32> [#uses=1]
+	%tmp131132 = zext i32 %tmp131 to i64		; <i64> [#uses=1]
+	%tmp147 = mul i64 %tmp131132, %tmp5051		; <i64> [#uses=1]
+	br i1 false, label %bb164, label %bb190
+bb164:		; preds = %entry
+	%tmp171172 = and i64 %tmp62, 4294967295		; <i64> [#uses=1]
+	%tmp173 = add i64 %tmp171172, %tmp147		; <i64> [#uses=0]
+	ret i32 0
+bb190:		; preds = %entry
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll b/src/LLVM/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll
new file mode 100644
index 0000000..2706337
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll

@@ -0,0 +1,100 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
+
+define fastcc i64 @nonzero_bits1() nounwind  {
+entry:
+	switch i32 0, label %bb1385 [
+		 i32 28, label %bb235
+		 i32 35, label %bb153
+		 i32 37, label %bb951
+		 i32 40, label %bb289
+		 i32 44, label %bb1344
+		 i32 46, label %bb651
+		 i32 47, label %bb651
+		 i32 48, label %bb322
+		 i32 49, label %bb651
+		 i32 50, label %bb651
+		 i32 51, label %bb651
+		 i32 52, label %bb651
+		 i32 53, label %bb651
+		 i32 54, label %bb535
+		 i32 55, label %bb565
+		 i32 56, label %bb565
+		 i32 58, label %bb1100
+		 i32 59, label %bb1100
+		 i32 60, label %bb1100
+		 i32 61, label %bb1100
+		 i32 63, label %bb565
+		 i32 64, label %bb565
+		 i32 65, label %bb565
+		 i32 66, label %bb565
+		 i32 73, label %bb302
+		 i32 74, label %bb302
+		 i32 75, label %bb302
+		 i32 76, label %bb302
+		 i32 77, label %bb302
+		 i32 78, label %bb302
+		 i32 79, label %bb302
+		 i32 80, label %bb302
+		 i32 81, label %bb302
+		 i32 82, label %bb302
+		 i32 83, label %bb302
+		 i32 84, label %bb302
+		 i32 85, label %bb302
+		 i32 86, label %bb302
+		 i32 87, label %bb302
+		 i32 88, label %bb302
+		 i32 89, label %bb302
+		 i32 90, label %bb302
+		 i32 91, label %bb507
+		 i32 92, label %bb375
+		 i32 93, label %bb355
+		 i32 103, label %bb1277
+		 i32 104, label %bb1310
+		 i32 105, label %UnifiedReturnBlock
+		 i32 106, label %bb1277
+		 i32 107, label %bb1343
+	]
+bb153:		; preds = %entry
+	ret i64 0
+bb235:		; preds = %entry
+	br i1 false, label %bb245, label %UnifiedReturnBlock
+bb245:		; preds = %bb235
+	ret i64 0
+bb289:		; preds = %entry
+	ret i64 0
+bb302:		; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+	ret i64 0
+bb322:		; preds = %entry
+	ret i64 0
+bb355:		; preds = %entry
+	ret i64 0
+bb375:		; preds = %entry
+	ret i64 0
+bb507:		; preds = %entry
+	ret i64 0
+bb535:		; preds = %entry
+	ret i64 0
+bb565:		; preds = %entry, %entry, %entry, %entry, %entry, %entry
+	ret i64 0
+bb651:		; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry
+	ret i64 0
+bb951:		; preds = %entry
+	ret i64 0
+bb1100:		; preds = %entry, %entry, %entry, %entry
+	ret i64 0
+bb1277:		; preds = %entry, %entry
+	br i1 false, label %UnifiedReturnBlock, label %bb1284
+bb1284:		; preds = %bb1277
+	ret i64 0
+bb1310:		; preds = %entry
+	ret i64 0
+bb1343:		; preds = %entry
+	ret i64 1
+bb1344:		; preds = %entry
+	ret i64 0
+bb1385:		; preds = %entry
+	ret i64 0
+UnifiedReturnBlock:		; preds = %bb1277, %bb235, %entry
+	%UnifiedRetVal = phi i64 [ 0, %bb235 ], [ undef, %bb1277 ], [ -1, %entry ]		; <i64> [#uses=1]
+	ret i64 %UnifiedRetVal
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll b/src/LLVM/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll
new file mode 100644
index 0000000..839098e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
+; Avoid reading memory that's already freed.
+
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i64)* @_Z13GetSectorSizey to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @_Z13GetSectorSizey(i64 %Base) nounwind  {
+entry:
+	br i1 false, label %bb, label %UnifiedReturnBlock
+bb:		; preds = %entry
+	%tmp10 = and i64 0, %Base		; <i64> [#uses=0]
+	ret i32 0
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 131072
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll b/src/LLVM/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
new file mode 100644
index 0000000..7b6d491
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll

@@ -0,0 +1,89 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
+
+@_ZL10DeviceCode = internal global i16 0		; <i16*> [#uses=1]
+@.str19 = internal constant [64 x i8] c"unlock_then_erase_sector: failed to erase block (status= 0x%x)\0A\00"		; <[64 x i8]*> [#uses=1]
+@.str34 = internal constant [68 x i8] c"ProgramByWords - Erasing sector 0x%llx to 0x%llx (size 0x%x bytes)\0A\00"		; <[68 x i8]*> [#uses=1]
+@.str35 = internal constant [37 x i8] c"ProgramByWords - Done erasing flash\0A\00"		; <[37 x i8]*> [#uses=1]
+@.str36 = internal constant [48 x i8] c"ProgramByWords - Starting to write to FLASH...\0A\00"		; <[48 x i8]*> [#uses=1]
+
+declare void @IOLog(i8*, ...)
+
+declare void @IODelay(i32)
+
+define i32 @_Z14ProgramByWordsPvyy(i8* %buffer, i64 %Offset, i64 %bufferSize) nounwind  {
+entry:
+	volatile store i8 -1, i8* null, align 1
+	%tmp28 = icmp eq i8 0, 0		; <i1> [#uses=1]
+	br i1 %tmp28, label %bb107, label %bb
+
+bb:		; preds = %entry
+	%tmp9596430 = zext i32 0 to i64		; <i64> [#uses=1]
+	%tmp98431 = add i64 %tmp9596430, %Offset		; <i64> [#uses=1]
+	%tmp100433 = icmp ugt i64 %tmp98431, %Offset		; <i1> [#uses=1]
+	br i1 %tmp100433, label %bb31, label %bb103
+
+bb31:		; preds = %_Z24unlock_then_erase_sectory.exit, %bb
+	%Pos.0.reg2mem.0 = phi i64 [ %tmp93, %_Z24unlock_then_erase_sectory.exit ], [ %Offset, %bb ]		; <i64> [#uses=3]
+	%tmp35 = load i16* @_ZL10DeviceCode, align 2		; <i16> [#uses=1]
+	%tmp3536 = zext i16 %tmp35 to i32		; <i32> [#uses=2]
+	%tmp37 = and i32 %tmp3536, 65520		; <i32> [#uses=1]
+	%tmp38 = icmp eq i32 %tmp37, 35008		; <i1> [#uses=1]
+	%tmp34 = sub i64 %Pos.0.reg2mem.0, %Offset		; <i64> [#uses=2]
+	br i1 %tmp38, label %bb41, label %bb68
+
+bb41:		; preds = %bb31
+	%tmp43 = add i32 0, -1		; <i32> [#uses=1]
+	%tmp4344 = zext i32 %tmp43 to i64		; <i64> [#uses=1]
+	%tmp46 = and i64 %tmp4344, %tmp34		; <i64> [#uses=0]
+	%tmp49 = and i32 %tmp3536, 1		; <i32> [#uses=0]
+	ret i32 0
+
+bb68:		; preds = %bb31
+	tail call void (i8*, ...)* @IOLog( i8* getelementptr ([68 x i8]* @.str34, i32 0, i32 0), i64 %tmp34, i64 0, i32 131072 ) nounwind 
+	%tmp2021.i = trunc i64 %Pos.0.reg2mem.0 to i32		; <i32> [#uses=1]
+	%tmp202122.i = inttoptr i32 %tmp2021.i to i8*		; <i8*> [#uses=1]
+	tail call void @IODelay( i32 500 ) nounwind 
+	%tmp53.i = volatile load i16* null, align 2		; <i16> [#uses=2]
+	%tmp5455.i = zext i16 %tmp53.i to i32		; <i32> [#uses=1]
+	br i1 false, label %bb.i, label %bb65.i
+
+bb.i:		; preds = %bb68
+	ret i32 0
+
+bb65.i:		; preds = %bb68
+	%tmp67.i = icmp eq i16 %tmp53.i, 128		; <i1> [#uses=1]
+	br i1 %tmp67.i, label %_Z24unlock_then_erase_sectory.exit, label %bb70.i
+
+bb70.i:		; preds = %bb65.i
+	tail call void (i8*, ...)* @IOLog( i8* getelementptr ([64 x i8]* @.str19, i32 0, i32 0), i32 %tmp5455.i ) nounwind 
+	ret i32 0
+
+_Z24unlock_then_erase_sectory.exit:		; preds = %bb65.i
+	volatile store i8 -1, i8* %tmp202122.i, align 1
+	%tmp93 = add i64 0, %Pos.0.reg2mem.0		; <i64> [#uses=2]
+	%tmp98 = add i64 0, %Offset		; <i64> [#uses=1]
+	%tmp100 = icmp ugt i64 %tmp98, %tmp93		; <i1> [#uses=1]
+	br i1 %tmp100, label %bb31, label %bb103
+
+bb103:		; preds = %_Z24unlock_then_erase_sectory.exit, %bb
+	tail call void (i8*, ...)* @IOLog( i8* getelementptr ([37 x i8]* @.str35, i32 0, i32 0) ) nounwind 
+	ret i32 0
+
+bb107:		; preds = %entry
+	tail call void (i8*, ...)* @IOLog( i8* getelementptr ([48 x i8]* @.str36, i32 0, i32 0) ) nounwind 
+	%tmp114115 = bitcast i8* %buffer to i16*		; <i16*> [#uses=1]
+	%tmp256 = lshr i64 %bufferSize, 1		; <i64> [#uses=1]
+	%tmp256257 = trunc i64 %tmp256 to i32		; <i32> [#uses=1]
+	%tmp258 = getelementptr i16* %tmp114115, i32 %tmp256257		; <i16*> [#uses=0]
+	ret i32 0
+}
+
+define i32 @_Z17program_64B_blockyPm(i64 %Base, i32* %pData) nounwind  {
+entry:
+	unreachable
+}
+
+define i32 @_Z15ProgramByBlocksyy(i64 %Offset, i64 %bufferSize) nounwind  {
+entry:
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll b/src/LLVM/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll
new file mode 100644
index 0000000..d42c814
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc32
+target triple = "powerpc-apple-darwin9.2.2"
+
+define i256 @func(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind readnone  {
+entry:
+	br i1 false, label %bb36, label %bb484
+
+bb36:		; preds = %entry
+	%tmp124 = fcmp ord ppc_fp128 %b, 0xM00000000000000000000000000000000		; <i1> [#uses=1]
+	%tmp140 = and i1 %tmp124, fcmp une (ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000)		; <i1> [#uses=0]
+	unreachable
+
+bb484:		; preds = %entry
+	ret i256 0
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll b/src/LLVM/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll
new file mode 100644
index 0000000..6b40b24
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=ppc32
+
+define void @t() nounwind {
+	call void null( ppc_fp128 undef )
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll b/src/LLVM/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
new file mode 100644
index 0000000..862559b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32
+
+@g = external global ppc_fp128
+@h = external global ppc_fp128
+
+define void @f() {
+	%tmp = load ppc_fp128* @g
+	store ppc_fp128 %tmp, ppc_fp128* @h
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll b/src/LLVM/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
new file mode 100644
index 0000000..83c5511
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc32
+; <rdar://problem/6020042>
+
+define i32 @bork() nounwind  {
+entry:
+	br i1 true, label %bb1, label %bb3
+
+bb1:
+	%tmp1 = load i8* null, align 1
+	%tmp2 = icmp eq i8 %tmp1, 0
+	br label %bb2
+
+bb2:
+	%val1 = phi i32 [ 0, %bb1 ], [ %val2, %bb2 ]
+	%val2 = select i1 %tmp2, i32 -1, i32 %val1
+	switch i32 %val2, label %bb2 [
+		 i32 -1, label %bb3
+		 i32 0, label %bb1
+		 i32 1, label %bb3
+		 i32 2, label %bb1
+	]
+
+bb3:
+	ret i32 -1
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll b/src/LLVM/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
new file mode 100644
index 0000000..8802b97
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vadduhm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubuhm
+
+define <4 x i32> @test() nounwind {
+	ret <4 x i32> < i32 4293066722, i32 4293066722, i32 4293066722, i32 4293066722>
+}
+
+define <4 x i32> @test2() nounwind {
+	ret <4 x i32> < i32 1114129, i32 1114129, i32 1114129, i32 1114129>
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-07-15-Bswap.ll b/src/LLVM/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
new file mode 100644
index 0000000..4a834f9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-07-15-Bswap.ll

@@ -0,0 +1,386 @@
+; RUN: llc < %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9"
+	%struct.BiPartSrcDescriptor = type <{ %"struct.BiPartSrcDescriptor::$_105" }>
+	%"struct.BiPartSrcDescriptor::$_105" = type { %struct.BiPartSrcDescriptor_NO_VECTOR_ALIGNMENT_size_is_16 }
+	%struct.BiPartSrcDescriptor_NO_VECTOR_ALIGNMENT_size_is_16 = type { [2 x %struct.MotionVectors], [2 x i8], %struct.Map4x4ToPartIdx, [2 x i8], i8, i8 }
+	%struct.Condv = type opaque
+	%struct.DHBFLayerId = type { i8 }
+	%struct.DecodeComplexityInfo = type { i32, i32, i32, i32, %"struct.DecodeComplexityInfo::IntraStats", %"struct.DecodeComplexityInfo::InterStats" }
+	%"struct.DecodeComplexityInfo::InterStats" = type { i32, i32, i32, i32, [5 x i32], [3 x i32], [4 x [4 x i32]], [4 x i32], i32, %struct.MotionVectors, %struct.MotionVectors }
+	%"struct.DecodeComplexityInfo::IntraStats" = type { i32, i32, i32, [5 x i32], [3 x i32], [4 x i32], [3 x i32] }
+	%struct.DecodeComplexityOptions = type { i8, i8, i32, double, i8, float, i8, float, i8, i8, i8, i8, i8 }
+	%struct.DescriptorAllocator = type { %struct.Mutex*, %struct.Mutex*, i8**, i32, i32, i8**, i32, i32, i8**, i32, i32 }
+	%struct.DetailsFromSliceType = type <{ i8 }>
+	%struct.FlatnessAnalysis = type { i16, i16, i32, i32*, i8*, [512 x i32], [256 x i32] }
+	%struct.Frame = type <{ i8, i8, i8, i8, i8, [3 x i8], i32, i32, %struct.Mutex*, %struct.Condv*, [8 x i8], %struct.FramePixels, %struct.FrameMotionVectorCache, %struct.FrameIndex, i32, i8*, i8*, i8*, i8*, i16*, %struct.FlatnessAnalysis, %struct.NoiseAnalysis, %struct.VisualActivity, %struct.FrameMotionInfo, %struct.FrameMotionAnalysis, %struct.FrameDataRateParameters, %struct.FrameEncoderTags, %struct.DecodeComplexityInfo, %struct.DecodeComplexityOptions, %struct.MotionInfoFor16x16_FasterSP*, [1 x i32] }>
+	%struct.FrameDataRateParameters = type { i32, float, i8, i8 }
+	%struct.FrameEncoderTags = type { i8, i8, i32, i8, i8, float }
+	%struct.FrameIndex = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i32, i32, %struct.Frame*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, %struct.DHBFLayerId }
+	%struct.FrameMotionAnalysis = type { i32, i32, i32, %struct.MoEstMotion16x16*, %struct.MbAnalysis*, i32, i32, i16, i16, i32, i32, i32, i32, i8, i8 }
+	%struct.FrameMotionInfo = type { i32, i32, %struct.MoEstMbMotionInfo*, i32, i32, i32, i32, i32 }
+	%struct.FrameMotionVectorCache = type <{ %struct.ThreadAllocator**, i32, i32, i32, %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor, [3 x %struct.BiPartSrcDescriptor*], %struct.BiPartSrcDescriptor** }>
+	%struct.FramePixels = type <{ i8, i8, i8, i8, i8, i8, i8, i8, i8*, i8*, i32, [4 x i8*], [4 x i8*], [2 x [4 x i32]], [2 x [4 x i32]], %struct.PixelData, %struct.InterpolationCache*, %struct.InterpolationCache*, %struct.InterpolationCache*, [16 x i16], [16 x i16], [12 x i8], %"struct.PortableSInt32Array<4>", %"struct.PortableSInt32Array<8>", %struct.ICOffsetArraysY, %struct.UVSrcOffsetEtcX_Struct*, i32*, i32*, [3 x i32] }>
+	%struct.ICOffsetArraysY = type { [21 x i32], [21 x i32], [4 x [21 x i32]] }
+	%struct.InterpolationCache = type opaque
+	%struct.LoopFilterInfo = type { %struct.BiPartSrcDescriptor**, i32, i32, i32, i32, i32*, i32, %"struct.LoopFilterInfo::SliceInfoStruct"*, i32, %struct.Mutex*, i16*, %struct.FramePixels*, i8*, i8*, i8*, i8*, i8*, %struct.PerMacroblockBoundaryStrengths*, %struct.Mutex*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8*, i8*, i8, void (i8*, i8*, i32, i32, i32, i32, i32, i8*, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i8*, i32, i8*)*, i32 }
+	%"struct.LoopFilterInfo::SliceInfoStruct" = type { %"struct.LoopFilterInfo::SliceInfoStruct::LFDisableStats", i8, i8, i8, i8, [17 x %struct.Frame*], [17 x %struct.Frame*] }
+	%"struct.LoopFilterInfo::SliceInfoStruct::LFDisableStats" = type { i32, i32 }
+	%struct.LoopFilterParam = type { i32, %struct.LoopFilterInfo*, %struct.FramePixels*, %struct.FrameMotionVectorCache* }
+	%struct.Map4x4ToPartIdx = type { i16 }
+	%struct.MbAnalysis = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, %struct.RdCost, %struct.RdCost, i32 }
+	%struct.MoEstMbMotionInfo = type { i32, i32, i32, i32, [16 x %struct.MoEstPartMotionInfo] }
+	%struct.MoEstMotion16x16 = type { [2 x i8], [2 x %struct.MotionVectors], i8, [3 x %struct.MoEstPredCost] }
+	%struct.MoEstPartMotionInfo = type { i32, %struct.PartGeom, i32, i32, [2 x %struct.MotionVectors], [2 x i8], i16 }
+	%struct.MoEstPredCost = type { i32, i16, i16 }
+	%struct.MotionInfoFor16x16_FasterSP = type { [2 x %struct.MotionVectors], [2 x i8], i8, [2 x i32], i32, i32 }
+	%struct.MotionVectors = type { %"struct.MotionVectors::$_103" }
+	%"struct.MotionVectors::$_103" = type { i32 }
+	%struct.Mutex = type opaque
+	%struct.NoiseAnalysis = type { i16, i16, i32, i8*, i8*, i8*, [512 x i32] }
+	%struct.PartGeom = type { %struct.Map4x4ToPartIdx }
+	%struct.PerMacroblockBoundaryStrengths = type { [16 x i8], [16 x i8], [4 x i8], [4 x i8], [2 x i32] }
+	%struct.PixelData = type { i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i8 }
+	%"struct.PortableSInt32Array<4>" = type { [4 x i32] }
+	%"struct.PortableSInt32Array<8>" = type { [8 x i32] }
+	%struct.RdCost = type { i32, i32, i32, double }
+	%struct.ThreadAllocator = type { %struct.DescriptorAllocator*, %struct.BiPartSrcDescriptor*, [256 x %struct.BiPartSrcDescriptor*], i32, i32, i32 }
+	%struct.ThreadedBatch = type opaque
+	%struct.UVSrcOffsetEtcX_Struct = type <{ i16 }>
+	%struct.VisualActivity = type { i16, i16, i32, i32, i32*, i32*, i32, i32, i32*, i32, i32, i32, i32, i32, i8*, i32, [2 x i32], i32, i32, i32, i16*, i16, i16, i16, i16, float, i8*, i32*, i32, i32, i8 }
+@_ZL33table_8_14_indexA_to_alpha_scalar = external constant [64 x i8]		; <[64 x i8]*> [#uses=0]
+@_ZL32table_8_14_indexB_to_beta_scalar = external constant [64 x i8]		; <[64 x i8]*> [#uses=0]
+@_ZL34table_8_15_indexA_bS_to_tc0_scalar = external constant [64 x [4 x i8]]		; <[64 x [4 x i8]]*> [#uses=0]
+@gkDummy = external global i32		; <i32*> [#uses=0]
+@gkDetailsFromSliceTypeArray = external constant [10 x %struct.DetailsFromSliceType]		; <[10 x %struct.DetailsFromSliceType]*> [#uses=0]
+
+declare i32 @_Z20LoopFilter_ConstructP14LoopFilterInfojj(%struct.LoopFilterInfo*, i32, i32)
+
+declare i32 @_Z25LF_Threading2_assert_doneP14LoopFilterInfo(%struct.LoopFilterInfo*) nounwind 
+
+declare i32 @_Z54S_CalcIfLargeMVDeltaForBMbBothPredictionsFromSameFramePK19BiPartSrcDescriptorS1_ijj(%struct.BiPartSrcDescriptor*, %struct.BiPartSrcDescriptor*, i32, i32, i32) nounwind 
+
+declare void @_Z30LoopFilter_Internal_FilterLumaPhiiiiii(i8*, i32, i32, i32, i32, i32, i32) nounwind 
+
+declare void @_Z33LoopFilter_Internal_FilterChromaVPhiiiiiiiiii(i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind 
+
+declare void @_Z33LoopFilter_Internal_FilterChromaHPhiiiiii(i8*, i32, i32, i32, i32, i32, i32) nounwind 
+
+declare void @_Z42LoopFilter_Internal_filter_macroblock_lumaPK14LoopFilterInfoPhS2_iiiPK30PerMacroblockBoundaryStrengthsjj(%struct.LoopFilterInfo*, i8*, i8*, i32, i32, i32, %struct.PerMacroblockBoundaryStrengths*, i32, i32) nounwind 
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+
+declare i32 @_Z40LoopFilter_Internal_FilterLumaPlaneMBAFFPK14LoopFilterInfojjj(%struct.LoopFilterInfo*, i32, i32, i32) nounwind 
+
+declare void @_Z18LoopFilter_DestroyP14LoopFilterInfo(%struct.LoopFilterInfo*)
+
+declare void @MutexDispose(%struct.Mutex*)
+
+declare void @_ZdaPv(i8*) nounwind 
+
+declare void @jvtDisposePTRVectorAligned(i8*)
+
+declare void @jvtDisposePTR(i8*)
+
+declare void @jvtDisposePTRMemAligned(i8*)
+
+declare void @_Z31LoopFilter_Internal_ResetTablesP14LoopFilterInfo(%struct.LoopFilterInfo*) nounwind 
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind 
+
+define i32 @_Z60LoopFilter_Internal_CalculateBoundaryStrengths_MbaffFramePicPK14LoopFilterInfoP22FrameMotionVectorCachejj(%struct.LoopFilterInfo* %lfiPtr, %struct.FrameMotionVectorCache* %frameMotionVectorCachePtr, i32 %mbY_min, i32 %mbY_maxPlus1) nounwind  {
+entry:
+	icmp ult i32 %mbY_min, %mbY_maxPlus1		; <i1>:0 [#uses=1]
+	br i1 %0, label %bb16, label %bb642
+
+bb16:		; preds = %entry
+	bitcast %struct.PerMacroblockBoundaryStrengths* null to i32*		; <i32*>:1 [#uses=3]
+	getelementptr i32* %1, i32 1		; <i32*>:2 [#uses=0]
+	getelementptr i32* %1, i32 2		; <i32*>:3 [#uses=0]
+	getelementptr i32* %1, i32 3		; <i32*>:4 [#uses=0]
+	bitcast [16 x i8]* null to i32*		; <i32*>:5 [#uses=3]
+	getelementptr i32* %5, i32 1		; <i32*>:6 [#uses=0]
+	getelementptr i32* %5, i32 2		; <i32*>:7 [#uses=0]
+	getelementptr i32* %5, i32 3		; <i32*>:8 [#uses=0]
+	icmp eq i32 0, 0		; <i1>:9 [#uses=0]
+	lshr i32 0, 30		; <i32>:10 [#uses=0]
+	and i32 0, 268435455		; <i32>:11 [#uses=0]
+	lshr i32 0, 28		; <i32>:12 [#uses=1]
+	and i32 %12, 3		; <i32>:13 [#uses=0]
+	and i32 0, 1		; <i32>:14 [#uses=1]
+	icmp eq i32 %14, 0		; <i1>:15 [#uses=0]
+	zext i8 0 to i32		; <i32>:16 [#uses=1]
+	%.not656 = icmp ne i32 0, 0		; <i1> [#uses=1]
+	icmp eq i8 0, 0		; <i1>:17 [#uses=0]
+	trunc i32 0 to i8		; <i8>:18 [#uses=2]
+	add i32 0, 1		; <i32>:19 [#uses=1]
+	%.not658 = icmp ne i32 0, 0		; <i1> [#uses=1]
+	and i32 0, 268369920		; <i32>:20 [#uses=1]
+	icmp eq i32 %20, 268369920		; <i1>:21 [#uses=2]
+	getelementptr %struct.PerMacroblockBoundaryStrengths* null, i32 0, i32 2		; <[4 x i8]*>:22 [#uses=1]
+	getelementptr %struct.PerMacroblockBoundaryStrengths* null, i32 0, i32 2, i32 0		; <i8*>:23 [#uses=0]
+	and i32 0, -2		; <i32>:24 [#uses=1]
+	add i32 %24, -1		; <i32>:25 [#uses=0]
+	bitcast [4 x i8]* %22 to i32*		; <i32*>:26 [#uses=3]
+	getelementptr i32* %26, i32 1		; <i32*>:27 [#uses=0]
+	getelementptr i32* %26, i32 2		; <i32*>:28 [#uses=0]
+	getelementptr i32* %26, i32 3		; <i32*>:29 [#uses=0]
+	br label %bb144
+
+bb144:		; preds = %bb395, %bb16
+	%idxEachField11.0773 = phi i32 [ 0, %bb16 ], [ %162, %bb395 ]		; <i32> [#uses=3]
+	%mbYLeft.2776 = phi i32 [ 0, %bb16 ], [ %mbYLeft.2776, %bb395 ]		; <i32> [#uses=3]
+	%mbXYLeft.2775 = phi i32 [ 0, %bb16 ], [ %mbXYLeft.2775, %bb395 ]		; <i32> [#uses=1]
+	%mixedModeLeftEdgeOfMbFlag.2774 = phi i32 [ 0, %bb16 ], [ 0, %bb395 ]		; <i32> [#uses=0]
+	%mbIndexLeft.2772 = phi i32 [ 0, %bb16 ], [ %mbIndexLeft.2772, %bb395 ]		; <i32> [#uses=2]
+	%boundaryStrengthsV.1771 = phi i8* [ null, %bb16 ], [ %158, %bb395 ]		; <i8*> [#uses=2]
+	%numEdgesToTest.1770 = phi i32 [ 4, %bb16 ], [ %numEdgesToTest.2, %bb395 ]		; <i32> [#uses=1]
+	icmp eq i32 %idxEachField11.0773, 0		; <i1>:30 [#uses=0]
+	getelementptr %struct.BiPartSrcDescriptor** null, i32 %mbIndexLeft.2772		; <%struct.BiPartSrcDescriptor**>:31 [#uses=1]
+	load %struct.BiPartSrcDescriptor** %31, align 4		; <%struct.BiPartSrcDescriptor*>:32 [#uses=0]
+	%fMacroblockHasNonZeroBS.4 = select i1 %21, i32 1, i32 0		; <i32> [#uses=1]
+	%numEdgesToTest.2 = select i1 %21, i32 1, i32 %numEdgesToTest.1770		; <i32> [#uses=2]
+	store i8 32, i8* %boundaryStrengthsV.1771, align 1
+	br label %labelContinueEdgesLoopV
+
+bb200:		; preds = %labelContinueEdgesLoopV
+	lshr i32 %159, 28		; <i32>:33 [#uses=2]
+	and i32 %160, %16		; <i32>:34 [#uses=1]
+	icmp eq i32 %34, 0		; <i1>:35 [#uses=0]
+	icmp eq i32 %160, 0		; <i1>:36 [#uses=3]
+	zext i1 %36 to i32		; <i32>:37 [#uses=1]
+	or i32 %37, -1		; <i32>:38 [#uses=1]
+	or i32 %38, %33		; <i32>:39 [#uses=1]
+	icmp eq i32 %39, 0		; <i1>:40 [#uses=1]
+	br i1 %40, label %bb205, label %bb206
+
+bb205:		; preds = %bb200
+	store i8 32, i8* %158, align 1
+	br label %labelContinueEdgesLoopV
+
+bb206:		; preds = %bb200
+	icmp eq i32 %33, 15		; <i1>:41 [#uses=1]
+	br i1 %41, label %labelContinueEdgesLoopV, label %bb210.preheader
+
+bb210.preheader:		; preds = %bb206
+	add i32 %160, 0		; <i32>:42 [#uses=2]
+	%bothcond657 = and i1 %36, %.not656		; <i1> [#uses=0]
+	shl i32 %idxEachField11.0773, 1		; <i32>:43 [#uses=1]
+	add i32 %43, 0		; <i32>:44 [#uses=0]
+	shl i32 %mbYLeft.2776, 2		; <i32>:45 [#uses=0]
+	add i32 %42, -1		; <i32>:46 [#uses=1]
+	icmp eq i32 0, 0		; <i1>:47 [#uses=1]
+	%brmerge689.not = and i1 %47, false		; <i1> [#uses=0]
+	%bothcond659 = and i1 %36, %.not658		; <i1> [#uses=0]
+	shl i32 %mbYLeft.2776, 1		; <i32>:48 [#uses=1]
+	or i32 %48, 0		; <i32>:49 [#uses=1]
+	shl i32 %49, 1		; <i32>:50 [#uses=0]
+	add i32 0, 0		; <i32>:51 [#uses=2]
+	mul i32 %51, 0		; <i32>:52 [#uses=1]
+	add i32 %52, %42		; <i32>:53 [#uses=1]
+	mul i32 %51, 0		; <i32>:54 [#uses=1]
+	add i32 %46, %54		; <i32>:55 [#uses=1]
+	getelementptr %struct.BiPartSrcDescriptor** null, i32 %53		; <%struct.BiPartSrcDescriptor**>:56 [#uses=1]
+	load %struct.BiPartSrcDescriptor** %56, align 4		; <%struct.BiPartSrcDescriptor*>:57 [#uses=7]
+	getelementptr %struct.BiPartSrcDescriptor** null, i32 %55		; <%struct.BiPartSrcDescriptor**>:58 [#uses=1]
+	load %struct.BiPartSrcDescriptor** %58, align 4		; <%struct.BiPartSrcDescriptor*>:59 [#uses=5]
+	icmp slt i32 %159, 0		; <i1>:60 [#uses=0]
+	icmp eq %struct.BiPartSrcDescriptor* %57, %59		; <i1>:61 [#uses=0]
+	bitcast %struct.BiPartSrcDescriptor* %57 to i16*		; <i16*>:62 [#uses=5]
+	load i16* %62, align 2		; <i16>:63 [#uses=2]
+	getelementptr i16* %62, i32 1		; <i16*>:64 [#uses=1]
+	load i16* %64, align 2		; <i16>:65 [#uses=2]
+	getelementptr i16* %62, i32 2		; <i16*>:66 [#uses=1]
+	load i16* %66, align 2		; <i16>:67 [#uses=2]
+	getelementptr i16* %62, i32 3		; <i16*>:68 [#uses=1]
+	load i16* %68, align 2		; <i16>:69 [#uses=2]
+	getelementptr i16* %62, i32 6		; <i16*>:70 [#uses=1]
+	load i16* %70, align 2		; <i16>:71 [#uses=2]
+	bitcast %struct.BiPartSrcDescriptor* %59 to i16*		; <i16*>:72 [#uses=5]
+	load i16* %72, align 2		; <i16>:73 [#uses=2]
+	getelementptr i16* %72, i32 1		; <i16*>:74 [#uses=1]
+	load i16* %74, align 2		; <i16>:75 [#uses=2]
+	getelementptr i16* %72, i32 2		; <i16*>:76 [#uses=1]
+	load i16* %76, align 2		; <i16>:77 [#uses=2]
+	getelementptr i16* %72, i32 3		; <i16*>:78 [#uses=1]
+	load i16* %78, align 2		; <i16>:79 [#uses=2]
+	getelementptr i16* %72, i32 6		; <i16*>:80 [#uses=1]
+	load i16* %80, align 2		; <i16>:81 [#uses=2]
+	sub i16 %63, %73		; <i16>:82 [#uses=3]
+	sub i16 %65, %75		; <i16>:83 [#uses=3]
+	sub i16 %67, %77		; <i16>:84 [#uses=3]
+	sub i16 %69, %79		; <i16>:85 [#uses=3]
+	sub i16 %71, %81		; <i16>:86 [#uses=3]
+	sub i16 0, %82		; <i16>:87 [#uses=1]
+	icmp slt i16 %82, 0		; <i1>:88 [#uses=1]
+	%. = select i1 %88, i16 %87, i16 %82		; <i16> [#uses=1]
+	sub i16 0, %83		; <i16>:89 [#uses=1]
+	icmp slt i16 %83, 0		; <i1>:90 [#uses=1]
+	%.660 = select i1 %90, i16 %89, i16 %83		; <i16> [#uses=1]
+	sub i16 0, %84		; <i16>:91 [#uses=1]
+	icmp slt i16 %84, 0		; <i1>:92 [#uses=1]
+	%.661 = select i1 %92, i16 %91, i16 %84		; <i16> [#uses=1]
+	sub i16 0, %85		; <i16>:93 [#uses=1]
+	icmp slt i16 %85, 0		; <i1>:94 [#uses=1]
+	%.662 = select i1 %94, i16 %93, i16 %85		; <i16> [#uses=1]
+	sub i16 0, %86		; <i16>:95 [#uses=1]
+	icmp slt i16 %86, 0		; <i1>:96 [#uses=1]
+	%.663 = select i1 %96, i16 %95, i16 %86		; <i16> [#uses=1]
+	getelementptr %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 1, i32 0		; <i8*>:97 [#uses=1]
+	load i8* %97, align 1		; <i8>:98 [#uses=1]
+	zext i8 %98 to i32		; <i32>:99 [#uses=1]
+	getelementptr %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 1, i32 1		; <i8*>:100 [#uses=1]
+	load i8* %100, align 1		; <i8>:101 [#uses=1]
+	zext i8 %101 to i32		; <i32>:102 [#uses=1]
+	getelementptr %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 3, i32 0		; <i8*>:103 [#uses=1]
+	load i8* %103, align 1		; <i8>:104 [#uses=2]
+	zext i8 %104 to i32		; <i32>:105 [#uses=1]
+	getelementptr %struct.BiPartSrcDescriptor* %59, i32 0, i32 0, i32 0, i32 3, i32 0		; <i8*>:106 [#uses=1]
+	load i8* %106, align 1		; <i8>:107 [#uses=2]
+	zext i8 %107 to i32		; <i32>:108 [#uses=1]
+	getelementptr %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 3, i32 1		; <i8*>:109 [#uses=1]
+	load i8* %109, align 1		; <i8>:110 [#uses=1]
+	zext i8 %110 to i32		; <i32>:111 [#uses=1]
+	getelementptr %struct.BiPartSrcDescriptor* %59, i32 0, i32 0, i32 0, i32 3, i32 1		; <i8*>:112 [#uses=1]
+	load i8* %112, align 1		; <i8>:113 [#uses=1]
+	zext i8 %113 to i32		; <i32>:114 [#uses=1]
+	lshr i32 %99, 4		; <i32>:115 [#uses=1]
+	and i32 %115, 2		; <i32>:116 [#uses=1]
+	lshr i32 %102, 5		; <i32>:117 [#uses=1]
+	or i32 %116, %117		; <i32>:118 [#uses=3]
+	icmp eq i32 %118, 0		; <i1>:119 [#uses=0]
+	icmp eq i32 %118, 1		; <i1>:120 [#uses=1]
+	br i1 %120, label %bb297, label %bb298
+
+bb297:		; preds = %bb210.preheader
+	br label %bb298
+
+bb298:		; preds = %bb297, %bb210.preheader
+	%vu8Mask_0.1 = phi i8 [ -1, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=1]
+	%vu8Mask_1.1 = phi i8 [ -1, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=1]
+	%vu8Mask_2.1 = phi i8 [ -1, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=0]
+	%vu8Mask_3.1 = phi i8 [ -1, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=1]
+	%vu8Mask_4.1 = phi i8 [ 0, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=0]
+	%vu8Mask_5.1 = phi i8 [ 0, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=1]
+	%vu8Mask_6.1 = phi i8 [ 0, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=0]
+	%vu8Mask_7.1 = phi i8 [ 0, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=1]
+	%vu8Mask_12.1 = phi i8 [ -1, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=0]
+	%vu8Mask_13.1 = phi i8 [ -1, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=0]
+	icmp eq i32 %118, 2		; <i1>:121 [#uses=0]
+	and i8 %vu8Mask_1.1, 3		; <i8>:122 [#uses=0]
+	and i8 %vu8Mask_5.1, 3		; <i8>:123 [#uses=0]
+	and i8 %vu8Mask_3.1, %18		; <i8>:124 [#uses=0]
+	and i8 %vu8Mask_7.1, %18		; <i8>:125 [#uses=0]
+	icmp eq i8 %104, %107		; <i1>:126 [#uses=1]
+	br i1 %126, label %bb328, label %bb303
+
+bb303:		; preds = %bb298
+	call i16 @llvm.bswap.i16( i16 %81 )		; <i16>:127 [#uses=1]
+	sub i16 %63, %77		; <i16>:128 [#uses=3]
+	sub i16 %65, %79		; <i16>:129 [#uses=3]
+	sub i16 %67, %73		; <i16>:130 [#uses=3]
+	sub i16 %69, %75		; <i16>:131 [#uses=3]
+	sub i16 %71, %127		; <i16>:132 [#uses=3]
+	sub i16 0, %128		; <i16>:133 [#uses=1]
+	icmp slt i16 %128, 0		; <i1>:134 [#uses=1]
+	%.673 = select i1 %134, i16 %133, i16 %128		; <i16> [#uses=1]
+	sub i16 0, %129		; <i16>:135 [#uses=1]
+	icmp slt i16 %129, 0		; <i1>:136 [#uses=1]
+	%.674 = select i1 %136, i16 %135, i16 %129		; <i16> [#uses=1]
+	sub i16 0, %130		; <i16>:137 [#uses=1]
+	icmp slt i16 %130, 0		; <i1>:138 [#uses=1]
+	%.675 = select i1 %138, i16 %137, i16 %130		; <i16> [#uses=1]
+	sub i16 0, %131		; <i16>:139 [#uses=1]
+	icmp slt i16 %131, 0		; <i1>:140 [#uses=1]
+	%.676 = select i1 %140, i16 %139, i16 %131		; <i16> [#uses=1]
+	sub i16 0, %132		; <i16>:141 [#uses=1]
+	icmp slt i16 %132, 0		; <i1>:142 [#uses=1]
+	%.677 = select i1 %142, i16 %141, i16 %132		; <i16> [#uses=1]
+	br label %bb328
+
+bb328:		; preds = %bb303, %bb298
+	%vu16Delta_0.0 = phi i16 [ %.673, %bb303 ], [ %., %bb298 ]		; <i16> [#uses=1]
+	%vu16Delta_1.0 = phi i16 [ %.674, %bb303 ], [ %.660, %bb298 ]		; <i16> [#uses=0]
+	%vu16Delta_2.0 = phi i16 [ %.675, %bb303 ], [ %.661, %bb298 ]		; <i16> [#uses=0]
+	%vu16Delta_3.0 = phi i16 [ %.676, %bb303 ], [ %.662, %bb298 ]		; <i16> [#uses=0]
+	%vu16Delta_6.0 = phi i16 [ %.677, %bb303 ], [ %.663, %bb298 ]		; <i16> [#uses=0]
+	lshr i16 %vu16Delta_0.0, 8		; <i16>:143 [#uses=1]
+	trunc i16 %143 to i8		; <i8>:144 [#uses=1]
+	and i8 %144, %vu8Mask_0.1		; <i8>:145 [#uses=1]
+	icmp eq i8 %145, 0		; <i1>:146 [#uses=0]
+	sub i32 %105, %114		; <i32>:147 [#uses=1]
+	sub i32 %111, %108		; <i32>:148 [#uses=1]
+	or i32 %147, %148		; <i32>:149 [#uses=1]
+	icmp eq i32 %149, 0		; <i1>:150 [#uses=0]
+	call i32 @_Z54S_CalcIfLargeMVDeltaForBMbBothPredictionsFromSameFramePK19BiPartSrcDescriptorS1_ijj( %struct.BiPartSrcDescriptor* %57, %struct.BiPartSrcDescriptor* %59, i32 %19, i32 0, i32 0 ) nounwind 		; <i32>:151 [#uses=0]
+	unreachable
+
+labelContinueEdgesLoopV:		; preds = %bb206, %bb205, %bb144
+	%fEdgeHasNonZeroBS.0 = phi i32 [ 0, %bb205 ], [ 0, %bb144 ], [ 1, %bb206 ]		; <i32> [#uses=2]
+	%fMacroblockHasNonZeroBS.6 = phi i32 [ %152, %bb205 ], [ %fMacroblockHasNonZeroBS.4, %bb144 ], [ %152, %bb206 ]		; <i32> [#uses=1]
+	%ixEdge.1 = phi i32 [ %160, %bb205 ], [ 0, %bb144 ], [ %160, %bb206 ]		; <i32> [#uses=1]
+	%bfNZ12.2 = phi i32 [ %159, %bb205 ], [ 0, %bb144 ], [ %159, %bb206 ]		; <i32> [#uses=1]
+	%boundaryStrengthsV.3 = phi i8* [ %158, %bb205 ], [ %boundaryStrengthsV.1771, %bb144 ], [ %158, %bb206 ]		; <i8*> [#uses=3]
+	or i32 %fMacroblockHasNonZeroBS.6, %fEdgeHasNonZeroBS.0		; <i32>:152 [#uses=2]
+	load i8* %boundaryStrengthsV.3, align 1		; <i8>:153 [#uses=1]
+	trunc i32 %fEdgeHasNonZeroBS.0 to i8		; <i8>:154 [#uses=1]
+	shl i8 %154, 5		; <i8>:155 [#uses=1]
+	xor i8 %155, 32		; <i8>:156 [#uses=1]
+	or i8 %153, %156		; <i8>:157 [#uses=1]
+	store i8 %157, i8* %boundaryStrengthsV.3, align 1
+	getelementptr i8* %boundaryStrengthsV.3, i32 4		; <i8*>:158 [#uses=4]
+	shl i32 %bfNZ12.2, 4		; <i32>:159 [#uses=4]
+	add i32 %ixEdge.1, 1		; <i32>:160 [#uses=6]
+	icmp ult i32 %160, %numEdgesToTest.2		; <i1>:161 [#uses=1]
+	br i1 %161, label %bb200, label %bb395
+
+bb395:		; preds = %labelContinueEdgesLoopV
+	add i32 %idxEachField11.0773, 1		; <i32>:162 [#uses=2]
+	icmp ugt i32 %162, 0		; <i1>:163 [#uses=1]
+	br i1 %163, label %bb398, label %bb144
+
+bb398:		; preds = %bb395
+	call void asm sideeffect "dcbt $0, $1", "b%,r,~{memory}"( i32 19, i32* null ) nounwind 
+	unreachable
+
+bb642:		; preds = %entry
+	ret i32 0
+}
+
+declare i16 @llvm.bswap.i16(i16) nounwind readnone 
+
+declare i8* @jvtNewPtrVectorAligned(i32)
+
+declare i8* @jvtNewPtr(i32)
+
+declare i8* @jvtNewPtrMemAligned(i32)
+
+declare %struct.Mutex* @MutexNew()
+
+declare i8* @_Znam(i32)
+
+declare i32 @_Z24LoopFilter_FilterMbGroupP14LoopFilterInfoP11FramePixelsP22FrameMotionVectorCacheP19ThreadedBatchStructjjij(%struct.LoopFilterInfo*, %struct.FramePixels*, %struct.FrameMotionVectorCache*, %struct.ThreadedBatch*, i32, i32, i32, i32)
+
+declare void @MutexLock(%struct.Mutex*)
+
+declare void @MutexUnlock(%struct.Mutex*)
+
+declare i32 @_Z35LoopFilter_Internal_FilterLumaPlanePK14LoopFilterInfojjjjj(%struct.LoopFilterInfo*, i32, i32, i32, i32, i32)
+
+declare i32 @_Z37LoopFilter_Internal_FilterChromaPlanePK14LoopFilterInfojjjjj(%struct.LoopFilterInfo*, i32, i32, i32, i32, i32)
+
+declare void @_Z44LoopFilter_Internal_filter_macroblock_chromaPK14LoopFilterInfoPhS2_iiiPK30PerMacroblockBoundaryStrengthsjj(%struct.LoopFilterInfo*, i8*, i8*, i32, i32, i32, %struct.PerMacroblockBoundaryStrengths*, i32, i32) nounwind 
+
+declare i32 @_Z42LoopFilter_Internal_FilterChromaPlaneMBAFFPK14LoopFilterInfojjj(%struct.LoopFilterInfo*, i32, i32, i32) nounwind 
+
+declare i32 @_Z26LF_Threading2_ProcessTasksP14LoopFilterInfoP11FramePixelsP22FrameMotionVectorCacheij(%struct.LoopFilterInfo*, %struct.FramePixels*, %struct.FrameMotionVectorCache*, i32, i32)
+
+declare i32 @_Z46LoopFilter_Internal_CalculateBoundaryStrengthsPK14LoopFilterInfoP22FrameMotionVectorCachejj(%struct.LoopFilterInfo*, %struct.FrameMotionVectorCache*, i32, i32)
+
+declare i32 @_Z44LoopFilter_Internal_FilterLumaChromaPlane_PPP14LoopFilterInfojjjjj(%struct.LoopFilterInfo*, i32, i32, i32, i32, i32)
+
+declare i32 @_Z22LoopFilter_FilterFrameP14LoopFilterInfoP11FramePixelsP22FrameMotionVectorCacheP19ThreadedBatchStructij(%struct.LoopFilterInfo*, %struct.FramePixels*, %struct.FrameMotionVectorCache*, %struct.ThreadedBatch*, i32, i32)
+
+declare void @_Z34LF_Threading2_ProcessTasks_WrapperPv(i8*)
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind 

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-07-15-Fabs.ll b/src/LLVM/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
new file mode 100644
index 0000000..17737d9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-07-15-Fabs.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9"
+
+define hidden i256 @__divtc3(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind readnone  {
+entry:
+	call ppc_fp128 @fabsl( ppc_fp128 %d ) nounwind readnone 		; <ppc_fp128>:0 [#uses=1]
+	fcmp olt ppc_fp128 0xM00000000000000000000000000000000, %0		; <i1>:1 [#uses=1]
+	%.pn106 = select i1 %1, ppc_fp128 %a, ppc_fp128 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
+	%.pn = fsub ppc_fp128 0xM00000000000000000000000000000000, %.pn106		; <ppc_fp128> [#uses=1]
+	%y.0 = fdiv ppc_fp128 %.pn, 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
+	fmul ppc_fp128 %y.0, 0xM3FF00000000000000000000000000000		; <ppc_fp128>:2 [#uses=1]
+	fadd ppc_fp128 %2, fmul (ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000)		; <ppc_fp128>:3 [#uses=1]
+	%tmpi = fadd ppc_fp128 %3, 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmpi, ppc_fp128* null, align 16
+	ret i256 0
+}
+
+declare ppc_fp128 @fabsl(ppc_fp128) nounwind readnone 

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll b/src/LLVM/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
new file mode 100644
index 0000000..21b0c61
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9"
+
+define signext i16 @t(i16* %dct)  nounwind  {
+entry:
+         load i16* null, align 2         ; <i16>:0 [#uses=2]
+         lshr i16 %0, 11         ; <i16>:1 [#uses=0]
+         trunc i16 %0 to i8              ; <i8>:2 [#uses=1]
+         sext i8 %2 to i16               ; <i16>:3 [#uses=1]
+         add i16 0, %3           ; <i16>:4 [#uses=1]
+         sext i16 %4 to i32              ; <i32>:5 [#uses=1]
+         %dcval.0.in = shl i32 %5, 0             ; <i32> [#uses=1]
+         %dcval.0 = trunc i32 %dcval.0.in to i16         ; <i16>  [#uses=1]
+         store i16 %dcval.0, i16* %dct, align 2
+         ret i16 0
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-07-17-Fneg.ll b/src/LLVM/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
new file mode 100644
index 0000000..dc1e936
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-07-17-Fneg.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9"
+
+define hidden i64 @__fixunstfdi(ppc_fp128 %a) nounwind  {
+entry:
+	br i1 false, label %bb3, label %bb4
+
+bb3:		; preds = %entry
+	fsub ppc_fp128 0xM80000000000000000000000000000000, 0xM00000000000000000000000000000000		; <ppc_fp128>:0 [#uses=1]
+	fptoui ppc_fp128 %0 to i32		; <i32>:1 [#uses=1]
+	zext i32 %1 to i64		; <i64>:2 [#uses=1]
+	sub i64 0, %2		; <i64>:3 [#uses=1]
+	ret i64 %3
+
+bb4:		; preds = %entry
+	ret i64 0
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll b/src/LLVM/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
new file mode 100644
index 0000000..c9c05e1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | grep lwz | grep 228
+
+@"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+define void @llvm_static_func(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15) nounwind  {
+entry:
+	tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i64 0), i32 %a8 ) nounwind 		; <i32>:0 [#uses=0]
+	ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind 

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll b/src/LLVM/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
new file mode 100644
index 0000000..97844dd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll

@@ -0,0 +1,254 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
+
+	%struct.CGLDI = type { %struct.cgli*, i32, i32, i32, i32, i32, i8*, i32, void (%struct.CGLSI*, i32, %struct.CGLDI*)*, i8*, %struct.vv_t }
+	%struct.cgli = type { i32, %struct.cgli*, void (%struct.cgli*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32)*, i32, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i8*, i32*, %struct._cgro*, %struct._cgro*, float, float, float, float, i32, i8*, float, i8*, [16 x i32] }
+	%struct.CGLSI = type { %struct.cgli*, i32, i8*, i8*, i32, i32, i8*, void (%struct.cgli*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32)*, %struct.vv_t, %struct.vv_t, %struct.xx_t* }
+	%struct._cgro = type opaque
+	%struct.xx_t = type { [3 x %struct.vv_t], [2 x %struct.vv_t], [2 x [3 x i8*]] }
+	%struct.vv_t = type { <16 x i8> }
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.CGLSI*, i32, %struct.CGLDI*)* @lb to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @lb(%struct.CGLSI* %src, i32 %n, %struct.CGLDI* %dst) nounwind {
+entry:
+	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb.nph4945, label %return
+
+bb.nph4945:		; preds = %entry
+	%2 = bitcast [2 x %struct.vv_t]* null to i64*		; <i64*> [#uses=6]
+	%3 = getelementptr [2 x i64]* null, i32 0, i32 1		; <i64*> [#uses=6]
+	%4 = bitcast %struct.vv_t* null to i64*		; <i64*> [#uses=5]
+	%5 = getelementptr [2 x i64]* null, i32 0, i32 1		; <i64*> [#uses=3]
+	br label %bb2326
+
+bb2217:		; preds = %bb2326
+	%6 = or i64 0, 0		; <i64> [#uses=2]
+	%7 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%8 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%9 = getelementptr float* null, i32 2		; <float*> [#uses=1]
+	%10 = load float* %9, align 4		; <float> [#uses=1]
+	%11 = getelementptr float* null, i32 3		; <float*> [#uses=1]
+	%12 = load float* %11, align 4		; <float> [#uses=1]
+	%13 = fmul float %10, 6.553500e+04		; <float> [#uses=1]
+	%14 = fadd float %13, 5.000000e-01		; <float> [#uses=1]
+	%15 = fmul float %12, 6.553500e+04		; <float> [#uses=1]
+	%16 = fadd float %15, 5.000000e-01		; <float> [#uses=3]
+	%17 = fcmp olt float %14, 0.000000e+00		; <i1> [#uses=0]
+	%18 = fcmp olt float %16, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %18, label %bb2265, label %bb2262
+
+bb2262:		; preds = %bb2217
+	%19 = fcmp ogt float %16, 6.553500e+04		; <i1> [#uses=1]
+	br i1 %19, label %bb2264, label %bb2265
+
+bb2264:		; preds = %bb2262
+	br label %bb2265
+
+bb2265:		; preds = %bb2264, %bb2262, %bb2217
+	%f3596.0 = phi float [ 6.553500e+04, %bb2264 ], [ 0.000000e+00, %bb2217 ], [ %16, %bb2262 ]		; <float> [#uses=1]
+	%20 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%21 = fptosi float %f3596.0 to i32		; <i32> [#uses=1]
+	%22 = zext i32 %7 to i64		; <i64> [#uses=1]
+	%23 = shl i64 %22, 48		; <i64> [#uses=1]
+	%24 = zext i32 %8 to i64		; <i64> [#uses=1]
+	%25 = shl i64 %24, 32		; <i64> [#uses=1]
+	%26 = sext i32 %20 to i64		; <i64> [#uses=1]
+	%27 = shl i64 %26, 16		; <i64> [#uses=1]
+	%28 = sext i32 %21 to i64		; <i64> [#uses=1]
+	%29 = or i64 %25, %23		; <i64> [#uses=1]
+	%30 = or i64 %29, %27		; <i64> [#uses=1]
+	%31 = or i64 %30, %28		; <i64> [#uses=2]
+	%32 = shl i64 %6, 48		; <i64> [#uses=1]
+	%33 = shl i64 %31, 32		; <i64> [#uses=1]
+	%34 = and i64 %33, 281470681743360		; <i64> [#uses=1]
+	store i64 %6, i64* %2, align 16
+	store i64 %31, i64* %3, align 8
+	%35 = getelementptr i8* null, i32 0		; <i8*> [#uses=1]
+	%36 = bitcast i8* %35 to float*		; <float*> [#uses=4]
+	%37 = load float* %36, align 4		; <float> [#uses=1]
+	%38 = getelementptr float* %36, i32 1		; <float*> [#uses=1]
+	%39 = load float* %38, align 4		; <float> [#uses=1]
+	%40 = fmul float %37, 6.553500e+04		; <float> [#uses=1]
+	%41 = fadd float %40, 5.000000e-01		; <float> [#uses=1]
+	%42 = fmul float %39, 6.553500e+04		; <float> [#uses=1]
+	%43 = fadd float %42, 5.000000e-01		; <float> [#uses=3]
+	%44 = fcmp olt float %41, 0.000000e+00		; <i1> [#uses=0]
+	%45 = fcmp olt float %43, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %45, label %bb2277, label %bb2274
+
+bb2274:		; preds = %bb2265
+	%46 = fcmp ogt float %43, 6.553500e+04		; <i1> [#uses=0]
+	br label %bb2277
+
+bb2277:		; preds = %bb2274, %bb2265
+	%f1582.0 = phi float [ 0.000000e+00, %bb2265 ], [ %43, %bb2274 ]		; <float> [#uses=1]
+	%47 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%48 = fptosi float %f1582.0 to i32		; <i32> [#uses=1]
+	%49 = getelementptr float* %36, i32 2		; <float*> [#uses=1]
+	%50 = load float* %49, align 4		; <float> [#uses=1]
+	%51 = getelementptr float* %36, i32 3		; <float*> [#uses=1]
+	%52 = load float* %51, align 4		; <float> [#uses=1]
+	%53 = fmul float %50, 6.553500e+04		; <float> [#uses=1]
+	%54 = fadd float %53, 5.000000e-01		; <float> [#uses=1]
+	%55 = fmul float %52, 6.553500e+04		; <float> [#uses=1]
+	%56 = fadd float %55, 5.000000e-01		; <float> [#uses=1]
+	%57 = fcmp olt float %54, 0.000000e+00		; <i1> [#uses=0]
+	%58 = fcmp olt float %56, 0.000000e+00		; <i1> [#uses=0]
+	%59 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%60 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%61 = zext i32 %47 to i64		; <i64> [#uses=1]
+	%62 = shl i64 %61, 48		; <i64> [#uses=1]
+	%63 = zext i32 %48 to i64		; <i64> [#uses=1]
+	%64 = shl i64 %63, 32		; <i64> [#uses=1]
+	%65 = sext i32 %59 to i64		; <i64> [#uses=1]
+	%66 = shl i64 %65, 16		; <i64> [#uses=1]
+	%67 = sext i32 %60 to i64		; <i64> [#uses=1]
+	%68 = or i64 %64, %62		; <i64> [#uses=1]
+	%69 = or i64 %68, %66		; <i64> [#uses=1]
+	%70 = or i64 %69, %67		; <i64> [#uses=2]
+	%71 = getelementptr i8* null, i32 0		; <i8*> [#uses=1]
+	%72 = bitcast i8* %71 to float*		; <float*> [#uses=4]
+	%73 = load float* %72, align 4		; <float> [#uses=1]
+	%74 = getelementptr float* %72, i32 1		; <float*> [#uses=1]
+	%75 = load float* %74, align 4		; <float> [#uses=1]
+	%76 = fmul float %73, 6.553500e+04		; <float> [#uses=1]
+	%77 = fadd float %76, 5.000000e-01		; <float> [#uses=3]
+	%78 = fmul float %75, 6.553500e+04		; <float> [#uses=1]
+	%79 = fadd float %78, 5.000000e-01		; <float> [#uses=1]
+	%80 = fcmp olt float %77, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %80, label %bb2295, label %bb2292
+
+bb2292:		; preds = %bb2277
+	%81 = fcmp ogt float %77, 6.553500e+04		; <i1> [#uses=1]
+	br i1 %81, label %bb2294, label %bb2295
+
+bb2294:		; preds = %bb2292
+	br label %bb2295
+
+bb2295:		; preds = %bb2294, %bb2292, %bb2277
+	%f0569.0 = phi float [ 6.553500e+04, %bb2294 ], [ 0.000000e+00, %bb2277 ], [ %77, %bb2292 ]		; <float> [#uses=1]
+	%82 = fcmp olt float %79, 0.000000e+00		; <i1> [#uses=0]
+	%83 = fptosi float %f0569.0 to i32		; <i32> [#uses=1]
+	%84 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%85 = getelementptr float* %72, i32 2		; <float*> [#uses=1]
+	%86 = load float* %85, align 4		; <float> [#uses=1]
+	%87 = getelementptr float* %72, i32 3		; <float*> [#uses=1]
+	%88 = load float* %87, align 4		; <float> [#uses=1]
+	%89 = fmul float %86, 6.553500e+04		; <float> [#uses=1]
+	%90 = fadd float %89, 5.000000e-01		; <float> [#uses=1]
+	%91 = fmul float %88, 6.553500e+04		; <float> [#uses=1]
+	%92 = fadd float %91, 5.000000e-01		; <float> [#uses=1]
+	%93 = fcmp olt float %90, 0.000000e+00		; <i1> [#uses=0]
+	%94 = fcmp olt float %92, 0.000000e+00		; <i1> [#uses=0]
+	%95 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%96 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%97 = zext i32 %83 to i64		; <i64> [#uses=1]
+	%98 = shl i64 %97, 48		; <i64> [#uses=1]
+	%99 = zext i32 %84 to i64		; <i64> [#uses=1]
+	%100 = shl i64 %99, 32		; <i64> [#uses=1]
+	%101 = sext i32 %95 to i64		; <i64> [#uses=1]
+	%102 = shl i64 %101, 16		; <i64> [#uses=1]
+	%103 = sext i32 %96 to i64		; <i64> [#uses=1]
+	%104 = or i64 %100, %98		; <i64> [#uses=1]
+	%105 = or i64 %104, %102		; <i64> [#uses=1]
+	%106 = or i64 %105, %103		; <i64> [#uses=2]
+	%107 = shl i64 %70, 16		; <i64> [#uses=1]
+	%108 = and i64 %107, 4294901760		; <i64> [#uses=1]
+	%109 = and i64 %106, 65535		; <i64> [#uses=1]
+	%110 = or i64 %34, %32		; <i64> [#uses=1]
+	%111 = or i64 %110, %108		; <i64> [#uses=1]
+	%112 = or i64 %111, %109		; <i64> [#uses=1]
+	store i64 %70, i64* %4, align 16
+	store i64 %106, i64* %5, align 8
+	%113 = icmp eq i64 %112, 0		; <i1> [#uses=1]
+	br i1 %113, label %bb2325, label %bb2315
+
+bb2315:		; preds = %bb2295
+	%114 = icmp eq %struct.xx_t* %159, null		; <i1> [#uses=1]
+	br i1 %114, label %bb2318, label %bb2317
+
+bb2317:		; preds = %bb2315
+	%115 = load i64* %2, align 16		; <i64> [#uses=1]
+	%116 = call i32 (...)* @_u16a_cm( i64 %115, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
+	%117 = sext i32 %116 to i64		; <i64> [#uses=1]
+	store i64 %117, i64* %2, align 16
+	%118 = load i64* %3, align 8		; <i64> [#uses=1]
+	%119 = call i32 (...)* @_u16a_cm( i64 %118, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
+	%120 = sext i32 %119 to i64		; <i64> [#uses=1]
+	store i64 %120, i64* %3, align 8
+	%121 = load i64* %4, align 16		; <i64> [#uses=1]
+	%122 = call i32 (...)* @_u16a_cm( i64 %121, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
+	%123 = sext i32 %122 to i64		; <i64> [#uses=1]
+	store i64 %123, i64* %4, align 16
+	%124 = load i64* %5, align 8		; <i64> [#uses=1]
+	%125 = call i32 (...)* @_u16a_cm( i64 %124, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb2318:		; preds = %bb2315
+	%126 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 8		; <%struct.vv_t*> [#uses=1]
+	%127 = bitcast %struct.vv_t* %126 to i64*		; <i64*> [#uses=1]
+	%128 = load i64* %127, align 8		; <i64> [#uses=1]
+	%129 = trunc i64 %128 to i32		; <i32> [#uses=4]
+	%130 = load i64* %2, align 16		; <i64> [#uses=1]
+	%131 = call i32 (...)* @_u16_ff( i64 %130, i32 %129 ) nounwind		; <i32> [#uses=1]
+	%132 = sext i32 %131 to i64		; <i64> [#uses=1]
+	store i64 %132, i64* %2, align 16
+	%133 = load i64* %3, align 8		; <i64> [#uses=1]
+	%134 = call i32 (...)* @_u16_ff( i64 %133, i32 %129 ) nounwind		; <i32> [#uses=1]
+	%135 = sext i32 %134 to i64		; <i64> [#uses=1]
+	store i64 %135, i64* %3, align 8
+	%136 = load i64* %4, align 16		; <i64> [#uses=1]
+	%137 = call i32 (...)* @_u16_ff( i64 %136, i32 %129 ) nounwind		; <i32> [#uses=1]
+	%138 = sext i32 %137 to i64		; <i64> [#uses=1]
+	store i64 %138, i64* %4, align 16
+	%139 = load i64* %5, align 8		; <i64> [#uses=1]
+	%140 = call i32 (...)* @_u16_ff( i64 %139, i32 %129 ) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb2319:		; preds = %bb2326
+	%141 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 2		; <i8**> [#uses=1]
+	%142 = load i8** %141, align 4		; <i8*> [#uses=4]
+	%143 = getelementptr i8* %142, i32 0		; <i8*> [#uses=1]
+	%144 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %143 ) nounwind		; <i32> [#uses=1]
+	%145 = sext i32 %144 to i64		; <i64> [#uses=2]
+	%146 = getelementptr i8* %142, i32 0		; <i8*> [#uses=1]
+	%147 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %146 ) nounwind		; <i32> [#uses=1]
+	%148 = sext i32 %147 to i64		; <i64> [#uses=2]
+	%149 = shl i64 %145, 48		; <i64> [#uses=0]
+	%150 = shl i64 %148, 32		; <i64> [#uses=1]
+	%151 = and i64 %150, 281470681743360		; <i64> [#uses=0]
+	store i64 %145, i64* %2, align 16
+	store i64 %148, i64* %3, align 8
+	%152 = getelementptr i8* %142, i32 0		; <i8*> [#uses=1]
+	%153 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %152 ) nounwind		; <i32> [#uses=1]
+	%154 = sext i32 %153 to i64		; <i64> [#uses=0]
+	%155 = getelementptr i8* %142, i32 0		; <i8*> [#uses=1]
+	%156 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %155 ) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb2325:		; preds = %bb2326, %bb2295
+	%indvar.next5145 = add i32 %indvar5021, 1		; <i32> [#uses=1]
+	br label %bb2326
+
+bb2326:		; preds = %bb2325, %bb.nph4945
+	%indvar5021 = phi i32 [ 0, %bb.nph4945 ], [ %indvar.next5145, %bb2325 ]		; <i32> [#uses=6]
+	%157 = icmp slt i32 %indvar5021, %n		; <i1> [#uses=0]
+	%158 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 10		; <%struct.xx_t**> [#uses=1]
+	%159 = load %struct.xx_t** %158, align 4		; <%struct.xx_t*> [#uses=5]
+	%160 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 1		; <i32*> [#uses=1]
+	%161 = load i32* %160, align 4		; <i32> [#uses=1]
+	%162 = and i32 %161, 255		; <i32> [#uses=1]
+	switch i32 %162, label %bb2325 [
+		 i32 59, label %bb2217
+		 i32 60, label %bb2319
+	]
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i32 @_u16_ff(...)
+
+declare i32 @_u16a_cm(...)
+
+declare i32 @_u16_sf32(...)

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll b/src/LLVM/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll
new file mode 100644
index 0000000..61ae438
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll

@@ -0,0 +1,15 @@
+; PR11218
+; FIXME: This depends on assertion failure for now.
+; REQUIRES: asserts
+
+; RUN: llc < %s
+; XFAIL: *
+; PR2356
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9"
+
+define i32 @test(i64 %x, i32* %p) nounwind {
+	%asmtmp = call i32 asm "", "=r,0"(i64 0) nounwind		; <i32> [#uses=0]
+	%y = add i32 %asmtmp, 1
+	ret i32 %y
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll b/src/LLVM/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
new file mode 100644
index 0000000..f474a6d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=ppc64
+
+define void @__divtc3({ ppc_fp128, ppc_fp128 }* noalias sret %agg.result, ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind {
+entry:
+        %imag59 = load ppc_fp128* null, align 8         ; <ppc_fp128> [#uses=1]
+        %0 = fmul ppc_fp128 0xM00000000000000000000000000000000, %imag59         ; <ppc_fp128> [#uses=1]
+        %1 = fmul ppc_fp128 0xM00000000000000000000000000000000, 0xM00000000000000000000000000000000             ; <ppc_fp128> [#uses=1]
+        %2 = fadd ppc_fp128 %0, %1               ; <ppc_fp128> [#uses=1]
+        store ppc_fp128 %2, ppc_fp128* null, align 16
+        unreachable
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/src/LLVM/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
new file mode 100644
index 0000000..f4c06fb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=ppc32 -o - | not grep fixunstfsi
+
+define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
+entry:
+	%0 = fcmp olt ppc_fp128 %a, 0xM00000000000000000000000000000000		; <i1> [#uses=1]
+	br i1 %0, label %bb5, label %bb1
+
+bb1:		; preds = %entry
+	%1 = fmul ppc_fp128 %a, 0xM3DF00000000000000000000000000000		; <ppc_fp128> [#uses=1]
+	%2 = fptoui ppc_fp128 %1 to i32		; <i32> [#uses=1]
+	%3 = zext i32 %2 to i64		; <i64> [#uses=1]
+	%4 = shl i64 %3, 32		; <i64> [#uses=3]
+	%5 = uitofp i64 %4 to ppc_fp128		; <ppc_fp128> [#uses=1]
+	%6 = fsub ppc_fp128 %a, %5		; <ppc_fp128> [#uses=3]
+	%7 = fcmp olt ppc_fp128 %6, 0xM00000000000000000000000000000000		; <i1> [#uses=1]
+	br i1 %7, label %bb2, label %bb3
+
+bb2:		; preds = %bb1
+	%8 = fsub ppc_fp128 0xM80000000000000000000000000000000, %6		; <ppc_fp128> [#uses=1]
+	%9 = fptoui ppc_fp128 %8 to i32		; <i32> [#uses=1]
+	%10 = zext i32 %9 to i64		; <i64> [#uses=1]
+	%11 = sub i64 %4, %10		; <i64> [#uses=1]
+	ret i64 %11
+
+bb3:		; preds = %bb1
+	%12 = fptoui ppc_fp128 %6 to i32		; <i32> [#uses=1]
+	%13 = zext i32 %12 to i64		; <i64> [#uses=1]
+	%14 = or i64 %13, %4		; <i64> [#uses=1]
+	ret i64 %14
+
+bb5:		; preds = %entry
+	ret i64 0
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll b/src/LLVM/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
new file mode 100644
index 0000000..8322a84
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s
+; PR2988
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin10.0"
+@a = common global ppc_fp128 0xM00000000000000000000000000000000, align 16		; <ppc_fp128*> [#uses=2]
+@b = common global ppc_fp128 0xM00000000000000000000000000000000, align 16		; <ppc_fp128*> [#uses=2]
+@c = common global ppc_fp128 0xM00000000000000000000000000000000, align 16		; <ppc_fp128*> [#uses=3]
+@d = common global ppc_fp128 0xM00000000000000000000000000000000, align 16		; <ppc_fp128*> [#uses=2]
+
+define void @foo() nounwind {
+entry:
+	%0 = load ppc_fp128* @a, align 16		; <ppc_fp128> [#uses=1]
+	%1 = call ppc_fp128 @llvm.sqrt.ppcf128(ppc_fp128 %0)		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %1, ppc_fp128* @a, align 16
+	%2 = load ppc_fp128* @b, align 16		; <ppc_fp128> [#uses=1]
+	%3 = call ppc_fp128 @"\01_sinl$LDBL128"(ppc_fp128 %2) nounwind readonly		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %3, ppc_fp128* @b, align 16
+	%4 = load ppc_fp128* @c, align 16		; <ppc_fp128> [#uses=1]
+	%5 = call ppc_fp128 @"\01_cosl$LDBL128"(ppc_fp128 %4) nounwind readonly		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %5, ppc_fp128* @c, align 16
+	%6 = load ppc_fp128* @d, align 16		; <ppc_fp128> [#uses=1]
+	%7 = load ppc_fp128* @c, align 16		; <ppc_fp128> [#uses=1]
+	%8 = call ppc_fp128 @llvm.pow.ppcf128(ppc_fp128 %6, ppc_fp128 %7)		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %8, ppc_fp128* @d, align 16
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare ppc_fp128 @llvm.sqrt.ppcf128(ppc_fp128) nounwind readonly
+
+declare ppc_fp128 @"\01_sinl$LDBL128"(ppc_fp128) nounwind readonly
+
+declare ppc_fp128 @"\01_cosl$LDBL128"(ppc_fp128) nounwind readonly
+
+declare ppc_fp128 @llvm.pow.ppcf128(ppc_fp128, ppc_fp128) nounwind readonly
+
+declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128)
+
+define ppc_fp128 @cs(ppc_fp128 %from, ppc_fp128 %to) {
+  %tmp = call ppc_fp128 @copysignl(ppc_fp128 %from, ppc_fp128 %to)
+  ret ppc_fp128 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll b/src/LLVM/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll
new file mode 100644
index 0000000..9ed7f6f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9.5
+
+define void @__multc3({ ppc_fp128, ppc_fp128 }* noalias sret %agg.result, ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind {
+entry:
+	%.pre139 = and i1 false, false		; <i1> [#uses=1]
+	br i1 false, label %bb6, label %bb21
+
+bb6:		; preds = %entry
+	%0 = tail call ppc_fp128 @copysignl(ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 %a) nounwind readnone		; <ppc_fp128> [#uses=0]
+	%iftmp.1.0 = select i1 %.pre139, ppc_fp128 0xM3FF00000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
+	%1 = tail call ppc_fp128 @copysignl(ppc_fp128 %iftmp.1.0, ppc_fp128 %b) nounwind readnone		; <ppc_fp128> [#uses=0]
+	unreachable
+
+bb21:		; preds = %entry
+	unreachable
+}
+
+declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/PowerPC/2008-12-12-EH.ll b/src/LLVM/test/CodeGen/PowerPC/2008-12-12-EH.ll
new file mode 100644
index 0000000..a2a5e9e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2008-12-12-EH.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -disable-cfi -march=ppc32 -mtriple=powerpc-apple-darwin9 | grep ^__Z1fv.eh
+
+define void @_Z1fv() {
+entry:
+	br label %return
+
+return:
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll b/src/LLVM/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
new file mode 100644
index 0000000..ce8e72d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9.5
+; rdar://6499616
+
+@"\01LC" = internal constant [13 x i8] c"conftest.val\00"		; <[13 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+	%0 = call i8* @fopen(i8* getelementptr ([13 x i8]* @"\01LC", i32 0, i32 0), i8* null) nounwind		; <i8*> [#uses=0]
+	unreachable
+}
+
+declare i8* @fopen(i8*, i8*)

diff --git a/src/LLVM/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll b/src/LLVM/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
new file mode 100644
index 0000000..172531e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin10
+; rdar://6692215
+
+define fastcc void @_qsort(i8* %a, i32 %n, i32 %es, i32 (i8*, i8*)* %cmp, i32 %depth_limit) nounwind optsize ssp {
+entry:
+	br i1 false, label %bb21, label %bb20.loopexit
+
+bb20.loopexit:		; preds = %entry
+	ret void
+
+bb21:		; preds = %entry
+	%0 = getelementptr i8* %a, i32 0		; <i8*> [#uses=2]
+	br label %bb35
+
+bb29:		; preds = %bb35
+	br i1 false, label %bb7.i252, label %bb34
+
+bb7.i252:		; preds = %bb7.i252, %bb29
+	%pj.0.rec.i247 = phi i32 [ %indvar.next488, %bb7.i252 ], [ 0, %bb29 ]		; <i32> [#uses=2]
+	%pi.0.i248 = getelementptr i8* %pa.1, i32 %pj.0.rec.i247		; <i8*> [#uses=0]
+	%indvar.next488 = add i32 %pj.0.rec.i247, 1		; <i32> [#uses=1]
+	br i1 false, label %bb34, label %bb7.i252
+
+bb34:		; preds = %bb7.i252, %bb29
+	%indvar.next505 = add i32 %indvar504, 1		; <i32> [#uses=1]
+	br label %bb35
+
+bb35:		; preds = %bb34, %bb21
+	%indvar504 = phi i32 [ %indvar.next505, %bb34 ], [ 0, %bb21 ]		; <i32> [#uses=2]
+	%pa.1 = phi i8* [ null, %bb34 ], [ %0, %bb21 ]		; <i8*> [#uses=2]
+	%pb.0.rec = mul i32 %indvar504, %es		; <i32> [#uses=1]
+	br i1 false, label %bb43, label %bb29
+
+bb43:		; preds = %bb43, %bb35
+	br i1 false, label %bb50, label %bb43
+
+bb50:		; preds = %bb43
+	%1 = ptrtoint i8* %pa.1 to i32		; <i32> [#uses=1]
+	%2 = sub i32 %1, 0		; <i32> [#uses=2]
+	%3 = icmp sle i32 0, %2		; <i1> [#uses=1]
+	%min = select i1 %3, i32 0, i32 %2		; <i32> [#uses=1]
+	br label %bb7.i161
+
+bb7.i161:		; preds = %bb7.i161, %bb50
+	%pj.0.rec.i156 = phi i32 [ %indvar.next394, %bb7.i161 ], [ 0, %bb50 ]		; <i32> [#uses=2]
+	%.sum279 = sub i32 %pj.0.rec.i156, %min		; <i32> [#uses=1]
+	%pb.0.sum542 = add i32 %pb.0.rec, %.sum279		; <i32> [#uses=1]
+	%pj.0.i158 = getelementptr i8* %0, i32 %pb.0.sum542		; <i8*> [#uses=0]
+	%indvar.next394 = add i32 %pj.0.rec.i156, 1		; <i32> [#uses=1]
+	br label %bb7.i161
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll b/src/LLVM/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll
new file mode 100644
index 0000000..29d115d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin10
+; PR4280
+
+define i32 @__fixunssfsi(float %a) nounwind readnone {
+entry:
+	%0 = fcmp ult float %a, 0x41E0000000000000		; <i1> [#uses=1]
+	br i1 %0, label %bb1, label %bb
+
+bb:		; preds = %entry
+	ret i32 1
+
+bb1:		; preds = %entry
+	ret i32 0
+}
+

diff --git a/src/LLVM/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll b/src/LLVM/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll
new file mode 100644
index 0000000..f64e3dc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ppc32 -verify-machineinstrs
+
+; Machine code verifier will call isRegTiedToDefOperand() on /all/ register use
+; operands.  We must make sure that the operand flag is found correctly.
+
+; This test case is actually not specific to PowerPC, but the (imm, reg) format
+; of PowerPC "m" operands trigger this bug.
+
+define void @memory_asm_operand(i32 %a) {
+  ; "m" operand will be represented as:
+  ; INLINEASM <es:fake $0>, 10, %R2, 20, -4, %R1
+  ; It is difficult to find the flag operand (20) when starting from %R1
+  call i32 asm "lbzx $0, $1", "=r,m" (i32 %a)
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/src/LLVM/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
new file mode 100644
index 0000000..6a3c440
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin10 -mcpu=g5 | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin10.0"
+; It is wrong on powerpc to substitute reg+reg for $0; the stw opcode
+; would have to change.
+
+@x = external global [0 x i32]                    ; <[0 x i32]*> [#uses=1]
+
+define void @foo(i32 %y) nounwind ssp {
+entry:
+; CHECK: foo
+; CHECK: add r3
+; CHECK: 0(r3)
+  %y_addr = alloca i32                            ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %y, i32* %y_addr
+  %0 = load i32* %y_addr, align 4                 ; <i32> [#uses=1]
+  %1 = getelementptr inbounds [0 x i32]* @x, i32 0, i32 %0 ; <i32*> [#uses=1]
+  call void asm sideeffect "isync\0A\09eieio\0A\09stw $1, $0", "=*o,r,~{memory}"(i32* %1, i32 0) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll b/src/LLVM/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll
new file mode 100644
index 0000000..ae2acd4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc

diff --git a/src/LLVM/test/CodeGen/PowerPC/2009-09-18-carrybit.ll b/src/LLVM/test/CodeGen/PowerPC/2009-09-18-carrybit.ll
new file mode 100644
index 0000000..6c23a61
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2009-09-18-carrybit.ll

@@ -0,0 +1,62 @@
+; RUN: llc -march=ppc32 < %s | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9.6"
+
+define i64 @foo(i64 %r.0.ph, i64 %q.0.ph, i32 %sr1.1.ph) nounwind {
+entry:
+; CHECK: foo:
+; CHECK: subfc
+; CHECK: subfe
+; CHECK: subfc
+; CHECK: subfe
+  %tmp0 = add i64 %r.0.ph, -1                           ; <i64> [#uses=1]
+  br label %bb40
+
+bb40:                                             ; preds = %bb40, %entry
+  %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb40 ] ; <i32> [#uses=1]
+  %carry.0274 = phi i32 [ 0, %entry ], [%tmp122, %bb40 ] ; <i32> [#uses=1]
+  %r.0273 = phi i64 [ %r.0.ph, %entry ], [ %tmp124, %bb40 ] ; <i64> [#uses=2]
+  %q.0272 = phi i64 [ %q.0.ph, %entry ], [ %ins169, %bb40 ] ; <i64> [#uses=3]
+  %tmp1 = lshr i64 %r.0273, 31                     ; <i64> [#uses=1]
+  %tmp2 = trunc i64 %tmp1 to i32                    ; <i32> [#uses=1]
+  %tmp3 = and i32 %tmp2, -2                         ; <i32> [#uses=1]
+  %tmp213 = trunc i64 %r.0273 to i32              ; <i32> [#uses=2]
+  %tmp106 = lshr i32 %tmp213, 31                     ; <i32> [#uses=1]
+  %tmp107 = or i32 %tmp3, %tmp106                        ; <i32> [#uses=1]
+  %tmp215 = zext i32 %tmp107 to i64                  ; <i64> [#uses=1]
+  %tmp216 = shl i64 %tmp215, 32                   ; <i64> [#uses=1]
+  %tmp108 = shl i32 %tmp213, 1                       ; <i32> [#uses=1]
+  %tmp109 = lshr i64 %q.0272, 63                     ; <i64> [#uses=1]
+  %tmp110 = trunc i64 %tmp109 to i32                    ; <i32> [#uses=1]
+  %tmp111 = or i32 %tmp108, %tmp110                        ; <i32> [#uses=1]
+  %tmp222 = zext i32 %tmp111 to i64                  ; <i64> [#uses=1]
+  %ins224 = or i64 %tmp216, %tmp222               ; <i64> [#uses=2]
+  %tmp112 = lshr i64 %q.0272, 31                     ; <i64> [#uses=1]
+  %tmp113 = trunc i64 %tmp112 to i32                    ; <i32> [#uses=1]
+  %tmp114 = and i32 %tmp113, -2                         ; <i32> [#uses=1]
+  %tmp158 = trunc i64 %q.0272 to i32              ; <i32> [#uses=2]
+  %tmp115 = lshr i32 %tmp158, 31                     ; <i32> [#uses=1]
+  %tmp116 = or i32 %tmp114, %tmp115                        ; <i32> [#uses=1]
+  %tmp160 = zext i32 %tmp116 to i64                  ; <i64> [#uses=1]
+  %tmp161 = shl i64 %tmp160, 32                   ; <i64> [#uses=1]
+  %tmp117 = shl i32 %tmp158, 1                       ; <i32> [#uses=1]
+  %tmp118 = or i32 %tmp117, %carry.0274                 ; <i32> [#uses=1]
+  %tmp167 = zext i32 %tmp118 to i64                  ; <i64> [#uses=1]
+  %ins169 = or i64 %tmp161, %tmp167               ; <i64> [#uses=2]
+  %tmp119 = sub i64 %tmp0, %ins224                    ; <i64> [#uses=1]
+  %tmp120 = ashr i64 %tmp119, 63                        ; <i64> [#uses=2]
+  %tmp121 = trunc i64 %tmp120 to i32                    ; <i32> [#uses=1]
+  %tmp122 = and i32 %tmp121, 1                          ; <i32> [#uses=2]
+  %tmp123 = and i64 %tmp120, %q.0.ph                         ; <i64> [#uses=1]
+  %tmp124 = sub i64 %ins224, %tmp123                    ; <i64> [#uses=2]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %sr1.1.ph ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb41.bb42_crit_edge, label %bb40
+
+bb41.bb42_crit_edge:                              ; preds = %bb40
+  %phitmp278 = zext i32 %tmp122 to i64               ; <i64> [#uses=1]
+  %tmp125 = shl i64 %ins169, 1                    ; <i64> [#uses=1]
+  %tmp126 = or i64 %phitmp278, %tmp125              ; <i64> [#uses=2]
+  ret i64 %tmp126
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll b/src/LLVM/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll
new file mode 100644
index 0000000..2d9d16a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll

@@ -0,0 +1,105 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8
+
+define void @gcov_exit() nounwind {
+entry:
+  br i1 undef, label %return, label %bb.nph341
+
+bb.nph341:                                        ; preds = %entry
+  br label %bb25
+
+bb25:                                             ; preds = %read_fatal, %bb.nph341
+  br i1 undef, label %bb49.1, label %bb48
+
+bb48:                                             ; preds = %bb25
+  br label %bb49.1
+
+bb51:                                             ; preds = %bb48.4, %bb49.3
+  switch i32 undef, label %bb58 [
+    i32 0, label %rewrite
+    i32 1734567009, label %bb59
+  ]
+
+bb58:                                             ; preds = %bb51
+  br label %read_fatal
+
+bb59:                                             ; preds = %bb51
+  br i1 undef, label %bb60, label %bb3.i156
+
+bb3.i156:                                         ; preds = %bb59
+  br label %read_fatal
+
+bb60:                                             ; preds = %bb59
+  br i1 undef, label %bb78.preheader, label %rewrite
+
+bb78.preheader:                                   ; preds = %bb60
+  br i1 undef, label %bb62, label %bb80
+
+bb62:                                             ; preds = %bb78.preheader
+  br i1 undef, label %bb64, label %read_mismatch
+
+bb64:                                             ; preds = %bb62
+  br i1 undef, label %bb65, label %read_mismatch
+
+bb65:                                             ; preds = %bb64
+  br i1 undef, label %bb75, label %read_mismatch
+
+read_mismatch:                                    ; preds = %bb98, %bb119.preheader, %bb72, %bb71, %bb65, %bb64, %bb62
+  br label %read_fatal
+
+bb71:                                             ; preds = %bb75
+  br i1 undef, label %bb72, label %read_mismatch
+
+bb72:                                             ; preds = %bb71
+  br i1 undef, label %bb73, label %read_mismatch
+
+bb73:                                             ; preds = %bb72
+  unreachable
+
+bb74:                                             ; preds = %bb75
+  br label %bb75
+
+bb75:                                             ; preds = %bb74, %bb65
+  br i1 undef, label %bb74, label %bb71
+
+bb80:                                             ; preds = %bb78.preheader
+  unreachable
+
+read_fatal:                                       ; preds = %read_mismatch, %bb3.i156, %bb58
+  br i1 undef, label %return, label %bb25
+
+rewrite:                                          ; preds = %bb60, %bb51
+  br i1 undef, label %bb94, label %bb119.preheader
+
+bb94:                                             ; preds = %rewrite
+  unreachable
+
+bb119.preheader:                                  ; preds = %rewrite
+  br i1 undef, label %read_mismatch, label %bb98
+
+bb98:                                             ; preds = %bb119.preheader
+  br label %read_mismatch
+
+return:                                           ; preds = %read_fatal, %entry
+  ret void
+
+bb49.1:                                           ; preds = %bb48, %bb25
+  br i1 undef, label %bb49.2, label %bb48.2
+
+bb49.2:                                           ; preds = %bb48.2, %bb49.1
+  br i1 undef, label %bb49.3, label %bb48.3
+
+bb48.2:                                           ; preds = %bb49.1
+  br label %bb49.2
+
+bb49.3:                                           ; preds = %bb48.3, %bb49.2
+  %c_ix.0.3 = phi i32 [ undef, %bb48.3 ], [ undef, %bb49.2 ] ; <i32> [#uses=1]
+  br i1 undef, label %bb51, label %bb48.4
+
+bb48.3:                                           ; preds = %bb49.2
+  store i64* undef, i64** undef, align 4
+  br label %bb49.3
+
+bb48.4:                                           ; preds = %bb49.3
+  %0 = getelementptr inbounds [5 x i64*]* undef, i32 0, i32 %c_ix.0.3 ; <i64**> [#uses=0]
+  br label %bb51
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2009-11-25-ImpDefBug.ll b/src/LLVM/test/CodeGen/PowerPC/2009-11-25-ImpDefBug.ll
new file mode 100644
index 0000000..9a22a6f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2009-11-25-ImpDefBug.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9.5 -mcpu=g5
+; rdar://7422268
+
+%struct..0EdgeT = type { i32, i32, float, float, i32, i32, i32, float, i32, i32 }
+
+define void @smooth_color_z_triangle(i32 %v0, i32 %v1, i32 %v2, i32 %pv) nounwind {
+entry:
+  br i1 undef, label %return, label %bb14
+
+bb14:                                             ; preds = %entry
+  br i1 undef, label %bb15, label %return
+
+bb15:                                             ; preds = %bb14
+  br i1 undef, label %bb16, label %bb17
+
+bb16:                                             ; preds = %bb15
+  br label %bb17
+
+bb17:                                             ; preds = %bb16, %bb15
+  %0 = fcmp olt float undef, 0.000000e+00         ; <i1> [#uses=2]
+  %eTop.eMaj = select i1 %0, %struct..0EdgeT* undef, %struct..0EdgeT* null ; <%struct..0EdgeT*> [#uses=1]
+  br label %bb69
+
+bb24:                                             ; preds = %bb69
+  br i1 undef, label %bb25, label %bb28
+
+bb25:                                             ; preds = %bb24
+  br label %bb33
+
+bb28:                                             ; preds = %bb24
+  br i1 undef, label %return, label %bb32
+
+bb32:                                             ; preds = %bb28
+  br i1 %0, label %bb38, label %bb33
+
+bb33:                                             ; preds = %bb32, %bb25
+  br i1 undef, label %bb34, label %bb38
+
+bb34:                                             ; preds = %bb33
+  br label %bb38
+
+bb38:                                             ; preds = %bb34, %bb33, %bb32
+  %eRight.08 = phi %struct..0EdgeT* [ %eTop.eMaj, %bb32 ], [ undef, %bb34 ], [ undef, %bb33 ] ; <%struct..0EdgeT*> [#uses=0]
+  %fdgOuter.0 = phi i32 [ %fdgOuter.1, %bb32 ], [ undef, %bb34 ], [ %fdgOuter.1, %bb33 ] ; <i32> [#uses=1]
+  %fz.3 = phi i32 [ %fz.2, %bb32 ], [ 2147483647, %bb34 ], [ %fz.2, %bb33 ] ; <i32> [#uses=1]
+  %1 = add i32 undef, 1                           ; <i32> [#uses=0]
+  br label %bb69
+
+bb69:                                             ; preds = %bb38, %bb17
+  %fdgOuter.1 = phi i32 [ undef, %bb17 ], [ %fdgOuter.0, %bb38 ] ; <i32> [#uses=2]
+  %fz.2 = phi i32 [ undef, %bb17 ], [ %fz.3, %bb38 ] ; <i32> [#uses=2]
+  br i1 undef, label %bb24, label %return
+
+return:                                           ; preds = %bb69, %bb28, %bb14, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2010-02-04-EmptyGlobal.ll b/src/LLVM/test/CodeGen/PowerPC/2010-02-04-EmptyGlobal.ll
new file mode 100644
index 0000000..1ba11d3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2010-02-04-EmptyGlobal.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin10 -relocation-model=pic -disable-fp-elim | FileCheck %s
+; <rdar://problem/7604010>
+
+%cmd.type = type { }
+
+@_cmd = constant %cmd.type zeroinitializer
+
+; CHECK:      .globl __cmd
+; CHECK-NEXT: .align 3
+; CHECK-NEXT: __cmd:
+; CHECK-NEXT: .byte 0
+
+; PR6340
+
+%Ty = type { i32, {}, i32 }
+@k = global %Ty { i32 1, {} zeroinitializer, i32 3 }
+
+; CHECK: _k:
+; CHECK-NEXT:	.long	1
+; CHECK-NEXT:	.long	3

diff --git a/src/LLVM/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/src/LLVM/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
new file mode 100644
index 0000000..b73382e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2010-02-12-saveCR.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s
+; ModuleID = 'hh.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin9.6"
+; This formerly used R0 for both the stack address and CR.
+
+define void @foo() nounwind {
+entry:
+;CHECK:  mfcr r2
+;CHECK:  rlwinm r2, r2, 8, 0, 31
+;CHECK:  lis r0, 1
+;CHECK:  ori r0, r0, 34540
+;CHECK:  stwx r2, r1, r0
+  %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
+  call void @bar(i8* %x1) nounwind
+  call void asm sideeffect "", "~{cr2}"() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+;CHECK:  lis r0, 1
+;CHECK:  ori r0, r0, 34540
+;CHECK:  lwzx r2, r1, r0
+;CHECK:  rlwinm r2, r2, 24, 0, 31
+;CHECK:  mtcrf 32, r2
+  ret void
+}
+
+declare void @bar(i8*)

diff --git a/src/LLVM/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll b/src/LLVM/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
new file mode 100644
index 0000000..6b31397
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -mtriple=powerpc-apple-darwin10.0 -join-physregs | FileCheck %s
+; ModuleID = 'nn.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin11.0"
+; Indirect calls must use R12 on Darwin (i.e., R12 must contain the address of
+; the function being called; the mtctr is not required to use it).
+
+@p = external global void (...)*                  ; <void (...)**> [#uses=1]
+
+define void @foo() nounwind ssp {
+entry:
+; CHECK: mtctr r12
+  %0 = load void (...)** @p, align 4              ; <void (...)*> [#uses=1]
+  call void (...)* %0() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2010-04-01-MachineCSEBug.ll b/src/LLVM/test/CodeGen/PowerPC/2010-04-01-MachineCSEBug.ll
new file mode 100644
index 0000000..8fd0550
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2010-04-01-MachineCSEBug.ll

@@ -0,0 +1,70 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin10.0
+; rdar://7819990
+
+%0 = type { i32 }
+%1 = type { i64 }
+%struct.Buffer = type { [1024 x i8], i64, i64, i64 }
+%struct.InStream = type { %struct.Buffer, %0, %1, i32*, %struct.InStreamMethods* }
+%struct.InStreamMethods = type { void (%struct.InStream*, i8*, i32)*, void (%struct.InStream*, i64)*, i64 (%struct.InStream*)*, void (%struct.InStream*)* }
+
+define i64 @t(%struct.InStream* %is) nounwind optsize ssp {
+entry:
+  br i1 undef, label %is_read_byte.exit, label %bb.i
+
+bb.i:                                             ; preds = %entry
+  br label %is_read_byte.exit
+
+is_read_byte.exit:                                ; preds = %bb.i, %entry
+  br i1 undef, label %is_read_byte.exit22, label %bb.i21
+
+bb.i21:                                           ; preds = %is_read_byte.exit
+  unreachable
+
+is_read_byte.exit22:                              ; preds = %is_read_byte.exit
+  br i1 undef, label %is_read_byte.exit19, label %bb.i18
+
+bb.i18:                                           ; preds = %is_read_byte.exit22
+  br label %is_read_byte.exit19
+
+is_read_byte.exit19:                              ; preds = %bb.i18, %is_read_byte.exit22
+  br i1 undef, label %is_read_byte.exit16, label %bb.i15
+
+bb.i15:                                           ; preds = %is_read_byte.exit19
+  unreachable
+
+is_read_byte.exit16:                              ; preds = %is_read_byte.exit19
+  %0 = shl i64 undef, 32                          ; <i64> [#uses=1]
+  br i1 undef, label %is_read_byte.exit13, label %bb.i12
+
+bb.i12:                                           ; preds = %is_read_byte.exit16
+  unreachable
+
+is_read_byte.exit13:                              ; preds = %is_read_byte.exit16
+  %1 = shl i64 undef, 24                          ; <i64> [#uses=1]
+  br i1 undef, label %is_read_byte.exit10, label %bb.i9
+
+bb.i9:                                            ; preds = %is_read_byte.exit13
+  unreachable
+
+is_read_byte.exit10:                              ; preds = %is_read_byte.exit13
+  %2 = shl i64 undef, 16                          ; <i64> [#uses=1]
+  br i1 undef, label %is_read_byte.exit7, label %bb.i6
+
+bb.i6:                                            ; preds = %is_read_byte.exit10
+  br label %is_read_byte.exit7
+
+is_read_byte.exit7:                               ; preds = %bb.i6, %is_read_byte.exit10
+  %3 = shl i64 undef, 8                           ; <i64> [#uses=1]
+  br i1 undef, label %is_read_byte.exit4, label %bb.i3
+
+bb.i3:                                            ; preds = %is_read_byte.exit7
+  unreachable
+
+is_read_byte.exit4:                               ; preds = %is_read_byte.exit7
+  %4 = or i64 0, %0                               ; <i64> [#uses=1]
+  %5 = or i64 %4, %1                              ; <i64> [#uses=1]
+  %6 = or i64 %5, %2                              ; <i64> [#uses=1]
+  %7 = or i64 %6, %3                              ; <i64> [#uses=1]
+  %8 = or i64 %7, 0                               ; <i64> [#uses=1]
+  ret i64 %8
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll b/src/LLVM/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 0000000..4a85098
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll

@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=ppc32 -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+

diff --git a/src/LLVM/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll b/src/LLVM/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
new file mode 100644
index 0000000..72ae9d6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 -regalloc=basic | FileCheck %s
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+define i8* @g2() nounwind readnone {
+entry:
+; CHECK: _g2:
+; CHECK: lwz r3, 0(r1)
+  %0 = tail call i8* @llvm.frameaddress(i32 1)    ; <i8*> [#uses=1]
+  ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
+
+define i8* @g() nounwind readnone {
+entry:
+; CHECK: _g:
+; CHECK:  mflr r0
+; CHECK:  stw r0, 8(r1)
+; CHECK:  lwz r3, 0(r1)
+; CHECK:  lwz r3, 8(r3)
+  %0 = tail call i8* @llvm.returnaddress(i32 1)   ; <i8*> [#uses=1]
+  ret i8* %0
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll b/src/LLVM/test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll
new file mode 100644
index 0000000..da77b28
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -O0
+; PR8357
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-freebsd9.0"
+
+; RegAllocFast requires that each physreg only be used once. The varargs
+; lowering code needs to use virtual registers when storing live-in registers on
+; the stack.
+
+define i32 @testing(i32 %x, float %a, ...) nounwind {
+  %1 = alloca i32, align 4
+  %2 = alloca float, align 4
+  store i32 %x, i32* %1, align 4
+  store float %a, float* %2, align 4
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll b/src/LLVM/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
new file mode 100644
index 0000000..bf3d577
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll

@@ -0,0 +1,22 @@
+; RUN: llc -disable-fp-elim < %s | FileCheck %s
+; PR8749
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin9.8"
+
+define i32 @main() nounwind {
+entry:
+; Make sure we're generating references using the red zone
+; CHECK: main:
+; CHECK: stw r3, -12(r1)
+  %retval = alloca i32
+  %0 = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  store i32 0, i32* %0, align 4
+  %1 = load i32* %0, align 4
+  store i32 %1, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval
+  ret i32 %retval1
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/Atomics-32.ll b/src/LLVM/test/CodeGen/PowerPC/Atomics-32.ll
new file mode 100644
index 0000000..64f1495
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/Atomics-32.ll

@@ -0,0 +1,699 @@
+; RUN: llc < %s -march=ppc32
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9"
+
+@sc = common global i8 0
+@uc = common global i8 0
+@ss = common global i16 0
+@us = common global i16 0
+@si = common global i32 0
+@ui = common global i32 0
+@sl = common global i32 0
+@ul = common global i32 0
+@sll = common global i64 0, align 8
+@ull = common global i64 0, align 8
+
+define void @test_op_ignore() nounwind {
+entry:
+  %0 = atomicrmw add i8* @sc, i8 1 monotonic
+  %1 = atomicrmw add i8* @uc, i8 1 monotonic
+  %2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %3 = atomicrmw add i16* %2, i16 1 monotonic
+  %4 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %5 = atomicrmw add i16* %4, i16 1 monotonic
+  %6 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %7 = atomicrmw add i32* %6, i32 1 monotonic
+  %8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %9 = atomicrmw add i32* %8, i32 1 monotonic
+  %10 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %11 = atomicrmw add i32* %10, i32 1 monotonic
+  %12 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %13 = atomicrmw add i32* %12, i32 1 monotonic
+  %14 = atomicrmw sub i8* @sc, i8 1 monotonic
+  %15 = atomicrmw sub i8* @uc, i8 1 monotonic
+  %16 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %17 = atomicrmw sub i16* %16, i16 1 monotonic
+  %18 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %19 = atomicrmw sub i16* %18, i16 1 monotonic
+  %20 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %21 = atomicrmw sub i32* %20, i32 1 monotonic
+  %22 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %23 = atomicrmw sub i32* %22, i32 1 monotonic
+  %24 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %25 = atomicrmw sub i32* %24, i32 1 monotonic
+  %26 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %27 = atomicrmw sub i32* %26, i32 1 monotonic
+  %28 = atomicrmw or i8* @sc, i8 1 monotonic
+  %29 = atomicrmw or i8* @uc, i8 1 monotonic
+  %30 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %31 = atomicrmw or i16* %30, i16 1 monotonic
+  %32 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %33 = atomicrmw or i16* %32, i16 1 monotonic
+  %34 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %35 = atomicrmw or i32* %34, i32 1 monotonic
+  %36 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %37 = atomicrmw or i32* %36, i32 1 monotonic
+  %38 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %39 = atomicrmw or i32* %38, i32 1 monotonic
+  %40 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %41 = atomicrmw or i32* %40, i32 1 monotonic
+  %42 = atomicrmw xor i8* @sc, i8 1 monotonic
+  %43 = atomicrmw xor i8* @uc, i8 1 monotonic
+  %44 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %45 = atomicrmw xor i16* %44, i16 1 monotonic
+  %46 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %47 = atomicrmw xor i16* %46, i16 1 monotonic
+  %48 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %49 = atomicrmw xor i32* %48, i32 1 monotonic
+  %50 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %51 = atomicrmw xor i32* %50, i32 1 monotonic
+  %52 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %53 = atomicrmw xor i32* %52, i32 1 monotonic
+  %54 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %55 = atomicrmw xor i32* %54, i32 1 monotonic
+  %56 = atomicrmw and i8* @sc, i8 1 monotonic
+  %57 = atomicrmw and i8* @uc, i8 1 monotonic
+  %58 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %59 = atomicrmw and i16* %58, i16 1 monotonic
+  %60 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %61 = atomicrmw and i16* %60, i16 1 monotonic
+  %62 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %63 = atomicrmw and i32* %62, i32 1 monotonic
+  %64 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %65 = atomicrmw and i32* %64, i32 1 monotonic
+  %66 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %67 = atomicrmw and i32* %66, i32 1 monotonic
+  %68 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %69 = atomicrmw and i32* %68, i32 1 monotonic
+  %70 = atomicrmw nand i8* @sc, i8 1 monotonic
+  %71 = atomicrmw nand i8* @uc, i8 1 monotonic
+  %72 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %73 = atomicrmw nand i16* %72, i16 1 monotonic
+  %74 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %75 = atomicrmw nand i16* %74, i16 1 monotonic
+  %76 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %77 = atomicrmw nand i32* %76, i32 1 monotonic
+  %78 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %79 = atomicrmw nand i32* %78, i32 1 monotonic
+  %80 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %81 = atomicrmw nand i32* %80, i32 1 monotonic
+  %82 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %83 = atomicrmw nand i32* %82, i32 1 monotonic
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @test_fetch_and_op() nounwind {
+entry:
+  %0 = atomicrmw add i8* @sc, i8 11 monotonic
+  store i8 %0, i8* @sc, align 1
+  %1 = atomicrmw add i8* @uc, i8 11 monotonic
+  store i8 %1, i8* @uc, align 1
+  %2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %3 = atomicrmw add i16* %2, i16 11 monotonic
+  store i16 %3, i16* @ss, align 2
+  %4 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %5 = atomicrmw add i16* %4, i16 11 monotonic
+  store i16 %5, i16* @us, align 2
+  %6 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %7 = atomicrmw add i32* %6, i32 11 monotonic
+  store i32 %7, i32* @si, align 4
+  %8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %9 = atomicrmw add i32* %8, i32 11 monotonic
+  store i32 %9, i32* @ui, align 4
+  %10 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %11 = atomicrmw add i32* %10, i32 11 monotonic
+  store i32 %11, i32* @sl, align 4
+  %12 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %13 = atomicrmw add i32* %12, i32 11 monotonic
+  store i32 %13, i32* @ul, align 4
+  %14 = atomicrmw sub i8* @sc, i8 11 monotonic
+  store i8 %14, i8* @sc, align 1
+  %15 = atomicrmw sub i8* @uc, i8 11 monotonic
+  store i8 %15, i8* @uc, align 1
+  %16 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %17 = atomicrmw sub i16* %16, i16 11 monotonic
+  store i16 %17, i16* @ss, align 2
+  %18 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %19 = atomicrmw sub i16* %18, i16 11 monotonic
+  store i16 %19, i16* @us, align 2
+  %20 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %21 = atomicrmw sub i32* %20, i32 11 monotonic
+  store i32 %21, i32* @si, align 4
+  %22 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %23 = atomicrmw sub i32* %22, i32 11 monotonic
+  store i32 %23, i32* @ui, align 4
+  %24 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %25 = atomicrmw sub i32* %24, i32 11 monotonic
+  store i32 %25, i32* @sl, align 4
+  %26 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %27 = atomicrmw sub i32* %26, i32 11 monotonic
+  store i32 %27, i32* @ul, align 4
+  %28 = atomicrmw or i8* @sc, i8 11 monotonic
+  store i8 %28, i8* @sc, align 1
+  %29 = atomicrmw or i8* @uc, i8 11 monotonic
+  store i8 %29, i8* @uc, align 1
+  %30 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %31 = atomicrmw or i16* %30, i16 11 monotonic
+  store i16 %31, i16* @ss, align 2
+  %32 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %33 = atomicrmw or i16* %32, i16 11 monotonic
+  store i16 %33, i16* @us, align 2
+  %34 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %35 = atomicrmw or i32* %34, i32 11 monotonic
+  store i32 %35, i32* @si, align 4
+  %36 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %37 = atomicrmw or i32* %36, i32 11 monotonic
+  store i32 %37, i32* @ui, align 4
+  %38 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %39 = atomicrmw or i32* %38, i32 11 monotonic
+  store i32 %39, i32* @sl, align 4
+  %40 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %41 = atomicrmw or i32* %40, i32 11 monotonic
+  store i32 %41, i32* @ul, align 4
+  %42 = atomicrmw xor i8* @sc, i8 11 monotonic
+  store i8 %42, i8* @sc, align 1
+  %43 = atomicrmw xor i8* @uc, i8 11 monotonic
+  store i8 %43, i8* @uc, align 1
+  %44 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %45 = atomicrmw xor i16* %44, i16 11 monotonic
+  store i16 %45, i16* @ss, align 2
+  %46 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %47 = atomicrmw xor i16* %46, i16 11 monotonic
+  store i16 %47, i16* @us, align 2
+  %48 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %49 = atomicrmw xor i32* %48, i32 11 monotonic
+  store i32 %49, i32* @si, align 4
+  %50 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %51 = atomicrmw xor i32* %50, i32 11 monotonic
+  store i32 %51, i32* @ui, align 4
+  %52 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %53 = atomicrmw xor i32* %52, i32 11 monotonic
+  store i32 %53, i32* @sl, align 4
+  %54 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %55 = atomicrmw xor i32* %54, i32 11 monotonic
+  store i32 %55, i32* @ul, align 4
+  %56 = atomicrmw and i8* @sc, i8 11 monotonic
+  store i8 %56, i8* @sc, align 1
+  %57 = atomicrmw and i8* @uc, i8 11 monotonic
+  store i8 %57, i8* @uc, align 1
+  %58 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %59 = atomicrmw and i16* %58, i16 11 monotonic
+  store i16 %59, i16* @ss, align 2
+  %60 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %61 = atomicrmw and i16* %60, i16 11 monotonic
+  store i16 %61, i16* @us, align 2
+  %62 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %63 = atomicrmw and i32* %62, i32 11 monotonic
+  store i32 %63, i32* @si, align 4
+  %64 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %65 = atomicrmw and i32* %64, i32 11 monotonic
+  store i32 %65, i32* @ui, align 4
+  %66 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %67 = atomicrmw and i32* %66, i32 11 monotonic
+  store i32 %67, i32* @sl, align 4
+  %68 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %69 = atomicrmw and i32* %68, i32 11 monotonic
+  store i32 %69, i32* @ul, align 4
+  %70 = atomicrmw nand i8* @sc, i8 11 monotonic
+  store i8 %70, i8* @sc, align 1
+  %71 = atomicrmw nand i8* @uc, i8 11 monotonic
+  store i8 %71, i8* @uc, align 1
+  %72 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %73 = atomicrmw nand i16* %72, i16 11 monotonic
+  store i16 %73, i16* @ss, align 2
+  %74 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %75 = atomicrmw nand i16* %74, i16 11 monotonic
+  store i16 %75, i16* @us, align 2
+  %76 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %77 = atomicrmw nand i32* %76, i32 11 monotonic
+  store i32 %77, i32* @si, align 4
+  %78 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %79 = atomicrmw nand i32* %78, i32 11 monotonic
+  store i32 %79, i32* @ui, align 4
+  %80 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %81 = atomicrmw nand i32* %80, i32 11 monotonic
+  store i32 %81, i32* @sl, align 4
+  %82 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %83 = atomicrmw nand i32* %82, i32 11 monotonic
+  store i32 %83, i32* @ul, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @test_op_and_fetch() nounwind {
+entry:
+  %0 = load i8* @uc, align 1
+  %1 = atomicrmw add i8* @sc, i8 %0 monotonic
+  %2 = add i8 %1, %0
+  store i8 %2, i8* @sc, align 1
+  %3 = load i8* @uc, align 1
+  %4 = atomicrmw add i8* @uc, i8 %3 monotonic
+  %5 = add i8 %4, %3
+  store i8 %5, i8* @uc, align 1
+  %6 = load i8* @uc, align 1
+  %7 = zext i8 %6 to i16
+  %8 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %9 = atomicrmw add i16* %8, i16 %7 monotonic
+  %10 = add i16 %9, %7
+  store i16 %10, i16* @ss, align 2
+  %11 = load i8* @uc, align 1
+  %12 = zext i8 %11 to i16
+  %13 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %14 = atomicrmw add i16* %13, i16 %12 monotonic
+  %15 = add i16 %14, %12
+  store i16 %15, i16* @us, align 2
+  %16 = load i8* @uc, align 1
+  %17 = zext i8 %16 to i32
+  %18 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %19 = atomicrmw add i32* %18, i32 %17 monotonic
+  %20 = add i32 %19, %17
+  store i32 %20, i32* @si, align 4
+  %21 = load i8* @uc, align 1
+  %22 = zext i8 %21 to i32
+  %23 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %24 = atomicrmw add i32* %23, i32 %22 monotonic
+  %25 = add i32 %24, %22
+  store i32 %25, i32* @ui, align 4
+  %26 = load i8* @uc, align 1
+  %27 = zext i8 %26 to i32
+  %28 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %29 = atomicrmw add i32* %28, i32 %27 monotonic
+  %30 = add i32 %29, %27
+  store i32 %30, i32* @sl, align 4
+  %31 = load i8* @uc, align 1
+  %32 = zext i8 %31 to i32
+  %33 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %34 = atomicrmw add i32* %33, i32 %32 monotonic
+  %35 = add i32 %34, %32
+  store i32 %35, i32* @ul, align 4
+  %36 = load i8* @uc, align 1
+  %37 = atomicrmw sub i8* @sc, i8 %36 monotonic
+  %38 = sub i8 %37, %36
+  store i8 %38, i8* @sc, align 1
+  %39 = load i8* @uc, align 1
+  %40 = atomicrmw sub i8* @uc, i8 %39 monotonic
+  %41 = sub i8 %40, %39
+  store i8 %41, i8* @uc, align 1
+  %42 = load i8* @uc, align 1
+  %43 = zext i8 %42 to i16
+  %44 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %45 = atomicrmw sub i16* %44, i16 %43 monotonic
+  %46 = sub i16 %45, %43
+  store i16 %46, i16* @ss, align 2
+  %47 = load i8* @uc, align 1
+  %48 = zext i8 %47 to i16
+  %49 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %50 = atomicrmw sub i16* %49, i16 %48 monotonic
+  %51 = sub i16 %50, %48
+  store i16 %51, i16* @us, align 2
+  %52 = load i8* @uc, align 1
+  %53 = zext i8 %52 to i32
+  %54 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %55 = atomicrmw sub i32* %54, i32 %53 monotonic
+  %56 = sub i32 %55, %53
+  store i32 %56, i32* @si, align 4
+  %57 = load i8* @uc, align 1
+  %58 = zext i8 %57 to i32
+  %59 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %60 = atomicrmw sub i32* %59, i32 %58 monotonic
+  %61 = sub i32 %60, %58
+  store i32 %61, i32* @ui, align 4
+  %62 = load i8* @uc, align 1
+  %63 = zext i8 %62 to i32
+  %64 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %65 = atomicrmw sub i32* %64, i32 %63 monotonic
+  %66 = sub i32 %65, %63
+  store i32 %66, i32* @sl, align 4
+  %67 = load i8* @uc, align 1
+  %68 = zext i8 %67 to i32
+  %69 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %70 = atomicrmw sub i32* %69, i32 %68 monotonic
+  %71 = sub i32 %70, %68
+  store i32 %71, i32* @ul, align 4
+  %72 = load i8* @uc, align 1
+  %73 = atomicrmw or i8* @sc, i8 %72 monotonic
+  %74 = or i8 %73, %72
+  store i8 %74, i8* @sc, align 1
+  %75 = load i8* @uc, align 1
+  %76 = atomicrmw or i8* @uc, i8 %75 monotonic
+  %77 = or i8 %76, %75
+  store i8 %77, i8* @uc, align 1
+  %78 = load i8* @uc, align 1
+  %79 = zext i8 %78 to i16
+  %80 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %81 = atomicrmw or i16* %80, i16 %79 monotonic
+  %82 = or i16 %81, %79
+  store i16 %82, i16* @ss, align 2
+  %83 = load i8* @uc, align 1
+  %84 = zext i8 %83 to i16
+  %85 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %86 = atomicrmw or i16* %85, i16 %84 monotonic
+  %87 = or i16 %86, %84
+  store i16 %87, i16* @us, align 2
+  %88 = load i8* @uc, align 1
+  %89 = zext i8 %88 to i32
+  %90 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %91 = atomicrmw or i32* %90, i32 %89 monotonic
+  %92 = or i32 %91, %89
+  store i32 %92, i32* @si, align 4
+  %93 = load i8* @uc, align 1
+  %94 = zext i8 %93 to i32
+  %95 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %96 = atomicrmw or i32* %95, i32 %94 monotonic
+  %97 = or i32 %96, %94
+  store i32 %97, i32* @ui, align 4
+  %98 = load i8* @uc, align 1
+  %99 = zext i8 %98 to i32
+  %100 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %101 = atomicrmw or i32* %100, i32 %99 monotonic
+  %102 = or i32 %101, %99
+  store i32 %102, i32* @sl, align 4
+  %103 = load i8* @uc, align 1
+  %104 = zext i8 %103 to i32
+  %105 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %106 = atomicrmw or i32* %105, i32 %104 monotonic
+  %107 = or i32 %106, %104
+  store i32 %107, i32* @ul, align 4
+  %108 = load i8* @uc, align 1
+  %109 = atomicrmw xor i8* @sc, i8 %108 monotonic
+  %110 = xor i8 %109, %108
+  store i8 %110, i8* @sc, align 1
+  %111 = load i8* @uc, align 1
+  %112 = atomicrmw xor i8* @uc, i8 %111 monotonic
+  %113 = xor i8 %112, %111
+  store i8 %113, i8* @uc, align 1
+  %114 = load i8* @uc, align 1
+  %115 = zext i8 %114 to i16
+  %116 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %117 = atomicrmw xor i16* %116, i16 %115 monotonic
+  %118 = xor i16 %117, %115
+  store i16 %118, i16* @ss, align 2
+  %119 = load i8* @uc, align 1
+  %120 = zext i8 %119 to i16
+  %121 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %122 = atomicrmw xor i16* %121, i16 %120 monotonic
+  %123 = xor i16 %122, %120
+  store i16 %123, i16* @us, align 2
+  %124 = load i8* @uc, align 1
+  %125 = zext i8 %124 to i32
+  %126 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %127 = atomicrmw xor i32* %126, i32 %125 monotonic
+  %128 = xor i32 %127, %125
+  store i32 %128, i32* @si, align 4
+  %129 = load i8* @uc, align 1
+  %130 = zext i8 %129 to i32
+  %131 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %132 = atomicrmw xor i32* %131, i32 %130 monotonic
+  %133 = xor i32 %132, %130
+  store i32 %133, i32* @ui, align 4
+  %134 = load i8* @uc, align 1
+  %135 = zext i8 %134 to i32
+  %136 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %137 = atomicrmw xor i32* %136, i32 %135 monotonic
+  %138 = xor i32 %137, %135
+  store i32 %138, i32* @sl, align 4
+  %139 = load i8* @uc, align 1
+  %140 = zext i8 %139 to i32
+  %141 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %142 = atomicrmw xor i32* %141, i32 %140 monotonic
+  %143 = xor i32 %142, %140
+  store i32 %143, i32* @ul, align 4
+  %144 = load i8* @uc, align 1
+  %145 = atomicrmw and i8* @sc, i8 %144 monotonic
+  %146 = and i8 %145, %144
+  store i8 %146, i8* @sc, align 1
+  %147 = load i8* @uc, align 1
+  %148 = atomicrmw and i8* @uc, i8 %147 monotonic
+  %149 = and i8 %148, %147
+  store i8 %149, i8* @uc, align 1
+  %150 = load i8* @uc, align 1
+  %151 = zext i8 %150 to i16
+  %152 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %153 = atomicrmw and i16* %152, i16 %151 monotonic
+  %154 = and i16 %153, %151
+  store i16 %154, i16* @ss, align 2
+  %155 = load i8* @uc, align 1
+  %156 = zext i8 %155 to i16
+  %157 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %158 = atomicrmw and i16* %157, i16 %156 monotonic
+  %159 = and i16 %158, %156
+  store i16 %159, i16* @us, align 2
+  %160 = load i8* @uc, align 1
+  %161 = zext i8 %160 to i32
+  %162 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %163 = atomicrmw and i32* %162, i32 %161 monotonic
+  %164 = and i32 %163, %161
+  store i32 %164, i32* @si, align 4
+  %165 = load i8* @uc, align 1
+  %166 = zext i8 %165 to i32
+  %167 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %168 = atomicrmw and i32* %167, i32 %166 monotonic
+  %169 = and i32 %168, %166
+  store i32 %169, i32* @ui, align 4
+  %170 = load i8* @uc, align 1
+  %171 = zext i8 %170 to i32
+  %172 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %173 = atomicrmw and i32* %172, i32 %171 monotonic
+  %174 = and i32 %173, %171
+  store i32 %174, i32* @sl, align 4
+  %175 = load i8* @uc, align 1
+  %176 = zext i8 %175 to i32
+  %177 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %178 = atomicrmw and i32* %177, i32 %176 monotonic
+  %179 = and i32 %178, %176
+  store i32 %179, i32* @ul, align 4
+  %180 = load i8* @uc, align 1
+  %181 = atomicrmw nand i8* @sc, i8 %180 monotonic
+  %182 = xor i8 %181, -1
+  %183 = and i8 %182, %180
+  store i8 %183, i8* @sc, align 1
+  %184 = load i8* @uc, align 1
+  %185 = atomicrmw nand i8* @uc, i8 %184 monotonic
+  %186 = xor i8 %185, -1
+  %187 = and i8 %186, %184
+  store i8 %187, i8* @uc, align 1
+  %188 = load i8* @uc, align 1
+  %189 = zext i8 %188 to i16
+  %190 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %191 = atomicrmw nand i16* %190, i16 %189 monotonic
+  %192 = xor i16 %191, -1
+  %193 = and i16 %192, %189
+  store i16 %193, i16* @ss, align 2
+  %194 = load i8* @uc, align 1
+  %195 = zext i8 %194 to i16
+  %196 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %197 = atomicrmw nand i16* %196, i16 %195 monotonic
+  %198 = xor i16 %197, -1
+  %199 = and i16 %198, %195
+  store i16 %199, i16* @us, align 2
+  %200 = load i8* @uc, align 1
+  %201 = zext i8 %200 to i32
+  %202 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %203 = atomicrmw nand i32* %202, i32 %201 monotonic
+  %204 = xor i32 %203, -1
+  %205 = and i32 %204, %201
+  store i32 %205, i32* @si, align 4
+  %206 = load i8* @uc, align 1
+  %207 = zext i8 %206 to i32
+  %208 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %209 = atomicrmw nand i32* %208, i32 %207 monotonic
+  %210 = xor i32 %209, -1
+  %211 = and i32 %210, %207
+  store i32 %211, i32* @ui, align 4
+  %212 = load i8* @uc, align 1
+  %213 = zext i8 %212 to i32
+  %214 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %215 = atomicrmw nand i32* %214, i32 %213 monotonic
+  %216 = xor i32 %215, -1
+  %217 = and i32 %216, %213
+  store i32 %217, i32* @sl, align 4
+  %218 = load i8* @uc, align 1
+  %219 = zext i8 %218 to i32
+  %220 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %221 = atomicrmw nand i32* %220, i32 %219 monotonic
+  %222 = xor i32 %221, -1
+  %223 = and i32 %222, %219
+  store i32 %223, i32* @ul, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @test_compare_and_swap() nounwind {
+entry:
+  %0 = load i8* @uc, align 1
+  %1 = load i8* @sc, align 1
+  %2 = cmpxchg i8* @sc, i8 %0, i8 %1 monotonic
+  store i8 %2, i8* @sc, align 1
+  %3 = load i8* @uc, align 1
+  %4 = load i8* @sc, align 1
+  %5 = cmpxchg i8* @uc, i8 %3, i8 %4 monotonic
+  store i8 %5, i8* @uc, align 1
+  %6 = load i8* @uc, align 1
+  %7 = zext i8 %6 to i16
+  %8 = load i8* @sc, align 1
+  %9 = sext i8 %8 to i16
+  %10 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %11 = cmpxchg i16* %10, i16 %7, i16 %9 monotonic
+  store i16 %11, i16* @ss, align 2
+  %12 = load i8* @uc, align 1
+  %13 = zext i8 %12 to i16
+  %14 = load i8* @sc, align 1
+  %15 = sext i8 %14 to i16
+  %16 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %17 = cmpxchg i16* %16, i16 %13, i16 %15 monotonic
+  store i16 %17, i16* @us, align 2
+  %18 = load i8* @uc, align 1
+  %19 = zext i8 %18 to i32
+  %20 = load i8* @sc, align 1
+  %21 = sext i8 %20 to i32
+  %22 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %23 = cmpxchg i32* %22, i32 %19, i32 %21 monotonic
+  store i32 %23, i32* @si, align 4
+  %24 = load i8* @uc, align 1
+  %25 = zext i8 %24 to i32
+  %26 = load i8* @sc, align 1
+  %27 = sext i8 %26 to i32
+  %28 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %29 = cmpxchg i32* %28, i32 %25, i32 %27 monotonic
+  store i32 %29, i32* @ui, align 4
+  %30 = load i8* @uc, align 1
+  %31 = zext i8 %30 to i32
+  %32 = load i8* @sc, align 1
+  %33 = sext i8 %32 to i32
+  %34 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %35 = cmpxchg i32* %34, i32 %31, i32 %33 monotonic
+  store i32 %35, i32* @sl, align 4
+  %36 = load i8* @uc, align 1
+  %37 = zext i8 %36 to i32
+  %38 = load i8* @sc, align 1
+  %39 = sext i8 %38 to i32
+  %40 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %41 = cmpxchg i32* %40, i32 %37, i32 %39 monotonic
+  store i32 %41, i32* @ul, align 4
+  %42 = load i8* @uc, align 1
+  %43 = load i8* @sc, align 1
+  %44 = cmpxchg i8* @sc, i8 %42, i8 %43 monotonic
+  %45 = icmp eq i8 %44, %42
+  %46 = zext i1 %45 to i32
+  store i32 %46, i32* @ui, align 4
+  %47 = load i8* @uc, align 1
+  %48 = load i8* @sc, align 1
+  %49 = cmpxchg i8* @uc, i8 %47, i8 %48 monotonic
+  %50 = icmp eq i8 %49, %47
+  %51 = zext i1 %50 to i32
+  store i32 %51, i32* @ui, align 4
+  %52 = load i8* @uc, align 1
+  %53 = zext i8 %52 to i16
+  %54 = load i8* @sc, align 1
+  %55 = sext i8 %54 to i16
+  %56 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %57 = cmpxchg i16* %56, i16 %53, i16 %55 monotonic
+  %58 = icmp eq i16 %57, %53
+  %59 = zext i1 %58 to i32
+  store i32 %59, i32* @ui, align 4
+  %60 = load i8* @uc, align 1
+  %61 = zext i8 %60 to i16
+  %62 = load i8* @sc, align 1
+  %63 = sext i8 %62 to i16
+  %64 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %65 = cmpxchg i16* %64, i16 %61, i16 %63 monotonic
+  %66 = icmp eq i16 %65, %61
+  %67 = zext i1 %66 to i32
+  store i32 %67, i32* @ui, align 4
+  %68 = load i8* @uc, align 1
+  %69 = zext i8 %68 to i32
+  %70 = load i8* @sc, align 1
+  %71 = sext i8 %70 to i32
+  %72 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %73 = cmpxchg i32* %72, i32 %69, i32 %71 monotonic
+  %74 = icmp eq i32 %73, %69
+  %75 = zext i1 %74 to i32
+  store i32 %75, i32* @ui, align 4
+  %76 = load i8* @uc, align 1
+  %77 = zext i8 %76 to i32
+  %78 = load i8* @sc, align 1
+  %79 = sext i8 %78 to i32
+  %80 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %81 = cmpxchg i32* %80, i32 %77, i32 %79 monotonic
+  %82 = icmp eq i32 %81, %77
+  %83 = zext i1 %82 to i32
+  store i32 %83, i32* @ui, align 4
+  %84 = load i8* @uc, align 1
+  %85 = zext i8 %84 to i32
+  %86 = load i8* @sc, align 1
+  %87 = sext i8 %86 to i32
+  %88 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %89 = cmpxchg i32* %88, i32 %85, i32 %87 monotonic
+  %90 = icmp eq i32 %89, %85
+  %91 = zext i1 %90 to i32
+  store i32 %91, i32* @ui, align 4
+  %92 = load i8* @uc, align 1
+  %93 = zext i8 %92 to i32
+  %94 = load i8* @sc, align 1
+  %95 = sext i8 %94 to i32
+  %96 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %97 = cmpxchg i32* %96, i32 %93, i32 %95 monotonic
+  %98 = icmp eq i32 %97, %93
+  %99 = zext i1 %98 to i32
+  store i32 %99, i32* @ui, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @test_lock() nounwind {
+entry:
+  %0 = atomicrmw xchg i8* @sc, i8 1 monotonic
+  store i8 %0, i8* @sc, align 1
+  %1 = atomicrmw xchg i8* @uc, i8 1 monotonic
+  store i8 %1, i8* @uc, align 1
+  %2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %3 = atomicrmw xchg i16* %2, i16 1 monotonic
+  store i16 %3, i16* @ss, align 2
+  %4 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %5 = atomicrmw xchg i16* %4, i16 1 monotonic
+  store i16 %5, i16* @us, align 2
+  %6 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %7 = atomicrmw xchg i32* %6, i32 1 monotonic
+  store i32 %7, i32* @si, align 4
+  %8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %9 = atomicrmw xchg i32* %8, i32 1 monotonic
+  store i32 %9, i32* @ui, align 4
+  %10 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  %11 = atomicrmw xchg i32* %10, i32 1 monotonic
+  store i32 %11, i32* @sl, align 4
+  %12 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  %13 = atomicrmw xchg i32* %12, i32 1 monotonic
+  store i32 %13, i32* @ul, align 4
+  fence seq_cst
+  store volatile i8 0, i8* @sc, align 1
+  store volatile i8 0, i8* @uc, align 1
+  %14 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  store volatile i16 0, i16* %14, align 2
+  %15 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  store volatile i16 0, i16* %15, align 2
+  %16 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  store volatile i32 0, i32* %16, align 4
+  %17 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  store volatile i32 0, i32* %17, align 4
+  %18 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
+  store volatile i32 0, i32* %18, align 4
+  %19 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
+  store volatile i32 0, i32* %19, align 4
+  %20 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  store volatile i64 0, i64* %20, align 8
+  %21 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  store volatile i64 0, i64* %21, align 8
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/Atomics-64.ll b/src/LLVM/test/CodeGen/PowerPC/Atomics-64.ll
new file mode 100644
index 0000000..d35b848
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/Atomics-64.ll

@@ -0,0 +1,714 @@
+; RUN: llc < %s -march=ppc64 -verify-machineinstrs
+;
+; This test is disabled until PPCISelLowering learns to insert proper 64-bit
+; code for ATOMIC_CMP_SWAP. Currently, it is inserting 32-bit instructions with
+; 64-bit operands which causes the machine code verifier to throw a tantrum.
+;
+; XFAIL: *
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc64-apple-darwin9"
+
+@sc = common global i8 0
+@uc = common global i8 0
+@ss = common global i16 0
+@us = common global i16 0
+@si = common global i32 0
+@ui = common global i32 0
+@sl = common global i64 0, align 8
+@ul = common global i64 0, align 8
+@sll = common global i64 0, align 8
+@ull = common global i64 0, align 8
+
+define void @test_op_ignore() nounwind {
+entry:
+  %0 = atomicrmw add i8* @sc, i8 1 monotonic
+  %1 = atomicrmw add i8* @uc, i8 1 monotonic
+  %2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %3 = atomicrmw add i16* %2, i16 1 monotonic
+  %4 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %5 = atomicrmw add i16* %4, i16 1 monotonic
+  %6 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %7 = atomicrmw add i32* %6, i32 1 monotonic
+  %8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %9 = atomicrmw add i32* %8, i32 1 monotonic
+  %10 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %11 = atomicrmw add i64* %10, i64 1 monotonic
+  %12 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %13 = atomicrmw add i64* %12, i64 1 monotonic
+  %14 = atomicrmw sub i8* @sc, i8 1 monotonic
+  %15 = atomicrmw sub i8* @uc, i8 1 monotonic
+  %16 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %17 = atomicrmw sub i16* %16, i16 1 monotonic
+  %18 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %19 = atomicrmw sub i16* %18, i16 1 monotonic
+  %20 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %21 = atomicrmw sub i32* %20, i32 1 monotonic
+  %22 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %23 = atomicrmw sub i32* %22, i32 1 monotonic
+  %24 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %25 = atomicrmw sub i64* %24, i64 1 monotonic
+  %26 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %27 = atomicrmw sub i64* %26, i64 1 monotonic
+  %28 = atomicrmw or i8* @sc, i8 1 monotonic
+  %29 = atomicrmw or i8* @uc, i8 1 monotonic
+  %30 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %31 = atomicrmw or i16* %30, i16 1 monotonic
+  %32 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %33 = atomicrmw or i16* %32, i16 1 monotonic
+  %34 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %35 = atomicrmw or i32* %34, i32 1 monotonic
+  %36 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %37 = atomicrmw or i32* %36, i32 1 monotonic
+  %38 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %39 = atomicrmw or i64* %38, i64 1 monotonic
+  %40 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %41 = atomicrmw or i64* %40, i64 1 monotonic
+  %42 = atomicrmw xor i8* @sc, i8 1 monotonic
+  %43 = atomicrmw xor i8* @uc, i8 1 monotonic
+  %44 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %45 = atomicrmw xor i16* %44, i16 1 monotonic
+  %46 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %47 = atomicrmw xor i16* %46, i16 1 monotonic
+  %48 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %49 = atomicrmw xor i32* %48, i32 1 monotonic
+  %50 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %51 = atomicrmw xor i32* %50, i32 1 monotonic
+  %52 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %53 = atomicrmw xor i64* %52, i64 1 monotonic
+  %54 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %55 = atomicrmw xor i64* %54, i64 1 monotonic
+  %56 = atomicrmw and i8* @sc, i8 1 monotonic
+  %57 = atomicrmw and i8* @uc, i8 1 monotonic
+  %58 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %59 = atomicrmw and i16* %58, i16 1 monotonic
+  %60 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %61 = atomicrmw and i16* %60, i16 1 monotonic
+  %62 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %63 = atomicrmw and i32* %62, i32 1 monotonic
+  %64 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %65 = atomicrmw and i32* %64, i32 1 monotonic
+  %66 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %67 = atomicrmw and i64* %66, i64 1 monotonic
+  %68 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %69 = atomicrmw and i64* %68, i64 1 monotonic
+  %70 = atomicrmw nand i8* @sc, i8 1 monotonic
+  %71 = atomicrmw nand i8* @uc, i8 1 monotonic
+  %72 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %73 = atomicrmw nand i16* %72, i16 1 monotonic
+  %74 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %75 = atomicrmw nand i16* %74, i16 1 monotonic
+  %76 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %77 = atomicrmw nand i32* %76, i32 1 monotonic
+  %78 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %79 = atomicrmw nand i32* %78, i32 1 monotonic
+  %80 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %81 = atomicrmw nand i64* %80, i64 1 monotonic
+  %82 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %83 = atomicrmw nand i64* %82, i64 1 monotonic
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @test_fetch_and_op() nounwind {
+entry:
+  %0 = atomicrmw add i8* @sc, i8 11 monotonic
+  store i8 %0, i8* @sc, align 1
+  %1 = atomicrmw add i8* @uc, i8 11 monotonic
+  store i8 %1, i8* @uc, align 1
+  %2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %3 = atomicrmw add i16* %2, i16 11 monotonic
+  store i16 %3, i16* @ss, align 2
+  %4 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %5 = atomicrmw add i16* %4, i16 11 monotonic
+  store i16 %5, i16* @us, align 2
+  %6 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %7 = atomicrmw add i32* %6, i32 11 monotonic
+  store i32 %7, i32* @si, align 4
+  %8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %9 = atomicrmw add i32* %8, i32 11 monotonic
+  store i32 %9, i32* @ui, align 4
+  %10 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %11 = atomicrmw add i64* %10, i64 11 monotonic
+  store i64 %11, i64* @sl, align 8
+  %12 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %13 = atomicrmw add i64* %12, i64 11 monotonic
+  store i64 %13, i64* @ul, align 8
+  %14 = atomicrmw sub i8* @sc, i8 11 monotonic
+  store i8 %14, i8* @sc, align 1
+  %15 = atomicrmw sub i8* @uc, i8 11 monotonic
+  store i8 %15, i8* @uc, align 1
+  %16 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %17 = atomicrmw sub i16* %16, i16 11 monotonic
+  store i16 %17, i16* @ss, align 2
+  %18 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %19 = atomicrmw sub i16* %18, i16 11 monotonic
+  store i16 %19, i16* @us, align 2
+  %20 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %21 = atomicrmw sub i32* %20, i32 11 monotonic
+  store i32 %21, i32* @si, align 4
+  %22 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %23 = atomicrmw sub i32* %22, i32 11 monotonic
+  store i32 %23, i32* @ui, align 4
+  %24 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %25 = atomicrmw sub i64* %24, i64 11 monotonic
+  store i64 %25, i64* @sl, align 8
+  %26 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %27 = atomicrmw sub i64* %26, i64 11 monotonic
+  store i64 %27, i64* @ul, align 8
+  %28 = atomicrmw or i8* @sc, i8 11 monotonic
+  store i8 %28, i8* @sc, align 1
+  %29 = atomicrmw or i8* @uc, i8 11 monotonic
+  store i8 %29, i8* @uc, align 1
+  %30 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %31 = atomicrmw or i16* %30, i16 11 monotonic
+  store i16 %31, i16* @ss, align 2
+  %32 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %33 = atomicrmw or i16* %32, i16 11 monotonic
+  store i16 %33, i16* @us, align 2
+  %34 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %35 = atomicrmw or i32* %34, i32 11 monotonic
+  store i32 %35, i32* @si, align 4
+  %36 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %37 = atomicrmw or i32* %36, i32 11 monotonic
+  store i32 %37, i32* @ui, align 4
+  %38 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %39 = atomicrmw or i64* %38, i64 11 monotonic
+  store i64 %39, i64* @sl, align 8
+  %40 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %41 = atomicrmw or i64* %40, i64 11 monotonic
+  store i64 %41, i64* @ul, align 8
+  %42 = atomicrmw xor i8* @sc, i8 11 monotonic
+  store i8 %42, i8* @sc, align 1
+  %43 = atomicrmw xor i8* @uc, i8 11 monotonic
+  store i8 %43, i8* @uc, align 1
+  %44 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %45 = atomicrmw xor i16* %44, i16 11 monotonic
+  store i16 %45, i16* @ss, align 2
+  %46 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %47 = atomicrmw xor i16* %46, i16 11 monotonic
+  store i16 %47, i16* @us, align 2
+  %48 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %49 = atomicrmw xor i32* %48, i32 11 monotonic
+  store i32 %49, i32* @si, align 4
+  %50 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %51 = atomicrmw xor i32* %50, i32 11 monotonic
+  store i32 %51, i32* @ui, align 4
+  %52 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %53 = atomicrmw xor i64* %52, i64 11 monotonic
+  store i64 %53, i64* @sl, align 8
+  %54 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %55 = atomicrmw xor i64* %54, i64 11 monotonic
+  store i64 %55, i64* @ul, align 8
+  %56 = atomicrmw and i8* @sc, i8 11 monotonic
+  store i8 %56, i8* @sc, align 1
+  %57 = atomicrmw and i8* @uc, i8 11 monotonic
+  store i8 %57, i8* @uc, align 1
+  %58 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %59 = atomicrmw and i16* %58, i16 11 monotonic
+  store i16 %59, i16* @ss, align 2
+  %60 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %61 = atomicrmw and i16* %60, i16 11 monotonic
+  store i16 %61, i16* @us, align 2
+  %62 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %63 = atomicrmw and i32* %62, i32 11 monotonic
+  store i32 %63, i32* @si, align 4
+  %64 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %65 = atomicrmw and i32* %64, i32 11 monotonic
+  store i32 %65, i32* @ui, align 4
+  %66 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %67 = atomicrmw and i64* %66, i64 11 monotonic
+  store i64 %67, i64* @sl, align 8
+  %68 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %69 = atomicrmw and i64* %68, i64 11 monotonic
+  store i64 %69, i64* @ul, align 8
+  %70 = atomicrmw nand i8* @sc, i8 11 monotonic
+  store i8 %70, i8* @sc, align 1
+  %71 = atomicrmw nand i8* @uc, i8 11 monotonic
+  store i8 %71, i8* @uc, align 1
+  %72 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %73 = atomicrmw nand i16* %72, i16 11 monotonic
+  store i16 %73, i16* @ss, align 2
+  %74 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %75 = atomicrmw nand i16* %74, i16 11 monotonic
+  store i16 %75, i16* @us, align 2
+  %76 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %77 = atomicrmw nand i32* %76, i32 11 monotonic
+  store i32 %77, i32* @si, align 4
+  %78 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %79 = atomicrmw nand i32* %78, i32 11 monotonic
+  store i32 %79, i32* @ui, align 4
+  %80 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %81 = atomicrmw nand i64* %80, i64 11 monotonic
+  store i64 %81, i64* @sl, align 8
+  %82 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %83 = atomicrmw nand i64* %82, i64 11 monotonic
+  store i64 %83, i64* @ul, align 8
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @test_op_and_fetch() nounwind {
+entry:
+  %0 = load i8* @uc, align 1
+  %1 = atomicrmw add i8* @sc, i8 %0 monotonic
+  %2 = add i8 %1, %0
+  store i8 %2, i8* @sc, align 1
+  %3 = load i8* @uc, align 1
+  %4 = atomicrmw add i8* @uc, i8 %3 monotonic
+  %5 = add i8 %4, %3
+  store i8 %5, i8* @uc, align 1
+  %6 = load i8* @uc, align 1
+  %7 = zext i8 %6 to i16
+  %8 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %9 = atomicrmw add i16* %8, i16 %7 monotonic
+  %10 = add i16 %9, %7
+  store i16 %10, i16* @ss, align 2
+  %11 = load i8* @uc, align 1
+  %12 = zext i8 %11 to i16
+  %13 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %14 = atomicrmw add i16* %13, i16 %12 monotonic
+  %15 = add i16 %14, %12
+  store i16 %15, i16* @us, align 2
+  %16 = load i8* @uc, align 1
+  %17 = zext i8 %16 to i32
+  %18 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %19 = atomicrmw add i32* %18, i32 %17 monotonic
+  %20 = add i32 %19, %17
+  store i32 %20, i32* @si, align 4
+  %21 = load i8* @uc, align 1
+  %22 = zext i8 %21 to i32
+  %23 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %24 = atomicrmw add i32* %23, i32 %22 monotonic
+  %25 = add i32 %24, %22
+  store i32 %25, i32* @ui, align 4
+  %26 = load i8* @uc, align 1
+  %27 = zext i8 %26 to i64
+  %28 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %29 = atomicrmw add i64* %28, i64 %27 monotonic
+  %30 = add i64 %29, %27
+  store i64 %30, i64* @sl, align 8
+  %31 = load i8* @uc, align 1
+  %32 = zext i8 %31 to i64
+  %33 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %34 = atomicrmw add i64* %33, i64 %32 monotonic
+  %35 = add i64 %34, %32
+  store i64 %35, i64* @ul, align 8
+  %36 = load i8* @uc, align 1
+  %37 = atomicrmw sub i8* @sc, i8 %36 monotonic
+  %38 = sub i8 %37, %36
+  store i8 %38, i8* @sc, align 1
+  %39 = load i8* @uc, align 1
+  %40 = atomicrmw sub i8* @uc, i8 %39 monotonic
+  %41 = sub i8 %40, %39
+  store i8 %41, i8* @uc, align 1
+  %42 = load i8* @uc, align 1
+  %43 = zext i8 %42 to i16
+  %44 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %45 = atomicrmw sub i16* %44, i16 %43 monotonic
+  %46 = sub i16 %45, %43
+  store i16 %46, i16* @ss, align 2
+  %47 = load i8* @uc, align 1
+  %48 = zext i8 %47 to i16
+  %49 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %50 = atomicrmw sub i16* %49, i16 %48 monotonic
+  %51 = sub i16 %50, %48
+  store i16 %51, i16* @us, align 2
+  %52 = load i8* @uc, align 1
+  %53 = zext i8 %52 to i32
+  %54 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %55 = atomicrmw sub i32* %54, i32 %53 monotonic
+  %56 = sub i32 %55, %53
+  store i32 %56, i32* @si, align 4
+  %57 = load i8* @uc, align 1
+  %58 = zext i8 %57 to i32
+  %59 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %60 = atomicrmw sub i32* %59, i32 %58 monotonic
+  %61 = sub i32 %60, %58
+  store i32 %61, i32* @ui, align 4
+  %62 = load i8* @uc, align 1
+  %63 = zext i8 %62 to i64
+  %64 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %65 = atomicrmw sub i64* %64, i64 %63 monotonic
+  %66 = sub i64 %65, %63
+  store i64 %66, i64* @sl, align 8
+  %67 = load i8* @uc, align 1
+  %68 = zext i8 %67 to i64
+  %69 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %70 = atomicrmw sub i64* %69, i64 %68 monotonic
+  %71 = sub i64 %70, %68
+  store i64 %71, i64* @ul, align 8
+  %72 = load i8* @uc, align 1
+  %73 = atomicrmw or i8* @sc, i8 %72 monotonic
+  %74 = or i8 %73, %72
+  store i8 %74, i8* @sc, align 1
+  %75 = load i8* @uc, align 1
+  %76 = atomicrmw or i8* @uc, i8 %75 monotonic
+  %77 = or i8 %76, %75
+  store i8 %77, i8* @uc, align 1
+  %78 = load i8* @uc, align 1
+  %79 = zext i8 %78 to i16
+  %80 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %81 = atomicrmw or i16* %80, i16 %79 monotonic
+  %82 = or i16 %81, %79
+  store i16 %82, i16* @ss, align 2
+  %83 = load i8* @uc, align 1
+  %84 = zext i8 %83 to i16
+  %85 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %86 = atomicrmw or i16* %85, i16 %84 monotonic
+  %87 = or i16 %86, %84
+  store i16 %87, i16* @us, align 2
+  %88 = load i8* @uc, align 1
+  %89 = zext i8 %88 to i32
+  %90 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %91 = atomicrmw or i32* %90, i32 %89 monotonic
+  %92 = or i32 %91, %89
+  store i32 %92, i32* @si, align 4
+  %93 = load i8* @uc, align 1
+  %94 = zext i8 %93 to i32
+  %95 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %96 = atomicrmw or i32* %95, i32 %94 monotonic
+  %97 = or i32 %96, %94
+  store i32 %97, i32* @ui, align 4
+  %98 = load i8* @uc, align 1
+  %99 = zext i8 %98 to i64
+  %100 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %101 = atomicrmw or i64* %100, i64 %99 monotonic
+  %102 = or i64 %101, %99
+  store i64 %102, i64* @sl, align 8
+  %103 = load i8* @uc, align 1
+  %104 = zext i8 %103 to i64
+  %105 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %106 = atomicrmw or i64* %105, i64 %104 monotonic
+  %107 = or i64 %106, %104
+  store i64 %107, i64* @ul, align 8
+  %108 = load i8* @uc, align 1
+  %109 = atomicrmw xor i8* @sc, i8 %108 monotonic
+  %110 = xor i8 %109, %108
+  store i8 %110, i8* @sc, align 1
+  %111 = load i8* @uc, align 1
+  %112 = atomicrmw xor i8* @uc, i8 %111 monotonic
+  %113 = xor i8 %112, %111
+  store i8 %113, i8* @uc, align 1
+  %114 = load i8* @uc, align 1
+  %115 = zext i8 %114 to i16
+  %116 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %117 = atomicrmw xor i16* %116, i16 %115 monotonic
+  %118 = xor i16 %117, %115
+  store i16 %118, i16* @ss, align 2
+  %119 = load i8* @uc, align 1
+  %120 = zext i8 %119 to i16
+  %121 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %122 = atomicrmw xor i16* %121, i16 %120 monotonic
+  %123 = xor i16 %122, %120
+  store i16 %123, i16* @us, align 2
+  %124 = load i8* @uc, align 1
+  %125 = zext i8 %124 to i32
+  %126 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %127 = atomicrmw xor i32* %126, i32 %125 monotonic
+  %128 = xor i32 %127, %125
+  store i32 %128, i32* @si, align 4
+  %129 = load i8* @uc, align 1
+  %130 = zext i8 %129 to i32
+  %131 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %132 = atomicrmw xor i32* %131, i32 %130 monotonic
+  %133 = xor i32 %132, %130
+  store i32 %133, i32* @ui, align 4
+  %134 = load i8* @uc, align 1
+  %135 = zext i8 %134 to i64
+  %136 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %137 = atomicrmw xor i64* %136, i64 %135 monotonic
+  %138 = xor i64 %137, %135
+  store i64 %138, i64* @sl, align 8
+  %139 = load i8* @uc, align 1
+  %140 = zext i8 %139 to i64
+  %141 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %142 = atomicrmw xor i64* %141, i64 %140 monotonic
+  %143 = xor i64 %142, %140
+  store i64 %143, i64* @ul, align 8
+  %144 = load i8* @uc, align 1
+  %145 = atomicrmw and i8* @sc, i8 %144 monotonic
+  %146 = and i8 %145, %144
+  store i8 %146, i8* @sc, align 1
+  %147 = load i8* @uc, align 1
+  %148 = atomicrmw and i8* @uc, i8 %147 monotonic
+  %149 = and i8 %148, %147
+  store i8 %149, i8* @uc, align 1
+  %150 = load i8* @uc, align 1
+  %151 = zext i8 %150 to i16
+  %152 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %153 = atomicrmw and i16* %152, i16 %151 monotonic
+  %154 = and i16 %153, %151
+  store i16 %154, i16* @ss, align 2
+  %155 = load i8* @uc, align 1
+  %156 = zext i8 %155 to i16
+  %157 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %158 = atomicrmw and i16* %157, i16 %156 monotonic
+  %159 = and i16 %158, %156
+  store i16 %159, i16* @us, align 2
+  %160 = load i8* @uc, align 1
+  %161 = zext i8 %160 to i32
+  %162 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %163 = atomicrmw and i32* %162, i32 %161 monotonic
+  %164 = and i32 %163, %161
+  store i32 %164, i32* @si, align 4
+  %165 = load i8* @uc, align 1
+  %166 = zext i8 %165 to i32
+  %167 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %168 = atomicrmw and i32* %167, i32 %166 monotonic
+  %169 = and i32 %168, %166
+  store i32 %169, i32* @ui, align 4
+  %170 = load i8* @uc, align 1
+  %171 = zext i8 %170 to i64
+  %172 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %173 = atomicrmw and i64* %172, i64 %171 monotonic
+  %174 = and i64 %173, %171
+  store i64 %174, i64* @sl, align 8
+  %175 = load i8* @uc, align 1
+  %176 = zext i8 %175 to i64
+  %177 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %178 = atomicrmw and i64* %177, i64 %176 monotonic
+  %179 = and i64 %178, %176
+  store i64 %179, i64* @ul, align 8
+  %180 = load i8* @uc, align 1
+  %181 = atomicrmw nand i8* @sc, i8 %180 monotonic
+  %182 = xor i8 %181, -1
+  %183 = and i8 %182, %180
+  store i8 %183, i8* @sc, align 1
+  %184 = load i8* @uc, align 1
+  %185 = atomicrmw nand i8* @uc, i8 %184 monotonic
+  %186 = xor i8 %185, -1
+  %187 = and i8 %186, %184
+  store i8 %187, i8* @uc, align 1
+  %188 = load i8* @uc, align 1
+  %189 = zext i8 %188 to i16
+  %190 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %191 = atomicrmw nand i16* %190, i16 %189 monotonic
+  %192 = xor i16 %191, -1
+  %193 = and i16 %192, %189
+  store i16 %193, i16* @ss, align 2
+  %194 = load i8* @uc, align 1
+  %195 = zext i8 %194 to i16
+  %196 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %197 = atomicrmw nand i16* %196, i16 %195 monotonic
+  %198 = xor i16 %197, -1
+  %199 = and i16 %198, %195
+  store i16 %199, i16* @us, align 2
+  %200 = load i8* @uc, align 1
+  %201 = zext i8 %200 to i32
+  %202 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %203 = atomicrmw nand i32* %202, i32 %201 monotonic
+  %204 = xor i32 %203, -1
+  %205 = and i32 %204, %201
+  store i32 %205, i32* @si, align 4
+  %206 = load i8* @uc, align 1
+  %207 = zext i8 %206 to i32
+  %208 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %209 = atomicrmw nand i32* %208, i32 %207 monotonic
+  %210 = xor i32 %209, -1
+  %211 = and i32 %210, %207
+  store i32 %211, i32* @ui, align 4
+  %212 = load i8* @uc, align 1
+  %213 = zext i8 %212 to i64
+  %214 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %215 = atomicrmw nand i64* %214, i64 %213 monotonic
+  %216 = xor i64 %215, -1
+  %217 = and i64 %216, %213
+  store i64 %217, i64* @sl, align 8
+  %218 = load i8* @uc, align 1
+  %219 = zext i8 %218 to i64
+  %220 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %221 = atomicrmw nand i64* %220, i64 %219 monotonic
+  %222 = xor i64 %221, -1
+  %223 = and i64 %222, %219
+  store i64 %223, i64* @ul, align 8
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @test_compare_and_swap() nounwind {
+entry:
+  %0 = load i8* @uc, align 1
+  %1 = load i8* @sc, align 1
+  %2 = cmpxchg i8* @sc, i8 %0, i8 %1 monotonic
+  store i8 %2, i8* @sc, align 1
+  %3 = load i8* @uc, align 1
+  %4 = load i8* @sc, align 1
+  %5 = cmpxchg i8* @uc, i8 %3, i8 %4 monotonic
+  store i8 %5, i8* @uc, align 1
+  %6 = load i8* @uc, align 1
+  %7 = zext i8 %6 to i16
+  %8 = load i8* @sc, align 1
+  %9 = sext i8 %8 to i16
+  %10 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %11 = cmpxchg i16* %10, i16 %7, i16 %9 monotonic
+  store i16 %11, i16* @ss, align 2
+  %12 = load i8* @uc, align 1
+  %13 = zext i8 %12 to i16
+  %14 = load i8* @sc, align 1
+  %15 = sext i8 %14 to i16
+  %16 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %17 = cmpxchg i16* %16, i16 %13, i16 %15 monotonic
+  store i16 %17, i16* @us, align 2
+  %18 = load i8* @uc, align 1
+  %19 = zext i8 %18 to i32
+  %20 = load i8* @sc, align 1
+  %21 = sext i8 %20 to i32
+  %22 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %23 = cmpxchg i32* %22, i32 %19, i32 %21 monotonic
+  store i32 %23, i32* @si, align 4
+  %24 = load i8* @uc, align 1
+  %25 = zext i8 %24 to i32
+  %26 = load i8* @sc, align 1
+  %27 = sext i8 %26 to i32
+  %28 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %29 = cmpxchg i32* %28, i32 %25, i32 %27 monotonic
+  store i32 %29, i32* @ui, align 4
+  %30 = load i8* @uc, align 1
+  %31 = zext i8 %30 to i64
+  %32 = load i8* @sc, align 1
+  %33 = sext i8 %32 to i64
+  %34 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %35 = cmpxchg i64* %34, i64 %31, i64 %33 monotonic
+  store i64 %35, i64* @sl, align 8
+  %36 = load i8* @uc, align 1
+  %37 = zext i8 %36 to i64
+  %38 = load i8* @sc, align 1
+  %39 = sext i8 %38 to i64
+  %40 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %41 = cmpxchg i64* %40, i64 %37, i64 %39 monotonic
+  store i64 %41, i64* @ul, align 8
+  %42 = load i8* @uc, align 1
+  %43 = load i8* @sc, align 1
+  %44 = cmpxchg i8* @sc, i8 %42, i8 %43 monotonic
+  %45 = icmp eq i8 %44, %42
+  %46 = zext i1 %45 to i8
+  %47 = zext i8 %46 to i32
+  store i32 %47, i32* @ui, align 4
+  %48 = load i8* @uc, align 1
+  %49 = load i8* @sc, align 1
+  %50 = cmpxchg i8* @uc, i8 %48, i8 %49 monotonic
+  %51 = icmp eq i8 %50, %48
+  %52 = zext i1 %51 to i8
+  %53 = zext i8 %52 to i32
+  store i32 %53, i32* @ui, align 4
+  %54 = load i8* @uc, align 1
+  %55 = zext i8 %54 to i16
+  %56 = load i8* @sc, align 1
+  %57 = sext i8 %56 to i16
+  %58 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %59 = cmpxchg i16* %58, i16 %55, i16 %57 monotonic
+  %60 = icmp eq i16 %59, %55
+  %61 = zext i1 %60 to i8
+  %62 = zext i8 %61 to i32
+  store i32 %62, i32* @ui, align 4
+  %63 = load i8* @uc, align 1
+  %64 = zext i8 %63 to i16
+  %65 = load i8* @sc, align 1
+  %66 = sext i8 %65 to i16
+  %67 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %68 = cmpxchg i16* %67, i16 %64, i16 %66 monotonic
+  %69 = icmp eq i16 %68, %64
+  %70 = zext i1 %69 to i8
+  %71 = zext i8 %70 to i32
+  store i32 %71, i32* @ui, align 4
+  %72 = load i8* @uc, align 1
+  %73 = zext i8 %72 to i32
+  %74 = load i8* @sc, align 1
+  %75 = sext i8 %74 to i32
+  %76 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %77 = cmpxchg i32* %76, i32 %73, i32 %75 monotonic
+  %78 = icmp eq i32 %77, %73
+  %79 = zext i1 %78 to i8
+  %80 = zext i8 %79 to i32
+  store i32 %80, i32* @ui, align 4
+  %81 = load i8* @uc, align 1
+  %82 = zext i8 %81 to i32
+  %83 = load i8* @sc, align 1
+  %84 = sext i8 %83 to i32
+  %85 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %86 = cmpxchg i32* %85, i32 %82, i32 %84 monotonic
+  %87 = icmp eq i32 %86, %82
+  %88 = zext i1 %87 to i8
+  %89 = zext i8 %88 to i32
+  store i32 %89, i32* @ui, align 4
+  %90 = load i8* @uc, align 1
+  %91 = zext i8 %90 to i64
+  %92 = load i8* @sc, align 1
+  %93 = sext i8 %92 to i64
+  %94 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %95 = cmpxchg i64* %94, i64 %91, i64 %93 monotonic
+  %96 = icmp eq i64 %95, %91
+  %97 = zext i1 %96 to i8
+  %98 = zext i8 %97 to i32
+  store i32 %98, i32* @ui, align 4
+  %99 = load i8* @uc, align 1
+  %100 = zext i8 %99 to i64
+  %101 = load i8* @sc, align 1
+  %102 = sext i8 %101 to i64
+  %103 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %104 = cmpxchg i64* %103, i64 %100, i64 %102 monotonic
+  %105 = icmp eq i64 %104, %100
+  %106 = zext i1 %105 to i8
+  %107 = zext i8 %106 to i32
+  store i32 %107, i32* @ui, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @test_lock() nounwind {
+entry:
+  %0 = atomicrmw xchg i8* @sc, i8 1 monotonic
+  store i8 %0, i8* @sc, align 1
+  %1 = atomicrmw xchg i8* @uc, i8 1 monotonic
+  store i8 %1, i8* @uc, align 1
+  %2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %3 = atomicrmw xchg i16* %2, i16 1 monotonic
+  store i16 %3, i16* @ss, align 2
+  %4 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %5 = atomicrmw xchg i16* %4, i16 1 monotonic
+  store i16 %5, i16* @us, align 2
+  %6 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %7 = atomicrmw xchg i32* %6, i32 1 monotonic
+  store i32 %7, i32* @si, align 4
+  %8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %9 = atomicrmw xchg i32* %8, i32 1 monotonic
+  store i32 %9, i32* @ui, align 4
+  %10 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %11 = atomicrmw xchg i64* %10, i64 1 monotonic
+  store i64 %11, i64* @sl, align 8
+  %12 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %13 = atomicrmw xchg i64* %12, i64 1 monotonic
+  store i64 %13, i64* @ul, align 8
+  fence seq_cst
+  store volatile i8 0, i8* @sc, align 1
+  store volatile i8 0, i8* @uc, align 1
+  %14 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  store volatile i16 0, i16* %14, align 2
+  %15 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  store volatile i16 0, i16* %15, align 2
+  %16 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  store volatile i32 0, i32* %16, align 4
+  %17 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  store volatile i32 0, i32* %17, align 4
+  %18 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  store volatile i64 0, i64* %18, align 8
+  %19 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  store volatile i64 0, i64* %19, align 8
+  %20 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  store volatile i64 0, i64* %20, align 8
+  %21 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  store volatile i64 0, i64* %21, align 8
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/Frames-alloca.ll b/src/LLVM/test/CodeGen/PowerPC/Frames-alloca.ll
new file mode 100644
index 0000000..38be275
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/Frames-alloca.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32

+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC64

+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-NOFP

+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-NOFP

+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32

+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS

+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS-NOFP

+

+; CHECK-PPC32: stw r31, -4(r1)

+; CHECK-PPC32: lwz r1, 0(r1)

+; CHECK-PPC32: lwz r31, -4(r1)

+; CHECK-PPC32-NOFP: stw r31, -4(r1)

+; CHECK-PPC32-NOFP: lwz r1, 0(r1)

+; CHECK-PPC32-NOFP: lwz r31, -4(r1)

+; CHECK-PPC32-RS: stwu r1, -80(r1)

+; CHECK-PPC32-RS-NOFP: stwu r1, -80(r1)

+

+; CHECK-PPC64: std r31, -8(r1)

+; CHECK-PPC64: stdu r1, -128(r1)

+; CHECK-PPC64: ld r1, 0(r1)

+; CHECK-PPC64: ld r31, -8(r1)

+; CHECK-PPC64-NOFP: std r31, -8(r1)

+; CHECK-PPC64-NOFP: stdu r1, -128(r1)

+; CHECK-PPC64-NOFP: ld r1, 0(r1)

+; CHECK-PPC64-NOFP: ld r31, -8(r1)

+

+define i32* @f1(i32 %n) nounwind {

+	%tmp = alloca i32, i32 %n		; <i32*> [#uses=1]

+	ret i32* %tmp

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/Frames-large.ll b/src/LLVM/test/CodeGen/PowerPC/Frames-large.ll
new file mode 100644
index 0000000..48dd565
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/Frames-large.ll

@@ -0,0 +1,52 @@
+; RUN: llvm-as < %s > %t.bc

+; RUN: llc < %t.bc -march=ppc32 | FileCheck %s -check-prefix=PPC32-NOFP

+; RUN: llc < %t.bc -march=ppc32 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-FP

+

+; RUN: llc < %t.bc -march=ppc64 | FileCheck %s -check-prefix=PPC64-NOFP

+; RUN: llc < %t.bc -march=ppc64 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-FP

+

+

+target triple = "powerpc-apple-darwin8"

+

+define i32* @f1() nounwind {

+        %tmp = alloca i32, i32 8191             ; <i32*> [#uses=1]

+        ret i32* %tmp

+}

+

+; PPC32-NOFP: _f1:

+; PPC32-NOFP: 	lis r0, -1

+; PPC32-NOFP: 	ori r0, r0, 32704

+; PPC32-NOFP: 	stwux r1, r1, r0

+; PPC32-NOFP: 	addi r3, r1, 68

+; PPC32-NOFP: 	lwz r1, 0(r1)

+; PPC32-NOFP: 	blr 

+

+; PPC32-FP: _f1:

+; PPC32-FP:	stw r31, -4(r1)

+; PPC32-FP:	lis r0, -1

+; PPC32-FP:	ori r0, r0, 32704

+; PPC32-FP:	stwux r1, r1, r0

+; ...

+; PPC32-FP:	lwz r1, 0(r1)

+; PPC32-FP:	lwz r31, -4(r1)

+; PPC32-FP:	blr 

+

+

+; PPC64-NOFP: _f1:

+; PPC64-NOFP: 	lis r0, -1

+; PPC64-NOFP: 	ori r0, r0, 32656

+; PPC64-NOFP: 	stdux r1, r1, r0

+; PPC64-NOFP: 	addi r3, r1, 116

+; PPC64-NOFP: 	ld r1, 0(r1)

+; PPC64-NOFP: 	blr 

+

+

+; PPC64-FP: _f1:

+; PPC64-FP:	std r31, -8(r1)

+; PPC64-FP:	lis r0, -1

+; PPC64-FP:	ori r0, r0, 32640

+; PPC64-FP:	stdux r1, r1, r0

+; ...

+; PPC64-FP:	ld r1, 0(r1)

+; PPC64-FP:	ld r31, -8(r1)

+; PPC64-FP:	blr 


diff --git a/src/LLVM/test/CodeGen/PowerPC/Frames-leaf.ll b/src/LLVM/test/CodeGen/PowerPC/Frames-leaf.ll
new file mode 100644
index 0000000..0b78491
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/Frames-leaf.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=ppc32 | \

+; RUN:   not grep {stw r31, 20(r1)}

+; RUN: llc < %s -march=ppc32 | \

+; RUN:   not grep {stwu r1, -.*(r1)}

+; RUN: llc < %s -march=ppc32 | \

+; RUN:   not grep {addi r1, r1, }

+; RUN: llc < %s -march=ppc32 | \

+; RUN:   not grep {lwz r31, 20(r1)}

+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \

+; RUN:   not grep {stw r31, 20(r1)}

+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \

+; RUN:   not grep {stwu r1, -.*(r1)}

+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \

+; RUN:   not grep {addi r1, r1, }

+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \

+; RUN:   not grep {lwz r31, 20(r1)}

+; RUN: llc < %s -march=ppc64 | \

+; RUN:   not grep {std r31, 40(r1)}

+; RUN: llc < %s -march=ppc64 | \

+; RUN:   not grep {stdu r1, -.*(r1)}

+; RUN: llc < %s -march=ppc64 | \

+; RUN:   not grep {addi r1, r1, }

+; RUN: llc < %s -march=ppc64 | \

+; RUN:   not grep {ld r31, 40(r1)}

+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \

+; RUN:   not grep {stw r31, 40(r1)}

+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \

+; RUN:   not grep {stdu r1, -.*(r1)}

+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \

+; RUN:   not grep {addi r1, r1, }

+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \

+; RUN:   not grep {ld r31, 40(r1)}

+

+define i32* @f1() {

+        %tmp = alloca i32, i32 2                ; <i32*> [#uses=1]

+        ret i32* %tmp

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/Frames-small.ll b/src/LLVM/test/CodeGen/PowerPC/Frames-small.ll
new file mode 100644
index 0000000..9e0403a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/Frames-small.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1

+; RUN: not grep {stw r31, -4(r1)} %t1

+; RUN: grep {stwu r1, -16448(r1)} %t1

+; RUN: grep {addi r1, r1, 16448} %t1

+; RUN: llc < %s -march=ppc32 | \

+; RUN: not grep {lwz r31, -4(r1)}

+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \

+; RUN:   -o %t2

+; RUN: grep {stw r31, -4(r1)} %t2

+; RUN: grep {stwu r1, -16448(r1)} %t2

+; RUN: grep {addi r1, r1, 16448} %t2

+; RUN: grep {lwz r31, -4(r1)} %t2

+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3

+; RUN: not grep {std r31, -8(r1)} %t3

+; RUN: grep {stdu r1, -16496(r1)} %t3

+; RUN: grep {addi r1, r1, 16496} %t3

+; RUN: not grep {ld r31, -8(r1)} %t3

+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \

+; RUN:   -o %t4

+; RUN: grep {std r31, -8(r1)} %t4

+; RUN: grep {stdu r1, -16512(r1)} %t4

+; RUN: grep {addi r1, r1, 16512} %t4

+; RUN: grep {ld r31, -8(r1)} %t4

+

+define i32* @f1() {

+        %tmp = alloca i32, i32 4095             ; <i32*> [#uses=1]

+        ret i32* %tmp

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/src/LLVM/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
new file mode 100644
index 0000000..431f8ac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \

+; RUN:   grep {stw r3, 32751}

+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \

+; RUN:   grep {stw r3, 32751}

+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \

+; RUN:   grep {std r3, 9024}

+

+define void @test() nounwind {

+	store i32 0, i32* inttoptr (i64 48725999 to i32*)

+	ret void

+}

+

+define void @test2() nounwind {

+	store i64 0, i64* inttoptr (i64 74560 to i64*)

+	ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/addc.ll b/src/LLVM/test/CodeGen/PowerPC/addc.ll
new file mode 100644
index 0000000..83b7177
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/addc.ll

@@ -0,0 +1,33 @@
+; All of these should be codegen'd without loading immediates

+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s

+

+define i64 @add_ll(i64 %a, i64 %b) nounwind {

+entry:

+        %tmp.2 = add i64 %b, %a         ; <i64> [#uses=1]

+        ret i64 %tmp.2

+; CHECK: add_ll:

+; CHECK: addc r4, r6, r4

+; CHECK: adde r3, r5, r3

+; CHECK: blr

+}

+

+define i64 @add_l_5(i64 %a) nounwind {

+entry:

+        %tmp.1 = add i64 %a, 5          ; <i64> [#uses=1]

+        ret i64 %tmp.1

+; CHECK: add_l_5:

+; CHECK: addic r4, r4, 5

+; CHECK: addze r3, r3

+; CHECK: blr

+}

+

+define i64 @add_l_m5(i64 %a) nounwind {

+entry:

+        %tmp.1 = add i64 %a, -5         ; <i64> [#uses=1]

+        ret i64 %tmp.1

+; CHECK: add_l_m5:

+; CHECK: addic r4, r4, -5

+; CHECK: addme r3, r3

+; CHECK: blr

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/addi-reassoc.ll b/src/LLVM/test/CodeGen/PowerPC/addi-reassoc.ll
new file mode 100644
index 0000000..31846a1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/addi-reassoc.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=ppc32 | not grep addi

+

+        %struct.X = type { [5 x i8] }

+

+define i32 @test1([4 x i32]* %P, i32 %i) {

+        %tmp.2 = add i32 %i, 2          ; <i32> [#uses=1]

+        %tmp.4 = getelementptr [4 x i32]* %P, i32 %tmp.2, i32 1         ; <i32*> [#uses=1]

+        %tmp.5 = load i32* %tmp.4               ; <i32> [#uses=1]

+        ret i32 %tmp.5

+}

+

+define i32 @test2(%struct.X* %P, i32 %i) {

+        %tmp.2 = add i32 %i, 2          ; <i32> [#uses=1]

+        %tmp.5 = getelementptr %struct.X* %P, i32 %tmp.2, i32 0, i32 1          ; <i8*> [#uses=1]

+        %tmp.6 = load i8* %tmp.5                ; <i8> [#uses=1]

+        %tmp.7 = sext i8 %tmp.6 to i32          ; <i32> [#uses=1]

+        ret i32 %tmp.7

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/align.ll b/src/LLVM/test/CodeGen/PowerPC/align.ll
new file mode 100644
index 0000000..7099a06
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/align.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=powerpc-linux-gnu | FileCheck %s -check-prefix=ELF

+; RUN: llc < %s -mtriple=powerpc-apple-darwin9 | FileCheck %s -check-prefix=DARWIN

+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=DARWIN8

+

+@a = global i1 true

+; no alignment

+

+@b = global i8 1

+; no alignment

+

+@c = global i16 2

+;ELF: .align 1

+;ELF: c:

+;DARWIN: .align 1

+;DARWIN: _c:

+

+@d = global i32 3

+;ELF: .align 2

+;ELF: d:

+;DARWIN: .align 2

+;DARWIN: _d:

+

+@e = global i64 4

+;ELF: .align 3

+;ELF: e

+;DARWIN: .align 3

+;DARWIN: _e:

+

+@f = global float 5.0

+;ELF: .align 2

+;ELF: f:

+;DARWIN: .align 2

+;DARWIN: _f:

+

+@g = global double 6.0

+;ELF: .align 3

+;ELF: g:

+;DARWIN: .align 3

+;DARWIN: _g:

+

+@bar = common global [75 x i8] zeroinitializer, align 128

+;ELF: .comm bar,75,128

+;DARWIN: .comm _bar,75,7

+

+;; Darwin8 doesn't support aligned comm.  Just miscompile this.

+; DARWIN8: .comm _bar,75 ;


diff --git a/src/LLVM/test/CodeGen/PowerPC/and-branch.ll b/src/LLVM/test/CodeGen/PowerPC/and-branch.ll
new file mode 100644
index 0000000..9cb25f5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/and-branch.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=ppc32 | not grep mfcr

+

+define void @foo(i32 %X, i32 %Y, i32 %Z) {

+entry:

+        %tmp = icmp eq i32 %X, 0                ; <i1> [#uses=1]

+        %tmp3 = icmp slt i32 %Y, 5              ; <i1> [#uses=1]

+        %tmp4 = and i1 %tmp3, %tmp              ; <i1> [#uses=1]

+        br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock

+cond_true:              ; preds = %entry

+        %tmp5 = tail call i32 (...)* @bar( )            ; <i32> [#uses=0]

+        ret void

+UnifiedReturnBlock:             ; preds = %entry

+        ret void

+}

+

+declare i32 @bar(...)

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/and-elim.ll b/src/LLVM/test/CodeGen/PowerPC/and-elim.ll
new file mode 100644
index 0000000..dcc35b6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/and-elim.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=ppc32 | not grep rlwin

+

+define void @test(i8* %P) {

+	%W = load i8* %P

+	%X = shl i8 %W, 1

+	%Y = add i8 %X, 2

+	%Z = and i8 %Y, 254        ; dead and

+	store i8 %Z, i8* %P

+	ret void

+}

+

+define zeroext i16 @test2(i16 zeroext %crc)  { 

+        ; No and's should be needed for the i16s here.

+        %tmp.1 = lshr i16 %crc, 1

+        %tmp.7 = xor i16 %tmp.1, 40961

+        ret i16 %tmp.7

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/and-imm.ll b/src/LLVM/test/CodeGen/PowerPC/and-imm.ll
new file mode 100644
index 0000000..e3ce799
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/and-imm.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc32 | not grep {ori\\|lis}

+

+; andi. r3, r3, 32769	

+define i32 @test(i32 %X) {

+        %Y = and i32 %X, 32769          ; <i32> [#uses=1]

+        ret i32 %Y

+}

+

+; andis. r3, r3, 32769

+define i32 @test2(i32 %X) {

+        %Y = and i32 %X, -2147418112            ; <i32> [#uses=1]

+        ret i32 %Y

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/and_add.ll b/src/LLVM/test/CodeGen/PowerPC/and_add.ll
new file mode 100644
index 0000000..a69101b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/and_add.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc32 -o %t

+; RUN: grep slwi %t

+; RUN: not grep addi %t

+; RUN: not grep rlwinm %t

+

+define i32 @test(i32 %A) {

+        ;; shift

+        %B = mul i32 %A, 8              ; <i32> [#uses=1]

+        ;; dead, no demanded bits.

+        %C = add i32 %B, 7              ; <i32> [#uses=1]

+        ;; dead once add is gone.

+        %D = and i32 %C, -8             ; <i32> [#uses=1]

+        ret i32 %D

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/and_sext.ll b/src/LLVM/test/CodeGen/PowerPC/and_sext.ll
new file mode 100644
index 0000000..95b59ed
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/and_sext.ll

@@ -0,0 +1,28 @@
+; These tests should not contain a sign extend.

+; RUN: llc < %s -march=ppc32 | not grep extsh

+; RUN: llc < %s -march=ppc32 | not grep extsb

+

+define i32 @test1(i32 %mode.0.i.0) {

+        %tmp.79 = trunc i32 %mode.0.i.0 to i16

+        %tmp.80 = sext i16 %tmp.79 to i32

+        %tmp.81 = and i32 %tmp.80, 24

+        ret i32 %tmp.81

+}

+

+define signext i16 @test2(i16 signext %X, i16 signext %x)  {

+        %tmp = sext i16 %X to i32

+        %tmp1 = sext i16 %x to i32

+        %tmp2 = add i32 %tmp, %tmp1

+        %tmp4 = ashr i32 %tmp2, 1

+        %tmp5 = trunc i32 %tmp4 to i16

+        %tmp45 = sext i16 %tmp5 to i32

+        %retval = trunc i32 %tmp45 to i16

+        ret i16 %retval

+}

+

+define signext i16 @test3(i32 zeroext %X)  {

+        %tmp1 = lshr i32 %X, 16

+        %tmp2 = trunc i32 %tmp1 to i16

+        ret i16 %tmp2

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/and_sra.ll b/src/LLVM/test/CodeGen/PowerPC/and_sra.ll
new file mode 100644
index 0000000..241033c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/and_sra.ll

@@ -0,0 +1,27 @@
+; Neither of these functions should contain algebraic right shifts

+; RUN: llc < %s -march=ppc32 | not grep srawi 

+

+define i32 @test1(i32 %mode.0.i.0) {

+        %tmp.79 = bitcast i32 %mode.0.i.0 to i32                ; <i32> [#uses=1]

+        %tmp.80 = ashr i32 %tmp.79, 15          ; <i32> [#uses=1]

+        %tmp.81 = and i32 %tmp.80, 24           ; <i32> [#uses=1]

+        ret i32 %tmp.81

+}

+

+define i32 @test2(i32 %mode.0.i.0) {

+        %tmp.79 = bitcast i32 %mode.0.i.0 to i32                ; <i32> [#uses=1]

+        %tmp.80 = ashr i32 %tmp.79, 15          ; <i32> [#uses=1]

+        %tmp.81 = lshr i32 %mode.0.i.0, 16              ; <i32> [#uses=1]

+        %tmp.82 = bitcast i32 %tmp.81 to i32            ; <i32> [#uses=1]

+        %tmp.83 = and i32 %tmp.80, %tmp.82              ; <i32> [#uses=1]

+        ret i32 %tmp.83

+}

+

+define i32 @test3(i32 %specbits.6.1) {

+        %tmp.2540 = ashr i32 %specbits.6.1, 11          ; <i32> [#uses=1]

+        %tmp.2541 = bitcast i32 %tmp.2540 to i32                ; <i32> [#uses=1]

+        %tmp.2542 = shl i32 %tmp.2541, 13               ; <i32> [#uses=1]

+        %tmp.2543 = and i32 %tmp.2542, 8192             ; <i32> [#uses=1]

+        ret i32 %tmp.2543

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/atomic-1.ll b/src/LLVM/test/CodeGen/PowerPC/atomic-1.ll
new file mode 100644
index 0000000..cbfa409
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/atomic-1.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=ppc32 |  FileCheck %s
+
+define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
+; CHECK: exchange_and_add:
+; CHECK: lwarx
+  %tmp = atomicrmw add i32* %mem, i32 %val monotonic
+; CHECK: stwcx.
+  ret i32 %tmp
+}
+
+define i32 @exchange_and_cmp(i32* %mem) nounwind {
+; CHECK: exchange_and_cmp:
+; CHECK: lwarx
+  %tmp = cmpxchg i32* %mem, i32 0, i32 1 monotonic
+; CHECK: stwcx.
+; CHECK: stwcx.
+  ret i32 %tmp
+}
+
+define i32 @exchange(i32* %mem, i32 %val) nounwind {
+; CHECK: exchange:
+; CHECK: lwarx
+  %tmp = atomicrmw xchg i32* %mem, i32 1 monotonic
+; CHECK: stwcx.
+  ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/atomic-2.ll b/src/LLVM/test/CodeGen/PowerPC/atomic-2.ll
new file mode 100644
index 0000000..a427379
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/atomic-2.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
+; CHECK: exchange_and_add:
+; CHECK: ldarx
+  %tmp = atomicrmw add i64* %mem, i64 %val monotonic
+; CHECK: stdcx.
+  ret i64 %tmp
+}
+
+define i64 @exchange_and_cmp(i64* %mem) nounwind {
+; CHECK: exchange_and_cmp:
+; CHECK: ldarx
+  %tmp = cmpxchg i64* %mem, i64 0, i64 1 monotonic
+; CHECK: stdcx.
+; CHECK: stdcx.
+  ret i64 %tmp
+}
+
+define i64 @exchange(i64* %mem, i64 %val) nounwind {
+; CHECK: exchange:
+; CHECK: ldarx
+  %tmp = atomicrmw xchg i64* %mem, i64 1 monotonic
+; CHECK: stdcx.
+  ret i64 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/available-externally.ll b/src/LLVM/test/CodeGen/PowerPC/available-externally.ll
new file mode 100644
index 0000000..fdead7d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/available-externally.ll

@@ -0,0 +1,71 @@
+; RUN: llc < %s -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
+; PR4482
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "powerpc-apple-darwin8"
+
+define i32 @foo(i64 %x) nounwind {
+entry:
+; STATIC: _foo:
+; STATIC: bl _exact_log2
+; STATIC: blr
+; STATIC: .subsections_via_symbols
+
+; PIC: _foo:
+; PIC: bl L_exact_log2$stub
+; PIC: blr
+
+; DYNAMIC: _foo:
+; DYNAMIC: bl L_exact_log2$stub
+; DYNAMIC: blr
+
+        %A = call i32 @exact_log2(i64 %x) nounwind
+	ret i32 %A
+}
+
+define available_externally i32 @exact_log2(i64 %x) nounwind {
+entry:
+	ret i32 42
+}
+
+
+; PIC: .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+; PIC: L_exact_log2$stub:
+; PIC: .indirect_symbol _exact_log2
+; PIC: mflr r0
+; PIC: bcl 20,31,L_exact_log2$stub$tmp
+
+; PIC: L_exact_log2$stub$tmp:
+; PIC: mflr r11
+; PIC: addis r11,r11,ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
+; PIC: mtlr r0
+; PIC: lwzu r12,lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
+; PIC: mtctr r12
+; PIC: bctr
+
+; PIC: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; PIC: L_exact_log2$lazy_ptr:
+; PIC: .indirect_symbol _exact_log2
+; PIC: .long dyld_stub_binding_helper
+
+; PIC: .subsections_via_symbols
+
+
+; DYNAMIC: .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
+; DYNAMIC: L_exact_log2$stub:
+; DYNAMIC: .indirect_symbol _exact_log2
+; DYNAMIC: lis r11,ha16(L_exact_log2$lazy_ptr)
+; DYNAMIC: lwzu r12,lo16(L_exact_log2$lazy_ptr)(r11)
+; DYNAMIC: mtctr r12
+; DYNAMIC: bctr
+
+; DYNAMIC: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; DYNAMIC: L_exact_log2$lazy_ptr:
+; DYNAMIC: .indirect_symbol _exact_log2
+; DYNAMIC: .long dyld_stub_binding_helper
+
+
+
+
+

diff --git a/src/LLVM/test/CodeGen/PowerPC/big-endian-actual-args.ll b/src/LLVM/test/CodeGen/PowerPC/big-endian-actual-args.ll
new file mode 100644
index 0000000..009f468
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/big-endian-actual-args.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN:   grep {addc 4, 4, 6}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN:   grep {adde 3, 3, 5}
+
+define i64 @foo(i64 %x, i64 %y) {
+  %z = add i64 %x, %y
+  ret i64 %z
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/big-endian-call-result.ll b/src/LLVM/test/CodeGen/PowerPC/big-endian-call-result.ll
new file mode 100644
index 0000000..fe85404
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/big-endian-call-result.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN:   grep {addic 4, 4, 1}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN:   grep {addze 3, 3}
+
+declare i64 @foo()
+
+define i64 @bar()
+{
+  %t = call i64 @foo()
+  %s = add i64 %t, 1
+  ret i64 %s
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/big-endian-formal-args.ll b/src/LLVM/test/CodeGen/PowerPC/big-endian-formal-args.ll
new file mode 100644
index 0000000..318ccb0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/big-endian-formal-args.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
+
+declare void @bar(i64 %x, i64 %y)
+
+; CHECK: li 4, 2
+; CHECK: li {{[53]}}, 0
+; CHECK: li 6, 3
+; CHECK: mr {{[53]}}, {{[53]}}
+
+define void @foo() {
+  call void @bar(i64 2, i64 3)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/branch-opt.ll b/src/LLVM/test/CodeGen/PowerPC/branch-opt.ll
new file mode 100644
index 0000000..c74d001
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/branch-opt.ll

@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=ppc32 | \

+; RUN:   grep {b LBB.*} | count 4

+

+target datalayout = "E-p:32:32"

+target triple = "powerpc-apple-darwin8.7.0"

+

+define void @foo(i32 %W, i32 %X, i32 %Y, i32 %Z) {

+entry:

+	%tmp1 = and i32 %W, 1		; <i32> [#uses=1]

+	%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]

+	br i1 %tmp1.upgrd.1, label %cond_false, label %bb5

+bb:		; preds = %bb5, %bb

+	%indvar77 = phi i32 [ %indvar.next78, %bb ], [ 0, %bb5 ]		; <i32> [#uses=1]

+	%tmp2 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]

+	%indvar.next78 = add i32 %indvar77, 1		; <i32> [#uses=2]

+	%exitcond79 = icmp eq i32 %indvar.next78, %X		; <i1> [#uses=1]

+	br i1 %exitcond79, label %cond_next48, label %bb

+bb5:		; preds = %entry

+	%tmp = icmp eq i32 %X, 0		; <i1> [#uses=1]

+	br i1 %tmp, label %cond_next48, label %bb

+cond_false:		; preds = %entry

+	%tmp10 = and i32 %W, 2		; <i32> [#uses=1]

+	%tmp10.upgrd.2 = icmp eq i32 %tmp10, 0		; <i1> [#uses=1]

+	br i1 %tmp10.upgrd.2, label %cond_false20, label %bb16

+bb12:		; preds = %bb16, %bb12

+	%indvar72 = phi i32 [ %indvar.next73, %bb12 ], [ 0, %bb16 ]		; <i32> [#uses=1]

+	%tmp13 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]

+	%indvar.next73 = add i32 %indvar72, 1		; <i32> [#uses=2]

+	%exitcond74 = icmp eq i32 %indvar.next73, %Y		; <i1> [#uses=1]

+	br i1 %exitcond74, label %cond_next48, label %bb12

+bb16:		; preds = %cond_false

+	%tmp18 = icmp eq i32 %Y, 0		; <i1> [#uses=1]

+	br i1 %tmp18, label %cond_next48, label %bb12

+cond_false20:		; preds = %cond_false

+	%tmp23 = and i32 %W, 4		; <i32> [#uses=1]

+	%tmp23.upgrd.3 = icmp eq i32 %tmp23, 0		; <i1> [#uses=1]

+	br i1 %tmp23.upgrd.3, label %cond_false33, label %bb29

+bb25:		; preds = %bb29, %bb25

+	%indvar67 = phi i32 [ %indvar.next68, %bb25 ], [ 0, %bb29 ]		; <i32> [#uses=1]

+	%tmp26 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]

+	%indvar.next68 = add i32 %indvar67, 1		; <i32> [#uses=2]

+	%exitcond69 = icmp eq i32 %indvar.next68, %Z		; <i1> [#uses=1]

+	br i1 %exitcond69, label %cond_next48, label %bb25

+bb29:		; preds = %cond_false20

+	%tmp31 = icmp eq i32 %Z, 0		; <i1> [#uses=1]

+	br i1 %tmp31, label %cond_next48, label %bb25

+cond_false33:		; preds = %cond_false20

+	%tmp36 = and i32 %W, 8		; <i32> [#uses=1]

+	%tmp36.upgrd.4 = icmp eq i32 %tmp36, 0		; <i1> [#uses=1]

+	br i1 %tmp36.upgrd.4, label %cond_next48, label %bb42

+bb38:		; preds = %bb42

+	%tmp39 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]

+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]

+	br label %bb42

+bb42:		; preds = %bb38, %cond_false33

+	%indvar = phi i32 [ %indvar.next, %bb38 ], [ 0, %cond_false33 ]		; <i32> [#uses=4]

+	%W_addr.0 = sub i32 %W, %indvar		; <i32> [#uses=1]

+	%exitcond = icmp eq i32 %indvar, %W		; <i1> [#uses=1]

+	br i1 %exitcond, label %cond_next48, label %bb38

+cond_next48:		; preds = %bb42, %cond_false33, %bb29, %bb25, %bb16, %bb12, %bb5, %bb

+	%W_addr.1 = phi i32 [ %W, %bb5 ], [ %W, %bb16 ], [ %W, %bb29 ], [ %W, %cond_false33 ], [ %W_addr.0, %bb42 ], [ %W, %bb25 ], [ %W, %bb12 ], [ %W, %bb ]		; <i32> [#uses=1]

+	%tmp50 = icmp eq i32 %W_addr.1, 0		; <i1> [#uses=1]

+	br i1 %tmp50, label %UnifiedReturnBlock, label %cond_true51

+cond_true51:		; preds = %cond_next48

+	%tmp52 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]

+	ret void

+UnifiedReturnBlock:		; preds = %cond_next48

+	ret void

+}

+

+declare i32 @bar(...)


diff --git a/src/LLVM/test/CodeGen/PowerPC/bswap-load-store.ll b/src/LLVM/test/CodeGen/PowerPC/bswap-load-store.ll
new file mode 100644
index 0000000..187de4a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/bswap-load-store.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=X32

+; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=X64

+

+

+define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {

+        %tmp1 = getelementptr i8* %ptr, i32 %off                ; <i8*> [#uses=1]

+        %tmp1.upgrd.1 = bitcast i8* %tmp1 to i32*               ; <i32*> [#uses=1]

+        %tmp13 = tail call i32 @llvm.bswap.i32( i32 %i )                ; <i32> [#uses=1]

+        store i32 %tmp13, i32* %tmp1.upgrd.1

+        ret void

+}

+

+define i32 @LWBRX(i8* %ptr, i32 %off) {

+        %tmp1 = getelementptr i8* %ptr, i32 %off                ; <i8*> [#uses=1]

+        %tmp1.upgrd.2 = bitcast i8* %tmp1 to i32*               ; <i32*> [#uses=1]

+        %tmp = load i32* %tmp1.upgrd.2          ; <i32> [#uses=1]

+        %tmp14 = tail call i32 @llvm.bswap.i32( i32 %tmp )              ; <i32> [#uses=1]

+        ret i32 %tmp14

+}

+

+define void @STHBRX(i16 %s, i8* %ptr, i32 %off) {

+        %tmp1 = getelementptr i8* %ptr, i32 %off                ; <i8*> [#uses=1]

+        %tmp1.upgrd.3 = bitcast i8* %tmp1 to i16*               ; <i16*> [#uses=1]

+        %tmp5 = call i16 @llvm.bswap.i16( i16 %s )              ; <i16> [#uses=1]

+        store i16 %tmp5, i16* %tmp1.upgrd.3

+        ret void

+}

+

+define i16 @LHBRX(i8* %ptr, i32 %off) {

+        %tmp1 = getelementptr i8* %ptr, i32 %off                ; <i8*> [#uses=1]

+        %tmp1.upgrd.4 = bitcast i8* %tmp1 to i16*               ; <i16*> [#uses=1]

+        %tmp = load i16* %tmp1.upgrd.4          ; <i16> [#uses=1]

+        %tmp6 = call i16 @llvm.bswap.i16( i16 %tmp )            ; <i16> [#uses=1]

+        ret i16 %tmp6

+}

+

+declare i32 @llvm.bswap.i32(i32)

+

+declare i16 @llvm.bswap.i16(i16)

+

+

+; X32: stwbrx

+; X32: lwbrx

+; X32: sthbrx

+; X32: lhbrx

+

+; X64: stwbrx

+; X64: lwbrx

+; X64: sthbrx

+; X64: lhbrx

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/src/LLVM/test/CodeGen/PowerPC/buildvec_canonicalize.ll
new file mode 100644
index 0000000..d699067
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/buildvec_canonicalize.ll

@@ -0,0 +1,24 @@
+; There should be exactly one vxor here.

+; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \

+; RUN:   grep vxor | count 1

+

+; There should be exactly one vsplti here.

+; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \

+; RUN:   grep vsplti | count 1

+

+define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {

+        %tmp = load <4 x float>* %P3            ; <<4 x float>> [#uses=1]

+        %tmp3 = load <4 x float>* %P1           ; <<4 x float>> [#uses=1]

+        %tmp4 = fmul <4 x float> %tmp, %tmp3             ; <<4 x float>> [#uses=1]

+        store <4 x float> %tmp4, <4 x float>* %P3

+        store <4 x float> zeroinitializer, <4 x float>* %P1

+        store <4 x i32> zeroinitializer, <4 x i32>* %P2

+        ret void

+}

+

+define void @VSPLTI(<4 x i32>* %P2, <8 x i16>* %P3) {

+        store <4 x i32> bitcast (<16 x i8> < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > to <4 x i32>), <4 x i32>* %P2

+        store <8 x i16> < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >, <8 x i16>* %P3

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/calls.ll b/src/LLVM/test/CodeGen/PowerPC/calls.ll
new file mode 100644
index 0000000..44cfabf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/calls.ll

@@ -0,0 +1,27 @@
+; Test various forms of calls.

+

+; RUN: llc < %s -march=ppc32 | \

+; RUN:   grep {bl } | count 1

+; RUN: llc < %s -march=ppc32 | \

+; RUN:   grep {bctrl} | count 1

+; RUN: llc < %s -march=ppc32 | \

+; RUN:   grep {bla } | count 1

+

+declare void @foo()

+

+define void @test_direct() {

+        call void @foo( )

+        ret void

+}

+

+define void @test_indirect(void ()* %fp) {

+        call void %fp( )

+        ret void

+}

+

+define void @test_abs() {

+        %fp = inttoptr i32 400 to void ()*              ; <void ()*> [#uses=1]

+        call void %fp( )

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/cmp-cmp.ll b/src/LLVM/test/CodeGen/PowerPC/cmp-cmp.ll
new file mode 100644
index 0000000..14866d2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/cmp-cmp.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=ppc32 | not grep mfcr

+

+define void @test(i64 %X) {

+        %tmp1 = and i64 %X, 3           ; <i64> [#uses=1]

+        %tmp = icmp sgt i64 %tmp1, 2            ; <i1> [#uses=1]

+        br i1 %tmp, label %UnifiedReturnBlock, label %cond_true

+cond_true:              ; preds = %0

+        tail call void @test( i64 0 )

+        ret void

+UnifiedReturnBlock:             ; preds = %0

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/compare-duplicate.ll b/src/LLVM/test/CodeGen/PowerPC/compare-duplicate.ll
new file mode 100644
index 0000000..618cf00
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/compare-duplicate.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8  | not grep slwi

+

+define i32 @test(i32 %A, i32 %B) {

+	%C = sub i32 %B, %A

+	%D = icmp eq i32 %C, %A

+	br i1 %D, label %T, label %F

+T:

+	ret i32 19123

+F:

+	ret i32 %C

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/compare-simm.ll b/src/LLVM/test/CodeGen/PowerPC/compare-simm.ll
new file mode 100644
index 0000000..928033f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/compare-simm.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \

+; RUN:   grep {cmpwi cr0, r3, -1}

+

+define i32 @test(i32 %x) nounwind {

+        %c = icmp eq i32 %x, -1

+	br i1 %c, label %T, label %F

+T:

+	%A = call i32 @test(i32 123)

+	%B = add i32 %A, 43

+	ret i32 %B

+F:

+	%G = add i32 %x, 1234

+	ret i32 %G

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/constants.ll b/src/LLVM/test/CodeGen/PowerPC/constants.ll
new file mode 100644
index 0000000..6c57bda
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/constants.ll

@@ -0,0 +1,52 @@
+; All of these routines should be perform optimal load of constants.

+; RUN: llc < %s -march=ppc32 | \

+; RUN:   grep lis | count 5

+; RUN: llc < %s -march=ppc32 | \

+; RUN:   grep ori | count 3

+; RUN: llc < %s -march=ppc32 | \

+; RUN:   grep {li } | count 4

+

+define i32 @f1() {

+entry:

+	ret i32 1

+}

+

+define i32 @f2() {

+entry:

+	ret i32 -1

+}

+

+define i32 @f3() {

+entry:

+	ret i32 0

+}

+

+define i32 @f4() {

+entry:

+	ret i32 32767

+}

+

+define i32 @f5() {

+entry:

+	ret i32 65535

+}

+

+define i32 @f6() {

+entry:

+	ret i32 65536

+}

+

+define i32 @f7() {

+entry:

+	ret i32 131071

+}

+

+define i32 @f8() {

+entry:

+	ret i32 2147483647

+}

+

+define i32 @f9() {

+entry:

+	ret i32 -2147483648

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/cr1eq.ll b/src/LLVM/test/CodeGen/PowerPC/cr1eq.ll
new file mode 100644
index 0000000..fb9c969
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/cr1eq.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = 'test.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-freebsd"
+
+@.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
+@.str1 = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1
+
+define void @foo() nounwind {
+entry:
+; CHECK: crxor 6, 6, 6
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 1)
+; CHECK: creqv 6, 6, 6
+  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str1, i32 0, i32 0), double 1.100000e+00)
+  ret void
+}
+
+declare i32 @printf(i8*, ...)

diff --git a/src/LLVM/test/CodeGen/PowerPC/cr_spilling.ll b/src/LLVM/test/CodeGen/PowerPC/cr_spilling.ll
new file mode 100644
index 0000000..8bd809f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/cr_spilling.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=ppc32 -regalloc=fast -O0 -relocation-model=pic -o -
+; PR1638
+
+@.str242 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
+
+define fastcc void @ParseContent(i8* %buf, i32 %bufsize) {
+entry:
+	%items = alloca [10000 x i8*], align 16		; <[10000 x i8*]*> [#uses=0]
+	%tmp86 = add i32 0, -1		; <i32> [#uses=1]
+	br i1 false, label %cond_true94, label %cond_next99
+
+cond_true94:		; preds = %entry
+	%tmp98 = call i32 (i8*, ...)* @printf(i8* getelementptr ([3 x i8]* @.str242, i32 0, i32 0), i8* null)		; <i32> [#uses=0]
+	%tmp20971 = icmp sgt i32 %tmp86, 0		; <i1> [#uses=1]
+	br i1 %tmp20971, label %bb101, label %bb212
+
+cond_next99:		; preds = %entry
+	ret void
+
+bb101:		; preds = %cond_true94
+	ret void
+
+bb212:		; preds = %cond_true94
+	ret void
+}
+
+declare i32 @printf(i8*, ...)

diff --git a/src/LLVM/test/CodeGen/PowerPC/cttz.ll b/src/LLVM/test/CodeGen/PowerPC/cttz.ll
new file mode 100644
index 0000000..4712547
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/cttz.ll

@@ -0,0 +1,11 @@
+; Make sure this testcase does not use ctpop

+; RUN: llc < %s -march=ppc32 | grep -i cntlzw

+

+declare i32 @llvm.cttz.i32(i32)

+

+define i32 @bar(i32 %x) {

+entry:

+        %tmp.1 = call i32 @llvm.cttz.i32( i32 %x )              ; <i32> [#uses=1]

+        ret i32 %tmp.1

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/darwin-labels.ll b/src/LLVM/test/CodeGen/PowerPC/darwin-labels.ll
new file mode 100644
index 0000000..be76f3d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/darwin-labels.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s | grep {foo bar":}

+

+target datalayout = "E-p:32:32"

+target triple = "powerpc-apple-darwin8.2.0"

+@"foo bar" = global i32 4               ; <i32*> [#uses=0]

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/delete-node.ll b/src/LLVM/test/CodeGen/PowerPC/delete-node.ll
new file mode 100644
index 0000000..a26c211
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/delete-node.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=ppc32
+
+; The DAGCombiner leaves behind a dead node in this testcase. Currently
+; ISel is ignoring dead nodes, though it would be preferable for
+; DAGCombiner to be able to eliminate the dead node.
+
+define void @GrayATo32ARGBTabB(i8* %baseAddr, i16** %cmp, i32 %rowBytes) nounwind {
+entry:
+      	br label %bb1
+
+bb1:            ; preds = %bb1, %entry
+        %0 = load i16* null, align 2            ; <i16> [#uses=1]
+        %1 = ashr i16 %0, 4             ; <i16> [#uses=1]
+        %2 = sext i16 %1 to i32         ; <i32> [#uses=1]
+        %3 = getelementptr i8* null, i32 %2             ; <i8*> [#uses=1]
+        %4 = load i8* %3, align 1               ; <i8> [#uses=1]
+        %5 = zext i8 %4 to i32          ; <i32> [#uses=1]
+        %6 = shl i32 %5, 24             ; <i32> [#uses=1]
+        %7 = or i32 0, %6               ; <i32> [#uses=1]
+        store i32 %7, i32* null, align 4
+        br label %bb1
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/dg.exp b/src/LLVM/test/CodeGen/PowerPC/dg.exp
new file mode 100644
index 0000000..efed47d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp

+

+if { [llvm_supports_target PowerPC] } {

+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/div-2.ll b/src/LLVM/test/CodeGen/PowerPC/div-2.ll
new file mode 100644
index 0000000..552e15d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/div-2.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=ppc32 | not grep srawi 

+; RUN: llc < %s -march=ppc32 | grep blr

+

+define i32 @test1(i32 %X) {

+        %Y = and i32 %X, 15             ; <i32> [#uses=1]

+        %Z = sdiv i32 %Y, 4             ; <i32> [#uses=1]

+        ret i32 %Z

+}

+

+define i32 @test2(i32 %W) {

+        %X = and i32 %W, 15             ; <i32> [#uses=1]

+        %Y = sub i32 16, %X             ; <i32> [#uses=1]

+        %Z = sdiv i32 %Y, 4             ; <i32> [#uses=1]

+        ret i32 %Z

+}

+

+define i32 @test3(i32 %W) {

+        %X = and i32 %W, 15             ; <i32> [#uses=1]

+        %Y = sub i32 15, %X             ; <i32> [#uses=1]

+        %Z = sdiv i32 %Y, 4             ; <i32> [#uses=1]

+        ret i32 %Z

+}

+

+define i32 @test4(i32 %W) {

+        %X = and i32 %W, 2              ; <i32> [#uses=1]

+        %Y = sub i32 5, %X              ; <i32> [#uses=1]

+        %Z = sdiv i32 %Y, 2             ; <i32> [#uses=1]

+        ret i32 %Z

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/empty-functions.ll b/src/LLVM/test/CodeGen/PowerPC/empty-functions.ll
new file mode 100644
index 0000000..3a2907d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/empty-functions.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+
+define void @func() {
+entry:
+  unreachable
+}
+; CHECK-NO-FP:     _func:
+; CHECK-NO-FP:     nop
+
+; CHECK-FP:      _func:
+; CHECK-FP:      nop

diff --git a/src/LLVM/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/src/LLVM/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
new file mode 100644
index 0000000..3a7851a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll

@@ -0,0 +1,93 @@
+; RUN: llc < %s -march=ppc32 | \

+; RUN:   grep eqv | count 3

+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \

+; RUN:   grep andc | count 3

+; RUN: llc < %s -march=ppc32 | \

+; RUN:   grep orc | count 2

+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \

+; RUN:   grep nor | count 3

+; RUN: llc < %s -march=ppc32 | \

+; RUN:   grep nand | count 1

+

+define i32 @EQV1(i32 %X, i32 %Y) nounwind {

+	%A = xor i32 %X, %Y		; <i32> [#uses=1]

+	%B = xor i32 %A, -1		; <i32> [#uses=1]

+	ret i32 %B

+}

+

+define i32 @EQV2(i32 %X, i32 %Y) nounwind {

+	%A = xor i32 %X, -1		; <i32> [#uses=1]

+	%B = xor i32 %A, %Y		; <i32> [#uses=1]

+	ret i32 %B

+}

+

+define i32 @EQV3(i32 %X, i32 %Y) nounwind {

+	%A = xor i32 %X, -1		; <i32> [#uses=1]

+	%B = xor i32 %Y, %A		; <i32> [#uses=1]

+	ret i32 %B

+}

+

+define i32 @ANDC1(i32 %X, i32 %Y) nounwind {

+	%A = xor i32 %Y, -1		; <i32> [#uses=1]

+	%B = and i32 %X, %A		; <i32> [#uses=1]

+	ret i32 %B

+}

+

+define i32 @ANDC2(i32 %X, i32 %Y) nounwind {

+	%A = xor i32 %X, -1		; <i32> [#uses=1]

+	%B = and i32 %A, %Y		; <i32> [#uses=1]

+	ret i32 %B

+}

+

+define i32 @ORC1(i32 %X, i32 %Y) nounwind {

+	%A = xor i32 %Y, -1		; <i32> [#uses=1]

+	%B = or i32 %X, %A		; <i32> [#uses=1]

+	ret i32 %B

+}

+

+define i32 @ORC2(i32 %X, i32 %Y) nounwind {

+	%A = xor i32 %X, -1		; <i32> [#uses=1]

+	%B = or i32 %A, %Y		; <i32> [#uses=1]

+	ret i32 %B

+}

+

+define i32 @NOR1(i32 %X) nounwind {

+	%Y = xor i32 %X, -1		; <i32> [#uses=1]

+	ret i32 %Y

+}

+

+define i32 @NOR2(i32 %X, i32 %Y) nounwind {

+	%Z = or i32 %X, %Y		; <i32> [#uses=1]

+	%R = xor i32 %Z, -1		; <i32> [#uses=1]

+	ret i32 %R

+}

+

+define i32 @NAND1(i32 %X, i32 %Y) nounwind {

+	%Z = and i32 %X, %Y		; <i32> [#uses=1]

+	%W = xor i32 %Z, -1		; <i32> [#uses=1]

+	ret i32 %W

+}

+

+define void @VNOR(<4 x float>* %P, <4 x float>* %Q) nounwind {

+	%tmp = load <4 x float>* %P		; <<4 x float>> [#uses=1]

+	%tmp.upgrd.1 = bitcast <4 x float> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp2 = load <4 x float>* %Q		; <<4 x float>> [#uses=1]

+	%tmp2.upgrd.2 = bitcast <4 x float> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp3 = or <4 x i32> %tmp.upgrd.1, %tmp2.upgrd.2		; <<4 x i32>> [#uses=1]

+	%tmp4 = xor <4 x i32> %tmp3, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]

+	%tmp4.upgrd.3 = bitcast <4 x i32> %tmp4 to <4 x float>		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp4.upgrd.3, <4 x float>* %P

+	ret void

+}

+

+define void @VANDC(<4 x float>* %P, <4 x float>* %Q) nounwind {

+	%tmp = load <4 x float>* %P		; <<4 x float>> [#uses=1]

+	%tmp.upgrd.4 = bitcast <4 x float> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp2 = load <4 x float>* %Q		; <<4 x float>> [#uses=1]

+	%tmp2.upgrd.5 = bitcast <4 x float> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp4 = xor <4 x i32> %tmp2.upgrd.5, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]

+	%tmp3 = and <4 x i32> %tmp.upgrd.4, %tmp4		; <<4 x i32>> [#uses=1]

+	%tmp4.upgrd.6 = bitcast <4 x i32> %tmp3 to <4 x float>		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp4.upgrd.6, <4 x float>* %P

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/extsh.ll b/src/LLVM/test/CodeGen/PowerPC/extsh.ll
new file mode 100644
index 0000000..e91bda9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/extsh.ll

@@ -0,0 +1,8 @@
+; This should turn into a single extsh

+; RUN: llc < %s -march=ppc32 | grep extsh | count 1

+define i32 @test(i32 %X) {

+        %tmp.81 = shl i32 %X, 16                ; <i32> [#uses=1]

+        %tmp.82 = ashr i32 %tmp.81, 16          ; <i32> [#uses=1]

+        ret i32 %tmp.82

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/fabs.ll b/src/LLVM/test/CodeGen/PowerPC/fabs.ll
new file mode 100644
index 0000000..6ef740f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/fabs.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | grep {fabs f1, f1}
+
+define double @fabs(double %f) {
+entry:
+	%tmp2 = tail call double @fabs( double %f )		; <double> [#uses=1]
+	ret double %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/fma.ll b/src/LLVM/test/CodeGen/PowerPC/fma.ll
new file mode 100644
index 0000000..46252ea
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/fma.ll

@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=ppc32 | \

+; RUN:   egrep {fn?madd|fn?msub} | count 8

+

+define double @test_FMADD1(double %A, double %B, double %C) {

+	%D = fmul double %A, %B		; <double> [#uses=1]

+	%E = fadd double %D, %C		; <double> [#uses=1]

+	ret double %E

+}

+

+define double @test_FMADD2(double %A, double %B, double %C) {

+	%D = fmul double %A, %B		; <double> [#uses=1]

+	%E = fadd double %D, %C		; <double> [#uses=1]

+	ret double %E

+}

+

+define double @test_FMSUB(double %A, double %B, double %C) {

+	%D = fmul double %A, %B		; <double> [#uses=1]

+	%E = fsub double %D, %C		; <double> [#uses=1]

+	ret double %E

+}

+

+define double @test_FNMADD1(double %A, double %B, double %C) {

+	%D = fmul double %A, %B		; <double> [#uses=1]

+	%E = fadd double %D, %C		; <double> [#uses=1]

+	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]

+	ret double %F

+}

+

+define double @test_FNMADD2(double %A, double %B, double %C) {

+	%D = fmul double %A, %B		; <double> [#uses=1]

+	%E = fadd double %C, %D		; <double> [#uses=1]

+	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]

+	ret double %F

+}

+

+define double @test_FNMSUB1(double %A, double %B, double %C) {

+	%D = fmul double %A, %B		; <double> [#uses=1]

+	%E = fsub double %C, %D		; <double> [#uses=1]

+	ret double %E

+}

+

+define double @test_FNMSUB2(double %A, double %B, double %C) {

+	%D = fmul double %A, %B		; <double> [#uses=1]

+	%E = fsub double %D, %C		; <double> [#uses=1]

+	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]

+	ret double %F

+}

+

+define float @test_FNMSUBS(float %A, float %B, float %C) {

+	%D = fmul float %A, %B		; <float> [#uses=1]

+	%E = fsub float %D, %C		; <float> [#uses=1]

+	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]

+	ret float %F

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/fnabs.ll b/src/LLVM/test/CodeGen/PowerPC/fnabs.ll
new file mode 100644
index 0000000..a1bf47d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/fnabs.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32 | grep fnabs

+

+declare double @fabs(double)

+

+define double @test(double %X) {

+        %Y = call double @fabs( double %X )             ; <double> [#uses=1]

+        %Z = fsub double -0.000000e+00, %Y               ; <double> [#uses=1]

+        ret double %Z

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/fneg.ll b/src/LLVM/test/CodeGen/PowerPC/fneg.ll
new file mode 100644
index 0000000..d05bfe6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/fneg.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=ppc32 | not grep fneg

+

+define double @test1(double %a, double %b, double %c, double %d) {

+entry:

+        %tmp2 = fsub double -0.000000e+00, %c            ; <double> [#uses=1]

+        %tmp4 = fmul double %tmp2, %d            ; <double> [#uses=1]

+        %tmp7 = fmul double %a, %b               ; <double> [#uses=1]

+        %tmp9 = fsub double %tmp7, %tmp4         ; <double> [#uses=1]

+        ret double %tmp9

+}

+

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/fold-li.ll b/src/LLVM/test/CodeGen/PowerPC/fold-li.ll
new file mode 100644
index 0000000..d1e1b64
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/fold-li.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc32  | \

+; RUN:   grep -v align | not grep li

+

+;; Test that immediates are folded into these instructions correctly.

+

+define i32 @ADD(i32 %X) nounwind {

+        %Y = add i32 %X, 65537          ; <i32> [#uses=1]

+        ret i32 %Y

+}

+

+define i32 @SUB(i32 %X) nounwind {

+        %Y = sub i32 %X, 65537          ; <i32> [#uses=1]

+        ret i32 %Y

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/fp-branch.ll b/src/LLVM/test/CodeGen/PowerPC/fp-branch.ll
new file mode 100644
index 0000000..4f1881b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/fp-branch.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=ppc32 | grep fcmp | count 1

+

+declare i1 @llvm.isunordered.f64(double, double)

+

+define i1 @intcoord_cond_next55(double %tmp48.reload) {

+newFuncRoot:

+        br label %cond_next55

+

+bb72.exitStub:          ; preds = %cond_next55

+        ret i1 true

+

+cond_next62.exitStub:           ; preds = %cond_next55

+        ret i1 false

+

+cond_next55:            ; preds = %newFuncRoot

+        %tmp57 = fcmp oge double %tmp48.reload, 1.000000e+00            ; <i1> [#uses=1]

+        %tmp58 = fcmp uno double %tmp48.reload, 1.000000e+00            ; <i1> [#uses=1]

+        %tmp59 = or i1 %tmp57, %tmp58           ; <i1> [#uses=1]

+        br i1 %tmp59, label %bb72.exitStub, label %cond_next62.exitStub

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/fp-int-fp.ll b/src/LLVM/test/CodeGen/PowerPC/fp-int-fp.ll
new file mode 100644
index 0000000..6174790
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/fp-int-fp.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep r1

+

+define double @test1(double %X) {

+        %Y = fptosi double %X to i64            ; <i64> [#uses=1]

+        %Z = sitofp i64 %Y to double            ; <double> [#uses=1]

+        ret double %Z

+}

+

+define float @test2(double %X) {

+        %Y = fptosi double %X to i64            ; <i64> [#uses=1]

+        %Z = sitofp i64 %Y to float             ; <float> [#uses=1]

+        ret float %Z

+}

+

+define double @test3(float %X) {

+        %Y = fptosi float %X to i64             ; <i64> [#uses=1]

+        %Z = sitofp i64 %Y to double            ; <double> [#uses=1]

+        ret double %Z

+}

+

+define float @test4(float %X) {

+        %Y = fptosi float %X to i64             ; <i64> [#uses=1]

+        %Z = sitofp i64 %Y to float             ; <float> [#uses=1]

+        ret float %Z

+}

+

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/fp_to_uint.ll b/src/LLVM/test/CodeGen/PowerPC/fp_to_uint.ll
new file mode 100644
index 0000000..cfbde02
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/fp_to_uint.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 | grep fctiwz | count 1

+

+define i16 @foo(float %a) {

+entry:

+        %tmp.1 = fptoui float %a to i16         ; <i16> [#uses=1]

+        ret i16 %tmp.1

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/fpcopy.ll b/src/LLVM/test/CodeGen/PowerPC/fpcopy.ll
new file mode 100644
index 0000000..6b6bdcd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/fpcopy.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ppc32 | not grep fmr

+

+define double @test(float %F) {

+        %F.upgrd.1 = fpext float %F to double           ; <double> [#uses=1]

+        ret double %F.upgrd.1

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/frounds.ll b/src/LLVM/test/CodeGen/PowerPC/frounds.ll
new file mode 100644
index 0000000..8eeadc3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/frounds.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=ppc32
+
+define i32 @foo() {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%tmp = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp1 = call i32 @llvm.flt.rounds( )		; <i32> [#uses=1]
+	store i32 %tmp1, i32* %tmp, align 4
+	%tmp2 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	store i32 %tmp2, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval3 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval3
+}
+
+declare i32 @llvm.flt.rounds() nounwind 

diff --git a/src/LLVM/test/CodeGen/PowerPC/fsqrt.ll b/src/LLVM/test/CodeGen/PowerPC/fsqrt.ll
new file mode 100644
index 0000000..98500ec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/fsqrt.ll

@@ -0,0 +1,19 @@
+; fsqrt should be generated when the fsqrt feature is enabled, but not 

+; otherwise.

+

+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \

+; RUN:   grep {fsqrt f1, f1}

+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \

+; RUN:   grep {fsqrt f1, f1}

+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \

+; RUN:   not grep {fsqrt f1, f1}

+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \

+; RUN:   not grep {fsqrt f1, f1}

+

+declare double @llvm.sqrt.f64(double)

+

+define double @X(double %Y) {

+        %Z = call double @llvm.sqrt.f64( double %Y )            ; <double> [#uses=1]

+        ret double %Z

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/hello.ll b/src/LLVM/test/CodeGen/PowerPC/hello.ll
new file mode 100644
index 0000000..061c447
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/hello.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=ppc32

+; RUN: llc < %s -march=ppc64

+; PR1399

+

+@.str = internal constant [13 x i8] c"Hello World!\00"

+

+define i32 @main() {

+	%tmp2 = tail call i32 @puts( i8* getelementptr ([13 x i8]* @.str, i32 0, i64 0) )

+	ret i32 0

+}

+

+declare i32 @puts(i8*)


diff --git a/src/LLVM/test/CodeGen/PowerPC/hidden-vis-2.ll b/src/LLVM/test/CodeGen/PowerPC/hidden-vis-2.ll
new file mode 100644
index 0000000..e9e2c0a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/hidden-vis-2.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9 | grep non_lazy_ptr | count 6
+
+@x = external hidden global i32		; <i32*> [#uses=1]
+@y = extern_weak hidden global i32	; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	%1 = load i32* @y, align 4		; <i32> [#uses=1]
+	%2 = add i32 %1, %0		; <i32> [#uses=1]
+	ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/hidden-vis.ll b/src/LLVM/test/CodeGen/PowerPC/hidden-vis.ll
new file mode 100644
index 0000000..b2cc143
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/hidden-vis.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9 | not grep non_lazy_ptr
+
+@x = weak hidden global i32 0		; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/i128-and-beyond.ll b/src/LLVM/test/CodeGen/PowerPC/i128-and-beyond.ll
new file mode 100644
index 0000000..51bcab2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/i128-and-beyond.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 | grep 4294967295 | count 28
+
+; These static initializers are too big to hand off to assemblers
+; as monolithic blobs.
+
+@x = global i128 -1
+@y = global i256 -1
+@z = global i512 -1

diff --git a/src/LLVM/test/CodeGen/PowerPC/i64_fp.ll b/src/LLVM/test/CodeGen/PowerPC/i64_fp.ll
new file mode 100644
index 0000000..45f9d43
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/i64_fp.ll

@@ -0,0 +1,26 @@
+; fcfid and fctid should be generated when the 64bit feature is enabled, but not

+; otherwise.

+

+; RUN: llc < %s -march=ppc32 -mattr=+64bit | \

+; RUN:   grep fcfid

+; RUN: llc < %s -march=ppc32 -mattr=+64bit | \

+; RUN:   grep fctidz

+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \

+; RUN:   grep fcfid

+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \

+; RUN:   grep fctidz

+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \

+; RUN:   not grep fcfid

+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \

+; RUN:   not grep fctidz

+; RUN: llc < %s -march=ppc32 -mcpu=g4 | \

+; RUN:   not grep fcfid

+; RUN: llc < %s -march=ppc32 -mcpu=g4 | \

+; RUN:   not grep fctidz

+

+define double @X(double %Y) {

+        %A = fptosi double %Y to i64            ; <i64> [#uses=1]

+        %B = sitofp i64 %A to double            ; <double> [#uses=1]

+        ret double %B

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/iabs.ll b/src/LLVM/test/CodeGen/PowerPC/iabs.ll
new file mode 100644
index 0000000..93d1943
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/iabs.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc32 -stats |& \

+; RUN:   grep {4 .*Number of machine instrs printed}

+

+;; Integer absolute value, should produce something as good as:

+;;      srawi r2, r3, 31

+;;      add r3, r3, r2

+;;      xor r3, r3, r2

+;;      blr 

+define i32 @test(i32 %a) {

+        %tmp1neg = sub i32 0, %a

+        %b = icmp sgt i32 %a, -1

+        %abs = select i1 %b, i32 %a, i32 %tmp1neg

+        ret i32 %abs

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/illegal-element-type.ll b/src/LLVM/test/CodeGen/PowerPC/illegal-element-type.ll
new file mode 100644
index 0000000..58bd055
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/illegal-element-type.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g3
+
+define void @foo() {
+entry:
+        br label %bb
+
+bb:             ; preds = %bb, %entry
+        br i1 false, label %bb26, label %bb
+
+bb19:           ; preds = %bb26
+        ret void
+
+bb26:           ; preds = %bb
+        br i1 false, label %bb30, label %bb19
+
+bb30:           ; preds = %bb26
+        br label %bb45
+
+bb45:           ; preds = %bb45, %bb30
+        %V.0 = phi <8 x i16> [ %tmp42, %bb45 ], [ zeroinitializer, %bb30 ]     ; <<8 x i16>> [#uses=1]
+        %tmp42 = mul <8 x i16> zeroinitializer, %V.0            ; <<8 x i16>> [#uses=1]
+        br label %bb45
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/indirectbr.ll b/src/LLVM/test/CodeGen/PowerPC/indirectbr.ll
new file mode 100644
index 0000000..29c620e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/indirectbr.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -relocation-model=pic -march=ppc32 -mtriple=powerpc-apple-darwin | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -relocation-model=static -march=ppc32 -mtriple=powerpc-apple-darwin | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -relocation-model=pic -march=ppc64 -mtriple=powerpc64-apple-darwin | FileCheck %s -check-prefix=PPC64
+
+@nextaddr = global i8* null                       ; <i8**> [#uses=2]
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal i32 @foo(i32 %i) nounwind {
+; PIC: foo:
+; STATIC: foo:
+; PPC64: foo:
+entry:
+  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
+  br i1 %1, label %bb3, label %bb2
+
+bb2:                                              ; preds = %entry, %bb3
+  %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
+; PIC: mtctr
+; PIC-NEXT: bctr
+; STATIC: mtctr
+; STATIC-NEXT: bctr
+; PPC64: mtctr
+; PPC64-NEXT: bctr
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+bb3:                                              ; preds = %entry
+  %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
+  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  br label %bb2
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i32 [ 385, %L5 ], [ 35, %bb2 ]     ; <i32> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i32 [ %res.0, %L4 ], [ 5, %bb2 ]   ; <i32> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+  %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ]   ; <i32> [#uses=1]
+  %phitmp = mul i32 %res.2, 6                     ; <i32> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
+; PIC: addis r[[R0:[0-9]+]], r{{[0-9]+}}, ha16(Ltmp0-L0$pb)
+; PIC: li r[[R1:[0-9]+]], lo16(Ltmp0-L0$pb)
+; PIC: add r[[R2:[0-9]+]], r[[R0]], r[[R1]]
+; PIC: stw r[[R2]]
+; STATIC: li r[[R0:[0-9]+]], lo16(Ltmp0)
+; STATIC: addis r[[R0]], r[[R0]], ha16(Ltmp0)
+; STATIC: stw r[[R0]]
+  store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
+  ret i32 %res.3
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/inlineasm-copy.ll b/src/LLVM/test/CodeGen/PowerPC/inlineasm-copy.ll
new file mode 100644
index 0000000..b4985f8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/inlineasm-copy.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc32 | not grep mr

+

+define i32 @test(i32 %Y, i32 %X) {

+entry:

+        %tmp = tail call i32 asm "foo $0", "=r"( )              ; <i32> [#uses=1]

+        ret i32 %tmp

+}

+

+define i32 @test2(i32 %Y, i32 %X) {

+entry:

+        %tmp1 = tail call i32 asm "foo $0, $1", "=r,r"( i32 %X )                ; <i32> [#uses=1]

+        ret i32 %tmp1

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/int-fp-conv-0.ll b/src/LLVM/test/CodeGen/PowerPC/int-fp-conv-0.ll
new file mode 100644
index 0000000..983d2b8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/int-fp-conv-0.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=ppc64 > %t
+; RUN: grep  __floattitf %t
+; RUN: grep  __fixunstfti %t
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc64-apple-darwin9.2.0"
+
+define ppc_fp128 @foo(i128 %a) nounwind  {
+entry:
+	%tmp2829 = uitofp i128 %a to ppc_fp128		; <i64> [#uses=1]
+	ret ppc_fp128 %tmp2829
+}
+define i128 @boo(ppc_fp128 %a) nounwind  {
+entry:
+	%tmp2829 = fptoui ppc_fp128 %a to i128		; <i64> [#uses=1]
+	ret i128 %tmp2829
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/int-fp-conv-1.ll b/src/LLVM/test/CodeGen/PowerPC/int-fp-conv-1.ll
new file mode 100644
index 0000000..6c82723
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/int-fp-conv-1.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=ppc64 | grep __floatditf
+
+define i64 @__fixunstfdi(ppc_fp128 %a) nounwind  {
+entry:
+	%tmp1213 = uitofp i64 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
+	%tmp15 = fsub ppc_fp128 %a, %tmp1213		; <ppc_fp128> [#uses=1]
+	%tmp2829 = fptoui ppc_fp128 %tmp15 to i32		; <i32> [#uses=1]
+	%tmp282930 = zext i32 %tmp2829 to i64		; <i64> [#uses=1]
+	%tmp32 = add i64 %tmp282930, 0		; <i64> [#uses=1]
+	ret i64 %tmp32
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/inverted-bool-compares.ll b/src/LLVM/test/CodeGen/PowerPC/inverted-bool-compares.ll
new file mode 100644
index 0000000..72f8dc5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/inverted-bool-compares.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=ppc32 | not grep xori

+

+define i32 @test(i1 %B, i32* %P) {

+        br i1 %B, label %T, label %F

+

+T:              ; preds = %0

+        store i32 123, i32* %P

+        ret i32 0

+

+F:              ; preds = %0

+        ret i32 17

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/ispositive.ll b/src/LLVM/test/CodeGen/PowerPC/ispositive.ll
new file mode 100644
index 0000000..7c056d6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/ispositive.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \

+; RUN:   grep {srwi r3, r3, 31}

+

+define i32 @test1(i32 %X) {

+entry:

+        icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]

+        zext i1 %0 to i32               ; <i32>:1 [#uses=1]

+        ret i32 %1

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/itofp128.ll b/src/LLVM/test/CodeGen/PowerPC/itofp128.ll
new file mode 100644
index 0000000..6d9ef95
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/itofp128.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc64
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc64-apple-darwin9.2.0"
+
+define i128 @__fixunstfti(ppc_fp128 %a) nounwind  {
+entry:
+        %tmp1213 = uitofp i128 0 to ppc_fp128           ; <ppc_fp128> [#uses=1]
+        %tmp15 = fsub ppc_fp128 %a, %tmp1213             ; <ppc_fp128> [#uses=1]
+        %tmp2829 = fptoui ppc_fp128 %tmp15 to i64               ; <i64> [#uses=1]
+        %tmp282930 = zext i64 %tmp2829 to i128          ; <i128> [#uses=1]
+        %tmp32 = add i128 %tmp282930, 0         ; <i128> [#uses=1]
+        ret i128 %tmp32
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/lha.ll b/src/LLVM/test/CodeGen/PowerPC/lha.ll
new file mode 100644
index 0000000..84033ed
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/lha.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 | grep lha

+

+define i32 @test(i16* %a) {

+        %tmp.1 = load i16* %a           ; <i16> [#uses=1]

+        %tmp.2 = sext i16 %tmp.1 to i32         ; <i32> [#uses=1]

+        ret i32 %tmp.2

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/load-constant-addr.ll b/src/LLVM/test/CodeGen/PowerPC/load-constant-addr.ll
new file mode 100644
index 0000000..cb41302
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/load-constant-addr.ll

@@ -0,0 +1,9 @@
+; Should fold the ori into the lfs.

+; RUN: llc < %s -march=ppc32 | grep lfs

+; RUN: llc < %s -march=ppc32 | not grep ori

+

+define float @test() {

+        %tmp.i = load float* inttoptr (i32 186018016 to float*)         ; <float> [#uses=1]

+        ret float %tmp.i

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/long-compare.ll b/src/LLVM/test/CodeGen/PowerPC/long-compare.ll
new file mode 100644
index 0000000..6a84513
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/long-compare.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32 | grep cntlzw 

+; RUN: llc < %s -march=ppc32 | not grep xori 

+; RUN: llc < %s -march=ppc32 | not grep {li }

+; RUN: llc < %s -march=ppc32 | not grep {mr }

+

+define i1 @test(i64 %x) {

+  %tmp = icmp ult i64 %x, 4294967296

+  ret i1 %tmp

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/longdbl-truncate.ll b/src/LLVM/test/CodeGen/PowerPC/longdbl-truncate.ll
new file mode 100644
index 0000000..e5f63c6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/longdbl-truncate.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin8"
+
+define double @SolveCubic(ppc_fp128 %X) {
+entry:
+	%Y = fptrunc ppc_fp128 %X to double
+	ret double %Y
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/lsr-postinc-pos.ll b/src/LLVM/test/CodeGen/PowerPC/lsr-postinc-pos.ll
new file mode 100644
index 0000000..f441e42
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/lsr-postinc-pos.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -print-lsr-output |& FileCheck %s
+
+; The icmp is a post-inc use, and the increment is in %bb11, but the
+; scevgep needs to be inserted in %bb so that it is dominated by %t.
+
+; CHECK: %t = load i8** undef
+; CHECK: %scevgep = getelementptr i8* %t, i32 %lsr.iv.next
+; CHECK: %c1 = icmp ult i8* %scevgep, undef
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin9"
+
+define void @foo() nounwind {
+entry:
+  br label %bb11
+
+bb11:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %bb ] ; <i32> [#uses=3]
+  %ii = shl i32 %i, 2                       ; <i32> [#uses=1]
+  %c0 = icmp eq i32 %i, undef                ; <i1> [#uses=1]
+  br i1 %c0, label %bb13, label %bb
+
+bb:
+  %t = load i8** undef, align 16                ; <i8*> [#uses=1]
+  %p = getelementptr i8* %t, i32 %ii ; <i8*> [#uses=1]
+  %c1 = icmp ult i8* %p, undef          ; <i1> [#uses=1]
+  %i.next = add i32 %i, 1                        ; <i32> [#uses=1]
+  br i1 %c1, label %bb11, label %bb13
+
+bb13:
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/mask64.ll b/src/LLVM/test/CodeGen/PowerPC/mask64.ll
new file mode 100644
index 0000000..139621a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/mask64.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc64-apple-darwin9.2.0"
+	%struct.re_pattern_buffer = type <{ i8*, i64, i8, [7 x i8] }>
+
+define i32 @xre_search_2(%struct.re_pattern_buffer* %bufp, i32 %range) nounwind  {
+entry:
+	br i1 false, label %bb16, label %bb49
+
+bb16:		; preds = %entry
+	%tmp19 = load i8** null, align 1		; <i8*> [#uses=1]
+	%tmp21 = load i8* %tmp19, align 1		; <i8> [#uses=1]
+	switch i8 %tmp21, label %bb49 [
+		 i8 0, label %bb45
+		 i8 1, label %bb34
+	]
+
+bb34:		; preds = %bb16
+	ret i32 0
+
+bb45:		; preds = %bb16
+	ret i32 -1
+
+bb49:		; preds = %bb16, %entry
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/mem-rr-addr-mode.ll b/src/LLVM/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
new file mode 100644
index 0000000..22903dc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/mem-rr-addr-mode.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep li.*16

+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep addi

+

+; Codegen lvx (R+16) as t = li 16,  lvx t,R

+; This shares the 16 between the two loads.

+

+define void @func(<4 x float>* %a, <4 x float>* %b) {

+        %tmp1 = getelementptr <4 x float>* %b, i32 1            ; <<4 x float>*> [#uses=1]

+        %tmp = load <4 x float>* %tmp1          ; <<4 x float>> [#uses=1]

+        %tmp3 = getelementptr <4 x float>* %a, i32 1            ; <<4 x float>*> [#uses=1]

+        %tmp4 = load <4 x float>* %tmp3         ; <<4 x float>> [#uses=1]

+        %tmp5 = fmul <4 x float> %tmp, %tmp4             ; <<4 x float>> [#uses=1]

+        %tmp8 = load <4 x float>* %b            ; <<4 x float>> [#uses=1]

+        %tmp9 = fadd <4 x float> %tmp5, %tmp8            ; <<4 x float>> [#uses=1]

+        store <4 x float> %tmp9, <4 x float>* %a

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/mem_update.ll b/src/LLVM/test/CodeGen/PowerPC/mem_update.ll
new file mode 100644
index 0000000..97efdfd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/mem_update.ll

@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=ppc32 -enable-ppc-preinc | \

+; RUN:   not grep addi

+; RUN: llc < %s -march=ppc64 -enable-ppc-preinc | \

+; RUN:   not grep addi

+

+@Glob = global i64 4

+

+define i32* @test0(i32* %X, i32* %dest) nounwind {

+	%Y = getelementptr i32* %X, i32 4

+	%A = load i32* %Y

+	store i32 %A, i32* %dest

+	ret i32* %Y

+}

+

+define i32* @test1(i32* %X, i32* %dest) nounwind {

+	%Y = getelementptr i32* %X, i32 4

+	%A = load i32* %Y

+	store i32 %A, i32* %dest

+	ret i32* %Y

+}

+

+define i16* @test2(i16* %X, i32* %dest) nounwind {

+	%Y = getelementptr i16* %X, i32 4

+	%A = load i16* %Y

+	%B = sext i16 %A to i32

+	store i32 %B, i32* %dest

+	ret i16* %Y

+}

+

+define i16* @test3(i16* %X, i32* %dest) nounwind {

+	%Y = getelementptr i16* %X, i32 4

+	%A = load i16* %Y

+	%B = zext i16 %A to i32

+	store i32 %B, i32* %dest

+	ret i16* %Y

+}

+

+define i16* @test3a(i16* %X, i64* %dest) nounwind {

+	%Y = getelementptr i16* %X, i32 4

+	%A = load i16* %Y

+	%B = sext i16 %A to i64

+	store i64 %B, i64* %dest

+	ret i16* %Y

+}

+

+define i64* @test4(i64* %X, i64* %dest) nounwind {

+	%Y = getelementptr i64* %X, i32 4

+	%A = load i64* %Y

+	store i64 %A, i64* %dest

+	ret i64* %Y

+}

+

+define i16* @test5(i16* %X) nounwind {

+	%Y = getelementptr i16* %X, i32 4

+	store i16 7, i16* %Y

+	ret i16* %Y

+}

+

+define i64* @test6(i64* %X, i64 %A) nounwind {

+	%Y = getelementptr i64* %X, i32 4

+	store i64 %A, i64* %Y

+	ret i64* %Y

+}

+

+define i64* @test7(i64* %X, i64 %A) nounwind {

+	store i64 %A, i64* @Glob

+	ret i64* @Glob

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/mul-neg-power-2.ll b/src/LLVM/test/CodeGen/PowerPC/mul-neg-power-2.ll
new file mode 100644
index 0000000..e6a1312
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/mul-neg-power-2.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 | not grep mul

+

+define i32 @test1(i32 %a) {

+        %tmp.1 = mul i32 %a, -2         ; <i32> [#uses=1]

+        %tmp.2 = add i32 %tmp.1, 63             ; <i32> [#uses=1]

+        ret i32 %tmp.2

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/mul-with-overflow.ll b/src/LLVM/test/CodeGen/PowerPC/mul-with-overflow.ll
new file mode 100644
index 0000000..76d06df
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/mul-with-overflow.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc32
+
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+define zeroext i1 @a(i32 %x)  nounwind {
+  %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
+  %obil = extractvalue {i32, i1} %res, 1
+  ret i1 %obil
+}
+
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+define zeroext i1 @b(i32 %x)  nounwind {
+  %res = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %x, i32 3)
+  %obil = extractvalue {i32, i1} %res, 1
+  ret i1 %obil
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/mulhs.ll b/src/LLVM/test/CodeGen/PowerPC/mulhs.ll
new file mode 100644
index 0000000..f93385d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/mulhs.ll

@@ -0,0 +1,17 @@
+; All of these ands and shifts should be folded into rlwimi's

+; RUN: llc < %s -march=ppc32 -o %t

+; RUN: not grep mulhwu %t

+; RUN: not grep srawi %t 

+; RUN: not grep add %t 

+; RUN: grep mulhw %t | count 1

+

+define i32 @mulhs(i32 %a, i32 %b) nounwind {

+entry:

+        %tmp.1 = sext i32 %a to i64             ; <i64> [#uses=1]

+        %tmp.3 = sext i32 %b to i64             ; <i64> [#uses=1]

+        %tmp.4 = mul i64 %tmp.3, %tmp.1         ; <i64> [#uses=1]

+        %tmp.6 = lshr i64 %tmp.4, 32            ; <i64> [#uses=1]

+        %tmp.7 = trunc i64 %tmp.6 to i32                ; <i32> [#uses=1]

+        ret i32 %tmp.7

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll b/src/LLVM/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
new file mode 100644
index 0000000..659cdf7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll

@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=ppc32
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll b/src/LLVM/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
new file mode 100644
index 0000000..3da06f6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll

@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=ppc64
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/neg.ll b/src/LLVM/test/CodeGen/PowerPC/neg.ll
new file mode 100644
index 0000000..fb7ccac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/neg.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ppc32 | grep neg

+

+define i32 @test(i32 %X) {

+        %Y = sub i32 0, %X              ; <i32> [#uses=1]

+        ret i32 %Y

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/no-dead-strip.ll b/src/LLVM/test/CodeGen/PowerPC/no-dead-strip.ll
new file mode 100644
index 0000000..3459413
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/no-dead-strip.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s | grep {no_dead_strip.*_X}
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "powerpc-apple-darwin8.8.0"
+@X = weak global i32 0          ; <i32*> [#uses=1]
+@.str = internal constant [4 x i8] c"t.c\00", section "llvm.metadata"          ; <[4 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32* @X to i8*) ], section "llvm.metadata"       ; <[1 x i8*]*> [#uses=0]
+

diff --git a/src/LLVM/test/CodeGen/PowerPC/or-addressing-mode.ll b/src/LLVM/test/CodeGen/PowerPC/or-addressing-mode.ll
new file mode 100644
index 0000000..66c48ca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/or-addressing-mode.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 | not grep ori

+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 | not grep rlwimi

+

+define i32 @test1(i8* %P) {

+        %tmp.2.i = ptrtoint i8* %P to i32               ; <i32> [#uses=2]

+        %tmp.4.i = and i32 %tmp.2.i, -65536             ; <i32> [#uses=1]

+        %tmp.10.i = lshr i32 %tmp.2.i, 5                ; <i32> [#uses=1]

+        %tmp.11.i = and i32 %tmp.10.i, 2040             ; <i32> [#uses=1]

+        %tmp.13.i = or i32 %tmp.11.i, %tmp.4.i          ; <i32> [#uses=1]

+        %tmp.14.i = inttoptr i32 %tmp.13.i to i32*              ; <i32*> [#uses=1]

+        %tmp.3 = load i32* %tmp.14.i            ; <i32> [#uses=1]

+        ret i32 %tmp.3

+}

+

+define i32 @test2(i32 %P) {

+        %tmp.2 = shl i32 %P, 4          ; <i32> [#uses=1]

+        %tmp.3 = or i32 %tmp.2, 2               ; <i32> [#uses=1]

+        %tmp.4 = inttoptr i32 %tmp.3 to i32*            ; <i32*> [#uses=1]

+        %tmp.5 = load i32* %tmp.4               ; <i32> [#uses=1]

+        ret i32 %tmp.5

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/ppc-prologue.ll b/src/LLVM/test/CodeGen/PowerPC/ppc-prologue.ll
new file mode 100644
index 0000000..5538371
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/ppc-prologue.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s
+
+define i32 @_Z4funci(i32 %a) ssp {
+; CHECK:       mflr r0
+; CHECK-NEXT:  stw r31, -4(r1)
+; CHECK-NEXT:  stw r0, 8(r1)
+; CHECK-NEXT:  stwu r1, -80(r1)
+; CHECK:  mr r31, r1
+entry:
+  %a_addr = alloca i32                            ; <i32*> [#uses=2]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %a, i32* %a_addr
+  %1 = call i32 @_Z3barPi(i32* %a_addr)           ; <i32> [#uses=1]
+  store i32 %1, i32* %0, align 4
+  %2 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  store i32 %2, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval                    ; <i32> [#uses=1]
+  ret i32 %retval1
+}
+
+declare i32 @_Z3barPi(i32*)

diff --git a/src/LLVM/test/CodeGen/PowerPC/ppc32-vaarg.ll b/src/LLVM/test/CodeGen/PowerPC/ppc32-vaarg.ll
new file mode 100644
index 0000000..6042991
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/ppc32-vaarg.ll

@@ -0,0 +1,167 @@
+; RUN: llc -O0 < %s | FileCheck %s
+;ModuleID = 'test.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-freebsd9.0"
+
+%struct.__va_list_tag = type { i8, i8, i16, i8*, i8* }
+
+@var1 = common global i64 0, align 8
+@var2 = common global double 0.0, align 8
+@var3 = common global i32 0, align 4
+
+define void @ppcvaargtest(%struct.__va_list_tag* %ap) nounwind {
+ entry:
+  %x = va_arg %struct.__va_list_tag* %ap, i64; Get from r5,r6
+; CHECK:      lbz 4, 0(3)
+; CHECK-NEXT: lwz 5, 4(3)
+; CHECK-NEXT: rlwinm 6, 4, 0, 31, 31
+; CHECK-NEXT: cmplwi 0, 6, 0
+; CHECK-NEXT: addi 6, 4, 1
+; CHECK-NEXT: stw 3, -4(1)
+; CHECK-NEXT: stw 6, -8(1)
+; CHECK-NEXT: stw 4, -12(1)
+; CHECK-NEXT: stw 5, -16(1)
+; CHECK-NEXT: bne 0, .LBB0_2
+; CHECK-NEXT: # BB#1:                                 # %entry
+; CHECK-NEXT: lwz 3, -12(1)
+; CHECK-NEXT: stw 3, -8(1)
+; CHECK-NEXT: .LBB0_2:                                # %entry
+; CHECK-NEXT: lwz 3, -8(1)
+; CHECK-NEXT: lwz 4, -4(1)
+; CHECK-NEXT: lwz 5, 8(4)
+; CHECK-NEXT: slwi 6, 3, 2
+; CHECK-NEXT: addi 7, 3, 2
+; CHECK-NEXT: cmpwi 0, 3, 8
+; CHECK-NEXT: lwz 3, -16(1)
+; CHECK-NEXT: addi 8, 3, 4
+; CHECK-NEXT: add 5, 5, 6
+; CHECK-NEXT: mfcr 0                          # cr0
+; CHECK-NEXT: stw 0, -20(1)
+; CHECK-NEXT: stw 5, -24(1)
+; CHECK-NEXT: stw 3, -28(1)
+; CHECK-NEXT: stw 7, -32(1)
+; CHECK-NEXT: stw 8, -36(1)
+; CHECK-NEXT: blt 0, .LBB0_4
+; CHECK-NEXT: # BB#3:                                 # %entry
+; CHECK-NEXT: lwz 3, -36(1)
+; CHECK-NEXT: stw 3, -28(1)
+; CHECK-NEXT: .LBB0_4:                                # %entry
+; CHECK-NEXT: lwz 3, -28(1)
+; CHECK-NEXT: lwz 4, -32(1)
+; CHECK-NEXT: lwz 5, -4(1)
+; CHECK-NEXT: stb 4, 0(5)
+; CHECK-NEXT: lwz 4, -24(1)
+; CHECK-NEXT: lwz 0, -20(1)
+; CHECK-NEXT: mtcrf 128, 0
+; CHECK-NEXT: stw 3, -40(1)
+; CHECK-NEXT: stw 4, -44(1)
+; CHECK-NEXT: blt 0, .LBB0_6
+; CHECK-NEXT: # BB#5:                                 # %entry
+; CHECK-NEXT: lwz 3, -16(1)
+; CHECK-NEXT: stw 3, -44(1)
+; CHECK-NEXT: .LBB0_6:                                # %entry
+; CHECK-NEXT: lwz 3, -44(1)
+; CHECK-NEXT: lwz 4, -40(1)
+; CHECK-NEXT: lwz 5, -4(1)
+; CHECK-NEXT: stw 4, 4(5)
+  store i64 %x, i64* @var1, align 8
+; CHECK-NEXT: lis 4, var1@ha
+; CHECK-NEXT: lwz 6, 4(3)
+; CHECK-NEXT: lwz 3, 0(3)
+; CHECK-NEXT: la 7, var1@l(4)
+; CHECK-NEXT: stw 3, var1@l(4)
+; CHECK-NEXT: stw 6, 4(7)
+  %y = va_arg %struct.__va_list_tag* %ap, double; From f1
+; CHECK-NEXT: lbz 3, 1(5)
+; CHECK-NEXT: lwz 4, 4(5)
+; CHECK-NEXT: lwz 6, 8(5)
+; CHECK-NEXT: slwi 7, 3, 3
+; CHECK-NEXT: add 6, 6, 7
+; CHECK-NEXT: addi 7, 3, 1
+; CHECK-NEXT: cmpwi 0, 3, 8
+; CHECK-NEXT: addi 3, 4, 8
+; CHECK-NEXT: addi 6, 6, 32
+; CHECK-NEXT: mr 8, 4
+; CHECK-NEXT: mfcr 0                          # cr0
+; CHECK-NEXT: stw 0, -48(1)
+; CHECK-NEXT: stw 4, -52(1)
+; CHECK-NEXT: stw 6, -56(1)
+; CHECK-NEXT: stw 7, -60(1)
+; CHECK-NEXT: stw 3, -64(1)
+; CHECK-NEXT: stw 8, -68(1)
+; CHECK-NEXT: blt 0, .LBB0_8
+; CHECK-NEXT: # BB#7:                                 # %entry
+; CHECK-NEXT: lwz 3, -64(1)
+; CHECK-NEXT: stw 3, -68(1)
+; CHECK-NEXT: .LBB0_8:                                # %entry
+; CHECK-NEXT: lwz 3, -68(1)
+; CHECK-NEXT: lwz 4, -60(1)
+; CHECK-NEXT: lwz 5, -4(1)
+; CHECK-NEXT: stb 4, 1(5)
+; CHECK-NEXT: lwz 4, -56(1)
+; CHECK-NEXT: lwz 0, -48(1)
+; CHECK-NEXT: mtcrf 128, 0
+; CHECK-NEXT: stw 4, -72(1)
+; CHECK-NEXT: stw 3, -76(1)
+; CHECK-NEXT: blt 0, .LBB0_10
+; CHECK-NEXT: # BB#9:                                 # %entry
+; CHECK-NEXT: lwz 3, -52(1)
+; CHECK-NEXT: stw 3, -72(1)
+; CHECK-NEXT: .LBB0_10:                               # %entry
+; CHECK-NEXT: lwz 3, -72(1)
+; CHECK-NEXT: lwz 4, -76(1)
+; CHECK-NEXT: lwz 5, -4(1)
+; CHECK-NEXT: stw 4, 4(5)
+; CHECK-NEXT: lfd 0, 0(3)
+  store double %y, double* @var2, align 8
+; CHECK-NEXT: lis 3, var2@ha
+; CHECK-NEXT: stfd 0, var2@l(3)
+  %z = va_arg %struct.__va_list_tag* %ap, i32; From r7
+; CHECK-NEXT: lbz 3, 0(5)
+; CHECK-NEXT: lwz 4, 4(5)
+; CHECK-NEXT: lwz 6, 8(5)
+; CHECK-NEXT: slwi 7, 3, 2
+; CHECK-NEXT: addi 8, 3, 1
+; CHECK-NEXT: cmpwi 0, 3, 8
+; CHECK-NEXT: addi 3, 4, 4
+; CHECK-NEXT: add 6, 6, 7
+; CHECK-NEXT: mr 7, 4
+; CHECK-NEXT: stw 6, -80(1)
+; CHECK-NEXT: stw 8, -84(1)
+; CHECK-NEXT: stw 3, -88(1)
+; CHECK-NEXT: stw 4, -92(1)
+; CHECK-NEXT: stw 7, -96(1)
+; CHECK-NEXT: mfcr 0                          # cr0
+; CHECK-NEXT: stw 0, -100(1)
+; CHECK-NEXT: blt 0, .LBB0_12
+; CHECK-NEXT: # BB#11:                                # %entry
+; CHECK-NEXT: lwz 3, -88(1)
+; CHECK-NEXT: stw 3, -96(1)
+; CHECK-NEXT: .LBB0_12:                               # %entry
+; CHECK-NEXT: lwz 3, -96(1)
+; CHECK-NEXT: lwz 4, -84(1)
+; CHECK-NEXT: lwz 5, -4(1)
+; CHECK-NEXT: stb 4, 0(5)
+; CHECK-NEXT: lwz 4, -80(1)
+; CHECK-NEXT: lwz 0, -100(1)
+; CHECK-NEXT: mtcrf 128, 0
+; CHECK-NEXT: stw 4, -104(1)
+; CHECK-NEXT: stw 3, -108(1)
+; CHECK-NEXT: blt 0, .LBB0_14
+; CHECK-NEXT: # BB#13:                                # %entry
+; CHECK-NEXT: lwz 3, -92(1)
+; CHECK-NEXT: stw 3, -104(1)
+; CHECK-NEXT: .LBB0_14:                               # %entry
+; CHECK-NEXT: lwz 3, -104(1)
+; CHECK-NEXT: lwz 4, -108(1)
+; CHECK-NEXT: lwz 5, -4(1)
+; CHECK-NEXT: stw 4, 4(5)
+; CHECK-NEXT: lwz 3, 0(3)
+  store i32 %z, i32* @var3, align 4
+; CHECK-NEXT: lis 4, var3@ha
+; CHECK-NEXT: stw 3, var3@l(4)
+  ret void
+; CHECK-NEXT: stw 5, -112(1)
+; CHECK-NEXT: blr
+}
+

diff --git a/src/LLVM/test/CodeGen/PowerPC/ppc64-32bit-addic.ll b/src/LLVM/test/CodeGen/PowerPC/ppc64-32bit-addic.ll
new file mode 100644
index 0000000..4d323da
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/ppc64-32bit-addic.ll

@@ -0,0 +1,29 @@
+; Check that the ADDIC optimizations are not applied on PPC64
+; RUN: llc < %s | FileCheck %s
+; ModuleID = 'os_unix.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd9.0"
+
+define i32 @notZero(i32 %call) nounwind {
+entry:
+; CHECK-NOT: addic
+  %not.tobool = icmp ne i32 %call, 0
+  %. = zext i1 %not.tobool to i32
+  ret i32 %.
+}
+
+define i32 @isMinusOne(i32 %call) nounwind {
+entry:
+; CHECK-NOT: addic
+  %not.tobool = icmp eq i32 %call, -1
+  %. = zext i1 %not.tobool to i32
+  ret i32 %.
+}
+
+define i32 @isNotMinusOne(i32 %call) nounwind {
+entry:
+; CHECK-NOT: addic
+  %not.tobool = icmp ne i32 %call, -1
+  %. = zext i1 %not.tobool to i32
+  ret i32 %.
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/ppc64-crash.ll b/src/LLVM/test/CodeGen/PowerPC/ppc64-crash.ll
new file mode 100644
index 0000000..073c322
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/ppc64-crash.ll

@@ -0,0 +1,14 @@
+; RUN: llc %s -o -
+
+; ModuleID = 'undo.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd"
+
+%struct.__sFILE = type {}
+%struct.pos_T = type { i64 }
+
+; check that we're not copying stuff between R and X registers
+define internal void @serialize_pos(%struct.pos_T* byval %pos, %struct.__sFILE* %fp) nounwind {
+entry:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/ppcf128-1-opt.ll b/src/LLVM/test/CodeGen/PowerPC/ppcf128-1-opt.ll
new file mode 100644
index 0000000..2fc1720
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/ppcf128-1-opt.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s > %t
+; ModuleID = '<stdin>'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin8"
+
+define ppc_fp128 @plus(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%tmp3 = fadd ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %tmp3
+}
+
+define ppc_fp128 @minus(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%tmp3 = fsub ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %tmp3
+}
+
+define ppc_fp128 @times(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%tmp3 = fmul ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %tmp3
+}
+
+define ppc_fp128 @divide(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%tmp3 = fdiv ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %tmp3
+}
+

diff --git a/src/LLVM/test/CodeGen/PowerPC/ppcf128-1.ll b/src/LLVM/test/CodeGen/PowerPC/ppcf128-1.ll
new file mode 100644
index 0000000..1047fe5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/ppcf128-1.ll

@@ -0,0 +1,92 @@
+; RUN: opt < %s -std-compile-opts | llc > %t
+; ModuleID = 'ld3.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin8"
+
+define ppc_fp128 @plus(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%x_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%y_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%retval = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%tmp = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store ppc_fp128 %x, ppc_fp128* %x_addr
+	store ppc_fp128 %y, ppc_fp128* %y_addr
+	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp3 = fadd ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
+	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %retval5
+}
+
+define ppc_fp128 @minus(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%x_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%y_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%retval = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%tmp = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store ppc_fp128 %x, ppc_fp128* %x_addr
+	store ppc_fp128 %y, ppc_fp128* %y_addr
+	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp3 = fsub ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
+	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %retval5
+}
+
+define ppc_fp128 @times(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%x_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%y_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%retval = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%tmp = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store ppc_fp128 %x, ppc_fp128* %x_addr
+	store ppc_fp128 %y, ppc_fp128* %y_addr
+	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp3 = fmul ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
+	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %retval5
+}
+
+define ppc_fp128 @divide(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%x_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%y_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%retval = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%tmp = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store ppc_fp128 %x, ppc_fp128* %x_addr
+	store ppc_fp128 %y, ppc_fp128* %y_addr
+	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp3 = fdiv ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
+	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %retval5
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/ppcf128-2.ll b/src/LLVM/test/CodeGen/PowerPC/ppcf128-2.ll
new file mode 100644
index 0000000..7eee354
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/ppcf128-2.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc64
+
+define i64 @__fixtfdi(ppc_fp128 %a) nounwind  {
+entry:
+        br i1 false, label %bb, label %bb8
+bb:             ; preds = %entry
+        %tmp5 = fsub ppc_fp128 0xM80000000000000000000000000000000, %a           ; <ppc_fp128> [#uses=1]
+        %tmp6 = tail call i64 @__fixunstfdi( ppc_fp128 %tmp5 ) nounwind                 ; <i64> [#uses=0]
+        ret i64 0
+bb8:            ; preds = %entry
+        ret i64 0
+}
+
+declare i64 @__fixunstfdi(ppc_fp128)

diff --git a/src/LLVM/test/CodeGen/PowerPC/ppcf128-3.ll b/src/LLVM/test/CodeGen/PowerPC/ppcf128-3.ll
new file mode 100644
index 0000000..5043b62
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/ppcf128-3.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=ppc32
+	%struct.stp_sequence = type { double, double }
+
+define i32 @stp_sequence_set_short_data(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
+entry:
+	%tmp1112 = sitofp i16 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
+	%tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}
+
+define i32 @stp_sequence_set_short_data2(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
+entry:
+	%tmp1112 = sitofp i8 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
+	%tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}
+
+define i32 @stp_sequence_set_short_data3(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
+entry:
+	%tmp1112 = uitofp i16 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
+	%tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}
+
+define i32 @stp_sequence_set_short_data4(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
+entry:
+	%tmp1112 = uitofp i8 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
+	%tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @__inline_isfinite(...)

diff --git a/src/LLVM/test/CodeGen/PowerPC/ppcf128-4.ll b/src/LLVM/test/CodeGen/PowerPC/ppcf128-4.ll
new file mode 100644
index 0000000..104a25e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/ppcf128-4.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32
+
+define ppc_fp128 @__floatditf(i64 %u) nounwind  {
+entry:
+        %tmp6 = fmul ppc_fp128 0xM00000000000000000000000000000000, 0xM41F00000000000000000000000000000
+        %tmp78 = trunc i64 %u to i32
+        %tmp789 = uitofp i32 %tmp78 to ppc_fp128
+        %tmp11 = fadd ppc_fp128 %tmp789, %tmp6
+        ret ppc_fp128 %tmp11
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/pr3711_widen_bit.ll b/src/LLVM/test/CodeGen/PowerPC/pr3711_widen_bit.ll
new file mode 100644
index 0000000..7abdeda
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/pr3711_widen_bit.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5
+
+; Test that causes a abort in expanding a bit convert due to a missing support
+; for widening.
+
+define i32 @main() nounwind {
+entry:
+	br i1 icmp ne (i32 trunc (i64 bitcast (<2 x i32> <i32 2, i32 2> to i64) to i32), i32 2), label %bb, label %bb1
+
+bb:		; preds = %entry
+	tail call void @abort() noreturn nounwind
+	unreachable
+
+bb1:		; preds = %entry
+	ret i32 0
+}
+
+declare void @abort() noreturn nounwind

diff --git a/src/LLVM/test/CodeGen/PowerPC/private.ll b/src/LLVM/test/CodeGen/PowerPC/private.ll
new file mode 100644
index 0000000..f9405f6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/private.ll

@@ -0,0 +1,24 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu > %t
+; RUN: grep .Lfoo: %t
+; RUN: grep bl.*\.Lfoo %t
+; RUN: grep .Lbaz: %t
+; RUN: grep lis.*\.Lbaz %t
+; RUN: llc < %s -mtriple=powerpc-apple-darwin > %t
+; RUN: grep L_foo: %t
+; RUN: grep bl.*\L_foo %t
+; RUN: grep L_baz: %t
+; RUN: grep lis.*\L_baz %t
+
+define private void @foo() nounwind {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() nounwind {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/reg-coalesce-simple.ll b/src/LLVM/test/CodeGen/PowerPC/reg-coalesce-simple.ll
new file mode 100644
index 0000000..9404cfe
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/reg-coalesce-simple.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=ppc32  | not grep or

+

+%struct.foo = type { i32, i32, [0 x i8] }

+

+define i32 @test(%struct.foo* %X) nounwind {

+        %tmp1 = getelementptr %struct.foo* %X, i32 0, i32 2, i32 100            ; <i8*> [#uses=1]

+        %tmp = load i8* %tmp1           ; <i8> [#uses=1]

+        %tmp2 = zext i8 %tmp to i32             ; <i32> [#uses=1]

+        ret i32 %tmp2

+}

+

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/retaddr.ll b/src/LLVM/test/CodeGen/PowerPC/retaddr.ll
new file mode 100644
index 0000000..cf16b4c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/retaddr.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc32 | grep mflr
+; RUN: llc < %s -march=ppc32 | grep lwz
+; RUN: llc < %s -march=ppc64 | grep {ld r., 16(r1)}
+
+target triple = "powerpc-apple-darwin8"
+
+define void @foo(i8** %X) nounwind {
+entry:
+	%tmp = tail call i8* @llvm.returnaddress( i32 0 )		; <i8*> [#uses=1]
+	store i8* %tmp, i8** %X, align 4
+	ret void
+}
+
+declare i8* @llvm.returnaddress(i32)
+

diff --git a/src/LLVM/test/CodeGen/PowerPC/return-val-i128.ll b/src/LLVM/test/CodeGen/PowerPC/return-val-i128.ll
new file mode 100644
index 0000000..e14a438
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/return-val-i128.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=ppc64
+
+define i128 @__fixsfdi(float %a) {
+entry:
+	%a_addr = alloca float		; <float*> [#uses=4]
+	%retval = alloca i128, align 16		; <i128*> [#uses=2]
+	%tmp = alloca i128, align 16		; <i128*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float %a, float* %a_addr
+	%tmp1 = load float* %a_addr, align 4		; <float> [#uses=1]
+	%tmp2 = fcmp olt float %tmp1, 0.000000e+00		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp23, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb8
+bb:		; preds = %entry
+	%tmp4 = load float* %a_addr, align 4		; <float> [#uses=1]
+	%tmp5 = fsub float -0.000000e+00, %tmp4		; <float> [#uses=1]
+	%tmp6 = call i128 @__fixunssfDI( float %tmp5 ) nounwind 		; <i128> [#uses=1]
+	%tmp7 = sub i128 0, %tmp6		; <i128> [#uses=1]
+	store i128 %tmp7, i128* %tmp, align 16
+	br label %bb11
+bb8:		; preds = %entry
+	%tmp9 = load float* %a_addr, align 4		; <float> [#uses=1]
+	%tmp10 = call i128 @__fixunssfDI( float %tmp9 ) nounwind 		; <i128> [#uses=1]
+	store i128 %tmp10, i128* %tmp, align 16
+	br label %bb11
+bb11:		; preds = %bb8, %bb
+	%tmp12 = load i128* %tmp, align 16		; <i128> [#uses=1]
+	store i128 %tmp12, i128* %retval, align 16
+	br label %return
+return:		; preds = %bb11
+	%retval13 = load i128* %retval		; <i128> [#uses=1]
+	ret i128 %retval13
+}
+
+declare i128 @__fixunssfDI(float)

diff --git a/src/LLVM/test/CodeGen/PowerPC/rlwimi-commute.ll b/src/LLVM/test/CodeGen/PowerPC/rlwimi-commute.ll
new file mode 100644
index 0000000..d34d497
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/rlwimi-commute.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=ppc32 | grep rlwimi

+; RUN: llc < %s -march=ppc32 | not grep {or }

+

+; Make sure there is no register-register copies here.

+

+define void @test1(i32* %A, i32* %B, i32* %D, i32* %E) {

+	%A.upgrd.1 = load i32* %A		; <i32> [#uses=2]

+	%B.upgrd.2 = load i32* %B		; <i32> [#uses=1]

+	%X = and i32 %A.upgrd.1, 15		; <i32> [#uses=1]

+	%Y = and i32 %B.upgrd.2, -16		; <i32> [#uses=1]

+	%Z = or i32 %X, %Y		; <i32> [#uses=1]

+	store i32 %Z, i32* %D

+	store i32 %A.upgrd.1, i32* %E

+	ret void

+}

+

+define void @test2(i32* %A, i32* %B, i32* %D, i32* %E) {

+	%A.upgrd.3 = load i32* %A		; <i32> [#uses=1]

+	%B.upgrd.4 = load i32* %B		; <i32> [#uses=2]

+	%X = and i32 %A.upgrd.3, 15		; <i32> [#uses=1]

+	%Y = and i32 %B.upgrd.4, -16		; <i32> [#uses=1]

+	%Z = or i32 %X, %Y		; <i32> [#uses=1]

+	store i32 %Z, i32* %D

+	store i32 %B.upgrd.4, i32* %E

+	ret void

+}

+

+define i32 @test3(i32 %a, i32 %b) {

+	%tmp.1 = and i32 %a, 15		; <i32> [#uses=1]

+	%tmp.3 = and i32 %b, 240		; <i32> [#uses=1]

+	%tmp.4 = or i32 %tmp.3, %tmp.1		; <i32> [#uses=1]

+	ret i32 %tmp.4

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll b/src/LLVM/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll
new file mode 100644
index 0000000..3dc8061
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | FileCheck %s
+; Formerly dropped the RHS of %tmp6 when constructing rlwimi.
+; 7346117
+
+@foo = external global i32
+
+define void @xxx(i32 %a, i32 %b, i32 %c, i32 %d) nounwind optsize {
+; CHECK: _xxx:
+; CHECK: or
+; CHECK: and
+; CHECK: rlwimi
+entry:
+  %tmp0 = ashr i32 %d, 31
+  %tmp1 = and i32 %tmp0, 255
+  %tmp2 = xor i32 %tmp1, 255
+  %tmp3 = ashr i32 %b, 31
+  %tmp4 = ashr i32 %a, 4
+  %tmp5 = or i32 %tmp3, %tmp4
+  %tmp6 = and i32 %tmp2, %tmp5
+  %tmp7 = shl i32 %c, 8
+  %tmp8 = or i32 %tmp6, %tmp7
+  store i32 %tmp8, i32* @foo, align 4
+  br label %return
+
+return:
+  ret void
+; CHECK: blr
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/rlwimi.ll b/src/LLVM/test/CodeGen/PowerPC/rlwimi.ll
new file mode 100644
index 0000000..e525875
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/rlwimi.ll

@@ -0,0 +1,70 @@
+; All of these ands and shifts should be folded into rlwimi's

+; RUN: llc < %s -march=ppc32 | not grep and

+; RUN: llc < %s -march=ppc32 | grep rlwimi | count 8

+

+define i32 @test1(i32 %x, i32 %y) {

+entry:

+	%tmp.3 = shl i32 %x, 16		; <i32> [#uses=1]

+	%tmp.7 = and i32 %y, 65535		; <i32> [#uses=1]

+	%tmp.9 = or i32 %tmp.7, %tmp.3		; <i32> [#uses=1]

+	ret i32 %tmp.9

+}

+

+define i32 @test2(i32 %x, i32 %y) {

+entry:

+	%tmp.7 = and i32 %x, 65535		; <i32> [#uses=1]

+	%tmp.3 = shl i32 %y, 16		; <i32> [#uses=1]

+	%tmp.9 = or i32 %tmp.7, %tmp.3		; <i32> [#uses=1]

+	ret i32 %tmp.9

+}

+

+define i32 @test3(i32 %x, i32 %y) {

+entry:

+	%tmp.3 = lshr i32 %x, 16		; <i32> [#uses=1]

+	%tmp.6 = and i32 %y, -65536		; <i32> [#uses=1]

+	%tmp.7 = or i32 %tmp.6, %tmp.3		; <i32> [#uses=1]

+	ret i32 %tmp.7

+}

+

+define i32 @test4(i32 %x, i32 %y) {

+entry:

+	%tmp.6 = and i32 %x, -65536		; <i32> [#uses=1]

+	%tmp.3 = lshr i32 %y, 16		; <i32> [#uses=1]

+	%tmp.7 = or i32 %tmp.6, %tmp.3		; <i32> [#uses=1]

+	ret i32 %tmp.7

+}

+

+define i32 @test5(i32 %x, i32 %y) {

+entry:

+	%tmp.3 = shl i32 %x, 1		; <i32> [#uses=1]

+	%tmp.4 = and i32 %tmp.3, -65536		; <i32> [#uses=1]

+	%tmp.7 = and i32 %y, 65535		; <i32> [#uses=1]

+	%tmp.9 = or i32 %tmp.4, %tmp.7		; <i32> [#uses=1]

+	ret i32 %tmp.9

+}

+

+define i32 @test6(i32 %x, i32 %y) {

+entry:

+	%tmp.7 = and i32 %x, 65535		; <i32> [#uses=1]

+	%tmp.3 = shl i32 %y, 1		; <i32> [#uses=1]

+	%tmp.4 = and i32 %tmp.3, -65536		; <i32> [#uses=1]

+	%tmp.9 = or i32 %tmp.4, %tmp.7		; <i32> [#uses=1]

+	ret i32 %tmp.9

+}

+

+define i32 @test7(i32 %x, i32 %y) {

+entry:

+	%tmp.2 = and i32 %x, -65536		; <i32> [#uses=1]

+	%tmp.5 = and i32 %y, 65535		; <i32> [#uses=1]

+	%tmp.7 = or i32 %tmp.5, %tmp.2		; <i32> [#uses=1]

+	ret i32 %tmp.7

+}

+

+define i32 @test8(i32 %bar) {

+entry:

+	%tmp.3 = shl i32 %bar, 1		; <i32> [#uses=1]

+	%tmp.4 = and i32 %tmp.3, 2		; <i32> [#uses=1]

+	%tmp.6 = and i32 %bar, -3		; <i32> [#uses=1]

+	%tmp.7 = or i32 %tmp.4, %tmp.6		; <i32> [#uses=1]

+	ret i32 %tmp.7

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/rlwimi2.ll b/src/LLVM/test/CodeGen/PowerPC/rlwimi2.ll
new file mode 100644
index 0000000..c79b406
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/rlwimi2.ll

@@ -0,0 +1,29 @@
+; All of these ands and shifts should be folded into rlwimi's

+; RUN: llc < %s -march=ppc32 -o %t

+; RUN: grep rlwimi %t | count 3

+; RUN: grep srwi   %t | count 1

+; RUN: not grep slwi %t

+

+define i16 @test1(i32 %srcA, i32 %srcB, i32 %alpha) nounwind {

+entry:

+	%tmp.1 = shl i32 %srcA, 15		; <i32> [#uses=1]

+	%tmp.4 = and i32 %tmp.1, 32505856		; <i32> [#uses=1]

+	%tmp.6 = and i32 %srcA, 31775		; <i32> [#uses=1]

+	%tmp.7 = or i32 %tmp.4, %tmp.6		; <i32> [#uses=1]

+	%tmp.9 = shl i32 %srcB, 15		; <i32> [#uses=1]

+	%tmp.12 = and i32 %tmp.9, 32505856		; <i32> [#uses=1]

+	%tmp.14 = and i32 %srcB, 31775		; <i32> [#uses=1]

+	%tmp.15 = or i32 %tmp.12, %tmp.14		; <i32> [#uses=1]

+	%tmp.18 = mul i32 %tmp.7, %alpha		; <i32> [#uses=1]

+	%tmp.20 = sub i32 32, %alpha		; <i32> [#uses=1]

+	%tmp.22 = mul i32 %tmp.15, %tmp.20		; <i32> [#uses=1]

+	%tmp.23 = add i32 %tmp.22, %tmp.18		; <i32> [#uses=2]

+	%tmp.27 = lshr i32 %tmp.23, 5		; <i32> [#uses=1]

+	%tmp.28 = trunc i32 %tmp.27 to i16		; <i16> [#uses=1]

+	%tmp.29 = and i16 %tmp.28, 31775		; <i16> [#uses=1]

+	%tmp.33 = lshr i32 %tmp.23, 20		; <i32> [#uses=1]

+	%tmp.34 = trunc i32 %tmp.33 to i16		; <i16> [#uses=1]

+	%tmp.35 = and i16 %tmp.34, 992		; <i16> [#uses=1]

+	%tmp.36 = or i16 %tmp.29, %tmp.35		; <i16> [#uses=1]

+	ret i16 %tmp.36

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/rlwimi3.ll b/src/LLVM/test/CodeGen/PowerPC/rlwimi3.ll
new file mode 100644
index 0000000..5bd9c30
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/rlwimi3.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc32 -stats |& \

+; RUN:   grep {Number of machine instrs printed} | grep 12

+

+define i16 @Trans16Bit(i32 %srcA, i32 %srcB, i32 %alpha) {

+	%tmp1 = shl i32 %srcA, 15		; <i32> [#uses=1]

+	%tmp2 = and i32 %tmp1, 32505856		; <i32> [#uses=1]

+	%tmp4 = and i32 %srcA, 31775		; <i32> [#uses=1]

+	%tmp5 = or i32 %tmp2, %tmp4		; <i32> [#uses=1]

+	%tmp7 = shl i32 %srcB, 15		; <i32> [#uses=1]

+	%tmp8 = and i32 %tmp7, 32505856		; <i32> [#uses=1]

+	%tmp10 = and i32 %srcB, 31775		; <i32> [#uses=1]

+	%tmp11 = or i32 %tmp8, %tmp10		; <i32> [#uses=1]

+	%tmp14 = mul i32 %tmp5, %alpha		; <i32> [#uses=1]

+	%tmp16 = sub i32 32, %alpha		; <i32> [#uses=1]

+	%tmp18 = mul i32 %tmp11, %tmp16		; <i32> [#uses=1]

+	%tmp19 = add i32 %tmp18, %tmp14		; <i32> [#uses=2]

+	%tmp21 = lshr i32 %tmp19, 5		; <i32> [#uses=1]

+	%tmp21.upgrd.1 = trunc i32 %tmp21 to i16		; <i16> [#uses=1]

+	%tmp = and i16 %tmp21.upgrd.1, 31775		; <i16> [#uses=1]

+	%tmp23 = lshr i32 %tmp19, 20		; <i32> [#uses=1]

+	%tmp23.upgrd.2 = trunc i32 %tmp23 to i16		; <i16> [#uses=1]

+	%tmp24 = and i16 %tmp23.upgrd.2, 992		; <i16> [#uses=1]

+	%tmp25 = or i16 %tmp, %tmp24		; <i16> [#uses=1]

+	ret i16 %tmp25

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/rlwinm.ll b/src/LLVM/test/CodeGen/PowerPC/rlwinm.ll
new file mode 100644
index 0000000..386f175
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/rlwinm.ll

@@ -0,0 +1,61 @@
+; All of these ands and shifts should be folded into rlwimi's

+; RUN: llc < %s -march=ppc32 -o %t

+; RUN: not grep and %t

+; RUN: not grep srawi %t

+; RUN: not grep srwi %t

+; RUN: not grep slwi %t

+; RUN: grep rlwinm %t | count 8

+

+define i32 @test1(i32 %a) {

+entry:

+	%tmp.1 = and i32 %a, 268431360		; <i32> [#uses=1]

+	ret i32 %tmp.1

+}

+

+define i32 @test2(i32 %a) {

+entry:

+	%tmp.1 = and i32 %a, -268435441		; <i32> [#uses=1]

+	ret i32 %tmp.1

+}

+

+define i32 @test3(i32 %a) {

+entry:

+	%tmp.2 = ashr i32 %a, 8		; <i32> [#uses=1]

+	%tmp.3 = and i32 %tmp.2, 255		; <i32> [#uses=1]

+	ret i32 %tmp.3

+}

+

+define i32 @test4(i32 %a) {

+entry:

+	%tmp.3 = lshr i32 %a, 8		; <i32> [#uses=1]

+	%tmp.4 = and i32 %tmp.3, 255		; <i32> [#uses=1]

+	ret i32 %tmp.4

+}

+

+define i32 @test5(i32 %a) {

+entry:

+	%tmp.2 = shl i32 %a, 8		; <i32> [#uses=1]

+	%tmp.3 = and i32 %tmp.2, -8388608		; <i32> [#uses=1]

+	ret i32 %tmp.3

+}

+

+define i32 @test6(i32 %a) {

+entry:

+	%tmp.1 = and i32 %a, 65280		; <i32> [#uses=1]

+	%tmp.2 = ashr i32 %tmp.1, 8		; <i32> [#uses=1]

+	ret i32 %tmp.2

+}

+

+define i32 @test7(i32 %a) {

+entry:

+	%tmp.1 = and i32 %a, 65280		; <i32> [#uses=1]

+	%tmp.2 = lshr i32 %tmp.1, 8		; <i32> [#uses=1]

+	ret i32 %tmp.2

+}

+

+define i32 @test8(i32 %a) {

+entry:

+	%tmp.1 = and i32 %a, 16711680		; <i32> [#uses=1]

+	%tmp.2 = shl i32 %tmp.1, 8		; <i32> [#uses=1]

+	ret i32 %tmp.2

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/rlwinm2.ll b/src/LLVM/test/CodeGen/PowerPC/rlwinm2.ll
new file mode 100644
index 0000000..ce9cc49
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/rlwinm2.ll

@@ -0,0 +1,28 @@
+; All of these ands and shifts should be folded into rlw[i]nm instructions

+; RUN: llc < %s -march=ppc32 -o %t

+; RUN: not grep and %t

+; RUN: not grep srawi %t 

+; RUN: not grep srwi %t 

+; RUN: not grep slwi %t 

+; RUN: grep rlwnm %t | count 1

+; RUN: grep rlwinm %t | count 1

+

+define i32 @test1(i32 %X, i32 %Y) {

+entry:

+	%tmp = trunc i32 %Y to i8		; <i8> [#uses=2]

+	%tmp1 = shl i32 %X, %Y		; <i32> [#uses=1]

+	%tmp2 = sub i32 32, %Y		; <i8> [#uses=1]

+	%tmp3 = lshr i32 %X, %tmp2		; <i32> [#uses=1]

+	%tmp4 = or i32 %tmp1, %tmp3		; <i32> [#uses=1]

+	%tmp6 = and i32 %tmp4, 127		; <i32> [#uses=1]

+	ret i32 %tmp6

+}

+

+define i32 @test2(i32 %X) {

+entry:

+	%tmp1 = lshr i32 %X, 27		; <i32> [#uses=1]

+	%tmp2 = shl i32 %X, 5		; <i32> [#uses=1]

+	%tmp2.masked = and i32 %tmp2, 96		; <i32> [#uses=1]

+	%tmp5 = or i32 %tmp1, %tmp2.masked		; <i32> [#uses=1]

+	ret i32 %tmp5

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/rotl-2.ll b/src/LLVM/test/CodeGen/PowerPC/rotl-2.ll
new file mode 100644
index 0000000..6ab7591
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/rotl-2.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=ppc32  | grep rlwinm | count 4

+; RUN: llc < %s -march=ppc32  | grep rlwnm | count 2

+; RUN: llc < %s -march=ppc32  | not grep or

+

+define i32 @rotl32(i32 %A, i8 %Amt) nounwind {

+	%shift.upgrd.1 = zext i8 %Amt to i32		; <i32> [#uses=1]

+	%B = shl i32 %A, %shift.upgrd.1		; <i32> [#uses=1]

+	%Amt2 = sub i8 32, %Amt		; <i8> [#uses=1]

+	%shift.upgrd.2 = zext i8 %Amt2 to i32		; <i32> [#uses=1]

+	%C = lshr i32 %A, %shift.upgrd.2		; <i32> [#uses=1]

+	%D = or i32 %B, %C		; <i32> [#uses=1]

+	ret i32 %D

+}

+

+define i32 @rotr32(i32 %A, i8 %Amt) nounwind {

+	%shift.upgrd.3 = zext i8 %Amt to i32		; <i32> [#uses=1]

+	%B = lshr i32 %A, %shift.upgrd.3		; <i32> [#uses=1]

+	%Amt2 = sub i8 32, %Amt		; <i8> [#uses=1]

+	%shift.upgrd.4 = zext i8 %Amt2 to i32		; <i32> [#uses=1]

+	%C = shl i32 %A, %shift.upgrd.4		; <i32> [#uses=1]

+	%D = or i32 %B, %C		; <i32> [#uses=1]

+	ret i32 %D

+}

+

+define i32 @rotli32(i32 %A) nounwind {

+	%B = shl i32 %A, 5		; <i32> [#uses=1]

+	%C = lshr i32 %A, 27		; <i32> [#uses=1]

+	%D = or i32 %B, %C		; <i32> [#uses=1]

+	ret i32 %D

+}

+

+define i32 @rotri32(i32 %A) nounwind {

+	%B = lshr i32 %A, 5		; <i32> [#uses=1]

+	%C = shl i32 %A, 27		; <i32> [#uses=1]

+	%D = or i32 %B, %C		; <i32> [#uses=1]

+	ret i32 %D

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/rotl-64.ll b/src/LLVM/test/CodeGen/PowerPC/rotl-64.ll
new file mode 100644
index 0000000..674c9e4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/rotl-64.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=ppc64 | grep rldicl
+; RUN: llc < %s -march=ppc64 | grep rldcl
+; PR1613
+
+define i64 @t1(i64 %A) {
+	%tmp1 = lshr i64 %A, 57
+        %tmp2 = shl i64 %A, 7
+        %tmp3 = or i64 %tmp1, %tmp2
+	ret i64 %tmp3
+}
+
+define i64 @t2(i64 %A, i8 zeroext %Amt) {
+	%Amt1 = zext i8 %Amt to i64
+	%tmp1 = lshr i64 %A, %Amt1
+        %Amt2  = sub i8 64, %Amt
+	%Amt3 = zext i8 %Amt2 to i64
+        %tmp2 = shl i64 %A, %Amt3
+        %tmp3 = or i64 %tmp1, %tmp2
+	ret i64 %tmp3
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/rotl.ll b/src/LLVM/test/CodeGen/PowerPC/rotl.ll
new file mode 100644
index 0000000..bac7ee2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/rotl.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=ppc32 | grep rlwnm | count 2

+; RUN: llc < %s -march=ppc32 | grep rlwinm | count 2

+

+define i32 @rotlw(i32 %x, i32 %sh) {

+entry:

+	%tmp.7 = sub i32 32, %sh		; <i32> [#uses=1]

+	%tmp.10 = lshr i32 %x, %tmp.7		; <i32> [#uses=2]

+	%tmp.4 = shl i32 %x, %sh 		; <i32> [#uses=1]

+	%tmp.12 = or i32 %tmp.10, %tmp.4		; <i32> [#uses=1]

+	ret i32 %tmp.12

+}

+

+define i32 @rotrw(i32 %x, i32 %sh) {

+entry:

+	%tmp.3 = trunc i32 %sh to i8		; <i8> [#uses=1]

+	%tmp.4 = lshr i32 %x, %sh		; <i32> [#uses=2]

+	%tmp.7 = sub i32 32, %sh		; <i32> [#uses=1]

+	%tmp.10 = shl i32 %x, %tmp.7    	; <i32> [#uses=1]

+	%tmp.12 = or i32 %tmp.4, %tmp.10		; <i32> [#uses=1]

+	ret i32 %tmp.12

+}

+

+define i32 @rotlwi(i32 %x) {

+entry:

+	%tmp.7 = lshr i32 %x, 27		; <i32> [#uses=2]

+	%tmp.3 = shl i32 %x, 5		; <i32> [#uses=1]

+	%tmp.9 = or i32 %tmp.3, %tmp.7		; <i32> [#uses=1]

+	ret i32 %tmp.9

+}

+

+define i32 @rotrwi(i32 %x) {

+entry:

+	%tmp.3 = lshr i32 %x, 5		; <i32> [#uses=2]

+	%tmp.7 = shl i32 %x, 27		; <i32> [#uses=1]

+	%tmp.9 = or i32 %tmp.3, %tmp.7		; <i32> [#uses=1]

+	ret i32 %tmp.9

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/sections.ll b/src/LLVM/test/CodeGen/PowerPC/sections.ll
new file mode 100644
index 0000000..0ff4a89
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/sections.ll

@@ -0,0 +1,8 @@
+; Test to make sure that bss sections are printed with '.section' directive.
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
+
+@A = global i32 0
+
+; CHECK:  .section  .bss,"aw",@nobits
+; CHECK:  .globl A
+

diff --git a/src/LLVM/test/CodeGen/PowerPC/select-cc.ll b/src/LLVM/test/CodeGen/PowerPC/select-cc.ll
new file mode 100644
index 0000000..ccc6489
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/select-cc.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32
+; PR3011
+
+define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind  {
+	%x.lo = extractelement <2 x double> %x, i32 0		; <double> [#uses=1]
+	%x.lo.ge = fcmp oge double %x.lo, 0.000000e+00		; <i1> [#uses=1]
+	%a.d = select i1 %x.lo.ge, <2 x double> %y, <2 x double> %x		; <<2 x double>> [#uses=1]
+	ret <2 x double> %a.d
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/select_lt0.ll b/src/LLVM/test/CodeGen/PowerPC/select_lt0.ll
new file mode 100644
index 0000000..beee34d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/select_lt0.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=ppc32 | not grep cmp

+

+define i32 @seli32_1(i32 %a) {

+entry:

+	%tmp.1 = icmp slt i32 %a, 0		; <i1> [#uses=1]

+	%retval = select i1 %tmp.1, i32 5, i32 0		; <i32> [#uses=1]

+	ret i32 %retval

+}

+

+define i32 @seli32_2(i32 %a, i32 %b) {

+entry:

+	%tmp.1 = icmp slt i32 %a, 0		; <i1> [#uses=1]

+	%retval = select i1 %tmp.1, i32 %b, i32 0		; <i32> [#uses=1]

+	ret i32 %retval

+}

+

+define i32 @seli32_3(i32 %a, i16 %b) {

+entry:

+	%tmp.2 = sext i16 %b to i32		; <i32> [#uses=1]

+	%tmp.1 = icmp slt i32 %a, 0		; <i1> [#uses=1]

+	%retval = select i1 %tmp.1, i32 %tmp.2, i32 0		; <i32> [#uses=1]

+	ret i32 %retval

+}

+

+define i32 @seli32_4(i32 %a, i16 %b) {

+entry:

+	%tmp.2 = zext i16 %b to i32		; <i32> [#uses=1]

+	%tmp.1 = icmp slt i32 %a, 0		; <i1> [#uses=1]

+	%retval = select i1 %tmp.1, i32 %tmp.2, i32 0		; <i32> [#uses=1]

+	ret i32 %retval

+}

+

+define i16 @seli16_1(i16 %a) {

+entry:

+	%tmp.1 = icmp slt i16 %a, 0		; <i1> [#uses=1]

+	%retval = select i1 %tmp.1, i16 7, i16 0		; <i16> [#uses=1]

+	ret i16 %retval

+}

+

+define i16 @seli16_2(i32 %a, i16 %b) {

+	%tmp.1 = icmp slt i32 %a, 0		; <i1> [#uses=1]

+	%retval = select i1 %tmp.1, i16 %b, i16 0		; <i16> [#uses=1]

+	ret i16 %retval

+}

+

+define i32 @seli32_a_a(i32 %a) {

+	%tmp = icmp slt i32 %a, 1		; <i1> [#uses=1]

+	%min = select i1 %tmp, i32 %a, i32 0		; <i32> [#uses=1]

+	ret i32 %min

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/setcc_no_zext.ll b/src/LLVM/test/CodeGen/PowerPC/setcc_no_zext.ll
new file mode 100644
index 0000000..12a8ed8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/setcc_no_zext.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32 | not grep rlwinm

+

+define i32 @setcc_one_or_zero(i32* %a) {

+entry:

+        %tmp.1 = icmp ne i32* %a, null          ; <i1> [#uses=1]

+        %inc.1 = zext i1 %tmp.1 to i32          ; <i32> [#uses=1]

+        ret i32 %inc.1

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/seteq-0.ll b/src/LLVM/test/CodeGen/PowerPC/seteq-0.ll
new file mode 100644
index 0000000..900fc26
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/seteq-0.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \

+; RUN:   grep {srwi r., r., 5}

+

+define i32 @eq0(i32 %a) {

+        %tmp.1 = icmp eq i32 %a, 0              ; <i1> [#uses=1]

+        %tmp.2 = zext i1 %tmp.1 to i32          ; <i32> [#uses=1]

+        ret i32 %tmp.2

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/shift128.ll b/src/LLVM/test/CodeGen/PowerPC/shift128.ll
new file mode 100644
index 0000000..8e518c1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/shift128.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc64 | grep sld | count 5
+
+define i128 @foo_lshr(i128 %x, i128 %y) {
+  %r = lshr i128 %x, %y
+  ret i128 %r
+}
+define i128 @foo_ashr(i128 %x, i128 %y) {
+  %r = ashr i128 %x, %y
+  ret i128 %r
+}
+define i128 @foo_shl(i128 %x, i128 %y) {
+  %r = shl i128 %x, %y
+  ret i128 %r
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/shl_elim.ll b/src/LLVM/test/CodeGen/PowerPC/shl_elim.ll
new file mode 100644
index 0000000..fedfad5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/shl_elim.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=ppc32 | not grep slwi

+

+define i32 @test1(i64 %a) {

+        %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]

+        %tmp23 = trunc i64 %tmp29 to i32                ; <i32> [#uses=1]

+        %tmp410 = lshr i32 %tmp23, 9            ; <i32> [#uses=1]

+        %tmp45 = trunc i32 %tmp410 to i16               ; <i16> [#uses=1]

+        %tmp456 = sext i16 %tmp45 to i32                ; <i32> [#uses=1]

+        ret i32 %tmp456

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/shl_sext.ll b/src/LLVM/test/CodeGen/PowerPC/shl_sext.ll
new file mode 100644
index 0000000..d8c8283
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/shl_sext.ll

@@ -0,0 +1,18 @@
+; This test should not contain a sign extend

+; RUN: llc < %s -march=ppc32 | not grep extsb 

+

+define i32 @test(i32 %mode.0.i.0) {

+        %tmp.79 = trunc i32 %mode.0.i.0 to i8           ; <i8> [#uses=1]

+        %tmp.80 = sext i8 %tmp.79 to i32                ; <i32> [#uses=1]

+        %tmp.81 = shl i32 %tmp.80, 24           ; <i32> [#uses=1]

+        ret i32 %tmp.81

+}

+

+define i32 @test2(i32 %mode.0.i.0) {

+        %tmp.79 = trunc i32 %mode.0.i.0 to i8           ; <i8> [#uses=1]

+        %tmp.80 = sext i8 %tmp.79 to i32                ; <i32> [#uses=1]

+        %tmp.81 = shl i32 %tmp.80, 16           ; <i32> [#uses=1]

+        %tmp.82 = and i32 %tmp.81, 16711680             ; <i32> [#uses=1]

+        ret i32 %tmp.82

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/sign_ext_inreg1.ll b/src/LLVM/test/CodeGen/PowerPC/sign_ext_inreg1.ll
new file mode 100644
index 0000000..9679a17
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/sign_ext_inreg1.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=ppc32 | grep srwi

+; RUN: llc < %s -march=ppc32 | not grep rlwimi

+

+define i32 @baz(i64 %a) {

+        %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]

+        %tmp23 = trunc i64 %tmp29 to i32                ; <i32> [#uses=1]

+        %tmp410 = lshr i32 %tmp23, 9            ; <i32> [#uses=1]

+        %tmp45 = trunc i32 %tmp410 to i16               ; <i16> [#uses=1]

+        %tmp456 = sext i16 %tmp45 to i32                ; <i32> [#uses=1]

+        ret i32 %tmp456

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/small-arguments.ll b/src/LLVM/test/CodeGen/PowerPC/small-arguments.ll
new file mode 100644
index 0000000..bddbf60
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/small-arguments.ll

@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=ppc32 | not grep {extsh\\|rlwinm}

+

+declare signext i16 @foo()  

+

+define i32 @test1(i16 signext %X) {

+	%Y = sext i16 %X to i32  ;; dead

+	ret i32 %Y

+}

+

+define i32 @test2(i16 zeroext %X) {

+	%Y = sext i16 %X to i32

+	%Z = and i32 %Y, 65535      ;; dead

+	ret i32 %Z

+}

+

+define void @test3() {

+	%tmp.0 = call signext i16 @foo()             ;; no extsh!

+	%tmp.1 = icmp slt i16 %tmp.0, 1234

+	br i1 %tmp.1, label %then, label %UnifiedReturnBlock

+

+then:	

+	call i32 @test1(i16 signext 0)

+	ret void

+UnifiedReturnBlock:

+	ret void

+}

+

+define i32 @test4(i16* %P) {

+        %tmp.1 = load i16* %P

+        %tmp.2 = zext i16 %tmp.1 to i32

+        %tmp.3 = and i32 %tmp.2, 255

+        ret i32 %tmp.3

+}

+

+define i32 @test5(i16* %P) {

+        %tmp.1 = load i16* %P

+        %tmp.2 = bitcast i16 %tmp.1 to i16

+        %tmp.3 = zext i16 %tmp.2 to i32

+        %tmp.4 = and i32 %tmp.3, 255

+        ret i32 %tmp.4

+}

+

+define i32 @test6(i32* %P) {

+        %tmp.1 = load i32* %P

+        %tmp.2 = and i32 %tmp.1, 255

+        ret i32 %tmp.2

+}

+

+define zeroext i16 @test7(float %a)  {

+        %tmp.1 = fptoui float %a to i16

+        ret i16 %tmp.1

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/stack-protector.ll b/src/LLVM/test/CodeGen/PowerPC/stack-protector.ll
new file mode 100644
index 0000000..2020361
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/stack-protector.ll

@@ -0,0 +1,25 @@
+; RUN: llc -march=ppc32 < %s -o - | grep {__stack_chk_guard}
+; RUN: llc -march=ppc32 < %s -o - | grep {__stack_chk_fail}
+
+@"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00"		; <[11 x i8]*> [#uses=1]
+
+define void @test(i8* %a) nounwind ssp {
+entry:
+	%a_addr = alloca i8*		; <i8**> [#uses=2]
+	%buf = alloca [8 x i8]		; <[8 x i8]*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i8* %a, i8** %a_addr
+	%buf1 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
+	%0 = load i8** %a_addr, align 4		; <i8*> [#uses=1]
+	%1 = call i8* @strcpy(i8* %buf1, i8* %0) nounwind		; <i8*> [#uses=0]
+  %buf2 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
+	%2 = call i32 (i8*, ...)* @printf(i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i8* %buf2) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8* @strcpy(i8*, i8*) nounwind
+
+declare i32 @printf(i8*, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/PowerPC/stfiwx-2.ll b/src/LLVM/test/CodeGen/PowerPC/stfiwx-2.ll
new file mode 100644
index 0000000..c49b25c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/stfiwx-2.ll

@@ -0,0 +1,11 @@
+; This cannot be a stfiwx
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep stb
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep stfiwx
+
+define void @test(float %F, i8* %P) {
+	%I = fptosi float %F to i32
+	%X = trunc i32 %I to i8
+	store i8 %X, i8* %P
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/PowerPC/stfiwx.ll b/src/LLVM/test/CodeGen/PowerPC/stfiwx.ll
new file mode 100644
index 0000000..86301ee
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/stfiwx.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=stfiwx -o %t1

+; RUN: grep stfiwx %t1

+; RUN: not grep r1 %t1

+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-stfiwx \

+; RUN:   -o %t2

+; RUN: not grep stfiwx %t2

+; RUN: grep r1 %t2

+

+define void @test(float %a, i32* %b) nounwind {

+        %tmp.2 = fptosi float %a to i32         ; <i32> [#uses=1]

+        store i32 %tmp.2, i32* %b

+        ret void

+}

+

+define void @test2(float %a, i32* %b, i32 %i) nounwind {

+        %tmp.2 = getelementptr i32* %b, i32 1           ; <i32*> [#uses=1]

+        %tmp.5 = getelementptr i32* %b, i32 %i          ; <i32*> [#uses=1]

+        %tmp.7 = fptosi float %a to i32         ; <i32> [#uses=3]

+        store i32 %tmp.7, i32* %tmp.5

+        store i32 %tmp.7, i32* %tmp.2

+        store i32 %tmp.7, i32* %b

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/store-load-fwd.ll b/src/LLVM/test/CodeGen/PowerPC/store-load-fwd.ll
new file mode 100644
index 0000000..8d792da
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/store-load-fwd.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 | not grep lwz

+

+define i32 @test(i32* %P) {

+        store i32 1, i32* %P

+        %V = load i32* %P               ; <i32> [#uses=1]

+        ret i32 %V

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/stubs.ll b/src/LLVM/test/CodeGen/PowerPC/stubs.ll
new file mode 100644
index 0000000..4889263
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/stubs.ll

@@ -0,0 +1,22 @@
+; RUN: llc %s -o - -mtriple=powerpc-apple-darwin8 | FileCheck %s
+define ppc_fp128 @test1(i64 %X) nounwind readnone {
+entry:
+  %0 = sitofp i64 %X to ppc_fp128
+  ret ppc_fp128 %0
+}
+
+; CHECK: _test1:
+; CHECK: bl ___floatditf$stub
+; CHECK: 	.section	__TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
+; CHECK: ___floatditf$stub:
+; CHECK: 	.indirect_symbol ___floatditf
+; CHECK: 	lis r11,ha16(___floatditf$lazy_ptr)
+; CHECK: 	lwzu r12,lo16(___floatditf$lazy_ptr)(r11)
+; CHECK: 	mtctr r12
+; CHECK: 	bctr
+; CHECK: 	.section	__DATA,__la_symbol_ptr,lazy_symbol_pointers
+; CHECK: ___floatditf$lazy_ptr:
+; CHECK: 	.indirect_symbol ___floatditf
+; CHECK: 	.long dyld_stub_binding_helper
+
+

diff --git a/src/LLVM/test/CodeGen/PowerPC/subc.ll b/src/LLVM/test/CodeGen/PowerPC/subc.ll
new file mode 100644
index 0000000..ffb682b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/subc.ll

@@ -0,0 +1,25 @@
+; All of these should be codegen'd without loading immediates

+; RUN: llc < %s -march=ppc32 -o %t

+; RUN: grep subfc %t | count 1

+; RUN: grep subfe %t | count 1

+; RUN: grep subfze %t | count 1

+; RUN: grep subfme %t | count 1

+; RUN: grep subfic %t | count 2

+

+define i64 @sub_ll(i64 %a, i64 %b) {

+entry:

+	%tmp.2 = sub i64 %a, %b		; <i64> [#uses=1]

+	ret i64 %tmp.2

+}

+

+define i64 @sub_l_5(i64 %a) {

+entry:

+	%tmp.1 = sub i64 5, %a		; <i64> [#uses=1]

+	ret i64 %tmp.1

+}

+

+define i64 @sub_l_m5(i64 %a) {

+entry:

+	%tmp.1 = sub i64 -5, %a		; <i64> [#uses=1]

+	ret i64 %tmp.1

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/tailcall1-64.ll b/src/LLVM/test/CodeGen/PowerPC/tailcall1-64.ll
new file mode 100644
index 0000000..e9c83a5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/tailcall1-64.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=ppc64 -tailcallopt | grep TC_RETURNd8
+define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+entry:
+	ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+entry:
+	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
+	ret i32 %tmp11
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/tailcall1.ll b/src/LLVM/test/CodeGen/PowerPC/tailcall1.ll
new file mode 100644
index 0000000..08f3392
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/tailcall1.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=ppc32 -tailcallopt | grep TC_RETURN
+define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+entry:
+	ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+entry:
+	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
+	ret i32 %tmp11
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/tailcallpic1.ll b/src/LLVM/test/CodeGen/PowerPC/tailcallpic1.ll
new file mode 100644
index 0000000..f3f5028
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/tailcallpic1.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s  -tailcallopt -mtriple=powerpc-apple-darwin -relocation-model=pic | grep TC_RETURN
+
+
+
+define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+entry:
+	ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+entry:
+	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
+	ret i32 %tmp11
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/trampoline.ll b/src/LLVM/test/CodeGen/PowerPC/trampoline.ll
new file mode 100644
index 0000000..91b2011
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/trampoline.ll

@@ -0,0 +1,168 @@
+; RUN: llc < %s -march=ppc32 | grep {__trampoline_setup}
+
+module asm "\09.lazy_reference .objc_class_name_NSImageRep"
+module asm "\09.objc_class_name_NSBitmapImageRep=0"
+module asm "\09.globl .objc_class_name_NSBitmapImageRep"
+	%struct.CGImage = type opaque
+	%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]" = type { %struct.NSBitmapImageRep*, void (%struct.__block_1*, %struct.CGImage*)* }
+	%struct.NSBitmapImageRep = type { %struct.NSImageRep }
+	%struct.NSImageRep = type {  }
+	%struct.NSZone = type opaque
+	%struct.__block_1 = type { %struct.__invoke_impl, %struct.NSZone*, %struct.NSBitmapImageRep** }
+	%struct.__builtin_trampoline = type { [40 x i8] }
+	%struct.__invoke_impl = type { i8*, i32, i32, i8* }
+	%struct._objc__method_prototype_list = type opaque
+	%struct._objc_class = type { %struct._objc_class*, %struct._objc_class*, i8*, i32, i32, i32, %struct._objc_ivar_list*, %struct._objc_method_list*, %struct.objc_cache*, %struct._objc_protocol**, i8*, %struct._objc_class_ext* }
+	%struct._objc_class_ext = type opaque
+	%struct._objc_ivar_list = type opaque
+	%struct._objc_method = type { %struct.objc_selector*, i8*, i8* }
+	%struct._objc_method_list = type opaque
+	%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* }
+	%struct._objc_protocol = type { %struct._objc_protocol_extension*, i8*, %struct._objc_protocol**, %struct._objc__method_prototype_list*, %struct._objc__method_prototype_list* }
+	%struct._objc_protocol_extension = type opaque
+	%struct._objc_super = type { %struct.objc_object*, %struct._objc_class* }
+	%struct._objc_symtab = type { i32, %struct.objc_selector**, i16, i16, [1 x i8*] }
+	%struct.anon = type { %struct._objc__method_prototype_list*, i32, [1 x %struct._objc_method] }
+	%struct.objc_cache = type opaque
+	%struct.objc_object = type opaque
+	%struct.objc_selector = type opaque
+	%struct.objc_super = type opaque
+@_NSConcreteStackBlock = external global i8*		; <i8**> [#uses=1]
+@"\01L_OBJC_SELECTOR_REFERENCES_1" = internal global %struct.objc_selector* bitcast ([34 x i8]* @"\01L_OBJC_METH_VAR_NAME_1" to %struct.objc_selector*), section "__OBJC,__message_refs,literal_pointers,no_dead_strip"		; <%struct.objc_selector**> [#uses=2]
+@"\01L_OBJC_CLASS_NSBitmapImageRep" = internal global %struct._objc_class { %struct._objc_class* @"\01L_OBJC_METACLASS_NSBitmapImageRep", %struct._objc_class* bitcast ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1" to %struct._objc_class*), i8* getelementptr ([17 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i32 0, i32 1, i32 0, %struct._objc_ivar_list* null, %struct._objc_method_list* bitcast ({ i8*, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_INSTANCE_METHODS_NSBitmapImageRep" to %struct._objc_method_list*), %struct.objc_cache* null, %struct._objc_protocol** null, i8* null, %struct._objc_class_ext* null }, section "__OBJC,__class,regular,no_dead_strip"		; <%struct._objc_class*> [#uses=3]
+@"\01L_OBJC_SELECTOR_REFERENCES_0" = internal global %struct.objc_selector* bitcast ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_0" to %struct.objc_selector*), section "__OBJC,__message_refs,literal_pointers,no_dead_strip"		; <%struct.objc_selector**> [#uses=2]
+@"\01L_OBJC_SYMBOLS" = internal global { i32, %struct.objc_selector**, i16, i16, [1 x %struct._objc_class*] } { i32 0, %struct.objc_selector** null, i16 1, i16 0, [1 x %struct._objc_class*] [ %struct._objc_class* @"\01L_OBJC_CLASS_NSBitmapImageRep" ] }, section "__OBJC,__symbols,regular,no_dead_strip"		; <{ i32, %struct.objc_selector**, i16, i16, [1 x %struct._objc_class*] }*> [#uses=2]
+@"\01L_OBJC_METH_VAR_NAME_0" = internal global [14 x i8] c"copyWithZone:\00", section "__TEXT,__cstring,cstring_literals", align 4		; <[14 x i8]*> [#uses=2]
+@"\01L_OBJC_METH_VAR_TYPE_0" = internal global [20 x i8] c"@12@0:4^{_NSZone=}8\00", section "__TEXT,__cstring,cstring_literals", align 4		; <[20 x i8]*> [#uses=1]
+@"\01L_OBJC_INSTANCE_METHODS_NSBitmapImageRep" = internal global { i8*, i32, [1 x %struct._objc_method] } { i8* null, i32 1, [1 x %struct._objc_method] [ %struct._objc_method { %struct.objc_selector* bitcast ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_0" to %struct.objc_selector*), i8* getelementptr ([20 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0), i8* bitcast (%struct.objc_object* (%struct.NSBitmapImageRep*, %struct.objc_selector*, %struct.NSZone*)* @"-[NSBitmapImageRep copyWithZone:]" to i8*) } ] }, section "__OBJC,__inst_meth,regular,no_dead_strip"		; <{ i8*, i32, [1 x %struct._objc_method] }*> [#uses=2]
+@"\01L_OBJC_CLASS_NAME_0" = internal global [17 x i8] c"NSBitmapImageRep\00", section "__TEXT,__cstring,cstring_literals", align 4		; <[17 x i8]*> [#uses=1]
+@"\01L_OBJC_CLASS_NAME_1" = internal global [11 x i8] c"NSImageRep\00", section "__TEXT,__cstring,cstring_literals", align 4		; <[11 x i8]*> [#uses=2]
+@"\01L_OBJC_METACLASS_NSBitmapImageRep" = internal global %struct._objc_class { %struct._objc_class* bitcast ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1" to %struct._objc_class*), %struct._objc_class* bitcast ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1" to %struct._objc_class*), i8* getelementptr ([17 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i32 0, i32 2, i32 48, %struct._objc_ivar_list* null, %struct._objc_method_list* null, %struct.objc_cache* null, %struct._objc_protocol** null, i8* null, %struct._objc_class_ext* null }, section "__OBJC,__meta_class,regular,no_dead_strip"		; <%struct._objc_class*> [#uses=2]
+@"\01L_OBJC_METH_VAR_NAME_1" = internal global [34 x i8] c"_performBlockUsingBackingCGImage:\00", section "__TEXT,__cstring,cstring_literals", align 4		; <[34 x i8]*> [#uses=2]
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] zeroinitializer, section "__OBJC, __image_info,regular"		; <[2 x i32]*> [#uses=1]
+@"\01L_OBJC_CLASS_NAME_2" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals", align 4		; <[1 x i8]*> [#uses=1]
+@"\01L_OBJC_MODULES" = internal global %struct._objc_module { i32 7, i32 16, i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), %struct._objc_symtab* bitcast ({ i32, %struct.objc_selector**, i16, i16, [1 x %struct._objc_class*] }* @"\01L_OBJC_SYMBOLS" to %struct._objc_symtab*) }, section "__OBJC,__module_info,regular,no_dead_strip"		; <%struct._objc_module*> [#uses=1]
+@llvm.used = appending global [14 x i8*] [ i8* bitcast (%struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_1" to i8*), i8* bitcast (%struct._objc_class* @"\01L_OBJC_CLASS_NSBitmapImageRep" to i8*), i8* bitcast (%struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0" to i8*), i8* bitcast ({ i32, %struct.objc_selector**, i16, i16, [1 x %struct._objc_class*] }* @"\01L_OBJC_SYMBOLS" to i8*), i8* getelementptr ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_0", i32 0, i32 0), i8* getelementptr ([20 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0), i8* bitcast ({ i8*, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_INSTANCE_METHODS_NSBitmapImageRep" to i8*), i8* getelementptr ([17 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i8* getelementptr ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1", i32 0, i32 0), i8* bitcast (%struct._objc_class* @"\01L_OBJC_METACLASS_NSBitmapImageRep" to i8*), i8* getelementptr ([34 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*) ], section "llvm.metadata"		; <[14 x i8*]*> [#uses=0]
+
+define internal %struct.objc_object* @"-[NSBitmapImageRep copyWithZone:]"(%struct.NSBitmapImageRep* %self, %struct.objc_selector* %_cmd, %struct.NSZone* %zone) nounwind {
+entry:
+	%self_addr = alloca %struct.NSBitmapImageRep*		; <%struct.NSBitmapImageRep**> [#uses=2]
+	%_cmd_addr = alloca %struct.objc_selector*		; <%struct.objc_selector**> [#uses=1]
+	%zone_addr = alloca %struct.NSZone*		; <%struct.NSZone**> [#uses=2]
+	%retval = alloca %struct.objc_object*		; <%struct.objc_object**> [#uses=1]
+	%__block_holder_tmp_1.0 = alloca %struct.__block_1		; <%struct.__block_1*> [#uses=7]
+	%new = alloca %struct.NSBitmapImageRep*		; <%struct.NSBitmapImageRep**> [#uses=2]
+	%self.1 = alloca %struct.objc_object*		; <%struct.objc_object**> [#uses=2]
+	%0 = alloca i8*		; <i8**> [#uses=2]
+	%TRAMP.9 = alloca %struct.__builtin_trampoline, align 4		; <%struct.__builtin_trampoline*> [#uses=1]
+	%1 = alloca void (%struct.__block_1*, %struct.CGImage*)*		; <void (%struct.__block_1*, %struct.CGImage*)**> [#uses=2]
+	%2 = alloca %struct.NSBitmapImageRep*		; <%struct.NSBitmapImageRep**> [#uses=2]
+	%FRAME.7 = alloca %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"		; <%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*> [#uses=5]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store %struct.NSBitmapImageRep* %self, %struct.NSBitmapImageRep** %self_addr
+	store %struct.objc_selector* %_cmd, %struct.objc_selector** %_cmd_addr
+	store %struct.NSZone* %zone, %struct.NSZone** %zone_addr
+	%3 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 0		; <%struct.NSBitmapImageRep**> [#uses=1]
+	%4 = load %struct.NSBitmapImageRep** %self_addr, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	store %struct.NSBitmapImageRep* %4, %struct.NSBitmapImageRep** %3, align 4
+	%TRAMP.91 = bitcast %struct.__builtin_trampoline* %TRAMP.9 to i8*		; <i8*> [#uses=1]
+	%FRAME.72 = bitcast %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7 to i8*		; <i8*> [#uses=1]
+	call void @llvm.init.trampoline(i8* %TRAMP.91, i8* bitcast (void (%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*, %struct.__block_1*, %struct.CGImage*)* @__helper_1.1632 to i8*), i8* %FRAME.72)		; <i8*> [#uses=1]
+        %tramp = call i8* @llvm.adjust.trampoline(i8* %TRAMP.91)
+	store i8* %tramp, i8** %0, align 4
+	%5 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 1		; <void (%struct.__block_1*, %struct.CGImage*)**> [#uses=1]
+	%6 = load i8** %0, align 4		; <i8*> [#uses=1]
+	%7 = bitcast i8* %6 to void (%struct.__block_1*, %struct.CGImage*)*		; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
+	store void (%struct.__block_1*, %struct.CGImage*)* %7, void (%struct.__block_1*, %struct.CGImage*)** %5, align 4
+	store %struct.NSBitmapImageRep* null, %struct.NSBitmapImageRep** %new, align 4
+	%8 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0		; <%struct.__invoke_impl*> [#uses=1]
+	%9 = getelementptr %struct.__invoke_impl* %8, i32 0, i32 0		; <i8**> [#uses=1]
+	store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %9, align 4
+	%10 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0		; <%struct.__invoke_impl*> [#uses=1]
+	%11 = getelementptr %struct.__invoke_impl* %10, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 67108864, i32* %11, align 4
+	%12 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0		; <%struct.__invoke_impl*> [#uses=1]
+	%13 = getelementptr %struct.__invoke_impl* %12, i32 0, i32 2		; <i32*> [#uses=1]
+	store i32 24, i32* %13, align 4
+	%14 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 1		; <void (%struct.__block_1*, %struct.CGImage*)**> [#uses=1]
+	%15 = load void (%struct.__block_1*, %struct.CGImage*)** %14, align 4		; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
+	store void (%struct.__block_1*, %struct.CGImage*)* %15, void (%struct.__block_1*, %struct.CGImage*)** %1, align 4
+	%16 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0		; <%struct.__invoke_impl*> [#uses=1]
+	%17 = getelementptr %struct.__invoke_impl* %16, i32 0, i32 3		; <i8**> [#uses=1]
+	%18 = load void (%struct.__block_1*, %struct.CGImage*)** %1, align 4		; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
+	%19 = bitcast void (%struct.__block_1*, %struct.CGImage*)* %18 to i8*		; <i8*> [#uses=1]
+	store i8* %19, i8** %17, align 4
+	%20 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 1		; <%struct.NSZone**> [#uses=1]
+	%21 = load %struct.NSZone** %zone_addr, align 4		; <%struct.NSZone*> [#uses=1]
+	store %struct.NSZone* %21, %struct.NSZone** %20, align 4
+	%22 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 2		; <%struct.NSBitmapImageRep***> [#uses=1]
+	store %struct.NSBitmapImageRep** %new, %struct.NSBitmapImageRep*** %22, align 4
+	%23 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 0		; <%struct.NSBitmapImageRep**> [#uses=1]
+	%24 = load %struct.NSBitmapImageRep** %23, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	store %struct.NSBitmapImageRep* %24, %struct.NSBitmapImageRep** %2, align 4
+	%25 = load %struct.NSBitmapImageRep** %2, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	%26 = bitcast %struct.NSBitmapImageRep* %25 to %struct.objc_object*		; <%struct.objc_object*> [#uses=1]
+	store %struct.objc_object* %26, %struct.objc_object** %self.1, align 4
+	%27 = load %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_1", align 4		; <%struct.objc_selector*> [#uses=1]
+	%__block_holder_tmp_1.03 = bitcast %struct.__block_1* %__block_holder_tmp_1.0 to void (%struct.CGImage*)*		; <void (%struct.CGImage*)*> [#uses=1]
+	%28 = load %struct.objc_object** %self.1, align 4		; <%struct.objc_object*> [#uses=1]
+	%29 = call %struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)* inttoptr (i64 4294901504 to %struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)*)(%struct.objc_object* %28, %struct.objc_selector* %27, void (%struct.CGImage*)* %__block_holder_tmp_1.03) nounwind		; <%struct.objc_object*> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load %struct.objc_object** %retval		; <%struct.objc_object*> [#uses=1]
+	ret %struct.objc_object* %retval5
+}
+
+declare void @llvm.init.trampoline(i8*, i8*, i8*) nounwind
+declare i8* @llvm.adjust.trampoline(i8*) nounwind
+
+define internal void @__helper_1.1632(%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* nest %CHAIN.8, %struct.__block_1* %_self, %struct.CGImage* %cgImage) nounwind {
+entry:
+	%CHAIN.8_addr = alloca %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*		; <%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"**> [#uses=2]
+	%_self_addr = alloca %struct.__block_1*		; <%struct.__block_1**> [#uses=3]
+	%cgImage_addr = alloca %struct.CGImage*		; <%struct.CGImage**> [#uses=1]
+	%zone = alloca %struct.NSZone*		; <%struct.NSZone**> [#uses=2]
+	%objc_super = alloca %struct._objc_super		; <%struct._objc_super*> [#uses=3]
+	%new = alloca %struct.NSBitmapImageRep**		; <%struct.NSBitmapImageRep***> [#uses=2]
+	%objc_super.5 = alloca %struct.objc_super*		; <%struct.objc_super**> [#uses=2]
+	%0 = alloca %struct.NSBitmapImageRep*		; <%struct.NSBitmapImageRep**> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %CHAIN.8, %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"** %CHAIN.8_addr
+	store %struct.__block_1* %_self, %struct.__block_1** %_self_addr
+	store %struct.CGImage* %cgImage, %struct.CGImage** %cgImage_addr
+	%1 = load %struct.__block_1** %_self_addr, align 4		; <%struct.__block_1*> [#uses=1]
+	%2 = getelementptr %struct.__block_1* %1, i32 0, i32 2		; <%struct.NSBitmapImageRep***> [#uses=1]
+	%3 = load %struct.NSBitmapImageRep*** %2, align 4		; <%struct.NSBitmapImageRep**> [#uses=1]
+	store %struct.NSBitmapImageRep** %3, %struct.NSBitmapImageRep*** %new, align 4
+	%4 = load %struct.__block_1** %_self_addr, align 4		; <%struct.__block_1*> [#uses=1]
+	%5 = getelementptr %struct.__block_1* %4, i32 0, i32 1		; <%struct.NSZone**> [#uses=1]
+	%6 = load %struct.NSZone** %5, align 4		; <%struct.NSZone*> [#uses=1]
+	store %struct.NSZone* %6, %struct.NSZone** %zone, align 4
+	%7 = load %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"** %CHAIN.8_addr, align 4		; <%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*> [#uses=1]
+	%8 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %7, i32 0, i32 0		; <%struct.NSBitmapImageRep**> [#uses=1]
+	%9 = load %struct.NSBitmapImageRep** %8, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	store %struct.NSBitmapImageRep* %9, %struct.NSBitmapImageRep** %0, align 4
+	%10 = load %struct.NSBitmapImageRep** %0, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	%11 = bitcast %struct.NSBitmapImageRep* %10 to %struct.objc_object*		; <%struct.objc_object*> [#uses=1]
+	%12 = getelementptr %struct._objc_super* %objc_super, i32 0, i32 0		; <%struct.objc_object**> [#uses=1]
+	store %struct.objc_object* %11, %struct.objc_object** %12, align 4
+	%13 = load %struct._objc_class** getelementptr (%struct._objc_class* @"\01L_OBJC_CLASS_NSBitmapImageRep", i32 0, i32 1), align 4		; <%struct._objc_class*> [#uses=1]
+	%14 = getelementptr %struct._objc_super* %objc_super, i32 0, i32 1		; <%struct._objc_class**> [#uses=1]
+	store %struct._objc_class* %13, %struct._objc_class** %14, align 4
+	%objc_super1 = bitcast %struct._objc_super* %objc_super to %struct.objc_super*		; <%struct.objc_super*> [#uses=1]
+	store %struct.objc_super* %objc_super1, %struct.objc_super** %objc_super.5, align 4
+	%15 = load %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0", align 4		; <%struct.objc_selector*> [#uses=1]
+	%16 = load %struct.objc_super** %objc_super.5, align 4		; <%struct.objc_super*> [#uses=1]
+	%17 = load %struct.NSZone** %zone, align 4		; <%struct.NSZone*> [#uses=1]
+	%18 = call %struct.objc_object* (%struct.objc_super*, %struct.objc_selector*, ...)* @objc_msgSendSuper(%struct.objc_super* %16, %struct.objc_selector* %15, %struct.NSZone* %17) nounwind		; <%struct.objc_object*> [#uses=1]
+	%19 = bitcast %struct.objc_object* %18 to %struct.NSBitmapImageRep*		; <%struct.NSBitmapImageRep*> [#uses=1]
+	%20 = load %struct.NSBitmapImageRep*** %new, align 4		; <%struct.NSBitmapImageRep**> [#uses=1]
+	store %struct.NSBitmapImageRep* %19, %struct.NSBitmapImageRep** %20, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare %struct.objc_object* @objc_msgSendSuper(%struct.objc_super*, %struct.objc_selector*, ...)

diff --git a/src/LLVM/test/CodeGen/PowerPC/unsafe-math.ll b/src/LLVM/test/CodeGen/PowerPC/unsafe-math.ll
new file mode 100644
index 0000000..6c23982
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/unsafe-math.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32 | grep fmul | count 2

+; RUN: llc < %s -march=ppc32 -enable-unsafe-fp-math | \

+; RUN:   grep fmul | count 1

+

+define double @foo(double %X) nounwind {

+        %tmp1 = fmul double %X, 1.23

+        %tmp2 = fmul double %tmp1, 4.124

+        ret double %tmp2

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/varargs.ll b/src/LLVM/test/CodeGen/PowerPC/varargs.ll
new file mode 100644
index 0000000..1769be9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/varargs.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck -check-prefix=P32 %s
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | FileCheck -check-prefix=P64 %s
+
+; PR8327
+define i8* @test1(i8** %foo) nounwind {
+  %A = va_arg i8** %foo, i8*
+  ret i8* %A
+}
+
+; P32: test1:
+; P32: 	lwz r4, 0(r3)
+; P32:	addi r5, r4, 4
+; P32:	stw r5, 0(r3)
+; P32:	lwz r3, 0(r4)
+; P32:	blr 
+
+; P64: test1:
+; P64: ld r4, 0(r3)
+; P64: addi r5, r4, 8
+; P64: std r5, 0(r3)
+; P64: ld r3, 0(r4)
+; P64: blr

diff --git a/src/LLVM/test/CodeGen/PowerPC/vcmp-fold.ll b/src/LLVM/test/CodeGen/PowerPC/vcmp-fold.ll
new file mode 100644
index 0000000..81a0141
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vcmp-fold.ll

@@ -0,0 +1,22 @@
+; This should fold the "vcmpbfp." and "vcmpbfp" instructions into a single

+; "vcmpbfp.".

+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vcmpbfp | count 1

+

+

+define void @test(<4 x float>* %x, <4 x float>* %y, i32* %P) {

+entry:

+	%tmp = load <4 x float>* %x		; <<4 x float>> [#uses=1]

+	%tmp2 = load <4 x float>* %y		; <<4 x float>> [#uses=1]

+	%tmp.upgrd.1 = call i32 @llvm.ppc.altivec.vcmpbfp.p( i32 1, <4 x float> %tmp, <4 x float> %tmp2 )		; <i32> [#uses=1]

+	%tmp4 = load <4 x float>* %x		; <<4 x float>> [#uses=1]

+	%tmp6 = load <4 x float>* %y		; <<4 x float>> [#uses=1]

+	%tmp.upgrd.2 = call <4 x i32> @llvm.ppc.altivec.vcmpbfp( <4 x float> %tmp4, <4 x float> %tmp6 )		; <<4 x i32>> [#uses=1]

+	%tmp7 = bitcast <4 x i32> %tmp.upgrd.2 to <4 x float>		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp7, <4 x float>* %x

+	store i32 %tmp.upgrd.1, i32* %P

+	ret void

+}

+

+declare i32 @llvm.ppc.altivec.vcmpbfp.p(i32, <4 x float>, <4 x float>)

+

+declare <4 x i32> @llvm.ppc.altivec.vcmpbfp(<4 x float>, <4 x float>)


diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_auto_constant.ll b/src/LLVM/test/CodeGen/PowerPC/vec_auto_constant.ll
new file mode 100644
index 0000000..973f089
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_auto_constant.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s
+; Formerly produced .long, 7320806 (partial)
+; CHECK: .byte  22
+; CHECK: .byte  21
+; CHECK: .byte  20
+; CHECK: .byte  3
+; CHECK: .byte  25
+; CHECK: .byte  24
+; CHECK: .byte  23
+; CHECK: .byte  3
+; CHECK: .byte  28
+; CHECK: .byte  27
+; CHECK: .byte  26
+; CHECK: .byte  3
+; CHECK: .byte  31
+; CHECK: .byte  30
+; CHECK: .byte  29
+; CHECK: .byte  3
+@baz = common global <16 x i8> zeroinitializer    ; <<16 x i8>*> [#uses=1]
+
+define void @foo(<16 x i8> %x) nounwind ssp {
+entry:
+  %x_addr = alloca <16 x i8>                      ; <<16 x i8>*> [#uses=2]
+  %temp = alloca <16 x i8>                        ; <<16 x i8>*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store <16 x i8> %x, <16 x i8>* %x_addr
+  store <16 x i8> <i8 22, i8 21, i8 20, i8 3, i8 25, i8 24, i8 23, i8 3, i8 28, i8 27, i8 26, i8 3, i8 31, i8 30, i8 29, i8 3>, <16 x i8>* %temp, align 16
+  %0 = load <16 x i8>* %x_addr, align 16          ; <<16 x i8>> [#uses=1]
+  %1 = load <16 x i8>* %temp, align 16            ; <<16 x i8>> [#uses=1]
+  %tmp = add <16 x i8> %0, %1                     ; <<16 x i8>> [#uses=1]
+  store <16 x i8> %tmp, <16 x i8>* @baz, align 16
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_br_cmp.ll b/src/LLVM/test/CodeGen/PowerPC/vec_br_cmp.ll
new file mode 100644
index 0000000..e3d1b88
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_br_cmp.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t

+; RUN: grep vcmpeqfp. %t

+; RUN: not grep mfcr %t

+

+; A predicate compare used immediately by a branch should not generate an mfcr.

+

+define void @test(<4 x float>* %A, <4 x float>* %B) {

+	%tmp = load <4 x float>* %A		; <<4 x float>> [#uses=1]

+	%tmp3 = load <4 x float>* %B		; <<4 x float>> [#uses=1]

+	%tmp.upgrd.1 = tail call i32 @llvm.ppc.altivec.vcmpeqfp.p( i32 1, <4 x float> %tmp, <4 x float> %tmp3 )		; <i32> [#uses=1]

+	%tmp.upgrd.2 = icmp eq i32 %tmp.upgrd.1, 0		; <i1> [#uses=1]

+	br i1 %tmp.upgrd.2, label %cond_true, label %UnifiedReturnBlock

+

+cond_true:		; preds = %0

+	store <4 x float> zeroinitializer, <4 x float>* %B

+	ret void

+

+UnifiedReturnBlock:		; preds = %0

+	ret void

+}

+

+declare i32 @llvm.ppc.altivec.vcmpeqfp.p(i32, <4 x float>, <4 x float>)


diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll b/src/LLVM/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
new file mode 100644
index 0000000..015c086
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mattr=+altivec  | FileCheck %s
+; Formerly this did byte loads and word stores.
+@a = external global <16 x i8>
+@b = external global <16 x i8>
+@c = external global <16 x i8>
+
+define void @foo() nounwind ssp {
+; CHECK: _foo:
+; CHECK-NOT: stw
+entry:
+    %tmp0 = load <16 x i8>* @a, align 16
+  %tmp180.i = extractelement <16 x i8> %tmp0, i32 0 ; <i8> [#uses=1]
+  %tmp181.i = insertelement <16 x i8> <i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp180.i, i32 2 ; <<16 x i8>> [#uses=1]
+  %tmp182.i = extractelement <16 x i8> %tmp0, i32 1 ; <i8> [#uses=1]
+  %tmp183.i = insertelement <16 x i8> %tmp181.i, i8 %tmp182.i, i32 3 ; <<16 x i8>> [#uses=1]
+  %tmp184.i = insertelement <16 x i8> %tmp183.i, i8 0, i32 4 ; <<16 x i8>> [#uses=1]
+  %tmp185.i = insertelement <16 x i8> %tmp184.i, i8 0, i32 5 ; <<16 x i8>> [#uses=1]
+  %tmp186.i = extractelement <16 x i8> %tmp0, i32 4 ; <i8> [#uses=1]
+  %tmp187.i = insertelement <16 x i8> %tmp185.i, i8 %tmp186.i, i32 6 ; <<16 x i8>> [#uses=1]
+  %tmp188.i = extractelement <16 x i8> %tmp0, i32 5 ; <i8> [#uses=1]
+  %tmp189.i = insertelement <16 x i8> %tmp187.i, i8 %tmp188.i, i32 7 ; <<16 x i8>> [#uses=1]
+  %tmp190.i = insertelement <16 x i8> %tmp189.i, i8 0, i32 8 ; <<16 x i8>> [#uses=1]
+  %tmp191.i = insertelement <16 x i8> %tmp190.i, i8 0, i32 9 ; <<16 x i8>> [#uses=1]
+  %tmp192.i = extractelement <16 x i8> %tmp0, i32 8 ; <i8> [#uses=1]
+  %tmp193.i = insertelement <16 x i8> %tmp191.i, i8 %tmp192.i, i32 10 ; <<16 x i8>> [#uses=1]
+  %tmp194.i = extractelement <16 x i8> %tmp0, i32 9 ; <i8> [#uses=1]
+  %tmp195.i = insertelement <16 x i8> %tmp193.i, i8 %tmp194.i, i32 11 ; <<16 x i8>> [#uses=1]
+  %tmp196.i = insertelement <16 x i8> %tmp195.i, i8 0, i32 12 ; <<16 x i8>> [#uses=1]
+  %tmp197.i = insertelement <16 x i8> %tmp196.i, i8 0, i32 13 ; <<16 x i8>> [#uses=1]
+%tmp201 = shufflevector <16 x i8> %tmp197.i, <16 x i8> %tmp0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 28, i32 29>; ModuleID = 'try.c'
+    store <16 x i8> %tmp201, <16 x i8>* @c, align 16
+    br label %return
+
+return:		; preds = %bb2
+	ret void
+; CHECK: blr
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_call.ll b/src/LLVM/test/CodeGen/PowerPC/vec_call.ll
new file mode 100644
index 0000000..8ed9cbc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_call.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5

+

+define <4 x i32> @test_arg(<4 x i32> %A, <4 x i32> %B) {

+	%C = add <4 x i32> %A, %B		; <<4 x i32>> [#uses=1]

+	ret <4 x i32> %C

+}

+

+define <4 x i32> @foo() {

+	%X = call <4 x i32> @test_arg( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <<4 x i32>> [#uses=1]

+	ret <4 x i32> %X

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_constants.ll b/src/LLVM/test/CodeGen/PowerPC/vec_constants.ll
new file mode 100644
index 0000000..14c7f98
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_constants.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep CPI

+

+define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {

+	%tmp = load <4 x i32>* %P1		; <<4 x i32>> [#uses=1]

+	%tmp4 = and <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]

+	store <4 x i32> %tmp4, <4 x i32>* %P1

+	%tmp7 = load <4 x i32>* %P2		; <<4 x i32>> [#uses=1]

+	%tmp9 = and <4 x i32> %tmp7, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 >		; <<4 x i32>> [#uses=1]

+	store <4 x i32> %tmp9, <4 x i32>* %P2

+	%tmp.upgrd.1 = load <4 x float>* %P3		; <<4 x float>> [#uses=1]

+	%tmp11 = bitcast <4 x float> %tmp.upgrd.1 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp12 = and <4 x i32> %tmp11, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 >		; <<4 x i32>> [#uses=1]

+	%tmp13 = bitcast <4 x i32> %tmp12 to <4 x float>		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp13, <4 x float>* %P3

+	ret void

+}

+

+define <4 x i32> @test_30() nounwind {

+	ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 >

+}

+

+define <4 x i32> @test_29() nounwind {

+	ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 >

+}

+

+define <8 x i16> @test_n30() nounwind {

+	ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 >

+}

+

+define <16 x i8> @test_n104() nounwind {

+	ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 >

+}

+

+define <4 x i32> @test_vsldoi() nounwind {

+	ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 >

+}

+

+define <8 x i16> @test_vsldoi_65023() nounwind {

+	ret <8 x i16> < i16 65023, i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023 >

+}

+

+define <4 x i32> @test_rol() nounwind {

+	ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 >

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_fneg.ll b/src/LLVM/test/CodeGen/PowerPC/vec_fneg.ll
new file mode 100644
index 0000000..e01e659
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_fneg.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubfp
+
+define void @t(<4 x float>* %A) {
+	%tmp2 = load <4 x float>* %A
+	%tmp3 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp2
+	store <4 x float> %tmp3, <4 x float>* %A
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_insert.ll b/src/LLVM/test/CodeGen/PowerPC/vec_insert.ll
new file mode 100644
index 0000000..185454c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_insert.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep sth
+
+define <8 x i16> @insert(<8 x i16> %foo, i16 %a) nounwind  {
+entry:
+	%vecext = insertelement <8 x i16> %foo, i16 %a, i32 7		; <i8> [#uses=1]
+	ret <8 x i16> %vecext
+}
+

diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_misaligned.ll b/src/LLVM/test/CodeGen/PowerPC/vec_misaligned.ll
new file mode 100644
index 0000000..d7ed64a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_misaligned.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin8"
+	%struct.S2203 = type { %struct.u16qi }
+	%struct.u16qi = type { <16 x i8> }
+@s = weak global %struct.S2203 zeroinitializer		; <%struct.S2203*> [#uses=1]
+
+define void @foo(i32 %x, ...) {
+entry:
+	%x_addr = alloca i32		; <i32*> [#uses=1]
+	%ap = alloca i8*		; <i8**> [#uses=3]
+	%ap.0 = alloca i8*		; <i8**> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %x, i32* %x_addr
+	%ap1 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_start( i8* %ap1 )
+	%tmp = load i8** %ap, align 4		; <i8*> [#uses=1]
+	store i8* %tmp, i8** %ap.0, align 4
+	%tmp2 = load i8** %ap.0, align 4		; <i8*> [#uses=1]
+	%tmp3 = getelementptr i8* %tmp2, i64 16		; <i8*> [#uses=1]
+	store i8* %tmp3, i8** %ap, align 4
+	%tmp4 = load i8** %ap.0, align 4		; <i8*> [#uses=1]
+	%tmp45 = bitcast i8* %tmp4 to %struct.S2203*		; <%struct.S2203*> [#uses=1]
+	%tmp6 = getelementptr %struct.S2203* @s, i32 0, i32 0		; <%struct.u16qi*> [#uses=1]
+	%tmp7 = getelementptr %struct.S2203* %tmp45, i32 0, i32 0		; <%struct.u16qi*> [#uses=1]
+	%tmp8 = getelementptr %struct.u16qi* %tmp6, i32 0, i32 0		; <<16 x i8>*> [#uses=1]
+	%tmp9 = getelementptr %struct.u16qi* %tmp7, i32 0, i32 0		; <<16 x i8>*> [#uses=1]
+	%tmp10 = load <16 x i8>* %tmp9, align 4		; <<16 x i8>> [#uses=1]
+	store <16 x i8> %tmp10, <16 x i8>* %tmp8, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind 

diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_mul.ll b/src/LLVM/test/CodeGen/PowerPC/vec_mul.ll
new file mode 100644
index 0000000..6df7935
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_mul.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep mullw

+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vmsumuhm

+

+define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {

+	%tmp = load <4 x i32>* %X		; <<4 x i32>> [#uses=1]

+	%tmp2 = load <4 x i32>* %Y		; <<4 x i32>> [#uses=1]

+	%tmp3 = mul <4 x i32> %tmp, %tmp2		; <<4 x i32>> [#uses=1]

+	ret <4 x i32> %tmp3

+}

+

+define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {

+	%tmp = load <8 x i16>* %X		; <<8 x i16>> [#uses=1]

+	%tmp2 = load <8 x i16>* %Y		; <<8 x i16>> [#uses=1]

+	%tmp3 = mul <8 x i16> %tmp, %tmp2		; <<8 x i16>> [#uses=1]

+	ret <8 x i16> %tmp3

+}

+

+define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {

+	%tmp = load <16 x i8>* %X		; <<16 x i8>> [#uses=1]

+	%tmp2 = load <16 x i8>* %Y		; <<16 x i8>> [#uses=1]

+	%tmp3 = mul <16 x i8> %tmp, %tmp2		; <<16 x i8>> [#uses=1]

+	ret <16 x i8> %tmp3

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_perf_shuffle.ll b/src/LLVM/test/CodeGen/PowerPC/vec_perf_shuffle.ll
new file mode 100644
index 0000000..0e31475
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_perf_shuffle.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vperm

+

+define <4 x float> @test_uu72(<4 x float>* %P1, <4 x float>* %P2) {

+	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]

+	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]

+	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 undef, i32 undef, i32 7, i32 2 >		; <<4 x float>> [#uses=1]

+	ret <4 x float> %V3

+}

+

+define <4 x float> @test_30u5(<4 x float>* %P1, <4 x float>* %P2) {

+	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]

+	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]

+	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 0, i32 undef, i32 5 >		; <<4 x float>> [#uses=1]

+	ret <4 x float> %V3

+}

+

+define <4 x float> @test_3u73(<4 x float>* %P1, <4 x float>* %P2) {

+	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]

+	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]

+	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 undef, i32 7, i32 3 >		; <<4 x float>> [#uses=1]

+	ret <4 x float> %V3

+}

+

+define <4 x float> @test_3774(<4 x float>* %P1, <4 x float>* %P2) {

+	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]

+	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]

+	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 7, i32 7, i32 4 >		; <<4 x float>> [#uses=1]

+	ret <4 x float> %V3

+}

+

+define <4 x float> @test_4450(<4 x float>* %P1, <4 x float>* %P2) {

+	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]

+	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]

+	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 4, i32 4, i32 5, i32 0 >		; <<4 x float>> [#uses=1]

+	ret <4 x float> %V3

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_shift.ll b/src/LLVM/test/CodeGen/PowerPC/vec_shift.ll
new file mode 100644
index 0000000..646fb5f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_shift.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s  -march=ppc32 -mcpu=g5
+; PR3628
+
+define void @update(<4 x i32> %val, <4 x i32>* %dst) nounwind {
+entry:
+	%shl = shl <4 x i32> %val, < i32 4, i32 3, i32 2, i32 1 >
+	%shr = ashr <4 x i32> %shl, < i32 1, i32 2, i32 3, i32 4 >
+	store <4 x i32> %shr, <4 x i32>* %dst
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_shuffle.ll b/src/LLVM/test/CodeGen/PowerPC/vec_shuffle.ll
new file mode 100644
index 0000000..b1a8ad9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_shuffle.ll

@@ -0,0 +1,504 @@
+; RUN: opt < %s -instcombine | \

+; RUN:   llc -march=ppc32 -mcpu=g5 | not grep vperm

+; RUN: llc < %s -march=ppc32 -mcpu=g5 > %t

+; RUN: grep vsldoi  %t | count 2

+; RUN: grep vmrgh   %t | count 7

+; RUN: grep vmrgl   %t | count 6

+; RUN: grep vpkuhum %t | count 1

+; RUN: grep vpkuwum %t | count 1

+

+define void @VSLDOI_xy(<8 x i16>* %A, <8 x i16>* %B) {

+entry:

+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]

+	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=1]

+	%tmp.upgrd.1 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=11]

+	%tmp2.upgrd.2 = bitcast <8 x i16> %tmp2 to <16 x i8>		; <<16 x i8>> [#uses=5]

+	%tmp.upgrd.3 = extractelement <16 x i8> %tmp.upgrd.1, i32 5		; <i8> [#uses=1]

+	%tmp3 = extractelement <16 x i8> %tmp.upgrd.1, i32 6		; <i8> [#uses=1]

+	%tmp4 = extractelement <16 x i8> %tmp.upgrd.1, i32 7		; <i8> [#uses=1]

+	%tmp5 = extractelement <16 x i8> %tmp.upgrd.1, i32 8		; <i8> [#uses=1]

+	%tmp6 = extractelement <16 x i8> %tmp.upgrd.1, i32 9		; <i8> [#uses=1]

+	%tmp7 = extractelement <16 x i8> %tmp.upgrd.1, i32 10		; <i8> [#uses=1]

+	%tmp8 = extractelement <16 x i8> %tmp.upgrd.1, i32 11		; <i8> [#uses=1]

+	%tmp9 = extractelement <16 x i8> %tmp.upgrd.1, i32 12		; <i8> [#uses=1]

+	%tmp10 = extractelement <16 x i8> %tmp.upgrd.1, i32 13		; <i8> [#uses=1]

+	%tmp11 = extractelement <16 x i8> %tmp.upgrd.1, i32 14		; <i8> [#uses=1]

+	%tmp12 = extractelement <16 x i8> %tmp.upgrd.1, i32 15		; <i8> [#uses=1]

+	%tmp13 = extractelement <16 x i8> %tmp2.upgrd.2, i32 0		; <i8> [#uses=1]

+	%tmp14 = extractelement <16 x i8> %tmp2.upgrd.2, i32 1		; <i8> [#uses=1]

+	%tmp15 = extractelement <16 x i8> %tmp2.upgrd.2, i32 2		; <i8> [#uses=1]

+	%tmp16 = extractelement <16 x i8> %tmp2.upgrd.2, i32 3		; <i8> [#uses=1]

+	%tmp17 = extractelement <16 x i8> %tmp2.upgrd.2, i32 4		; <i8> [#uses=1]

+	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.3, i32 0		; <<16 x i8>> [#uses=1]

+	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]

+	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]

+	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]

+	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]

+	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]

+	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]

+	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]

+	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]

+	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]

+	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]

+	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]

+	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]

+	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]

+	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]

+	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]

+	%tmp33.upgrd.4 = bitcast <16 x i8> %tmp33 to <8 x i16>		; <<8 x i16>> [#uses=1]

+	store <8 x i16> %tmp33.upgrd.4, <8 x i16>* %A

+	ret void

+}

+

+define void @VSLDOI_xx(<8 x i16>* %A, <8 x i16>* %B) {

+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]

+	%tmp2 = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]

+	%tmp.upgrd.5 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=11]

+	%tmp2.upgrd.6 = bitcast <8 x i16> %tmp2 to <16 x i8>		; <<16 x i8>> [#uses=5]

+	%tmp.upgrd.7 = extractelement <16 x i8> %tmp.upgrd.5, i32 5		; <i8> [#uses=1]

+	%tmp3 = extractelement <16 x i8> %tmp.upgrd.5, i32 6		; <i8> [#uses=1]

+	%tmp4 = extractelement <16 x i8> %tmp.upgrd.5, i32 7		; <i8> [#uses=1]

+	%tmp5 = extractelement <16 x i8> %tmp.upgrd.5, i32 8		; <i8> [#uses=1]

+	%tmp6 = extractelement <16 x i8> %tmp.upgrd.5, i32 9		; <i8> [#uses=1]

+	%tmp7 = extractelement <16 x i8> %tmp.upgrd.5, i32 10		; <i8> [#uses=1]

+	%tmp8 = extractelement <16 x i8> %tmp.upgrd.5, i32 11		; <i8> [#uses=1]

+	%tmp9 = extractelement <16 x i8> %tmp.upgrd.5, i32 12		; <i8> [#uses=1]

+	%tmp10 = extractelement <16 x i8> %tmp.upgrd.5, i32 13		; <i8> [#uses=1]

+	%tmp11 = extractelement <16 x i8> %tmp.upgrd.5, i32 14		; <i8> [#uses=1]

+	%tmp12 = extractelement <16 x i8> %tmp.upgrd.5, i32 15		; <i8> [#uses=1]

+	%tmp13 = extractelement <16 x i8> %tmp2.upgrd.6, i32 0		; <i8> [#uses=1]

+	%tmp14 = extractelement <16 x i8> %tmp2.upgrd.6, i32 1		; <i8> [#uses=1]

+	%tmp15 = extractelement <16 x i8> %tmp2.upgrd.6, i32 2		; <i8> [#uses=1]

+	%tmp16 = extractelement <16 x i8> %tmp2.upgrd.6, i32 3		; <i8> [#uses=1]

+	%tmp17 = extractelement <16 x i8> %tmp2.upgrd.6, i32 4		; <i8> [#uses=1]

+	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.7, i32 0		; <<16 x i8>> [#uses=1]

+	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]

+	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]

+	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]

+	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]

+	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]

+	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]

+	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]

+	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]

+	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]

+	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]

+	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]

+	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]

+	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]

+	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]

+	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]

+	%tmp33.upgrd.8 = bitcast <16 x i8> %tmp33 to <8 x i16>		; <<8 x i16>> [#uses=1]

+	store <8 x i16> %tmp33.upgrd.8, <8 x i16>* %A

+	ret void

+}

+

+define void @VPERM_promote(<8 x i16>* %A, <8 x i16>* %B) {

+entry:

+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]

+	%tmp.upgrd.9 = bitcast <8 x i16> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=1]

+	%tmp2.upgrd.10 = bitcast <8 x i16> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp3 = call <4 x i32> @llvm.ppc.altivec.vperm( <4 x i32> %tmp.upgrd.9, <4 x i32> %tmp2.upgrd.10, <16 x i8> < i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14 > )		; <<4 x i32>> [#uses=1]

+	%tmp3.upgrd.11 = bitcast <4 x i32> %tmp3 to <8 x i16>		; <<8 x i16>> [#uses=1]

+	store <8 x i16> %tmp3.upgrd.11, <8 x i16>* %A

+	ret void

+}

+

+declare <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32>, <4 x i32>, <16 x i8>)

+

+define void @tb_l(<16 x i8>* %A, <16 x i8>* %B) {

+entry:

+	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=8]

+	%tmp2 = load <16 x i8>* %B		; <<16 x i8>> [#uses=8]

+	%tmp.upgrd.12 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]

+	%tmp3 = extractelement <16 x i8> %tmp2, i32 8		; <i8> [#uses=1]

+	%tmp4 = extractelement <16 x i8> %tmp, i32 9		; <i8> [#uses=1]

+	%tmp5 = extractelement <16 x i8> %tmp2, i32 9		; <i8> [#uses=1]

+	%tmp6 = extractelement <16 x i8> %tmp, i32 10		; <i8> [#uses=1]

+	%tmp7 = extractelement <16 x i8> %tmp2, i32 10		; <i8> [#uses=1]

+	%tmp8 = extractelement <16 x i8> %tmp, i32 11		; <i8> [#uses=1]

+	%tmp9 = extractelement <16 x i8> %tmp2, i32 11		; <i8> [#uses=1]

+	%tmp10 = extractelement <16 x i8> %tmp, i32 12		; <i8> [#uses=1]

+	%tmp11 = extractelement <16 x i8> %tmp2, i32 12		; <i8> [#uses=1]

+	%tmp12 = extractelement <16 x i8> %tmp, i32 13		; <i8> [#uses=1]

+	%tmp13 = extractelement <16 x i8> %tmp2, i32 13		; <i8> [#uses=1]

+	%tmp14 = extractelement <16 x i8> %tmp, i32 14		; <i8> [#uses=1]

+	%tmp15 = extractelement <16 x i8> %tmp2, i32 14		; <i8> [#uses=1]

+	%tmp16 = extractelement <16 x i8> %tmp, i32 15		; <i8> [#uses=1]

+	%tmp17 = extractelement <16 x i8> %tmp2, i32 15		; <i8> [#uses=1]

+	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.12, i32 0		; <<16 x i8>> [#uses=1]

+	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]

+	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]

+	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]

+	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]

+	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]

+	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]

+	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]

+	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]

+	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]

+	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]

+	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]

+	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]

+	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]

+	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]

+	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]

+	store <16 x i8> %tmp33, <16 x i8>* %A

+	ret void

+}

+

+define void @th_l(<8 x i16>* %A, <8 x i16>* %B) {

+entry:

+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=4]

+	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=4]

+	%tmp.upgrd.13 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]

+	%tmp3 = extractelement <8 x i16> %tmp2, i32 4		; <i16> [#uses=1]

+	%tmp4 = extractelement <8 x i16> %tmp, i32 5		; <i16> [#uses=1]

+	%tmp5 = extractelement <8 x i16> %tmp2, i32 5		; <i16> [#uses=1]

+	%tmp6 = extractelement <8 x i16> %tmp, i32 6		; <i16> [#uses=1]

+	%tmp7 = extractelement <8 x i16> %tmp2, i32 6		; <i16> [#uses=1]

+	%tmp8 = extractelement <8 x i16> %tmp, i32 7		; <i16> [#uses=1]

+	%tmp9 = extractelement <8 x i16> %tmp2, i32 7		; <i16> [#uses=1]

+	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.13, i32 0		; <<8 x i16>> [#uses=1]

+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]

+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]

+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]

+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]

+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]

+	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]

+	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]

+	store <8 x i16> %tmp17, <8 x i16>* %A

+	ret void

+}

+

+define void @tw_l(<4 x i32>* %A, <4 x i32>* %B) {

+entry:

+	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]

+	%tmp2 = load <4 x i32>* %B		; <<4 x i32>> [#uses=2]

+	%tmp.upgrd.14 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]

+	%tmp3 = extractelement <4 x i32> %tmp2, i32 2		; <i32> [#uses=1]

+	%tmp4 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]

+	%tmp5 = extractelement <4 x i32> %tmp2, i32 3		; <i32> [#uses=1]

+	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.14, i32 0		; <<4 x i32>> [#uses=1]

+	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]

+	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]

+	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]

+	store <4 x i32> %tmp9, <4 x i32>* %A

+	ret void

+}

+

+define void @tb_h(<16 x i8>* %A, <16 x i8>* %B) {

+entry:

+	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=8]

+	%tmp2 = load <16 x i8>* %B		; <<16 x i8>> [#uses=8]

+	%tmp.upgrd.15 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]

+	%tmp3 = extractelement <16 x i8> %tmp2, i32 0		; <i8> [#uses=1]

+	%tmp4 = extractelement <16 x i8> %tmp, i32 1		; <i8> [#uses=1]

+	%tmp5 = extractelement <16 x i8> %tmp2, i32 1		; <i8> [#uses=1]

+	%tmp6 = extractelement <16 x i8> %tmp, i32 2		; <i8> [#uses=1]

+	%tmp7 = extractelement <16 x i8> %tmp2, i32 2		; <i8> [#uses=1]

+	%tmp8 = extractelement <16 x i8> %tmp, i32 3		; <i8> [#uses=1]

+	%tmp9 = extractelement <16 x i8> %tmp2, i32 3		; <i8> [#uses=1]

+	%tmp10 = extractelement <16 x i8> %tmp, i32 4		; <i8> [#uses=1]

+	%tmp11 = extractelement <16 x i8> %tmp2, i32 4		; <i8> [#uses=1]

+	%tmp12 = extractelement <16 x i8> %tmp, i32 5		; <i8> [#uses=1]

+	%tmp13 = extractelement <16 x i8> %tmp2, i32 5		; <i8> [#uses=1]

+	%tmp14 = extractelement <16 x i8> %tmp, i32 6		; <i8> [#uses=1]

+	%tmp15 = extractelement <16 x i8> %tmp2, i32 6		; <i8> [#uses=1]

+	%tmp16 = extractelement <16 x i8> %tmp, i32 7		; <i8> [#uses=1]

+	%tmp17 = extractelement <16 x i8> %tmp2, i32 7		; <i8> [#uses=1]

+	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.15, i32 0		; <<16 x i8>> [#uses=1]

+	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]

+	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]

+	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]

+	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]

+	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]

+	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]

+	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]

+	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]

+	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]

+	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]

+	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]

+	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]

+	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]

+	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]

+	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]

+	store <16 x i8> %tmp33, <16 x i8>* %A

+	ret void

+}

+

+define void @th_h(<8 x i16>* %A, <8 x i16>* %B) {

+entry:

+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=4]

+	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=4]

+	%tmp.upgrd.16 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]

+	%tmp3 = extractelement <8 x i16> %tmp2, i32 0		; <i16> [#uses=1]

+	%tmp4 = extractelement <8 x i16> %tmp, i32 1		; <i16> [#uses=1]

+	%tmp5 = extractelement <8 x i16> %tmp2, i32 1		; <i16> [#uses=1]

+	%tmp6 = extractelement <8 x i16> %tmp, i32 2		; <i16> [#uses=1]

+	%tmp7 = extractelement <8 x i16> %tmp2, i32 2		; <i16> [#uses=1]

+	%tmp8 = extractelement <8 x i16> %tmp, i32 3		; <i16> [#uses=1]

+	%tmp9 = extractelement <8 x i16> %tmp2, i32 3		; <i16> [#uses=1]

+	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.16, i32 0		; <<8 x i16>> [#uses=1]

+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]

+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]

+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]

+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]

+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]

+	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]

+	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]

+	store <8 x i16> %tmp17, <8 x i16>* %A

+	ret void

+}

+

+define void @tw_h(<4 x i32>* %A, <4 x i32>* %B) {

+entry:

+	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]

+	%tmp2 = load <4 x i32>* %B		; <<4 x i32>> [#uses=2]

+	%tmp.upgrd.17 = extractelement <4 x i32> %tmp2, i32 0		; <i32> [#uses=1]

+	%tmp3 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]

+	%tmp4 = extractelement <4 x i32> %tmp2, i32 1		; <i32> [#uses=1]

+	%tmp5 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]

+	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.17, i32 0		; <<4 x i32>> [#uses=1]

+	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]

+	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]

+	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]

+	store <4 x i32> %tmp9, <4 x i32>* %A

+	ret void

+}

+

+define void @tw_h_flop(<4 x i32>* %A, <4 x i32>* %B) {

+	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]

+	%tmp2 = load <4 x i32>* %B		; <<4 x i32>> [#uses=2]

+	%tmp.upgrd.18 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]

+	%tmp3 = extractelement <4 x i32> %tmp2, i32 0		; <i32> [#uses=1]

+	%tmp4 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]

+	%tmp5 = extractelement <4 x i32> %tmp2, i32 1		; <i32> [#uses=1]

+	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.18, i32 0		; <<4 x i32>> [#uses=1]

+	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]

+	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]

+	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]

+	store <4 x i32> %tmp9, <4 x i32>* %A

+	ret void

+}

+

+define void @VMRG_UNARY_tb_l(<16 x i8>* %A, <16 x i8>* %B) {

+entry:

+	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=16]

+	%tmp.upgrd.19 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]

+	%tmp3 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]

+	%tmp4 = extractelement <16 x i8> %tmp, i32 9		; <i8> [#uses=1]

+	%tmp5 = extractelement <16 x i8> %tmp, i32 9		; <i8> [#uses=1]

+	%tmp6 = extractelement <16 x i8> %tmp, i32 10		; <i8> [#uses=1]

+	%tmp7 = extractelement <16 x i8> %tmp, i32 10		; <i8> [#uses=1]

+	%tmp8 = extractelement <16 x i8> %tmp, i32 11		; <i8> [#uses=1]

+	%tmp9 = extractelement <16 x i8> %tmp, i32 11		; <i8> [#uses=1]

+	%tmp10 = extractelement <16 x i8> %tmp, i32 12		; <i8> [#uses=1]

+	%tmp11 = extractelement <16 x i8> %tmp, i32 12		; <i8> [#uses=1]

+	%tmp12 = extractelement <16 x i8> %tmp, i32 13		; <i8> [#uses=1]

+	%tmp13 = extractelement <16 x i8> %tmp, i32 13		; <i8> [#uses=1]

+	%tmp14 = extractelement <16 x i8> %tmp, i32 14		; <i8> [#uses=1]

+	%tmp15 = extractelement <16 x i8> %tmp, i32 14		; <i8> [#uses=1]

+	%tmp16 = extractelement <16 x i8> %tmp, i32 15		; <i8> [#uses=1]

+	%tmp17 = extractelement <16 x i8> %tmp, i32 15		; <i8> [#uses=1]

+	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.19, i32 0		; <<16 x i8>> [#uses=1]

+	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]

+	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]

+	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]

+	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]

+	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]

+	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]

+	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]

+	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]

+	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]

+	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]

+	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]

+	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]

+	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]

+	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]

+	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]

+	store <16 x i8> %tmp33, <16 x i8>* %A

+	ret void

+}

+

+define void @VMRG_UNARY_th_l(<8 x i16>* %A, <8 x i16>* %B) {

+entry:

+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=8]

+	%tmp.upgrd.20 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]

+	%tmp3 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]

+	%tmp4 = extractelement <8 x i16> %tmp, i32 5		; <i16> [#uses=1]

+	%tmp5 = extractelement <8 x i16> %tmp, i32 5		; <i16> [#uses=1]

+	%tmp6 = extractelement <8 x i16> %tmp, i32 6		; <i16> [#uses=1]

+	%tmp7 = extractelement <8 x i16> %tmp, i32 6		; <i16> [#uses=1]

+	%tmp8 = extractelement <8 x i16> %tmp, i32 7		; <i16> [#uses=1]

+	%tmp9 = extractelement <8 x i16> %tmp, i32 7		; <i16> [#uses=1]

+	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.20, i32 0		; <<8 x i16>> [#uses=1]

+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]

+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]

+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]

+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]

+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]

+	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]

+	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]

+	store <8 x i16> %tmp17, <8 x i16>* %A

+	ret void

+}

+

+define void @VMRG_UNARY_tw_l(<4 x i32>* %A, <4 x i32>* %B) {

+entry:

+	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=4]

+	%tmp.upgrd.21 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]

+	%tmp3 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]

+	%tmp4 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]

+	%tmp5 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]

+	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.21, i32 0		; <<4 x i32>> [#uses=1]

+	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]

+	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]

+	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]

+	store <4 x i32> %tmp9, <4 x i32>* %A

+	ret void

+}

+

+define void @VMRG_UNARY_tb_h(<16 x i8>* %A, <16 x i8>* %B) {

+entry:

+	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=16]

+	%tmp.upgrd.22 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]

+	%tmp3 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]

+	%tmp4 = extractelement <16 x i8> %tmp, i32 1		; <i8> [#uses=1]

+	%tmp5 = extractelement <16 x i8> %tmp, i32 1		; <i8> [#uses=1]

+	%tmp6 = extractelement <16 x i8> %tmp, i32 2		; <i8> [#uses=1]

+	%tmp7 = extractelement <16 x i8> %tmp, i32 2		; <i8> [#uses=1]

+	%tmp8 = extractelement <16 x i8> %tmp, i32 3		; <i8> [#uses=1]

+	%tmp9 = extractelement <16 x i8> %tmp, i32 3		; <i8> [#uses=1]

+	%tmp10 = extractelement <16 x i8> %tmp, i32 4		; <i8> [#uses=1]

+	%tmp11 = extractelement <16 x i8> %tmp, i32 4		; <i8> [#uses=1]

+	%tmp12 = extractelement <16 x i8> %tmp, i32 5		; <i8> [#uses=1]

+	%tmp13 = extractelement <16 x i8> %tmp, i32 5		; <i8> [#uses=1]

+	%tmp14 = extractelement <16 x i8> %tmp, i32 6		; <i8> [#uses=1]

+	%tmp15 = extractelement <16 x i8> %tmp, i32 6		; <i8> [#uses=1]

+	%tmp16 = extractelement <16 x i8> %tmp, i32 7		; <i8> [#uses=1]

+	%tmp17 = extractelement <16 x i8> %tmp, i32 7		; <i8> [#uses=1]

+	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.22, i32 0		; <<16 x i8>> [#uses=1]

+	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]

+	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]

+	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]

+	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]

+	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]

+	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]

+	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]

+	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]

+	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]

+	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]

+	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]

+	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]

+	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]

+	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]

+	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]

+	store <16 x i8> %tmp33, <16 x i8>* %A

+	ret void

+}

+

+define void @VMRG_UNARY_th_h(<8 x i16>* %A, <8 x i16>* %B) {

+entry:

+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=8]

+	%tmp.upgrd.23 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]

+	%tmp3 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]

+	%tmp4 = extractelement <8 x i16> %tmp, i32 1		; <i16> [#uses=1]

+	%tmp5 = extractelement <8 x i16> %tmp, i32 1		; <i16> [#uses=1]

+	%tmp6 = extractelement <8 x i16> %tmp, i32 2		; <i16> [#uses=1]

+	%tmp7 = extractelement <8 x i16> %tmp, i32 2		; <i16> [#uses=1]

+	%tmp8 = extractelement <8 x i16> %tmp, i32 3		; <i16> [#uses=1]

+	%tmp9 = extractelement <8 x i16> %tmp, i32 3		; <i16> [#uses=1]

+	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.23, i32 0		; <<8 x i16>> [#uses=1]

+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]

+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]

+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]

+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]

+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]

+	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]

+	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]

+	store <8 x i16> %tmp17, <8 x i16>* %A

+	ret void

+}

+

+define void @VMRG_UNARY_tw_h(<4 x i32>* %A, <4 x i32>* %B) {

+entry:

+	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=4]

+	%tmp.upgrd.24 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]

+	%tmp3 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]

+	%tmp4 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]

+	%tmp5 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]

+	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.24, i32 0		; <<4 x i32>> [#uses=1]

+	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]

+	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]

+	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]

+	store <4 x i32> %tmp9, <4 x i32>* %A

+	ret void

+}

+

+define void @VPCKUHUM_unary(<8 x i16>* %A, <8 x i16>* %B) {

+entry:

+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=2]

+	%tmp.upgrd.25 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=8]

+	%tmp3 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=8]

+	%tmp.upgrd.26 = extractelement <16 x i8> %tmp.upgrd.25, i32 1		; <i8> [#uses=1]

+	%tmp4 = extractelement <16 x i8> %tmp.upgrd.25, i32 3		; <i8> [#uses=1]

+	%tmp5 = extractelement <16 x i8> %tmp.upgrd.25, i32 5		; <i8> [#uses=1]

+	%tmp6 = extractelement <16 x i8> %tmp.upgrd.25, i32 7		; <i8> [#uses=1]

+	%tmp7 = extractelement <16 x i8> %tmp.upgrd.25, i32 9		; <i8> [#uses=1]

+	%tmp8 = extractelement <16 x i8> %tmp.upgrd.25, i32 11		; <i8> [#uses=1]

+	%tmp9 = extractelement <16 x i8> %tmp.upgrd.25, i32 13		; <i8> [#uses=1]

+	%tmp10 = extractelement <16 x i8> %tmp.upgrd.25, i32 15		; <i8> [#uses=1]

+	%tmp11 = extractelement <16 x i8> %tmp3, i32 1		; <i8> [#uses=1]

+	%tmp12 = extractelement <16 x i8> %tmp3, i32 3		; <i8> [#uses=1]

+	%tmp13 = extractelement <16 x i8> %tmp3, i32 5		; <i8> [#uses=1]

+	%tmp14 = extractelement <16 x i8> %tmp3, i32 7		; <i8> [#uses=1]

+	%tmp15 = extractelement <16 x i8> %tmp3, i32 9		; <i8> [#uses=1]

+	%tmp16 = extractelement <16 x i8> %tmp3, i32 11		; <i8> [#uses=1]

+	%tmp17 = extractelement <16 x i8> %tmp3, i32 13		; <i8> [#uses=1]

+	%tmp18 = extractelement <16 x i8> %tmp3, i32 15		; <i8> [#uses=1]

+	%tmp19 = insertelement <16 x i8> undef, i8 %tmp.upgrd.26, i32 0		; <<16 x i8>> [#uses=1]

+	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 1		; <<16 x i8>> [#uses=1]

+	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 2		; <<16 x i8>> [#uses=1]

+	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 3		; <<16 x i8>> [#uses=1]

+	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 4		; <<16 x i8>> [#uses=1]

+	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 5		; <<16 x i8>> [#uses=1]

+	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 6		; <<16 x i8>> [#uses=1]

+	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 7		; <<16 x i8>> [#uses=1]

+	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 8		; <<16 x i8>> [#uses=1]

+	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 9		; <<16 x i8>> [#uses=1]

+	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 10		; <<16 x i8>> [#uses=1]

+	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 11		; <<16 x i8>> [#uses=1]

+	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 12		; <<16 x i8>> [#uses=1]

+	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 13		; <<16 x i8>> [#uses=1]

+	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 14		; <<16 x i8>> [#uses=1]

+	%tmp34 = insertelement <16 x i8> %tmp33, i8 %tmp18, i32 15		; <<16 x i8>> [#uses=1]

+	%tmp34.upgrd.27 = bitcast <16 x i8> %tmp34 to <8 x i16>		; <<8 x i16>> [#uses=1]

+	store <8 x i16> %tmp34.upgrd.27, <8 x i16>* %A

+	ret void

+}

+

+define void @VPCKUWUM_unary(<4 x i32>* %A, <4 x i32>* %B) {

+entry:

+	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]

+	%tmp.upgrd.28 = bitcast <4 x i32> %tmp to <8 x i16>		; <<8 x i16>> [#uses=4]

+	%tmp3 = bitcast <4 x i32> %tmp to <8 x i16>		; <<8 x i16>> [#uses=4]

+	%tmp.upgrd.29 = extractelement <8 x i16> %tmp.upgrd.28, i32 1		; <i16> [#uses=1]

+	%tmp4 = extractelement <8 x i16> %tmp.upgrd.28, i32 3		; <i16> [#uses=1]

+	%tmp5 = extractelement <8 x i16> %tmp.upgrd.28, i32 5		; <i16> [#uses=1]

+	%tmp6 = extractelement <8 x i16> %tmp.upgrd.28, i32 7		; <i16> [#uses=1]

+	%tmp7 = extractelement <8 x i16> %tmp3, i32 1		; <i16> [#uses=1]

+	%tmp8 = extractelement <8 x i16> %tmp3, i32 3		; <i16> [#uses=1]

+	%tmp9 = extractelement <8 x i16> %tmp3, i32 5		; <i16> [#uses=1]

+	%tmp10 = extractelement <8 x i16> %tmp3, i32 7		; <i16> [#uses=1]

+	%tmp11 = insertelement <8 x i16> undef, i16 %tmp.upgrd.29, i32 0		; <<8 x i16>> [#uses=1]

+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 1		; <<8 x i16>> [#uses=1]

+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 2		; <<8 x i16>> [#uses=1]

+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 3		; <<8 x i16>> [#uses=1]

+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 4		; <<8 x i16>> [#uses=1]

+	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 5		; <<8 x i16>> [#uses=1]

+	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 6		; <<8 x i16>> [#uses=1]

+	%tmp18 = insertelement <8 x i16> %tmp17, i16 %tmp10, i32 7		; <<8 x i16>> [#uses=1]

+	%tmp18.upgrd.30 = bitcast <8 x i16> %tmp18 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	store <4 x i32> %tmp18.upgrd.30, <4 x i32>* %A

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_splat.ll b/src/LLVM/test/CodeGen/PowerPC/vec_splat.ll
new file mode 100644
index 0000000..104f05a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_splat.ll

@@ -0,0 +1,71 @@
+; Test that vectors are scalarized/lowered correctly.

+; RUN: llc < %s -march=ppc32 -mcpu=g3 | \

+; RUN:    grep stfs | count 4

+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t

+; RUN: grep vspltw %t | count 2

+; RUN: grep vsplti %t | count 3

+; RUN: grep vsplth %t | count 1

+

+        %f4 = type <4 x float>

+        %i4 = type <4 x i32>

+

+define void @splat(%f4* %P, %f4* %Q, float %X) nounwind {

+        %tmp = insertelement %f4 undef, float %X, i32 0         ; <%f4> [#uses=1]

+        %tmp2 = insertelement %f4 %tmp, float %X, i32 1         ; <%f4> [#uses=1]

+        %tmp4 = insertelement %f4 %tmp2, float %X, i32 2                ; <%f4> [#uses=1]

+        %tmp6 = insertelement %f4 %tmp4, float %X, i32 3                ; <%f4> [#uses=1]

+        %q = load %f4* %Q               ; <%f4> [#uses=1]

+        %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]

+        store %f4 %R, %f4* %P

+        ret void

+}

+

+define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind {

+        %tmp = insertelement %i4 undef, i32 %X, i32 0           ; <%i4> [#uses=1]

+        %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1           ; <%i4> [#uses=1]

+        %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2          ; <%i4> [#uses=1]

+        %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3          ; <%i4> [#uses=1]

+        %q = load %i4* %Q               ; <%i4> [#uses=1]

+        %R = add %i4 %q, %tmp6          ; <%i4> [#uses=1]

+        store %i4 %R, %i4* %P

+        ret void

+}

+

+define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind {

+        %q = load %i4* %Q               ; <%i4> [#uses=1]

+        %R = add %i4 %q, < i32 -1, i32 -1, i32 -1, i32 -1 >             ; <%i4> [#uses=1]

+        store %i4 %R, %i4* %P

+        ret void

+}

+

+define void @splat_imm_i16(%i4* %P, %i4* %Q, i32 %X) nounwind {

+        %q = load %i4* %Q               ; <%i4> [#uses=1]

+        %R = add %i4 %q, < i32 65537, i32 65537, i32 65537, i32 65537 >         ; <%i4> [#uses=1]

+        store %i4 %R, %i4* %P

+        ret void

+}

+

+define void @splat_h(i16 %tmp, <16 x i8>* %dst) nounwind {

+        %tmp.upgrd.1 = insertelement <8 x i16> undef, i16 %tmp, i32 0           

+        %tmp72 = insertelement <8 x i16> %tmp.upgrd.1, i16 %tmp, i32 1 

+        %tmp73 = insertelement <8 x i16> %tmp72, i16 %tmp, i32 2 

+        %tmp74 = insertelement <8 x i16> %tmp73, i16 %tmp, i32 3

+        %tmp75 = insertelement <8 x i16> %tmp74, i16 %tmp, i32 4 

+        %tmp76 = insertelement <8 x i16> %tmp75, i16 %tmp, i32 5

+        %tmp77 = insertelement <8 x i16> %tmp76, i16 %tmp, i32 6 

+        %tmp78 = insertelement <8 x i16> %tmp77, i16 %tmp, i32 7 

+        %tmp78.upgrd.2 = bitcast <8 x i16> %tmp78 to <16 x i8>  

+        store <16 x i8> %tmp78.upgrd.2, <16 x i8>* %dst

+        ret void

+}

+

+define void @spltish(<16 x i8>* %A, <16 x i8>* %B) nounwind {

+        %tmp = load <16 x i8>* %B               ; <<16 x i8>> [#uses=1]

+        %tmp.s = bitcast <16 x i8> %tmp to <16 x i8>            ; <<16 x i8>> [#uses=1]

+        %tmp4 = sub <16 x i8> %tmp.s, bitcast (<8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16

+ 15, i16 15, i16 15 > to <16 x i8>)             ; <<16 x i8>> [#uses=1]

+        %tmp4.u = bitcast <16 x i8> %tmp4 to <16 x i8>          ; <<16 x i8>> [#uses=1]

+        store <16 x i8> %tmp4.u, <16 x i8>* %A

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_splat_constant.ll b/src/LLVM/test/CodeGen/PowerPC/vec_splat_constant.ll
new file mode 100644
index 0000000..b227794
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_splat_constant.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s
+; Formerly incorrectly inserted vsldoi (endian confusion)
+
+@baz = common global <16 x i8> zeroinitializer    ; <<16 x i8>*> [#uses=1]
+
+define void @foo(<16 x i8> %x) nounwind ssp {
+entry:
+; CHECK: _foo:
+; CHECK-NOT: vsldoi
+  %x_addr = alloca <16 x i8>                      ; <<16 x i8>*> [#uses=2]
+  %temp = alloca <16 x i8>                        ; <<16 x i8>*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store <16 x i8> %x, <16 x i8>* %x_addr
+  store <16 x i8> <i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14>, <16 x i8>* %temp, align 16
+  %0 = load <16 x i8>* %x_addr, align 16          ; <<16 x i8>> [#uses=1]
+  %1 = load <16 x i8>* %temp, align 16            ; <<16 x i8>> [#uses=1]
+  %tmp = add <16 x i8> %0, %1                     ; <<16 x i8>> [#uses=1]
+  store <16 x i8> %tmp, <16 x i8>* @baz, align 16
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: blr
+}

diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_vrsave.ll b/src/LLVM/test/CodeGen/PowerPC/vec_vrsave.ll
new file mode 100644
index 0000000..8f8a031
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_vrsave.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t

+; RUN: grep vrlw %t

+; RUN: not grep spr %t

+; RUN: not grep vrsave %t

+

+define <4 x i32> @test_rol() {

+        ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 >

+}

+

+define <4 x i32> @test_arg(<4 x i32> %A, <4 x i32> %B) {

+        %C = add <4 x i32> %A, %B               ; <<4 x i32>> [#uses=1]

+        ret <4 x i32> %C

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/vec_zero.ll b/src/LLVM/test/CodeGen/PowerPC/vec_zero.ll
new file mode 100644
index 0000000..62ca563
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vec_zero.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vxor

+

+define void @foo(<4 x float>* %P) {

+        %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]

+        %S = fadd <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]

+        store <4 x float> %S, <4 x float>* %P

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/vector-identity-shuffle.ll b/src/LLVM/test/CodeGen/PowerPC/vector-identity-shuffle.ll
new file mode 100644
index 0000000..587fb49
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vector-identity-shuffle.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep test:

+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vperm

+

+define void @test(<4 x float>* %tmp2.i) {

+        %tmp2.i.upgrd.1 = load <4 x float>* %tmp2.i             ; <<4 x float>> [#uses=4]

+        %xFloat0.48 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 0      ; <float> [#uses=1]

+        %inFloat0.49 = insertelement <4 x float> undef, float %xFloat0.48, i32 0              ; <<4 x float>> [#uses=1]

+        %xFloat1.50 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 1      ; <float> [#uses=1]

+        %inFloat1.52 = insertelement <4 x float> %inFloat0.49, float %xFloat1.50, i32 1               ; <<4 x float>> [#uses=1]

+        %xFloat2.53 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 2      ; <float> [#uses=1]

+        %inFloat2.55 = insertelement <4 x float> %inFloat1.52, float %xFloat2.53, i32 2               ; <<4 x float>> [#uses=1]

+        %xFloat3.56 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 3      ; <float> [#uses=1]

+        %inFloat3.58 = insertelement <4 x float> %inFloat2.55, float %xFloat3.56, i32 3               ; <<4 x float>> [#uses=1]

+        store <4 x float> %inFloat3.58, <4 x float>* %tmp2.i

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/PowerPC/vector.ll b/src/LLVM/test/CodeGen/PowerPC/vector.ll
new file mode 100644
index 0000000..2905582
--- /dev/null
+++ b/src/LLVM/test/CodeGen/PowerPC/vector.ll

@@ -0,0 +1,158 @@
+; Test that vectors are scalarized/lowered correctly.

+; RUN: llc < %s -march=ppc32 -mcpu=g5 > %t

+; RUN: llc < %s -march=ppc32 -mcpu=g3 >> %t

+

+%d8 = type <8 x double>

+%f1 = type <1 x float>

+%f2 = type <2 x float>

+%f4 = type <4 x float>

+%f8 = type <8 x float>

+%i4 = type <4 x i32>

+

+;;; TEST HANDLING OF VARIOUS VECTOR SIZES

+

+define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {

+        %p = load %f1* %P               ; <%f1> [#uses=1]

+        %q = load %f1* %Q               ; <%f1> [#uses=1]

+        %R = fadd %f1 %p, %q             ; <%f1> [#uses=1]

+        store %f1 %R, %f1* %S

+        ret void

+}

+

+define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {

+        %p = load %f2* %P               ; <%f2> [#uses=1]

+        %q = load %f2* %Q               ; <%f2> [#uses=1]

+        %R = fadd %f2 %p, %q             ; <%f2> [#uses=1]

+        store %f2 %R, %f2* %S

+        ret void

+}

+

+define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {

+        %p = load %f4* %P               ; <%f4> [#uses=1]

+        %q = load %f4* %Q               ; <%f4> [#uses=1]

+        %R = fadd %f4 %p, %q             ; <%f4> [#uses=1]

+        store %f4 %R, %f4* %S

+        ret void

+}

+

+define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {

+        %p = load %f8* %P               ; <%f8> [#uses=1]

+        %q = load %f8* %Q               ; <%f8> [#uses=1]

+        %R = fadd %f8 %p, %q             ; <%f8> [#uses=1]

+        store %f8 %R, %f8* %S

+        ret void

+}

+

+define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {

+        %p = load %f8* %P               ; <%f8> [#uses=1]

+        %q = load %f8* %Q               ; <%f8> [#uses=1]

+        %R = fmul %f8 %p, %q             ; <%f8> [#uses=1]

+        store %f8 %R, %f8* %S

+        ret void

+}

+

+define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {

+        %p = load %f8* %P               ; <%f8> [#uses=1]

+        %q = load %f8* %Q               ; <%f8> [#uses=1]

+        %R = fdiv %f8 %p, %q            ; <%f8> [#uses=1]

+        store %f8 %R, %f8* %S

+        ret void

+}

+

+;;; TEST VECTOR CONSTRUCTS

+

+define void @test_cst(%f4* %P, %f4* %S) {

+        %p = load %f4* %P               ; <%f4> [#uses=1]

+        %R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float

+ 2.000000e+00, float 4.500000e+00 >             ; <%f4> [#uses=1]

+        store %f4 %R, %f4* %S

+        ret void

+}

+

+define void @test_zero(%f4* %P, %f4* %S) {

+        %p = load %f4* %P               ; <%f4> [#uses=1]

+        %R = fadd %f4 %p, zeroinitializer                ; <%f4> [#uses=1]

+        store %f4 %R, %f4* %S

+        ret void

+}

+

+define void @test_undef(%f4* %P, %f4* %S) {

+        %p = load %f4* %P               ; <%f4> [#uses=1]

+        %R = fadd %f4 %p, undef          ; <%f4> [#uses=1]

+        store %f4 %R, %f4* %S

+        ret void

+}

+

+define void @test_constant_insert(%f4* %S) {

+        %R = insertelement %f4 zeroinitializer, float 1.000000e+01, i32 0     

+                ; <%f4> [#uses=1]

+        store %f4 %R, %f4* %S

+        ret void

+}

+

+define void @test_variable_buildvector(float %F, %f4* %S) {

+        %R = insertelement %f4 zeroinitializer, float %F, i32 0        

+        store %f4 %R, %f4* %S

+        ret void

+}

+

+define void @test_scalar_to_vector(float %F, %f4* %S) {

+        %R = insertelement %f4 undef, float %F, i32 0           

+        store %f4 %R, %f4* %S

+        ret void

+}

+

+define float @test_extract_elt(%f8* %P) {

+        %p = load %f8* %P               ; <%f8> [#uses=1]

+        %R = extractelement %f8 %p, i32 3               ; <float> [#uses=1]

+        ret float %R

+}

+

+define double @test_extract_elt2(%d8* %P) {

+        %p = load %d8* %P               ; <%d8> [#uses=1]

+        %R = extractelement %d8 %p, i32 3               ; <double> [#uses=1]

+        ret double %R

+}

+

+define void @test_cast_1(%f4* %b, %i4* %a) {

+        %tmp = load %f4* %b             ; <%f4> [#uses=1]

+        %tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float

+3.000000e+00, float 4.000000e+00 >              ; <%f4> [#uses=1]

+        %tmp3 = bitcast %f4 %tmp2 to %i4                ; <%i4> [#uses=1]

+        %tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >           

+        store %i4 %tmp4, %i4* %a

+        ret void

+}

+

+define void @test_cast_2(%f8* %a, <8 x i32>* %b) {

+        %T = load %f8* %a               ; <%f8> [#uses=1]

+        %T2 = bitcast %f8 %T to <8 x i32>               

+        store <8 x i32> %T2, <8 x i32>* %b

+        ret void

+}

+

+

+;;; TEST IMPORTANT IDIOMS

+

+define void @splat(%f4* %P, %f4* %Q, float %X) {

+        %tmp = insertelement %f4 undef, float %X, i32 0        

+        %tmp2 = insertelement %f4 %tmp, float %X, i32 1       

+        %tmp4 = insertelement %f4 %tmp2, float %X, i32 2    

+        %tmp6 = insertelement %f4 %tmp4, float %X, i32 3   

+        %q = load %f4* %Q               ; <%f4> [#uses=1]

+        %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]

+        store %f4 %R, %f4* %P

+        ret void

+}

+

+define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {

+        %tmp = insertelement %i4 undef, i32 %X, i32 0          

+        %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1         

+        %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2       

+        %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3     

+        %q = load %i4* %Q               ; <%i4> [#uses=1]

+        %R = add %i4 %q, %tmp6          ; <%i4> [#uses=1]

+        store %i4 %R, %i4* %P

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll b/src/LLVM/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
new file mode 100644
index 0000000..9c84cd1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=sparc

+

+define void @execute_list() {

+        %tmp.33.i = fdiv float 0.000000e+00, 0.000000e+00               ; <float> [#uses=1]

+        %tmp.37.i = fmul float 0.000000e+00, %tmp.33.i           ; <float> [#uses=1]

+        %tmp.42.i = fadd float %tmp.37.i, 0.000000e+00           ; <float> [#uses=1]

+        call void @gl_EvalCoord1f( float %tmp.42.i )

+        ret void

+}

+

+declare void @gl_EvalCoord1f(float)

+


diff --git a/src/LLVM/test/CodeGen/SPARC/2007-05-09-JumpTables.ll b/src/LLVM/test/CodeGen/SPARC/2007-05-09-JumpTables.ll
new file mode 100644
index 0000000..73578dc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/2007-05-09-JumpTables.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=sparc

+

+; We cannot emit jump tables on Sparc, but we should correctly handle this case.

+

+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+

+define i32 @foo(i32 %f) {

+entry:

+	switch i32 %f, label %bb14 [

+		 i32 0, label %UnifiedReturnBlock

+		 i32 1, label %bb4

+		 i32 2, label %bb7

+		 i32 3, label %bb10

+	]

+

+bb4:		; preds = %entry

+	ret i32 2

+

+bb7:		; preds = %entry

+	ret i32 5

+

+bb10:		; preds = %entry

+	ret i32 9

+

+bb14:		; preds = %entry

+	ret i32 0

+

+UnifiedReturnBlock:		; preds = %entry

+	ret i32 1

+}


diff --git a/src/LLVM/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll b/src/LLVM/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll
new file mode 100644
index 0000000..77c2002
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=sparc
+; PR1540
+
+declare float @sinf(float)
+declare double @sin(double)
+define double @test_sin(float %F) {
+        %G = call float @sinf( float %F )               ; <float> [#uses=1]
+        %H = fpext float %G to double           ; <double> [#uses=1]
+        %I = call double @sin( double %H )              ; <double> [#uses=1]
+        ret double %I
+}

diff --git a/src/LLVM/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll b/src/LLVM/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
new file mode 100644
index 0000000..e8315f1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=sparc
+; PR 1557
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @set_fast_math } ]		; <[1 x { i32, void ()* }]*> [#uses=0]
+
+define internal void @set_fast_math() nounwind {
+entry:
+	%fsr = alloca i32		; <i32*> [#uses=4]
+	call void asm "st %fsr, $0", "=*m"(i32* %fsr) nounwind
+	%0 = load i32* %fsr, align 4		; <i32> [#uses=1]
+	%1 = or i32 %0, 4194304		; <i32> [#uses=1]
+	store i32 %1, i32* %fsr, align 4
+	call void asm sideeffect "ld $0, %fsr", "*m"(i32* %fsr) nounwind
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll b/src/LLVM/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
new file mode 100644
index 0000000..c12e9c1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=sparc
+; PR 1557
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
+module asm "\09.section\09\22.ctors\22,#alloc,#write"
+module asm "\09.section\09\22.dtors\22,#alloc,#write"
+
+define void @frame_dummy() nounwind {
+entry:
+	%asmtmp = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0"(void (i8*)* @_Jv_RegisterClasses) nounwind		; <void (i8*)*> [#uses=0]
+	unreachable
+}
+
+declare void @_Jv_RegisterClasses(i8*)

diff --git a/src/LLVM/test/CodeGen/SPARC/2009-08-28-PIC.ll b/src/LLVM/test/CodeGen/SPARC/2009-08-28-PIC.ll
new file mode 100644
index 0000000..a2ba0d0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/2009-08-28-PIC.ll

@@ -0,0 +1,9 @@
+; RUN: llc -march=sparc --relocation-model=pic < %s | grep _GLOBAL_OFFSET_TABLE_
+
+@foo = global i32 0                               ; <i32*> [#uses=1]
+
+define i32 @func() nounwind readonly {
+entry:
+  %0 = load i32* @foo, align 4                    ; <i32> [#uses=1]
+  ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll b/src/LLVM/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll
new file mode 100644
index 0000000..0167d32
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll

@@ -0,0 +1,6 @@
+; RUN: llc -march=sparc < %s | grep weak
+
+define weak i32 @func() nounwind {
+entry:
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll b/src/LLVM/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 0000000..3b64498
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll

@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=sparc -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+

diff --git a/src/LLVM/test/CodeGen/SPARC/2011-01-11-CC.ll b/src/LLVM/test/CodeGen/SPARC/2011-01-11-CC.ll
new file mode 100644
index 0000000..3ceda95
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/2011-01-11-CC.ll

@@ -0,0 +1,105 @@
+; RUN: llc -march=sparc <%s | FileCheck %s -check-prefix=V8
+; RUN: llc -march=sparc -mattr=v9 <%s | FileCheck %s -check-prefix=V9
+
+
+define i32 @test_addx(i64 %a, i64 %b, i64 %c) nounwind readnone noinline {
+entry:
+; V8: addcc
+; V8-NOT: subcc
+; V8: addx
+; V9: addcc
+; V9-NOT: subcc
+; V9: addx
+; V9: mov{{e|ne}} %icc
+  %0 = add i64 %a, %b
+  %1 = icmp ugt i64 %0, %c
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+
+define i32 @test_select_int_icc(i32 %a, i32 %b, i32 %c) nounwind readnone noinline {
+entry:
+; V8: test_select_int_icc
+; V8: subcc
+; V8: {{be|bne}}
+; V9: test_select_int_icc
+; V9: subcc
+; V9-NOT: {{be|bne}}
+; V9: mov{{e|ne}} %icc
+  %0 = icmp eq i32 %a, 0
+  %1 = select i1 %0, i32 %b, i32 %c
+  ret i32 %1
+}
+
+
+define float @test_select_fp_icc(i32 %a, float %f1, float %f2) nounwind readnone noinline {
+entry:
+; V8: test_select_fp_icc
+; V8: subcc
+; V8: {{be|bne}}
+; V9: test_select_fp_icc
+; V9: subcc
+; V9-NOT: {{be|bne}}
+; V9: fmovs{{e|ne}} %icc
+  %0 = icmp eq i32 %a, 0
+  %1 = select i1 %0, float %f1, float %f2
+  ret float %1
+}
+
+define double @test_select_dfp_icc(i32 %a, double %f1, double %f2) nounwind readnone noinline {
+entry:
+; V8: test_select_dfp_icc
+; V8: subcc
+; V8: {{be|bne}}
+; V9: test_select_dfp_icc
+; V9: subcc
+; V9=NOT: {{be|bne}}
+; V9: fmovd{{e|ne}} %icc
+  %0 = icmp eq i32 %a, 0
+  %1 = select i1 %0, double %f1, double %f2
+  ret double %1
+}
+
+define i32 @test_select_int_fcc(float %f, i32 %a, i32 %b) nounwind readnone noinline {
+entry:
+;V8: test_select_int_fcc
+;V8: fcmps
+;V8: {{fbe|fbne}}
+;V9: test_select_int_fcc
+;V9: fcmps
+;V9-NOT: {{fbe|fbne}}
+;V9: mov{{e|ne}} %fcc0
+  %0 = fcmp une float %f, 0.000000e+00
+  %a.b = select i1 %0, i32 %a, i32 %b
+  ret i32 %a.b
+}
+
+
+define float @test_select_fp_fcc(float %f, float %f1, float %f2) nounwind readnone noinline {
+entry:
+;V8: test_select_fp_fcc
+;V8: fcmps
+;V8: {{fbe|fbne}}
+;V9: test_select_fp_fcc
+;V9: fcmps
+;V9-NOT: {{fbe|fbne}}
+;V9: fmovs{{e|ne}} %fcc0
+  %0 = fcmp une float %f, 0.000000e+00
+  %1 = select i1 %0, float %f1, float %f2
+  ret float %1
+}
+
+define double @test_select_dfp_fcc(double %f, double %f1, double %f2) nounwind readnone noinline {
+entry:
+;V8: test_select_dfp_fcc
+;V8: fcmpd
+;V8: {{fbne|fbe}}
+;V9: test_select_dfp_fcc
+;V9: fcmpd
+;V9-NOT: {{fbne|fbe}}
+;V9: fmovd{{e|ne}} %fcc0
+  %0 = fcmp une double %f, 0.000000e+00
+  %1 = select i1 %0, double %f1, double %f2
+  ret double %1
+}

diff --git a/src/LLVM/test/CodeGen/SPARC/2011-01-11-Call.ll b/src/LLVM/test/CodeGen/SPARC/2011-01-11-Call.ll
new file mode 100644
index 0000000..7350e92
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/2011-01-11-Call.ll

@@ -0,0 +1,13 @@
+; RUN: llc -march=sparc -O0 <%s
+
+define void @test() nounwind {
+entry:
+ %0 = tail call i32 (...)* @foo() nounwind
+ tail call void (...)* @bar() nounwind
+ ret void
+}
+
+declare i32 @foo(...)
+
+declare void @bar(...)
+

diff --git a/src/LLVM/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll b/src/LLVM/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
new file mode 100644
index 0000000..9e6583c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll

@@ -0,0 +1,66 @@
+;RUN: llc -march=sparc < %s | FileCheck %s -check-prefix=V8
+;RUN: llc -march=sparc -mattr=v9 < %s | FileCheck %s -check-prefix=V9
+;RUN: llc -march=sparc -regalloc=basic < %s | FileCheck %s -check-prefix=V8
+;RUN: llc -march=sparc -regalloc=basic -mattr=v9 < %s | FileCheck %s -check-prefix=V9
+
+define i8* @frameaddr() nounwind readnone {
+entry:
+;V8: frameaddr
+;V8: or %g0, %fp, {{.+}}
+
+;V9: frameaddr
+;V9: or %g0, %fp, {{.+}}
+  %0 = tail call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @frameaddr2() nounwind readnone {
+entry:
+;V8: frameaddr2
+;V8: ta 3
+;V8: ld [%fp+56], {{.+}}
+;V8: ld [{{.+}}+56], {{.+}}
+;V8: ld [{{.+}}+56], {{.+}}
+
+;V9: frameaddr2
+;V9: flushw
+;V9: ld [%fp+56], {{.+}}
+;V9: ld [{{.+}}+56], {{.+}}
+;V9: ld [{{.+}}+56], {{.+}}
+  %0 = tail call i8* @llvm.frameaddress(i32 3)
+  ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+
+
+define i8* @retaddr() nounwind readnone {
+entry:
+;V8: retaddr
+;V8: or %g0, %i7, {{.+}}
+
+;V9: retaddr
+;V9: or %g0, %i7, {{.+}}
+  %0 = tail call i8* @llvm.returnaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @retaddr2() nounwind readnone {
+entry:
+;V8: retaddr2
+;V8: ta 3
+;V8: ld [%fp+56], {{.+}}
+;V8: ld [{{.+}}+56], {{.+}}
+;V8: ld [{{.+}}+60], {{.+}}
+
+;V9: retaddr2
+;V9: flushw
+;V9: ld [%fp+56], {{.+}}
+;V9: ld [{{.+}}+56], {{.+}}
+;V9: ld [{{.+}}+60], {{.+}}
+  %0 = tail call i8* @llvm.returnaddress(i32 3)
+  ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/src/LLVM/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
new file mode 100644
index 0000000..71fdb4e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll

@@ -0,0 +1,90 @@
+;RUN: llc -march=sparc < %s | FileCheck %s
+;RUN: llc -march=sparc -O0 < %s | FileCheck %s -check-prefix=UNOPT
+
+
+define i32 @test(i32 %a) nounwind {
+entry:
+; CHECK: test
+; CHECK: call bar
+; CHECK-NOT: nop
+; CHECK: jmp
+; CHECK-NEXT: restore
+  %0 = tail call i32 @bar(i32 %a) nounwind
+  ret i32 %0
+}
+
+define i32 @test_jmpl(i32 (i32, i32)* nocapture %f, i32 %a, i32 %b) nounwind {
+entry:
+; CHECK:      test_jmpl
+; CHECK:      call
+; CHECK-NOT:  nop
+; CHECK:      jmp
+; CHECK-NEXT: restore
+  %0 = tail call i32 %f(i32 %a, i32 %b) nounwind
+  ret i32 %0
+}
+
+define i32 @test_loop(i32 %a, i32 %b) nounwind readnone {
+; CHECK: test_loop
+entry:
+  %0 = icmp sgt i32 %b, 0
+  br i1 %0, label %bb, label %bb5
+
+bb:                                               ; preds = %entry, %bb
+  %a_addr.18 = phi i32 [ %a_addr.0, %bb ], [ %a, %entry ]
+  %1 = phi i32 [ %3, %bb ], [ 0, %entry ]
+  %tmp9 = mul i32 %1, %b
+  %2 = and i32 %1, 1
+  %tmp = xor i32 %2, 1
+  %.pn = shl i32 %tmp9, %tmp
+  %a_addr.0 = add i32 %.pn, %a_addr.18
+  %3 = add nsw i32 %1, 1
+  %exitcond = icmp eq i32 %3, %b
+;CHECK:      subcc
+;CHECK:      bne
+;CHECK-NOT:  nop
+  br i1 %exitcond, label %bb5, label %bb
+
+bb5:                                              ; preds = %bb, %entry
+  %a_addr.1.lcssa = phi i32 [ %a, %entry ], [ %a_addr.0, %bb ]
+;CHECK:      jmp
+;CHECK-NEXT: restore
+  ret i32 %a_addr.1.lcssa
+}
+
+define i32 @test_inlineasm(i32 %a) nounwind {
+entry:
+;CHECK:      test_inlineasm
+;CHECK:      sethi
+;CHECK:      !NO_APP
+;CHECK-NEXT: subcc
+;CHECK-NEXT: bg
+;CHECK-NEXT: nop
+  tail call void asm sideeffect "sethi 0, %g0", ""() nounwind
+  %0 = icmp slt i32 %a, 0
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %1 = tail call i32 (...)* @foo(i32 %a) nounwind
+  ret i32 %1
+
+bb1:                                              ; preds = %entry
+  %2 = tail call i32 @bar(i32 %a) nounwind
+  ret i32 %2
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(i32)
+
+
+define i32 @test_implicit_def() nounwind {
+entry:
+;UNOPT:       test_implicit_def
+;UNOPT:       call func
+;UNOPT-NEXT:  nop
+  %0 = tail call i32 @func(i32* undef) nounwind
+  ret i32 0
+}
+
+declare i32 @func(i32*)

diff --git a/src/LLVM/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll b/src/LLVM/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll
new file mode 100644
index 0000000..85c16e4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll

@@ -0,0 +1,18 @@
+;RUN: llc -march=sparc < %s | FileCheck %s
+
+%struct.foo_t = type { i32, i32, i32 }
+
+@s = internal unnamed_addr global %struct.foo_t { i32 10, i32 20, i32 30 }
+
+define i32 @test() nounwind {
+entry:
+;CHECK:     test
+;CHECK:     st
+;CHECK:     st
+;CHECK:     st
+;CHECK:     bar
+  %0 = tail call i32 @bar(%struct.foo_t* byval @s) nounwind
+  ret i32 %0
+}
+
+declare i32 @bar(%struct.foo_t* byval)

diff --git a/src/LLVM/test/CodeGen/SPARC/2011-01-22-SRet.ll b/src/LLVM/test/CodeGen/SPARC/2011-01-22-SRet.ll
new file mode 100644
index 0000000..5393392
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/2011-01-22-SRet.ll

@@ -0,0 +1,36 @@
+;RUN: llc -march=sparc < %s | FileCheck %s
+
+%struct.foo_t = type { i32, i32, i32 }
+
+define weak void @make_foo(%struct.foo_t* noalias sret %agg.result, i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+;CHECK: make_foo
+;CHECK: ld [%fp+64], {{.+}}
+;CHECK: jmp %i7+12
+  %0 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 0
+  store i32 %a, i32* %0, align 4
+  %1 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 1
+  store i32 %b, i32* %1, align 4
+  %2 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 2
+  store i32 %c, i32* %2, align 4
+  ret void
+}
+
+define i32 @test() nounwind {
+entry:
+;CHECK: test
+;CHECK: st {{.+}}, [%sp+64]
+;CHECK: make_foo
+;CHECK: unimp 12
+  %f = alloca %struct.foo_t, align 8
+  call void @make_foo(%struct.foo_t* noalias sret %f, i32 10, i32 20, i32 30) nounwind
+  %0 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 0
+  %1 = load i32* %0, align 8
+  %2 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 1
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 2
+  %5 = load i32* %4, align 8
+  %6 = add nsw i32 %3, %1
+  %7 = add nsw i32 %6, %5
+  ret i32 %7
+}

diff --git a/src/LLVM/test/CodeGen/SPARC/basictest.ll b/src/LLVM/test/CodeGen/SPARC/basictest.ll
new file mode 100644
index 0000000..3102b22
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/basictest.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=sparc | FileCheck %s

+

+define i32 @test0(i32 %X) {

+	%tmp.1 = add i32 %X, 1

+	ret i32 %tmp.1

+; CHECK: test0:

+; CHECK: add %i0, 1, %i0

+}

+

+

+;; xnor tests.

+define i32 @test1(i32 %X, i32 %Y) {

+        %A = xor i32 %X, %Y

+        %B = xor i32 %A, -1

+        ret i32 %B

+; CHECK: test1:

+; CHECK: xnor %i0, %i1, %i0

+}

+

+define i32 @test2(i32 %X, i32 %Y) {

+        %A = xor i32 %X, -1

+        %B = xor i32 %A, %Y

+        ret i32 %B

+; CHECK: test2:

+; CHECK: xnor %i0, %i1, %i0

+}


diff --git a/src/LLVM/test/CodeGen/SPARC/ctpop.ll b/src/LLVM/test/CodeGen/SPARC/ctpop.ll
new file mode 100644
index 0000000..d3d84be
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/ctpop.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=sparc -mattr=-v9 | not grep popc

+; RUN: llc < %s -march=sparcv9 -mattr=v9 | grep popc

+

+declare i32 @llvm.ctpop.i32(i32)

+

+define i32 @test(i32 %X) {

+        %Y = call i32 @llvm.ctpop.i32( i32 %X )         ; <i32> [#uses=1]

+        ret i32 %Y

+}

+


diff --git a/src/LLVM/test/CodeGen/SPARC/dg.exp b/src/LLVM/test/CodeGen/SPARC/dg.exp
new file mode 100644
index 0000000..0ed86f1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp

+

+if { [llvm_supports_target Sparc] } {

+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]

+}


diff --git a/src/LLVM/test/CodeGen/SPARC/mult-alt-generic-sparc.ll b/src/LLVM/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
new file mode 100644
index 0000000..6013b17
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/mult-alt-generic-sparc.ll

@@ -0,0 +1,323 @@
+; RUN: llc < %s -march=sparc
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32"
+target triple = "sparc"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/SPARC/private.ll b/src/LLVM/test/CodeGen/SPARC/private.ll
new file mode 100644
index 0000000..f06ccd0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SPARC/private.ll

@@ -0,0 +1,19 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s  -march=sparc > %t
+; RUN: grep .foo: %t
+; RUN: grep call.*\.foo %t
+; RUN: grep .baz: %t
+; RUN: grep ld.*\.baz %t
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/00-RetVoid.ll b/src/LLVM/test/CodeGen/SystemZ/00-RetVoid.ll
new file mode 100644
index 0000000..6f3cbac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/00-RetVoid.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+
+define void @foo() {
+entry:
+    ret void
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/01-RetArg.ll b/src/LLVM/test/CodeGen/SystemZ/01-RetArg.ll
new file mode 100644
index 0000000..8e1ff49
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/01-RetArg.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    ret i64 %b
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/01-RetImm.ll b/src/LLVM/test/CodeGen/SystemZ/01-RetImm.ll
new file mode 100644
index 0000000..8b99e68
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/01-RetImm.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=systemz | grep lghi  | count 1
+; RUN: llc < %s -march=systemz | grep llill | count 1
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep llihl | count 1
+; RUN: llc < %s -march=systemz | grep llihh | count 1
+; RUN: llc < %s -march=systemz | grep lgfi  | count 1
+; RUN: llc < %s -march=systemz | grep llilf | count 1
+; RUN: llc < %s -march=systemz | grep llihf | count 1
+
+
+define i64 @foo1() {
+entry:
+    ret i64 1
+}
+
+define i64 @foo2() {
+entry:
+    ret i64 65535 
+}
+
+define i64 @foo3() {
+entry:
+    ret i64 131072
+}
+
+define i64 @foo4() {
+entry:
+    ret i64 8589934592
+}
+
+define i64 @foo5() {
+entry:
+    ret i64 562949953421312
+}
+
+define i64 @foo6() {
+entry:
+    ret i64 65537
+}
+
+define i64 @foo7() {
+entry:
+    ret i64 4294967295
+}
+
+define i64 @foo8() {
+entry:
+    ret i64 281483566645248
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/02-MemArith.ll b/src/LLVM/test/CodeGen/SystemZ/02-MemArith.ll
new file mode 100644
index 0000000..ee9e5e9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/02-MemArith.ll

@@ -0,0 +1,133 @@
+; RUN: llc < %s -march=systemz | FileCheck %s
+
+define signext i32 @foo1(i32 %a, i32 *%b, i64 %idx)  {
+; CHECK: foo1:
+; CHECK:  a %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = add i32 %a, %c
+    ret i32 %d
+}
+
+define signext i32 @foo2(i32 %a, i32 *%b, i64 %idx) {
+; CHECK: foo2:
+; CHECK:  ay %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = add i32 %a, %c
+    ret i32 %d
+}
+
+define signext i64 @foo3(i64 %a, i64 *%b, i64 %idx)  {
+; CHECK: foo3:
+; CHECK:  ag %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = add i64 %a, %c
+    ret i64 %d
+}
+
+define signext i32 @foo4(i32 %a, i32 *%b, i64 %idx)  {
+; CHECK: foo4:
+; CHECK:  n %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = and i32 %a, %c
+    ret i32 %d
+}
+
+define signext i32 @foo5(i32 %a, i32 *%b, i64 %idx) {
+; CHECK: foo5:
+; CHECK:  ny %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = and i32 %a, %c
+    ret i32 %d
+}
+
+define signext i64 @foo6(i64 %a, i64 *%b, i64 %idx)  {
+; CHECK: foo6:
+; CHECK:  ng %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = and i64 %a, %c
+    ret i64 %d
+}
+
+define signext i32 @foo7(i32 %a, i32 *%b, i64 %idx) {
+; CHECK: foo7:
+; CHECK:  o %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = or i32 %a, %c
+    ret i32 %d
+}
+
+define signext i32 @foo8(i32 %a, i32 *%b, i64 %idx)  {
+; CHECK: foo8:
+; CHECK:  oy %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = or i32 %a, %c
+    ret i32 %d
+}
+
+define signext i64 @foo9(i64 %a, i64 *%b, i64 %idx)  {
+; CHECK: foo9:
+; CHECK:  og %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = or i64 %a, %c
+    ret i64 %d
+}
+
+define signext i32 @foo10(i32 %a, i32 *%b, i64 %idx)  {
+; CHECK: foo10:
+; CHECK:  x %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = xor i32 %a, %c
+    ret i32 %d
+}
+
+define signext i32 @foo11(i32 %a, i32 *%b, i64 %idx)  {
+; CHECK: foo11:
+; CHECK:  xy %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = xor i32 %a, %c
+    ret i32 %d
+}
+
+define signext i64 @foo12(i64 %a, i64 *%b, i64 %idx)  {
+; CHECK: foo12:
+; CHECK:  xg %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = xor i64 %a, %c
+    ret i64 %d
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/02-RetAdd.ll b/src/LLVM/test/CodeGen/SystemZ/02-RetAdd.ll
new file mode 100644
index 0000000..d5dfa22
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/02-RetAdd.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = add i64 %a, %b
+    ret i64 %c
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/02-RetAddImm.ll b/src/LLVM/test/CodeGen/SystemZ/02-RetAddImm.ll
new file mode 100644
index 0000000..40f6cce
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/02-RetAddImm.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = add i64 %a, 1
+    ret i64 %c
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/02-RetAnd.ll b/src/LLVM/test/CodeGen/SystemZ/02-RetAnd.ll
new file mode 100644
index 0000000..b568a57
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/02-RetAnd.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, %b
+    ret i64 %c
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/02-RetAndImm.ll b/src/LLVM/test/CodeGen/SystemZ/02-RetAndImm.ll
new file mode 100644
index 0000000..53c5e54
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/02-RetAndImm.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=systemz | grep ngr   | count 4
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep llihl | count 1
+; RUN: llc < %s -march=systemz | grep llihh | count 1
+
+define i64 @foo1(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 1
+    ret i64 %c
+}
+
+define i64 @foo2(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 131072
+    ret i64 %c
+}
+
+define i64 @foo3(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 8589934592
+    ret i64 %c
+}
+
+define i64 @foo4(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 562949953421312
+    ret i64 %c
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/02-RetNeg.ll b/src/LLVM/test/CodeGen/SystemZ/02-RetNeg.ll
new file mode 100644
index 0000000..3f6ba2f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/02-RetNeg.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz | grep lcgr | count 1
+
+define i64 @foo(i64 %a) {
+entry:
+    %c = sub i64 0, %a
+    ret i64 %c
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/02-RetOr.ll b/src/LLVM/test/CodeGen/SystemZ/02-RetOr.ll
new file mode 100644
index 0000000..a1ddb63
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/02-RetOr.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, %b
+    ret i64 %c
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/02-RetOrImm.ll b/src/LLVM/test/CodeGen/SystemZ/02-RetOrImm.ll
new file mode 100644
index 0000000..68cd24d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/02-RetOrImm.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=systemz | grep oill | count 1
+; RUN: llc < %s -march=systemz | grep oilh | count 1
+; RUN: llc < %s -march=systemz | grep oihl | count 1
+; RUN: llc < %s -march=systemz | grep oihh | count 1
+
+define i64 @foo1(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 1
+    ret i64 %c
+}
+
+define i64 @foo2(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 131072
+    ret i64 %c
+}
+
+define i64 @foo3(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 8589934592
+    ret i64 %c
+}
+
+define i64 @foo4(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 562949953421312
+    ret i64 %c
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/02-RetSub.ll b/src/LLVM/test/CodeGen/SystemZ/02-RetSub.ll
new file mode 100644
index 0000000..98e1861
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/02-RetSub.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = sub i64 %a, %b
+    ret i64 %c
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/02-RetSubImm.ll b/src/LLVM/test/CodeGen/SystemZ/02-RetSubImm.ll
new file mode 100644
index 0000000..8479fbf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/02-RetSubImm.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = sub i64 %a, 1
+    ret i64 %c
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/02-RetXor.ll b/src/LLVM/test/CodeGen/SystemZ/02-RetXor.ll
new file mode 100644
index 0000000..4d1adf2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/02-RetXor.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = xor i64 %a, %b
+    ret i64 %c
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/02-RetXorImm.ll b/src/LLVM/test/CodeGen/SystemZ/02-RetXorImm.ll
new file mode 100644
index 0000000..473bbf7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/02-RetXorImm.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = xor i64 %a, 1
+    ret i64 %c
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll b/src/LLVM/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
new file mode 100644
index 0000000..0a7f5ee
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep ahi   | count 3
+; RUN: llc < %s -march=systemz | grep afi   | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 4
+; RUN: llc < %s -march=systemz | grep llgfr | count 2
+
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+
+define zeroext i32 @foo3(i32 %a, i32 %b)  {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define zeroext i32 @foo4(i32 %a, i32 %b)  {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+
+define signext i32 @foo5(i32 %a, i32 %b)  {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define signext i32 @foo6(i32 %a, i32 %b)  {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/03-RetAddSubreg.ll b/src/LLVM/test/CodeGen/SystemZ/03-RetAddSubreg.ll
new file mode 100644
index 0000000..337bb3f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/03-RetAddSubreg.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=systemz | grep ar    | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 2
+; RUN: llc < %s -march=systemz | grep llgfr | count 1
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+
+define zeroext i32 @foo1(i32 %a, i32 %b)  {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+
+define signext i32 @foo2(i32 %a, i32 %b)  {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll b/src/LLVM/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
new file mode 100644
index 0000000..c5326ab
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=systemz | grep ngr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+
+define zeroext i32 @foo3(i32 %a, i32 %b)  {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define signext i32 @foo4(i32 %a, i32 %b)  {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+
+define zeroext i32 @foo5(i32 %a, i32 %b)  {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define signext i32 @foo6(i32 %a, i32 %b)  {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/03-RetAndSubreg.ll b/src/LLVM/test/CodeGen/SystemZ/03-RetAndSubreg.ll
new file mode 100644
index 0000000..75dc90a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/03-RetAndSubreg.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=systemz | grep ngr | count 3
+; RUN: llc < %s -march=systemz | grep nihf | count 1
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+
+define zeroext i32 @foo1(i32 %a, i32 %b)  {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+
+define signext i32 @foo2(i32 %a, i32 %b)  {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/03-RetArgSubreg.ll b/src/LLVM/test/CodeGen/SystemZ/03-RetArgSubreg.ll
new file mode 100644
index 0000000..476821a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/03-RetArgSubreg.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=systemz | grep lgr   | count 2
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    ret i32 %b
+}
+
+define zeroext i32 @foo1(i32 %a, i32 %b)  {
+entry:
+    ret i32 %b
+}
+
+define signext i32 @foo2(i32 %a, i32 %b)  {
+entry:
+    ret i32 %b
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/03-RetImmSubreg.ll b/src/LLVM/test/CodeGen/SystemZ/03-RetImmSubreg.ll
new file mode 100644
index 0000000..70da913
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/03-RetImmSubreg.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep lghi  | count 2
+; RUN: llc < %s -march=systemz | grep llill | count 1
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep lgfi  | count 1
+; RUN: llc < %s -march=systemz | grep llilf | count 2
+
+
+define i32 @foo1() {
+entry:
+    ret i32 1
+}
+
+define i32 @foo2() {
+entry:
+    ret i32 65535 
+}
+
+define i32 @foo3() {
+entry:
+    ret i32 131072
+}
+
+define i32 @foo4() {
+entry:
+    ret i32 65537
+}
+
+define i32 @foo5() {
+entry:
+    ret i32 4294967295
+}
+
+define zeroext i32 @foo6()  {
+entry:
+    ret i32 4294967295
+}
+
+define signext i32 @foo7()  {
+entry:
+    ret i32 4294967295
+}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll b/src/LLVM/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
new file mode 100644
index 0000000..87ebcc1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=systemz | grep lcr | count 1
+
+define i32 @foo(i32 %a) {
+entry:
+    %c = sub i32 0, %a
+    ret i32 %c
+}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll b/src/LLVM/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
new file mode 100644
index 0000000..99adea8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll

@@ -0,0 +1,60 @@
+; RUN: llc < %s -march=systemz | grep oill  | count 3
+; RUN: llc < %s -march=systemz | grep oilh  | count 3
+; RUN: llc < %s -march=systemz | grep oilf  | count 3
+; RUN: llc < %s -march=systemz | grep llgfr | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo7(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+
+define zeroext i32 @foo3(i32 %a, i32 %b)  {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define zeroext i32 @foo8(i32 %a, i32 %b)  {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+
+define signext i32 @foo4(i32 %a, i32 %b)  {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define zeroext i32 @foo5(i32 %a, i32 %b)  {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define signext i32 @foo6(i32 %a, i32 %b)  {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define signext i32 @foo9(i32 %a, i32 %b)  {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/03-RetOrSubreg.ll b/src/LLVM/test/CodeGen/SystemZ/03-RetOrSubreg.ll
new file mode 100644
index 0000000..7dab5ca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/03-RetOrSubreg.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=systemz | grep ogr   | count 3
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+
+define zeroext i32 @foo1(i32 %a, i32 %b)  {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+
+define signext i32 @foo2(i32 %a, i32 %b)  {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll b/src/LLVM/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
new file mode 100644
index 0000000..21ea9b5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep ahi   | count 3
+; RUN: llc < %s -march=systemz | grep afi   | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 4
+; RUN: llc < %s -march=systemz | grep llgfr | count 2
+
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+
+define zeroext i32 @foo3(i32 %a, i32 %b)  {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define signext i32 @foo4(i32 %a, i32 %b)  {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+
+define zeroext i32 @foo5(i32 %a, i32 %b)  {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define signext i32 @foo6(i32 %a, i32 %b)  {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/03-RetSubSubreg.ll b/src/LLVM/test/CodeGen/SystemZ/03-RetSubSubreg.ll
new file mode 100644
index 0000000..24b7631
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/03-RetSubSubreg.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=systemz | grep sr    | count 3
+; RUN: llc < %s -march=systemz | grep llgfr | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 2
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+
+define zeroext i32 @foo1(i32 %a, i32 %b)  {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+
+define signext i32 @foo2(i32 %a, i32 %b)  {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll b/src/LLVM/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
new file mode 100644
index 0000000..70ee454
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll

@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=systemz | grep xilf  | count 9
+; RUN: llc < %s -march=systemz | grep llgfr | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo7(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+
+define zeroext i32 @foo3(i32 %a, i32 %b)  {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define zeroext i32 @foo8(i32 %a, i32 %b)  {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+
+define signext i32 @foo4(i32 %a, i32 %b)  {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define zeroext i32 @foo5(i32 %a, i32 %b)  {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define signext i32 @foo6(i32 %a, i32 %b)  {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define signext i32 @foo9(i32 %a, i32 %b)  {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/03-RetXorSubreg.ll b/src/LLVM/test/CodeGen/SystemZ/03-RetXorSubreg.ll
new file mode 100644
index 0000000..02c4a2a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/03-RetXorSubreg.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=systemz | grep xgr   | count 3
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+
+define zeroext i32 @foo1(i32 %a, i32 %b)  {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+
+define signext i32 @foo2(i32 %a, i32 %b)  {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/04-RetShifts.ll b/src/LLVM/test/CodeGen/SystemZ/04-RetShifts.ll
new file mode 100644
index 0000000..cccdc47
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/04-RetShifts.ll

@@ -0,0 +1,121 @@
+; RUN: llc < %s -march=systemz | grep sra   | count 6
+; RUN: llc < %s -march=systemz | grep srag  | count 3
+; RUN: llc < %s -march=systemz | grep srl   | count 6
+; RUN: llc < %s -march=systemz | grep srlg  | count 3
+; RUN: llc < %s -march=systemz | grep sll   | count 6
+; RUN: llc < %s -march=systemz | grep sllg  | count 3
+
+define signext i32 @foo1(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = ashr i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i32 @foo2(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = shl i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i32 @foo3(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = lshr i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i64 @foo4(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = ashr i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i64 @foo5(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = shl i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i64 @foo6(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = lshr i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i32 @foo7(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = ashr i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo8(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = shl i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo9(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = lshr i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo10(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = ashr i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i32 @foo11(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = shl i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i32 @foo12(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = lshr i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i64 @foo13(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = ashr i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo14(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = shl i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo15(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = lshr i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo16(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = ashr i64 %a, %idx
+        ret i64 %shr
+}
+
+define signext i64 @foo17(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = shl i64 %a, %idx
+        ret i64 %shr
+}
+
+define signext i64 @foo18(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = lshr i64 %a, %idx
+        ret i64 %shr
+}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/05-LoadAddr.ll b/src/LLVM/test/CodeGen/SystemZ/05-LoadAddr.ll
new file mode 100644
index 0000000..cf02642
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/05-LoadAddr.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s | grep lay | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64* @foo(i64* %a, i64 %idx) nounwind readnone {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	ret i64* %add.ptr2
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/05-MemImmStores.ll b/src/LLVM/test/CodeGen/SystemZ/05-MemImmStores.ll
new file mode 100644
index 0000000..3cf21cc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/05-MemImmStores.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -mattr=+z10 | grep mvghi | count 1
+; RUN: llc < %s -mattr=+z10 | grep mvhi  | count 1
+; RUN: llc < %s -mattr=+z10 | grep mvhhi | count 1
+; RUN: llc < %s | grep mvi   | count 2
+; RUN: llc < %s | grep mviy  | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo1(i64* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i64* %a, i64 1		; <i64*> [#uses=1]
+	store i64 1, i64* %add.ptr
+	ret void
+}
+
+define void @foo2(i32* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %a, i64 1		; <i32*> [#uses=1]
+	store i32 2, i32* %add.ptr
+	ret void
+}
+
+define void @foo3(i16* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %a, i64 1		; <i16*> [#uses=1]
+	store i16 3, i16* %add.ptr
+	ret void
+}
+
+define void @foo4(i8* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i8* %a, i64 1		; <i8*> [#uses=1]
+	store i8 4, i8* %add.ptr
+	ret void
+}
+
+define void @foo5(i8* nocapture %a, i64 %idx) nounwind {
+entry:
+        %add.ptr = getelementptr i8* %a, i64 -1         ; <i8*> [#uses=1]
+        store i8 4, i8* %add.ptr
+        ret void
+}
+
+define void @foo6(i16* nocapture %a, i64 %idx) nounwind {
+entry:
+        %add.ptr = getelementptr i16* %a, i64 -1         ; <i16*> [#uses=1]
+        store i16 3, i16* %add.ptr
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/05-MemLoadsStores.ll b/src/LLVM/test/CodeGen/SystemZ/05-MemLoadsStores.ll
new file mode 100644
index 0000000..eabeb0a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/05-MemLoadsStores.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s | grep ly     | count 2
+; RUN: llc < %s | grep sty    | count 2
+; RUN: llc < %s | grep {l	%}  | count 2
+; RUN: llc < %s | grep {st	%} | count 2
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define void @foo1(i32* nocapture %foo, i32* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i32* %foo		; <i32> [#uses=1]
+	store i32 %tmp1, i32* %bar
+	ret void
+}
+
+define void @foo2(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %foo, i64 1		; <i32*> [#uses=1]
+	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr5
+	ret void
+}
+
+define void @foo3(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i32* %foo, i64 -1		; <i32*> [#uses=1]
+	%tmp1 = load i32* %sub.ptr		; <i32> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr
+	ret void
+}
+
+define void @foo4(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %foo, i64 8192		; <i32*> [#uses=1]
+	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr5
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/05-MemLoadsStores16.ll b/src/LLVM/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
new file mode 100644
index 0000000..53bb641
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/05-MemLoadsStores16.ll

@@ -0,0 +1,85 @@
+; RUN: llc < %s | grep {sthy.%} | count 2
+; RUN: llc < %s | grep {lhy.%}  | count 2
+; RUN: llc < %s | grep {lh.%}   | count 6
+; RUN: llc < %s | grep {sth.%}  | count 2
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define void @foo1(i16* nocapture %foo, i16* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i16* %foo		; <i16> [#uses=1]
+	store i16 %tmp1, i16* %bar
+	ret void
+}
+
+define void @foo2(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr5
+	ret void
+}
+
+define void @foo3(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i16* %bar, i64 %sub.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr
+	ret void
+}
+
+define void @foo4(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr5
+	ret void
+}
+
+define void @foo5(i16* nocapture %foo, i32* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i16* %foo		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	store i32 %conv, i32* %bar
+	ret void
+}
+
+define void @foo6(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr5
+	ret void
+}
+
+define void @foo7(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr
+	ret void
+}
+
+define void @foo8(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr5
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/05-MemRegLoads.ll b/src/LLVM/test/CodeGen/SystemZ/05-MemRegLoads.ll
new file mode 100644
index 0000000..f690a48
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/05-MemRegLoads.ll

@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=systemz | not grep aghi
+; RUN: llc < %s -march=systemz | grep llgf | count 1
+; RUN: llc < %s -march=systemz | grep llgh | count 1
+; RUN: llc < %s -march=systemz | grep llgc | count 1
+; RUN: llc < %s -march=systemz | grep lgf  | count 2
+; RUN: llc < %s -march=systemz | grep lgh  | count 2
+; RUN: llc < %s -march=systemz | grep lgb  | count 1
+
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define zeroext i64 @foo1(i64* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
+	ret i64 %tmp3
+}
+
+define zeroext i32 @foo2(i32* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
+
+define zeroext i16 @foo3(i16* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
+	ret i16 %tmp3
+}
+
+define zeroext i8 @foo4(i8* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
+	ret i8 %tmp3
+}
+
+define signext i64 @foo5(i64* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
+	ret i64 %tmp3
+}
+
+define signext i32 @foo6(i32* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
+
+define signext i16 @foo7(i16* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
+	ret i16 %tmp3
+}
+
+define signext i8 @foo8(i8* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
+	ret i8 %tmp3
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/05-MemRegStores.ll b/src/LLVM/test/CodeGen/SystemZ/05-MemRegStores.ll
new file mode 100644
index 0000000..b851c3f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/05-MemRegStores.ll

@@ -0,0 +1,79 @@
+; RUN: llc < %s | not grep aghi
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo1(i64* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+
+; CHECK: foo1:
+; CHECK:   stg %r4, 8(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	store i64 %val, i64* %add.ptr2
+	ret void
+}
+
+define void @foo2(i32* nocapture %a, i64 %idx, i32 %val) nounwind {
+entry:
+; CHECK: foo2:
+; CHECK:   st %r4, 4(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	store i32 %val, i32* %add.ptr2
+	ret void
+}
+
+define void @foo3(i16* nocapture %a, i64 %idx, i16 zeroext %val) nounwind {
+entry:
+; CHECK: foo3:
+; CHECK: sth     %r4, 2(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	store i16 %val, i16* %add.ptr2
+	ret void
+}
+
+define void @foo4(i8* nocapture %a, i64 %idx, i8 zeroext %val) nounwind {
+entry:
+; CHECK: foo4:
+; CHECK: stc     %r4, 1(%r3,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	store i8 %val, i8* %add.ptr2
+	ret void
+}
+
+define void @foo5(i8* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo5:
+; CHECK: stc     %r4, 1(%r3,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%conv = trunc i64 %val to i8		; <i8> [#uses=1]
+	store i8 %conv, i8* %add.ptr2
+	ret void
+}
+
+define void @foo6(i16* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo6:
+; CHECK: sth     %r4, 2(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%conv = trunc i64 %val to i16		; <i16> [#uses=1]
+	store i16 %conv, i16* %add.ptr2
+	ret void
+}
+
+define void @foo7(i32* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo7:
+; CHECK: st      %r4, 4(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%conv = trunc i64 %val to i32		; <i32> [#uses=1]
+	store i32 %conv, i32* %add.ptr2
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/06-CallViaStack.ll b/src/LLVM/test/CodeGen/SystemZ/06-CallViaStack.ll
new file mode 100644
index 0000000..e904f49
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/06-CallViaStack.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s | grep 168 | count 1
+; RUN: llc < %s | grep 160 | count 3
+; RUN: llc < %s | grep 328 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) nounwind {
+entry:
+	%a = alloca i64, align 8		; <i64*> [#uses=3]
+	store i64 %g, i64* %a
+	call void @bar(i64* %a) nounwind
+	%tmp1 = load i64* %a		; <i64> [#uses=1]
+	ret i64 %tmp1
+}
+
+declare void @bar(i64*)

diff --git a/src/LLVM/test/CodeGen/SystemZ/06-FrameIdxLoad.ll b/src/LLVM/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
new file mode 100644
index 0000000..c71da9b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/06-FrameIdxLoad.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s | grep 160 | count 1
+; RUN: llc < %s | grep 168 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
+entry:
+        ret i64 %f
+}
+
+define i64 @bar(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
+entry:
+	%conv = ptrtoint i64* %g to i64		; <i64> [#uses=1]
+	ret i64 %conv
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/06-LocalFrame.ll b/src/LLVM/test/CodeGen/SystemZ/06-LocalFrame.ll
new file mode 100644
index 0000000..d89b0df
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/06-LocalFrame.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s | grep 160 | count 1
+; RUN: llc < %s | grep 328 | count 1
+; RUN: llc < %s | grep 168 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define noalias i64* @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f) nounwind readnone {
+entry:
+	%g = alloca i64, align 8		; <i64*> [#uses=1]
+	%add.ptr = getelementptr i64* %g, i64 %f		; <i64*> [#uses=1]
+	ret i64* %add.ptr
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/06-SimpleCall.ll b/src/LLVM/test/CodeGen/SystemZ/06-SimpleCall.ll
new file mode 100644
index 0000000..fd4b502
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/06-SimpleCall.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+	tail call void @bar() nounwind
+	ret void
+}
+
+declare void @bar()

diff --git a/src/LLVM/test/CodeGen/SystemZ/07-BrCond.ll b/src/LLVM/test/CodeGen/SystemZ/07-BrCond.ll
new file mode 100644
index 0000000..8599717
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/07-BrCond.ll

@@ -0,0 +1,141 @@
+; RUN: llc < %s | grep je  | count 1
+; RUN: llc < %s | grep jne | count 1
+; RUN: llc < %s | grep jhe | count 2
+; RUN: llc < %s | grep jle | count 2
+; RUN: llc < %s | grep jh  | count 4
+; RUN: llc < %s | grep jl  | count 4
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/07-BrCond32.ll b/src/LLVM/test/CodeGen/SystemZ/07-BrCond32.ll
new file mode 100644
index 0000000..8ece5ac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/07-BrCond32.ll

@@ -0,0 +1,142 @@
+; RUN: llc < %s | grep je  | count 1
+; RUN: llc < %s | grep jne | count 1
+; RUN: llc < %s | grep jhe | count 2
+; RUN: llc < %s | grep jle | count 2
+; RUN: llc < %s | grep jh  | count 4
+; RUN: llc < %s | grep jl  | count 4
+
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/07-BrUnCond.ll b/src/LLVM/test/CodeGen/SystemZ/07-BrUnCond.ll
new file mode 100644
index 0000000..ac6067a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/07-BrUnCond.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-ibm-linux"
+
+define void @foo() noreturn nounwind {
+entry:
+	tail call void @baz() nounwind
+	br label %l1
+
+l1:		; preds = %entry, %l1
+	tail call void @bar() nounwind
+	br label %l1
+}
+
+declare void @bar()
+
+declare void @baz()

diff --git a/src/LLVM/test/CodeGen/SystemZ/07-CmpImm.ll b/src/LLVM/test/CodeGen/SystemZ/07-CmpImm.ll
new file mode 100644
index 0000000..4d0ebda
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/07-CmpImm.ll

@@ -0,0 +1,137 @@
+; RUN: llc < %s | grep cgfi | count 8
+; RUN: llc < %s | grep clgfi | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i64 %a) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i64 %a) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i64 %a) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i64 %a) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/07-CmpImm32.ll b/src/LLVM/test/CodeGen/SystemZ/07-CmpImm32.ll
new file mode 100644
index 0000000..add34fa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/07-CmpImm32.ll

@@ -0,0 +1,139 @@
+; RUN: llc < %s | grep jl  | count 3
+; RUN: llc < %s | grep jh  | count 3
+; RUN: llc < %s | grep je  | count 2
+; RUN: llc < %s | grep jne | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i32 %a) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i32 %a) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i32 %a) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i32 %a) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/07-SelectCC.ll b/src/LLVM/test/CodeGen/SystemZ/07-SelectCC.ll
new file mode 100644
index 0000000..aa4b36e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/07-SelectCC.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s | grep clgr
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	%cond = select i1 %cmp, i64 %a, i64 %b		; <i64> [#uses=1]
+	ret i64 %cond
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/08-DivRem.ll b/src/LLVM/test/CodeGen/SystemZ/08-DivRem.ll
new file mode 100644
index 0000000..ff1e441
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/08-DivRem.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s | grep dsgr  | count 2
+; RUN: llc < %s | grep dsgfr | count 2
+; RUN: llc < %s | grep dlr   | count 2
+; RUN: llc < %s | grep dlgr  | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @div(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%div = sdiv i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %div
+}
+
+define i32 @div1(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%div = sdiv i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %div
+}
+
+define i64 @div2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%div = udiv i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %div
+}
+
+define i32 @div3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%div = udiv i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %div
+}
+
+define i64 @rem(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%rem = srem i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %rem
+}
+
+define i32 @rem1(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%rem = srem i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %rem
+}
+
+define i64 @rem2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%rem = urem i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %rem
+}
+
+define i32 @rem3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%rem = urem i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %rem
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/08-DivRemMemOp.ll b/src/LLVM/test/CodeGen/SystemZ/08-DivRemMemOp.ll
new file mode 100644
index 0000000..d6ec0e7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/08-DivRemMemOp.ll

@@ -0,0 +1,64 @@
+; RUN: llc < %s | grep {dsgf.%} | count 2
+; RUN: llc < %s | grep {dsg.%}  | count 2
+; RUN: llc < %s | grep {dl.%}   | count 2
+; RUN: llc < %s | grep dlg      | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @div(i64 %a, i64* %b) nounwind readnone {
+entry:
+	%b1 = load i64* %b
+	%div = sdiv i64 %a, %b1
+	ret i64 %div
+}
+
+define i64 @div1(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = udiv i64 %a, %b1
+        ret i64 %div
+}
+
+define i64 @rem(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = srem i64 %a, %b1
+        ret i64 %div
+}
+
+define i64 @rem1(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = urem i64 %a, %b1
+        ret i64 %div
+}
+
+define i32 @div2(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = sdiv i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @div3(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = udiv i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @rem2(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = srem i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @rem3(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = urem i32 %a, %b1
+        ret i32 %div
+}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/08-SimpleMuls.ll b/src/LLVM/test/CodeGen/SystemZ/08-SimpleMuls.ll
new file mode 100644
index 0000000..1ab88d6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/08-SimpleMuls.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s | grep msgr | count 2
+; RUN: llc < %s | grep msr  | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%mul = mul i64 %b, %a		; <i64> [#uses=1]
+	ret i64 %mul
+}
+
+define i64 @foo2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%mul = mul i64 %b, %a		; <i64> [#uses=1]
+	ret i64 %mul
+}
+
+define i32 @foo3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%mul = mul i32 %b, %a		; <i32> [#uses=1]
+	ret i32 %mul
+}
+
+define i32 @foo4(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%mul = mul i32 %b, %a		; <i32> [#uses=1]
+	ret i32 %mul
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/09-DynamicAlloca.ll b/src/LLVM/test/CodeGen/SystemZ/09-DynamicAlloca.ll
new file mode 100644
index 0000000..30810ce
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/09-DynamicAlloca.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define void @foo(i64 %N) nounwind {
+entry:
+	%N3 = trunc i64 %N to i32		; <i32> [#uses=1]
+	%vla = alloca i8, i32 %N3, align 2		; <i8*> [#uses=1]
+	call void @bar(i8* %vla) nounwind
+	ret void
+}
+
+declare void @bar(i8*)

diff --git a/src/LLVM/test/CodeGen/SystemZ/09-Globals.ll b/src/LLVM/test/CodeGen/SystemZ/09-Globals.ll
new file mode 100644
index 0000000..50a26e2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/09-Globals.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s | grep larl | count 3
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-ibm-linux"
+@bar = common global i64 0, align 8		; <i64*> [#uses=3]
+
+define i64 @foo() nounwind readonly {
+entry:
+	%tmp = load i64* @bar		; <i64> [#uses=1]
+	ret i64 %tmp
+}
+
+define i64* @foo2() nounwind readnone {
+entry:
+	ret i64* @bar
+}
+
+define i64* @foo3(i64 %idx) nounwind readnone {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* @bar, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	ret i64* %add.ptr2
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/09-Switches.ll b/src/LLVM/test/CodeGen/SystemZ/09-Switches.ll
new file mode 100644
index 0000000..32aaa62
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/09-Switches.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=systemz | grep larl
+
+define i32 @main(i32 %tmp158) {
+entry:
+        switch i32 %tmp158, label %bb336 [
+		 i32 -2147483648, label %bb338
+		 i32 -2147483647, label %bb338
+		 i32 -2147483646, label %bb338
+		 i32 120, label %bb338
+		 i32 121, label %bb339
+		 i32 122, label %bb340
+                 i32 123, label %bb341
+                 i32 124, label %bb342
+                 i32 125, label %bb343
+                 i32 126, label %bb336
+		 i32 1024, label %bb338
+                 i32 0, label %bb338
+                 i32 1, label %bb338
+                 i32 2, label %bb338
+                 i32 3, label %bb338
+                 i32 4, label %bb338
+		 i32 5, label %bb338
+        ]
+bb336:
+  ret i32 10
+bb338:
+  ret i32 11
+bb339:
+  ret i32 12
+bb340:
+  ret i32 13
+bb341:
+  ret i32 14
+bb342:
+  ret i32 15
+bb343:
+  ret i32 18
+
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/10-FuncsPic.ll b/src/LLVM/test/CodeGen/SystemZ/10-FuncsPic.ll
new file mode 100644
index 0000000..f291e5f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/10-FuncsPic.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 3
+; RUN: llc < %s -relocation-model=pic | grep PLT | count 1
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+@ptr = external global void (...)*		; <void (...)**> [#uses=2]
+
+define void @foo1() nounwind {
+entry:
+	store void (...)* @func, void (...)** @ptr
+	ret void
+}
+
+declare void @func(...)
+
+define void @foo2() nounwind {
+entry:
+	tail call void (...)* @func() nounwind
+	ret void
+}
+
+define void @foo3() nounwind {
+entry:
+	%tmp = load void (...)** @ptr		; <void (...)*> [#uses=1]
+	tail call void (...)* %tmp() nounwind
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/10-GlobalsPic.ll b/src/LLVM/test/CodeGen/SystemZ/10-GlobalsPic.ll
new file mode 100644
index 0000000..c581ad9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/10-GlobalsPic.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 6
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+@src = external global i32		; <i32*> [#uses=2]
+@dst = external global i32		; <i32*> [#uses=2]
+@ptr = external global i32*		; <i32**> [#uses=2]
+
+define void @foo1() nounwind {
+entry:
+	%tmp = load i32* @src		; <i32> [#uses=1]
+	store i32 %tmp, i32* @dst
+	ret void
+}
+
+define void @foo2() nounwind {
+entry:
+	store i32* @dst, i32** @ptr
+	ret void
+}
+
+define void @foo3() nounwind {
+entry:
+	%tmp = load i32* @src		; <i32> [#uses=1]
+	%tmp1 = load i32** @ptr		; <i32*> [#uses=1]
+	%arrayidx = getelementptr i32* %tmp1, i64 1		; <i32*> [#uses=1]
+	store i32 %tmp, i32* %arrayidx
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/11-BSwap.ll b/src/LLVM/test/CodeGen/SystemZ/11-BSwap.ll
new file mode 100644
index 0000000..1aa9c67
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/11-BSwap.ll

@@ -0,0 +1,74 @@
+; RUN: llc < %s | FileCheck %s
+
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+
+define zeroext i16 @foo(i16 zeroext %a)  {
+	%res = tail call i16 @llvm.bswap.i16(i16 %a)
+	ret i16 %res
+}
+
+define zeroext i32 @foo2(i32 zeroext %a)  {
+; CHECK: foo2:
+; CHECK:  lrvr [[R1:%r.]], %r2
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        ret i32 %res
+}
+
+define zeroext i64 @foo3(i64 %a)  {
+; CHECK: foo3:
+; CHECK:  lrvgr %r2, %r2
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        ret i64 %res
+}
+
+define zeroext i16 @foo4(i16* %b)  {
+	%a = load i16* %b
+        %res = tail call i16 @llvm.bswap.i16(i16 %a)
+        ret i16 %res
+}
+
+define zeroext i32 @foo5(i32* %b)  {
+; CHECK: foo5:
+; CHECK:  lrv [[R1:%r.]], 0(%r2)
+	%a = load i32* %b
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        ret i32 %res
+}
+
+define i64 @foo6(i64* %b) {
+; CHECK: foo6:
+; CHECK:  lrvg %r2, 0(%r2)
+	%a = load i64* %b
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        ret i64 %res
+}
+
+define void @foo7(i16 %a, i16* %b) {
+        %res = tail call i16 @llvm.bswap.i16(i16 %a)
+        store i16 %res, i16* %b
+        ret void
+}
+
+define void @foo8(i32 %a, i32* %b) {
+; CHECK: foo8:
+; CHECK:  strv %r2, 0(%r3)
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        store i32 %res, i32* %b
+        ret void
+}
+
+define void @foo9(i64 %a, i64* %b) {
+; CHECK: foo9:
+; CHECK:  strvg %r2, 0(%r3)
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        store i64 %res, i64* %b
+        ret void
+}
+
+declare i16 @llvm.bswap.i16(i16) nounwind readnone
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll b/src/LLVM/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
new file mode 100644
index 0000000..65f8e14
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i32 @main() nounwind {
+entry:
+	%call = call i32 (...)* @random() nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @random(...)

diff --git a/src/LLVM/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll b/src/LLVM/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
new file mode 100644
index 0000000..3cfa97d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=systemz | grep nilf | count 1
+; RUN: llc < %s -march=systemz | grep nill | count 1
+
+define i32 @gnu_dev_major(i64 %__dev) nounwind readnone {
+entry:
+        %shr = lshr i64 %__dev, 8               ; <i64> [#uses=1]
+        %shr8 = trunc i64 %shr to i32           ; <i32> [#uses=1]
+        %shr2 = lshr i64 %__dev, 32             ; <i64> [#uses=1]
+        %conv = trunc i64 %shr2 to i32          ; <i32> [#uses=1]
+        %and3 = and i32 %conv, -4096            ; <i32> [#uses=1]
+        %and6 = and i32 %shr8, 4095             ; <i32> [#uses=1]
+        %conv5 = or i32 %and6, %and3            ; <i32> [#uses=1]
+        ret i32 %conv5
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/src/LLVM/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
new file mode 100644
index 0000000..54424e1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/2009-06-02-Rotate.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=systemz | grep rll
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+	%shl = shl i32 %x, 1		; <i32> [#uses=1]
+	%sub = sub i32 32, 1		; <i32> [#uses=1]
+	%shr = lshr i32 %x, %sub		; <i32> [#uses=1]
+	%or = or i32 %shr, %shl		; <i32> [#uses=1]
+	ret i32 %or
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll b/src/LLVM/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
new file mode 100644
index 0000000..5f6ec50
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-ibm-linux"
+	%struct.re_pattern_buffer = type <{ i8*, i64, i64, i64, i8*, i8*, i64, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%struct.re_registers = type <{ i32, i8, i8, i8, i8, i32*, i32* }>
+
+define i32 @xre_search_2(%struct.re_pattern_buffer* nocapture %bufp, i8* %string1, i32 %size1, i8* %string2, i32 %size2, i32 %startpos, i32 %range, %struct.re_registers* %regs, i32 %stop) nounwind {
+entry:
+	%cmp17.i = icmp slt i32 undef, %startpos		; <i1> [#uses=1]
+	%or.cond.i = or i1 undef, %cmp17.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %byte_re_search_2.exit, label %if.then20.i
+
+if.then20.i:		; preds = %entry
+	ret i32 -2
+
+byte_re_search_2.exit:		; preds = %entry
+	ret i32 -1
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/2009-07-04-Shl32.ll b/src/LLVM/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
new file mode 100644
index 0000000..89b2225
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/2009-07-04-Shl32.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define void @compdecomp(i8* nocapture %data, i64 %data_len) nounwind {
+entry:
+	br label %for.body38
+
+for.body38:		; preds = %for.body38, %entry
+	br i1 undef, label %for.cond220, label %for.body38
+
+for.cond220:		; preds = %for.cond220, %for.body38
+	br i1 false, label %for.cond220, label %for.end297
+
+for.end297:		; preds = %for.cond220
+	%tmp334 = load i8* undef		; <i8> [#uses=1]
+	%conv343 = zext i8 %tmp334 to i32		; <i32> [#uses=1]
+	%sub344 = add i32 %conv343, -1		; <i32> [#uses=1]
+	%shl345 = shl i32 1, %sub344		; <i32> [#uses=1]
+	%conv346 = sext i32 %shl345 to i64		; <i64> [#uses=1]
+	br label %for.body356
+
+for.body356:		; preds = %for.body356, %for.end297
+	%mask.1633 = phi i64 [ %conv346, %for.end297 ], [ undef, %for.body356 ]		; <i64> [#uses=0]
+	br label %for.body356
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/2009-07-05-Shifts.ll b/src/LLVM/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
new file mode 100644
index 0000000..68ccb84
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/2009-07-05-Shifts.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define signext i32 @bit_place_piece(i32 signext %col, i32 signext %player, i64* nocapture %b1, i64* nocapture %b2) nounwind {
+entry:
+	br i1 undef, label %for.body, label %return
+
+for.body:		; preds = %entry
+	%add = add i32 0, %col		; <i32> [#uses=1]
+	%sh_prom = zext i32 %add to i64		; <i64> [#uses=1]
+	%shl = shl i64 1, %sh_prom		; <i64> [#uses=1]
+	br i1 undef, label %if.then13, label %if.else
+
+if.then13:		; preds = %for.body
+	ret i32 0
+
+if.else:		; preds = %for.body
+	%or34 = or i64 undef, %shl		; <i64> [#uses=0]
+	ret i32 0
+
+return:		; preds = %entry
+	ret i32 1
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/src/LLVM/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
new file mode 100644
index 0000000..92f5467
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -regalloc=basic | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) nounwind
+
+declare double @mp_mul_d2i_test(i32 signext, i32 signext, double* nocapture) nounwind
+
+define void @mp_mul_radix_test_bb3(i32 %radix, i32 %nfft, double* %tmpfft, i32* %ip, double* %w, double* %arrayidx44.reload, double* %call.out) nounwind {
+; CHECK: lg %r{{[0-9]+}}, 328(%r15)
+
+newFuncRoot:
+	br label %bb3
+
+bb4.exitStub:		; preds = %bb3
+	store double %call, double* %call.out
+	ret void
+
+bb3:		; preds = %newFuncRoot
+	tail call void @rdft(i32 signext %nfft, i32 signext -1, double* %arrayidx44.reload, i32* %ip, double* %w) nounwind
+	%call = tail call double @mp_mul_d2i_test(i32 signext %radix, i32 signext %nfft, double* %tmpfft)		; <double> [#uses=1]
+	br label %bb4.exitStub
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll b/src/LLVM/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
new file mode 100644
index 0000000..f4e176e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define float @foo(i32 signext %a) {
+entry:
+    %b = bitcast i32 %a to float
+    ret float %b
+}
+
+define i32 @bar(float %a) {
+entry:
+    %b = bitcast float %a to i32
+    ret i32 %b
+}

diff --git a/src/LLVM/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll b/src/LLVM/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
new file mode 100644
index 0000000..63fd855
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define signext i32 @dfg_parse() nounwind {
+entry:
+	br i1 undef, label %if.then2208, label %if.else2360
+
+if.then2208:		; preds = %entry
+	br i1 undef, label %bb.nph3189, label %for.end2270
+
+bb.nph3189:		; preds = %if.then2208
+	unreachable
+
+for.end2270:		; preds = %if.then2208
+	%call2279 = call i64 @strlen(i8* undef) nounwind		; <i64> [#uses=1]
+	%add2281 = add i64 0, %call2279		; <i64> [#uses=1]
+	%tmp2283 = trunc i64 %add2281 to i32		; <i32> [#uses=1]
+	%tmp2284 = alloca i8, i32 %tmp2283, align 2		; <i8*> [#uses=1]
+	%yyd.0.i2561.13 = getelementptr i8* %tmp2284, i64 13		; <i8*> [#uses=1]
+	store i8 117, i8* %yyd.0.i2561.13
+	br label %while.cond.i2558
+
+while.cond.i2558:		; preds = %while.cond.i2558, %for.end2270
+	br label %while.cond.i2558
+
+if.else2360:		; preds = %entry
+	unreachable
+}
+
+declare i64 @strlen(i8* nocapture) nounwind readonly

diff --git a/src/LLVM/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll b/src/LLVM/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
new file mode 100644
index 0000000..f7686f1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
+target triple = "s390x-ibm-linux-gnu"
+
+@__JCR_LIST__ = internal global [0 x i8*] zeroinitializer, section ".jcr", align 8 ; <[0 x i8*]*> [#uses=1]
+
+define internal void @frame_dummy() nounwind {
+entry:
+  %asmtmp = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0"(void (i8*)* @_Jv_RegisterClasses) nounwind ; <void (i8*)*> [#uses=2]
+  %0 = icmp eq void (i8*)* %asmtmp, null          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb3
+
+bb3:                                              ; preds = %entry
+  tail call void %asmtmp(i8* bitcast ([0 x i8*]* @__JCR_LIST__ to i8*)) nounwind
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare extern_weak void @_Jv_RegisterClasses(i8*)

diff --git a/src/LLVM/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll b/src/LLVM/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
new file mode 100644
index 0000000..fde7d9d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
+target triple = "s390x-ibm-linux-gnu"
+
+define double @foo(double %a, double %b) nounwind {
+entry:
+; CHECK: cpsdr %f0, %f2, %f0
+  %0 = tail call double @copysign(double %a, double %b) nounwind readnone
+  ret double %0
+}
+
+define float @bar(float %a, float %b) nounwind {
+entry:
+; CHECK: cpsdr %f0, %f2, %f0
+  %0 = tail call float @copysignf(float %a, float %b) nounwind readnone
+  ret float %0
+}
+
+
+declare double @copysign(double, double) nounwind readnone
+declare float @copysignf(float, float) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/SystemZ/2010-01-04-DivMem.ll b/src/LLVM/test/CodeGen/SystemZ/2010-01-04-DivMem.ll
new file mode 100644
index 0000000..d730bec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/2010-01-04-DivMem.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16-n32:64"
+target triple = "s390x-elf"
+
+@REGISTER = external global [10 x i32]            ; <[10 x i32]*> [#uses=2]
+
+define void @DIVR_P(i32 signext %PRINT_EFFECT) nounwind {
+entry:
+  %REG1 = alloca i32, align 4                     ; <i32*> [#uses=2]
+  %REG2 = alloca i32, align 4                     ; <i32*> [#uses=2]
+  %call = call signext i32 (...)* @FORMAT2(i32* %REG1, i32* %REG2) nounwind ; <i32> [#uses=0]
+  %tmp = load i32* %REG1                          ; <i32> [#uses=1]
+  %idxprom = sext i32 %tmp to i64                 ; <i64> [#uses=1]
+  %arrayidx = getelementptr inbounds [10 x i32]* @REGISTER, i64 0, i64 %idxprom ; <i32*> [#uses=2]
+  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=2]
+  %tmp2 = load i32* %REG2                         ; <i32> [#uses=1]
+  %idxprom3 = sext i32 %tmp2 to i64               ; <i64> [#uses=1]
+  %arrayidx4 = getelementptr inbounds [10 x i32]* @REGISTER, i64 0, i64 %idxprom3 ; <i32*> [#uses=3]
+  %tmp5 = load i32* %arrayidx4                    ; <i32> [#uses=3]
+  %cmp6 = icmp sgt i32 %tmp5, 8388607             ; <i1> [#uses=1]
+  %REG2_SIGN.0 = select i1 %cmp6, i32 -1, i32 1   ; <i32> [#uses=2]
+  %cmp10 = icmp eq i32 %REG2_SIGN.0, 1            ; <i1> [#uses=1]
+  %not.cmp = icmp slt i32 %tmp1, 8388608          ; <i1> [#uses=2]
+  %or.cond = and i1 %cmp10, %not.cmp              ; <i1> [#uses=1]
+  br i1 %or.cond, label %if.then13, label %if.end25
+
+if.then13:                                        ; preds = %entry
+  %div = sdiv i32 %tmp5, %tmp1                    ; <i32> [#uses=2]
+  store i32 %div, i32* %arrayidx4
+  br label %if.end25
+
+if.end25:                                         ; preds = %if.then13, %entry
+  %tmp35 = phi i32 [ %div, %if.then13 ], [ %tmp5, %entry ] ; <i32> [#uses=1]
+  %cmp27 = icmp eq i32 %REG2_SIGN.0, -1           ; <i1> [#uses=1]
+  %or.cond46 = and i1 %cmp27, %not.cmp            ; <i1> [#uses=1]
+  br i1 %or.cond46, label %if.then31, label %if.end45
+
+if.then31:                                        ; preds = %if.end25
+  %sub = sub i32 16777216, %tmp35                 ; <i32> [#uses=1]
+  %tmp39 = load i32* %arrayidx                    ; <i32> [#uses=1]
+  %div40 = udiv i32 %sub, %tmp39                  ; <i32> [#uses=1]
+  %sub41 = sub i32 16777216, %div40               ; <i32> [#uses=1]
+  store i32 %sub41, i32* %arrayidx4
+  ret void
+
+if.end45:                                         ; preds = %if.end25
+  ret void
+}
+
+declare signext i32 @FORMAT2(...)

diff --git a/src/LLVM/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll b/src/LLVM/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 0000000..c2877ac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll

@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=systemz -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+

diff --git a/src/LLVM/test/CodeGen/SystemZ/dg.exp b/src/LLVM/test/CodeGen/SystemZ/dg.exp
new file mode 100644
index 0000000..e9624ba
--- /dev/null
+++ b/src/LLVM/test/CodeGen/SystemZ/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target SystemZ] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll b/src/LLVM/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
new file mode 100644
index 0000000..0490bc4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin

+

+%struct.rtx_def = type { i8 }

+@str = external global [7 x i8]

+

+define void @f1() {

+	%D = alloca %struct.rtx_def, align 1

+	%tmp1 = bitcast %struct.rtx_def* %D to i32*

+	%tmp7 = load i32* %tmp1

+	%tmp14 = lshr i32 %tmp7, 1

+	%tmp1415 = and i32 %tmp14, 1

+	call void (i32, ...)* @printf( i32 undef, i32 0, i32 %tmp1415 )

+	ret void

+}

+

+declare void @printf(i32, ...)


diff --git a/src/LLVM/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll b/src/LLVM/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
new file mode 100644
index 0000000..d9996e1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin

+

+	%struct.color_sample = type { i32 }

+	%struct.ref = type { %struct.color_sample, i16, i16 }

+

+define void @zcvrs() {

+	br i1 false, label %bb22, label %UnifiedReturnBlock

+

+bb22:

+	br i1 false, label %bb64, label %UnifiedReturnBlock

+

+bb64:

+	%tmp67 = urem i32 0, 0

+	%tmp69 = icmp slt i32 %tmp67, 10

+	%iftmp.13.0 = select i1 %tmp69, i8 48, i8 55

+	%tmp75 = add i8 %iftmp.13.0, 0

+	store i8 %tmp75, i8* null

+	%tmp81 = udiv i32 0, 0

+	%tmp83 = icmp eq i32 %tmp81, 0

+	br i1 %tmp83, label %bb85, label %bb64

+

+bb85:

+	ret void

+

+UnifiedReturnBlock:

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll b/src/LLVM/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
new file mode 100644
index 0000000..3be5e5e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s | not grep r11

+

+target triple = "thumb-unknown-linux-gnueabi"

+	%struct.__sched_param = type { i32 }

+	%struct.pthread_attr_t = type { i32, i32, %struct.__sched_param, i32, i32, i32, i32, i8*, i32 }

+@i.1882 = internal global i32 1		; <i32*> [#uses=2]

+@.str = internal constant [14 x i8] c"Thread 1: %d\0A\00"		; <[14 x i8]*> [#uses=1]

+@.str1 = internal constant [14 x i8] c"Thread 2: %d\0A\00"		; <[14 x i8]*> [#uses=1]

+

+define i8* @f(i8* %a) {

+entry:

+	%tmp1 = load i32* @i.1882		; <i32> [#uses=1]

+	%tmp2 = add i32 %tmp1, 1		; <i32> [#uses=2]

+	store i32 %tmp2, i32* @i.1882

+	%tmp34 = inttoptr i32 %tmp2 to i8*		; <i8*> [#uses=1]

+	ret i8* %tmp34

+}

+

+define i32 @main() {

+entry:

+	%t = alloca i32, align 4		; <i32*> [#uses=4]

+	%ret = alloca i32, align 4		; <i32*> [#uses=3]

+	%tmp1 = call i32 @pthread_create( i32* %t, %struct.pthread_attr_t* null, i8* (i8*)* @f, i8* null )		; <i32> [#uses=0]

+	%tmp2 = load i32* %t		; <i32> [#uses=1]

+	%ret3 = bitcast i32* %ret to i8**		; <i8**> [#uses=2]

+	%tmp4 = call i32 @pthread_join( i32 %tmp2, i8** %ret3 )		; <i32> [#uses=0]

+	%tmp5 = load i32* %ret		; <i32> [#uses=1]

+	%tmp7 = call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8]* @.str, i32 0, i32 0), i32 %tmp5 )		; <i32> [#uses=0]

+	%tmp8 = call i32 @pthread_create( i32* %t, %struct.pthread_attr_t* null, i8* (i8*)* @f, i8* null )		; <i32> [#uses=0]

+	%tmp9 = load i32* %t		; <i32> [#uses=1]

+	%tmp11 = call i32 @pthread_join( i32 %tmp9, i8** %ret3 )		; <i32> [#uses=0]

+	%tmp12 = load i32* %ret		; <i32> [#uses=1]

+	%tmp14 = call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8]* @.str1, i32 0, i32 0), i32 %tmp12 )		; <i32> [#uses=0]

+	ret i32 0

+}

+

+declare i32 @pthread_create(i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)

+

+declare i32 @pthread_join(i32, i8**)

+

+declare i32 @printf(i8*, ...)


diff --git a/src/LLVM/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll b/src/LLVM/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
new file mode 100644
index 0000000..5c883b3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb | grep r0 | count 1
+
+define i32 @a(i32 %x, i32 %y) nounwind readnone {
+entry:
+	%mul = mul i32 %y, %x		; <i32> [#uses=1]
+	ret i32 %mul
+}
+

diff --git a/src/LLVM/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll b/src/LLVM/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
new file mode 100644
index 0000000..d4651a1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin10
+
+@Time.2535 = external global i64		; <i64*> [#uses=2]
+
+define i64 @millisecs() nounwind {
+entry:
+	%0 = load i64* @Time.2535, align 4		; <i64> [#uses=2]
+	%1 = add i64 %0, 1		; <i64> [#uses=1]
+	store i64 %1, i64* @Time.2535, align 4
+	ret i64 %0
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll b/src/LLVM/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll
new file mode 100644
index 0000000..aaca3a7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim
+
+	%struct.LinkList = type { i32, %struct.LinkList* }
+	%struct.List = type { i32, i32* }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @main to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @main() nounwind {
+entry:
+	%ll = alloca %struct.LinkList*, align 4		; <%struct.LinkList**> [#uses=1]
+	%0 = call  i32 @ReadList(%struct.LinkList** %ll, %struct.List** null) nounwind		; <i32> [#uses=1]
+	switch i32 %0, label %bb5 [
+		i32 7, label %bb4
+		i32 42, label %bb3
+	]
+
+bb3:		; preds = %entry
+	ret i32 1
+
+bb4:		; preds = %entry
+	ret i32 0
+
+bb5:		; preds = %entry
+	ret i32 1
+}
+
+declare i32 @ReadList(%struct.LinkList** nocapture, %struct.List** nocapture) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll b/src/LLVM/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
new file mode 100644
index 0000000..5b420fc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll

@@ -0,0 +1,737 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin
+
+	%struct.BF_KEY = type { [18 x i32], [1024 x i32] }
+
+define void @BF_encrypt(i32* nocapture %data, %struct.BF_KEY* nocapture %key, i32 %encrypt) nounwind {
+entry:
+	%0 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 0; <i32*> [#uses=2]
+	%1 = load i32* %data, align 4             ; <i32> [#uses=2]
+	%2 = load i32* undef, align 4             ; <i32> [#uses=2]
+	br i1 undef, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+	%3 = load i32* %0, align 4                ; <i32> [#uses=1]
+	%4 = xor i32 %3, %1                       ; <i32> [#uses=4]
+	%5 = load i32* null, align 4              ; <i32> [#uses=1]
+	%6 = lshr i32 %4, 24                      ; <i32> [#uses=1]
+	%7 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %6; <i32*> [#uses=1]
+	%8 = load i32* %7, align 4                ; <i32> [#uses=1]
+	%9 = lshr i32 %4, 16                      ; <i32> [#uses=1]
+	%10 = or i32 %9, 256                      ; <i32> [#uses=1]
+	%11 = and i32 %10, 511                    ; <i32> [#uses=1]
+	%12 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %11; <i32*> [#uses=1]
+	%13 = load i32* %12, align 4              ; <i32> [#uses=1]
+	%14 = add i32 %13, %8                     ; <i32> [#uses=1]
+	%15 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 undef; <i32*> [#uses=1]
+	%16 = load i32* %15, align 4              ; <i32> [#uses=1]
+	%17 = xor i32 %14, %16                    ; <i32> [#uses=1]
+	%18 = or i32 %4, 768                      ; <i32> [#uses=1]
+	%19 = and i32 %18, 1023                   ; <i32> [#uses=1]
+	%20 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %19; <i32*> [#uses=1]
+	%21 = load i32* %20, align 4              ; <i32> [#uses=1]
+	%22 = add i32 %17, %21                    ; <i32> [#uses=1]
+	%23 = xor i32 %5, %2                      ; <i32> [#uses=1]
+	%24 = xor i32 %23, %22                    ; <i32> [#uses=5]
+	%25 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 2; <i32*> [#uses=1]
+	%26 = load i32* %25, align 4              ; <i32> [#uses=1]
+	%27 = lshr i32 %24, 24                    ; <i32> [#uses=1]
+	%28 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %27; <i32*> [#uses=1]
+	%29 = load i32* %28, align 4              ; <i32> [#uses=1]
+	%30 = lshr i32 %24, 16                    ; <i32> [#uses=1]
+	%31 = or i32 %30, 256                     ; <i32> [#uses=1]
+	%32 = and i32 %31, 511                    ; <i32> [#uses=1]
+	%33 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %32; <i32*> [#uses=1]
+	%34 = load i32* %33, align 4              ; <i32> [#uses=1]
+	%35 = add i32 %34, %29                    ; <i32> [#uses=1]
+	%36 = lshr i32 %24, 8                     ; <i32> [#uses=1]
+	%37 = or i32 %36, 512                     ; <i32> [#uses=1]
+	%38 = and i32 %37, 767                    ; <i32> [#uses=1]
+	%39 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %38; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4              ; <i32> [#uses=1]
+	%41 = xor i32 %35, %40                    ; <i32> [#uses=1]
+	%42 = or i32 %24, 768                     ; <i32> [#uses=1]
+	%43 = and i32 %42, 1023                   ; <i32> [#uses=1]
+	%44 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %43; <i32*> [#uses=1]
+	%45 = load i32* %44, align 4              ; <i32> [#uses=1]
+	%46 = add i32 %41, %45                    ; <i32> [#uses=1]
+	%47 = xor i32 %26, %4                     ; <i32> [#uses=1]
+	%48 = xor i32 %47, %46                    ; <i32> [#uses=5]
+	%49 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
+	%50 = load i32* %49, align 4              ; <i32> [#uses=1]
+	%51 = lshr i32 %48, 24                    ; <i32> [#uses=1]
+	%52 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %51; <i32*> [#uses=1]
+	%53 = load i32* %52, align 4              ; <i32> [#uses=1]
+	%54 = lshr i32 %48, 16                    ; <i32> [#uses=1]
+	%55 = or i32 %54, 256                     ; <i32> [#uses=1]
+	%56 = and i32 %55, 511                    ; <i32> [#uses=1]
+	%57 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %56; <i32*> [#uses=1]
+	%58 = load i32* %57, align 4              ; <i32> [#uses=1]
+	%59 = add i32 %58, %53                    ; <i32> [#uses=1]
+	%60 = lshr i32 %48, 8                     ; <i32> [#uses=1]
+	%61 = or i32 %60, 512                     ; <i32> [#uses=1]
+	%62 = and i32 %61, 767                    ; <i32> [#uses=1]
+	%63 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %62; <i32*> [#uses=1]
+	%64 = load i32* %63, align 4              ; <i32> [#uses=1]
+	%65 = xor i32 %59, %64                    ; <i32> [#uses=1]
+	%66 = or i32 %48, 768                     ; <i32> [#uses=1]
+	%67 = and i32 %66, 1023                   ; <i32> [#uses=1]
+	%68 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %67; <i32*> [#uses=1]
+	%69 = load i32* %68, align 4              ; <i32> [#uses=1]
+	%70 = add i32 %65, %69                    ; <i32> [#uses=1]
+	%71 = xor i32 %50, %24                    ; <i32> [#uses=1]
+	%72 = xor i32 %71, %70                    ; <i32> [#uses=5]
+	%73 = load i32* null, align 4             ; <i32> [#uses=1]
+	%74 = lshr i32 %72, 24                    ; <i32> [#uses=1]
+	%75 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %74; <i32*> [#uses=1]
+	%76 = load i32* %75, align 4              ; <i32> [#uses=1]
+	%77 = lshr i32 %72, 16                    ; <i32> [#uses=1]
+	%78 = or i32 %77, 256                     ; <i32> [#uses=1]
+	%79 = and i32 %78, 511                    ; <i32> [#uses=1]
+	%80 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %79; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4              ; <i32> [#uses=1]
+	%82 = add i32 %81, %76                    ; <i32> [#uses=1]
+	%83 = lshr i32 %72, 8                     ; <i32> [#uses=1]
+	%84 = or i32 %83, 512                     ; <i32> [#uses=1]
+	%85 = and i32 %84, 767                    ; <i32> [#uses=1]
+	%86 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %85; <i32*> [#uses=1]
+	%87 = load i32* %86, align 4              ; <i32> [#uses=1]
+	%88 = xor i32 %82, %87                    ; <i32> [#uses=1]
+	%89 = or i32 %72, 768                     ; <i32> [#uses=1]
+	%90 = and i32 %89, 1023                   ; <i32> [#uses=1]
+	%91 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %90; <i32*> [#uses=1]
+	%92 = load i32* %91, align 4              ; <i32> [#uses=1]
+	%93 = add i32 %88, %92                    ; <i32> [#uses=1]
+	%94 = xor i32 %73, %48                    ; <i32> [#uses=1]
+	%95 = xor i32 %94, %93                    ; <i32> [#uses=5]
+	%96 = load i32* undef, align 4            ; <i32> [#uses=1]
+	%97 = lshr i32 %95, 24                    ; <i32> [#uses=1]
+	%98 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %97; <i32*> [#uses=1]
+	%99 = load i32* %98, align 4              ; <i32> [#uses=1]
+	%100 = lshr i32 %95, 16                   ; <i32> [#uses=1]
+	%101 = or i32 %100, 256                   ; <i32> [#uses=1]
+	%102 = and i32 %101, 511                  ; <i32> [#uses=1]
+	%103 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %102; <i32*> [#uses=1]
+	%104 = load i32* %103, align 4            ; <i32> [#uses=1]
+	%105 = add i32 %104, %99                  ; <i32> [#uses=1]
+	%106 = lshr i32 %95, 8                    ; <i32> [#uses=1]
+	%107 = or i32 %106, 512                   ; <i32> [#uses=1]
+	%108 = and i32 %107, 767                  ; <i32> [#uses=1]
+	%109 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %108; <i32*> [#uses=1]
+	%110 = load i32* %109, align 4            ; <i32> [#uses=1]
+	%111 = xor i32 %105, %110                 ; <i32> [#uses=1]
+	%112 = or i32 %95, 768                    ; <i32> [#uses=1]
+	%113 = and i32 %112, 1023                 ; <i32> [#uses=1]
+	%114 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %113; <i32*> [#uses=1]
+	%115 = load i32* %114, align 4            ; <i32> [#uses=1]
+	%116 = add i32 %111, %115                 ; <i32> [#uses=1]
+	%117 = xor i32 %96, %72                   ; <i32> [#uses=1]
+	%118 = xor i32 %117, %116                 ; <i32> [#uses=5]
+	%119 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
+	%120 = load i32* %119, align 4            ; <i32> [#uses=1]
+	%121 = lshr i32 %118, 24                  ; <i32> [#uses=1]
+	%122 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %121; <i32*> [#uses=1]
+	%123 = load i32* %122, align 4            ; <i32> [#uses=1]
+	%124 = lshr i32 %118, 16                  ; <i32> [#uses=1]
+	%125 = or i32 %124, 256                   ; <i32> [#uses=1]
+	%126 = and i32 %125, 511                  ; <i32> [#uses=1]
+	%127 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %126; <i32*> [#uses=1]
+	%128 = load i32* %127, align 4            ; <i32> [#uses=1]
+	%129 = add i32 %128, %123                 ; <i32> [#uses=1]
+	%130 = lshr i32 %118, 8                   ; <i32> [#uses=1]
+	%131 = or i32 %130, 512                   ; <i32> [#uses=1]
+	%132 = and i32 %131, 767                  ; <i32> [#uses=1]
+	%133 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %132; <i32*> [#uses=1]
+	%134 = load i32* %133, align 4            ; <i32> [#uses=1]
+	%135 = xor i32 %129, %134                 ; <i32> [#uses=1]
+	%136 = or i32 %118, 768                   ; <i32> [#uses=1]
+	%137 = and i32 %136, 1023                 ; <i32> [#uses=1]
+	%138 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %137; <i32*> [#uses=1]
+	%139 = load i32* %138, align 4            ; <i32> [#uses=1]
+	%140 = add i32 %135, %139                 ; <i32> [#uses=1]
+	%141 = xor i32 %120, %95                  ; <i32> [#uses=1]
+	%142 = xor i32 %141, %140                 ; <i32> [#uses=5]
+	%143 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 7; <i32*> [#uses=1]
+	%144 = load i32* %143, align 4            ; <i32> [#uses=1]
+	%145 = lshr i32 %142, 24                  ; <i32> [#uses=1]
+	%146 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %145; <i32*> [#uses=1]
+	%147 = load i32* %146, align 4            ; <i32> [#uses=1]
+	%148 = lshr i32 %142, 16                  ; <i32> [#uses=1]
+	%149 = or i32 %148, 256                   ; <i32> [#uses=1]
+	%150 = and i32 %149, 511                  ; <i32> [#uses=1]
+	%151 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %150; <i32*> [#uses=1]
+	%152 = load i32* %151, align 4            ; <i32> [#uses=1]
+	%153 = add i32 %152, %147                 ; <i32> [#uses=1]
+	%154 = lshr i32 %142, 8                   ; <i32> [#uses=1]
+	%155 = or i32 %154, 512                   ; <i32> [#uses=1]
+	%156 = and i32 %155, 767                  ; <i32> [#uses=1]
+	%157 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %156; <i32*> [#uses=1]
+	%158 = load i32* %157, align 4            ; <i32> [#uses=1]
+	%159 = xor i32 %153, %158                 ; <i32> [#uses=1]
+	%160 = or i32 %142, 768                   ; <i32> [#uses=1]
+	%161 = and i32 %160, 1023                 ; <i32> [#uses=1]
+	%162 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %161; <i32*> [#uses=1]
+	%163 = load i32* %162, align 4            ; <i32> [#uses=1]
+	%164 = add i32 %159, %163                 ; <i32> [#uses=1]
+	%165 = xor i32 %144, %118                 ; <i32> [#uses=1]
+	%166 = xor i32 %165, %164                 ; <i32> [#uses=5]
+	%167 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%168 = lshr i32 %166, 24                  ; <i32> [#uses=1]
+	%169 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %168; <i32*> [#uses=1]
+	%170 = load i32* %169, align 4            ; <i32> [#uses=1]
+	%171 = lshr i32 %166, 16                  ; <i32> [#uses=1]
+	%172 = or i32 %171, 256                   ; <i32> [#uses=1]
+	%173 = and i32 %172, 511                  ; <i32> [#uses=1]
+	%174 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %173; <i32*> [#uses=1]
+	%175 = load i32* %174, align 4            ; <i32> [#uses=1]
+	%176 = add i32 %175, %170                 ; <i32> [#uses=1]
+	%177 = lshr i32 %166, 8                   ; <i32> [#uses=1]
+	%178 = or i32 %177, 512                   ; <i32> [#uses=1]
+	%179 = and i32 %178, 767                  ; <i32> [#uses=1]
+	%180 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %179; <i32*> [#uses=1]
+	%181 = load i32* %180, align 4            ; <i32> [#uses=1]
+	%182 = xor i32 %176, %181                 ; <i32> [#uses=1]
+	%183 = or i32 %166, 768                   ; <i32> [#uses=1]
+	%184 = and i32 %183, 1023                 ; <i32> [#uses=1]
+	%185 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %184; <i32*> [#uses=1]
+	%186 = load i32* %185, align 4            ; <i32> [#uses=1]
+	%187 = add i32 %182, %186                 ; <i32> [#uses=1]
+	%188 = xor i32 %167, %142                 ; <i32> [#uses=1]
+	%189 = xor i32 %188, %187                 ; <i32> [#uses=5]
+	%190 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
+	%191 = load i32* %190, align 4            ; <i32> [#uses=1]
+	%192 = lshr i32 %189, 24                  ; <i32> [#uses=1]
+	%193 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %192; <i32*> [#uses=1]
+	%194 = load i32* %193, align 4            ; <i32> [#uses=1]
+	%195 = lshr i32 %189, 16                  ; <i32> [#uses=1]
+	%196 = or i32 %195, 256                   ; <i32> [#uses=1]
+	%197 = and i32 %196, 511                  ; <i32> [#uses=1]
+	%198 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %197; <i32*> [#uses=1]
+	%199 = load i32* %198, align 4            ; <i32> [#uses=1]
+	%200 = add i32 %199, %194                 ; <i32> [#uses=1]
+	%201 = lshr i32 %189, 8                   ; <i32> [#uses=1]
+	%202 = or i32 %201, 512                   ; <i32> [#uses=1]
+	%203 = and i32 %202, 767                  ; <i32> [#uses=1]
+	%204 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %203; <i32*> [#uses=1]
+	%205 = load i32* %204, align 4            ; <i32> [#uses=1]
+	%206 = xor i32 %200, %205                 ; <i32> [#uses=1]
+	%207 = or i32 %189, 768                   ; <i32> [#uses=1]
+	%208 = and i32 %207, 1023                 ; <i32> [#uses=1]
+	%209 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %208; <i32*> [#uses=1]
+	%210 = load i32* %209, align 4            ; <i32> [#uses=1]
+	%211 = add i32 %206, %210                 ; <i32> [#uses=1]
+	%212 = xor i32 %191, %166                 ; <i32> [#uses=1]
+	%213 = xor i32 %212, %211                 ; <i32> [#uses=5]
+	%214 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
+	%215 = load i32* %214, align 4            ; <i32> [#uses=1]
+	%216 = lshr i32 %213, 24                  ; <i32> [#uses=1]
+	%217 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %216; <i32*> [#uses=1]
+	%218 = load i32* %217, align 4            ; <i32> [#uses=1]
+	%219 = lshr i32 %213, 16                  ; <i32> [#uses=1]
+	%220 = or i32 %219, 256                   ; <i32> [#uses=1]
+	%221 = and i32 %220, 511                  ; <i32> [#uses=1]
+	%222 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %221; <i32*> [#uses=1]
+	%223 = load i32* %222, align 4            ; <i32> [#uses=1]
+	%224 = add i32 %223, %218                 ; <i32> [#uses=1]
+	%225 = lshr i32 %213, 8                   ; <i32> [#uses=1]
+	%226 = or i32 %225, 512                   ; <i32> [#uses=1]
+	%227 = and i32 %226, 767                  ; <i32> [#uses=1]
+	%228 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %227; <i32*> [#uses=1]
+	%229 = load i32* %228, align 4            ; <i32> [#uses=1]
+	%230 = xor i32 %224, %229                 ; <i32> [#uses=1]
+	%231 = or i32 %213, 768                   ; <i32> [#uses=1]
+	%232 = and i32 %231, 1023                 ; <i32> [#uses=1]
+	%233 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %232; <i32*> [#uses=1]
+	%234 = load i32* %233, align 4            ; <i32> [#uses=1]
+	%235 = add i32 %230, %234                 ; <i32> [#uses=1]
+	%236 = xor i32 %215, %189                 ; <i32> [#uses=1]
+	%237 = xor i32 %236, %235                 ; <i32> [#uses=5]
+	%238 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 11; <i32*> [#uses=1]
+	%239 = load i32* %238, align 4            ; <i32> [#uses=1]
+	%240 = lshr i32 %237, 24                  ; <i32> [#uses=1]
+	%241 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %240; <i32*> [#uses=1]
+	%242 = load i32* %241, align 4            ; <i32> [#uses=1]
+	%243 = lshr i32 %237, 16                  ; <i32> [#uses=1]
+	%244 = or i32 %243, 256                   ; <i32> [#uses=1]
+	%245 = and i32 %244, 511                  ; <i32> [#uses=1]
+	%246 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %245; <i32*> [#uses=1]
+	%247 = load i32* %246, align 4            ; <i32> [#uses=1]
+	%248 = add i32 %247, %242                 ; <i32> [#uses=1]
+	%249 = lshr i32 %237, 8                   ; <i32> [#uses=1]
+	%250 = or i32 %249, 512                   ; <i32> [#uses=1]
+	%251 = and i32 %250, 767                  ; <i32> [#uses=1]
+	%252 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %251; <i32*> [#uses=1]
+	%253 = load i32* %252, align 4            ; <i32> [#uses=1]
+	%254 = xor i32 %248, %253                 ; <i32> [#uses=1]
+	%255 = or i32 %237, 768                   ; <i32> [#uses=1]
+	%256 = and i32 %255, 1023                 ; <i32> [#uses=1]
+	%257 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %256; <i32*> [#uses=1]
+	%258 = load i32* %257, align 4            ; <i32> [#uses=1]
+	%259 = add i32 %254, %258                 ; <i32> [#uses=1]
+	%260 = xor i32 %239, %213                 ; <i32> [#uses=1]
+	%261 = xor i32 %260, %259                 ; <i32> [#uses=5]
+	%262 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%263 = lshr i32 %261, 24                  ; <i32> [#uses=1]
+	%264 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %263; <i32*> [#uses=1]
+	%265 = load i32* %264, align 4            ; <i32> [#uses=1]
+	%266 = lshr i32 %261, 16                  ; <i32> [#uses=1]
+	%267 = or i32 %266, 256                   ; <i32> [#uses=1]
+	%268 = and i32 %267, 511                  ; <i32> [#uses=1]
+	%269 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %268; <i32*> [#uses=1]
+	%270 = load i32* %269, align 4            ; <i32> [#uses=1]
+	%271 = add i32 %270, %265                 ; <i32> [#uses=1]
+	%272 = lshr i32 %261, 8                   ; <i32> [#uses=1]
+	%273 = or i32 %272, 512                   ; <i32> [#uses=1]
+	%274 = and i32 %273, 767                  ; <i32> [#uses=1]
+	%275 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %274; <i32*> [#uses=1]
+	%276 = load i32* %275, align 4            ; <i32> [#uses=1]
+	%277 = xor i32 %271, %276                 ; <i32> [#uses=1]
+	%278 = or i32 %261, 768                   ; <i32> [#uses=1]
+	%279 = and i32 %278, 1023                 ; <i32> [#uses=1]
+	%280 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %279; <i32*> [#uses=1]
+	%281 = load i32* %280, align 4            ; <i32> [#uses=1]
+	%282 = add i32 %277, %281                 ; <i32> [#uses=1]
+	%283 = xor i32 %262, %237                 ; <i32> [#uses=1]
+	%284 = xor i32 %283, %282                 ; <i32> [#uses=4]
+	%285 = load i32* null, align 4            ; <i32> [#uses=1]
+	%286 = lshr i32 %284, 24                  ; <i32> [#uses=1]
+	%287 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %286; <i32*> [#uses=1]
+	%288 = load i32* %287, align 4            ; <i32> [#uses=1]
+	%289 = lshr i32 %284, 16                  ; <i32> [#uses=1]
+	%290 = or i32 %289, 256                   ; <i32> [#uses=1]
+	%291 = and i32 %290, 511                  ; <i32> [#uses=1]
+	%292 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %291; <i32*> [#uses=1]
+	%293 = load i32* %292, align 4            ; <i32> [#uses=1]
+	%294 = add i32 %293, %288                 ; <i32> [#uses=1]
+	%295 = lshr i32 %284, 8                   ; <i32> [#uses=1]
+	%296 = or i32 %295, 512                   ; <i32> [#uses=1]
+	%297 = and i32 %296, 767                  ; <i32> [#uses=1]
+	%298 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %297; <i32*> [#uses=1]
+	%299 = load i32* %298, align 4            ; <i32> [#uses=1]
+	%300 = xor i32 %294, %299                 ; <i32> [#uses=1]
+	%301 = or i32 %284, 768                   ; <i32> [#uses=1]
+	%302 = and i32 %301, 1023                 ; <i32> [#uses=1]
+	%303 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %302; <i32*> [#uses=1]
+	%304 = load i32* %303, align 4            ; <i32> [#uses=1]
+	%305 = add i32 %300, %304                 ; <i32> [#uses=1]
+	%306 = xor i32 %285, %261                 ; <i32> [#uses=1]
+	%307 = xor i32 %306, %305                 ; <i32> [#uses=1]
+	%308 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
+	%309 = load i32* %308, align 4            ; <i32> [#uses=1]
+	%310 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 0; <i32*> [#uses=1]
+	%311 = load i32* %310, align 4            ; <i32> [#uses=1]
+	%312 = or i32 0, 256                      ; <i32> [#uses=1]
+	%313 = and i32 %312, 511                  ; <i32> [#uses=1]
+	%314 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %313; <i32*> [#uses=1]
+	%315 = load i32* %314, align 4            ; <i32> [#uses=1]
+	%316 = add i32 %315, %311                 ; <i32> [#uses=1]
+	%317 = or i32 0, 512                      ; <i32> [#uses=1]
+	%318 = and i32 %317, 767                  ; <i32> [#uses=1]
+	%319 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %318; <i32*> [#uses=1]
+	%320 = load i32* %319, align 4            ; <i32> [#uses=1]
+	%321 = xor i32 %316, %320                 ; <i32> [#uses=1]
+	%322 = or i32 0, 768                      ; <i32> [#uses=1]
+	%323 = and i32 %322, 1023                 ; <i32> [#uses=1]
+	%324 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %323; <i32*> [#uses=1]
+	%325 = load i32* %324, align 4            ; <i32> [#uses=1]
+	%326 = add i32 %321, %325                 ; <i32> [#uses=1]
+	%327 = xor i32 %309, %307                 ; <i32> [#uses=1]
+	%328 = xor i32 %327, %326                 ; <i32> [#uses=5]
+	%329 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 17; <i32*> [#uses=1]
+	br label %bb2
+
+bb1:                                              ; preds = %entry
+	%330 = load i32* null, align 4            ; <i32> [#uses=1]
+	%331 = xor i32 %330, %1                   ; <i32> [#uses=4]
+	%332 = load i32* null, align 4            ; <i32> [#uses=1]
+	%333 = lshr i32 %331, 24                  ; <i32> [#uses=1]
+	%334 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %333; <i32*> [#uses=1]
+	%335 = load i32* %334, align 4            ; <i32> [#uses=1]
+	%336 = load i32* null, align 4            ; <i32> [#uses=1]
+	%337 = add i32 %336, %335                 ; <i32> [#uses=1]
+	%338 = lshr i32 %331, 8                   ; <i32> [#uses=1]
+	%339 = or i32 %338, 512                   ; <i32> [#uses=1]
+	%340 = and i32 %339, 767                  ; <i32> [#uses=1]
+	%341 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %340; <i32*> [#uses=1]
+	%342 = load i32* %341, align 4            ; <i32> [#uses=1]
+	%343 = xor i32 %337, %342                 ; <i32> [#uses=1]
+	%344 = or i32 %331, 768                   ; <i32> [#uses=1]
+	%345 = and i32 %344, 1023                 ; <i32> [#uses=1]
+	%346 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %345; <i32*> [#uses=1]
+	%347 = load i32* %346, align 4            ; <i32> [#uses=1]
+	%348 = add i32 %343, %347                 ; <i32> [#uses=1]
+	%349 = xor i32 %332, %2                   ; <i32> [#uses=1]
+	%350 = xor i32 %349, %348                 ; <i32> [#uses=5]
+	%351 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
+	%352 = load i32* %351, align 4            ; <i32> [#uses=1]
+	%353 = lshr i32 %350, 24                  ; <i32> [#uses=1]
+	%354 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %353; <i32*> [#uses=1]
+	%355 = load i32* %354, align 4            ; <i32> [#uses=1]
+	%356 = lshr i32 %350, 16                  ; <i32> [#uses=1]
+	%357 = or i32 %356, 256                   ; <i32> [#uses=1]
+	%358 = and i32 %357, 511                  ; <i32> [#uses=1]
+	%359 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %358; <i32*> [#uses=1]
+	%360 = load i32* %359, align 4            ; <i32> [#uses=1]
+	%361 = add i32 %360, %355                 ; <i32> [#uses=1]
+	%362 = lshr i32 %350, 8                   ; <i32> [#uses=1]
+	%363 = or i32 %362, 512                   ; <i32> [#uses=1]
+	%364 = and i32 %363, 767                  ; <i32> [#uses=1]
+	%365 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %364; <i32*> [#uses=1]
+	%366 = load i32* %365, align 4            ; <i32> [#uses=1]
+	%367 = xor i32 %361, %366                 ; <i32> [#uses=1]
+	%368 = or i32 %350, 768                   ; <i32> [#uses=1]
+	%369 = and i32 %368, 1023                 ; <i32> [#uses=1]
+	%370 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %369; <i32*> [#uses=1]
+	%371 = load i32* %370, align 4            ; <i32> [#uses=1]
+	%372 = add i32 %367, %371                 ; <i32> [#uses=1]
+	%373 = xor i32 %352, %331                 ; <i32> [#uses=1]
+	%374 = xor i32 %373, %372                 ; <i32> [#uses=5]
+	%375 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 14; <i32*> [#uses=1]
+	%376 = load i32* %375, align 4            ; <i32> [#uses=1]
+	%377 = lshr i32 %374, 24                  ; <i32> [#uses=1]
+	%378 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %377; <i32*> [#uses=1]
+	%379 = load i32* %378, align 4            ; <i32> [#uses=1]
+	%380 = lshr i32 %374, 16                  ; <i32> [#uses=1]
+	%381 = or i32 %380, 256                   ; <i32> [#uses=1]
+	%382 = and i32 %381, 511                  ; <i32> [#uses=1]
+	%383 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %382; <i32*> [#uses=1]
+	%384 = load i32* %383, align 4            ; <i32> [#uses=1]
+	%385 = add i32 %384, %379                 ; <i32> [#uses=1]
+	%386 = lshr i32 %374, 8                   ; <i32> [#uses=1]
+	%387 = or i32 %386, 512                   ; <i32> [#uses=1]
+	%388 = and i32 %387, 767                  ; <i32> [#uses=1]
+	%389 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %388; <i32*> [#uses=1]
+	%390 = load i32* %389, align 4            ; <i32> [#uses=1]
+	%391 = xor i32 %385, %390                 ; <i32> [#uses=1]
+	%392 = or i32 %374, 768                   ; <i32> [#uses=1]
+	%393 = and i32 %392, 1023                 ; <i32> [#uses=1]
+	%394 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %393; <i32*> [#uses=1]
+	%395 = load i32* %394, align 4            ; <i32> [#uses=1]
+	%396 = add i32 %391, %395                 ; <i32> [#uses=1]
+	%397 = xor i32 %376, %350                 ; <i32> [#uses=1]
+	%398 = xor i32 %397, %396                 ; <i32> [#uses=5]
+	%399 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 13; <i32*> [#uses=1]
+	%400 = load i32* %399, align 4            ; <i32> [#uses=1]
+	%401 = lshr i32 %398, 24                  ; <i32> [#uses=1]
+	%402 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %401; <i32*> [#uses=1]
+	%403 = load i32* %402, align 4            ; <i32> [#uses=1]
+	%404 = lshr i32 %398, 16                  ; <i32> [#uses=1]
+	%405 = or i32 %404, 256                   ; <i32> [#uses=1]
+	%406 = and i32 %405, 511                  ; <i32> [#uses=1]
+	%407 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %406; <i32*> [#uses=1]
+	%408 = load i32* %407, align 4            ; <i32> [#uses=1]
+	%409 = add i32 %408, %403                 ; <i32> [#uses=1]
+	%410 = lshr i32 %398, 8                   ; <i32> [#uses=1]
+	%411 = or i32 %410, 512                   ; <i32> [#uses=1]
+	%412 = and i32 %411, 767                  ; <i32> [#uses=1]
+	%413 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %412; <i32*> [#uses=1]
+	%414 = load i32* %413, align 4            ; <i32> [#uses=1]
+	%415 = xor i32 %409, %414                 ; <i32> [#uses=1]
+	%416 = or i32 %398, 768                   ; <i32> [#uses=1]
+	%417 = and i32 %416, 1023                 ; <i32> [#uses=1]
+	%418 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %417; <i32*> [#uses=1]
+	%419 = load i32* %418, align 4            ; <i32> [#uses=1]
+	%420 = add i32 %415, %419                 ; <i32> [#uses=1]
+	%421 = xor i32 %400, %374                 ; <i32> [#uses=1]
+	%422 = xor i32 %421, %420                 ; <i32> [#uses=5]
+	%423 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 12; <i32*> [#uses=1]
+	%424 = load i32* %423, align 4            ; <i32> [#uses=1]
+	%425 = lshr i32 %422, 24                  ; <i32> [#uses=1]
+	%426 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %425; <i32*> [#uses=1]
+	%427 = load i32* %426, align 4            ; <i32> [#uses=1]
+	%428 = lshr i32 %422, 16                  ; <i32> [#uses=1]
+	%429 = or i32 %428, 256                   ; <i32> [#uses=1]
+	%430 = and i32 %429, 511                  ; <i32> [#uses=1]
+	%431 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %430; <i32*> [#uses=1]
+	%432 = load i32* %431, align 4            ; <i32> [#uses=1]
+	%433 = add i32 %432, %427                 ; <i32> [#uses=1]
+	%434 = lshr i32 %422, 8                   ; <i32> [#uses=1]
+	%435 = or i32 %434, 512                   ; <i32> [#uses=1]
+	%436 = and i32 %435, 767                  ; <i32> [#uses=1]
+	%437 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %436; <i32*> [#uses=1]
+	%438 = load i32* %437, align 4            ; <i32> [#uses=1]
+	%439 = xor i32 %433, %438                 ; <i32> [#uses=1]
+	%440 = or i32 %422, 768                   ; <i32> [#uses=1]
+	%441 = and i32 %440, 1023                 ; <i32> [#uses=1]
+	%442 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %441; <i32*> [#uses=1]
+	%443 = load i32* %442, align 4            ; <i32> [#uses=1]
+	%444 = add i32 %439, %443                 ; <i32> [#uses=1]
+	%445 = xor i32 %424, %398                 ; <i32> [#uses=1]
+	%446 = xor i32 %445, %444                 ; <i32> [#uses=5]
+	%447 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%448 = lshr i32 %446, 24                  ; <i32> [#uses=1]
+	%449 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %448; <i32*> [#uses=1]
+	%450 = load i32* %449, align 4            ; <i32> [#uses=1]
+	%451 = lshr i32 %446, 16                  ; <i32> [#uses=1]
+	%452 = or i32 %451, 256                   ; <i32> [#uses=1]
+	%453 = and i32 %452, 511                  ; <i32> [#uses=1]
+	%454 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %453; <i32*> [#uses=1]
+	%455 = load i32* %454, align 4            ; <i32> [#uses=1]
+	%456 = add i32 %455, %450                 ; <i32> [#uses=1]
+	%457 = lshr i32 %446, 8                   ; <i32> [#uses=1]
+	%458 = or i32 %457, 512                   ; <i32> [#uses=1]
+	%459 = and i32 %458, 767                  ; <i32> [#uses=1]
+	%460 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %459; <i32*> [#uses=1]
+	%461 = load i32* %460, align 4            ; <i32> [#uses=1]
+	%462 = xor i32 %456, %461                 ; <i32> [#uses=1]
+	%463 = or i32 %446, 768                   ; <i32> [#uses=1]
+	%464 = and i32 %463, 1023                 ; <i32> [#uses=1]
+	%465 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %464; <i32*> [#uses=1]
+	%466 = load i32* %465, align 4            ; <i32> [#uses=1]
+	%467 = add i32 %462, %466                 ; <i32> [#uses=1]
+	%468 = xor i32 %447, %422                 ; <i32> [#uses=1]
+	%469 = xor i32 %468, %467                 ; <i32> [#uses=5]
+	%470 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
+	%471 = load i32* %470, align 4            ; <i32> [#uses=1]
+	%472 = lshr i32 %469, 24                  ; <i32> [#uses=1]
+	%473 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %472; <i32*> [#uses=1]
+	%474 = load i32* %473, align 4            ; <i32> [#uses=1]
+	%475 = lshr i32 %469, 16                  ; <i32> [#uses=1]
+	%476 = or i32 %475, 256                   ; <i32> [#uses=1]
+	%477 = and i32 %476, 511                  ; <i32> [#uses=1]
+	%478 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %477; <i32*> [#uses=1]
+	%479 = load i32* %478, align 4            ; <i32> [#uses=1]
+	%480 = add i32 %479, %474                 ; <i32> [#uses=1]
+	%481 = lshr i32 %469, 8                   ; <i32> [#uses=1]
+	%482 = or i32 %481, 512                   ; <i32> [#uses=1]
+	%483 = and i32 %482, 767                  ; <i32> [#uses=1]
+	%484 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %483; <i32*> [#uses=1]
+	%485 = load i32* %484, align 4            ; <i32> [#uses=1]
+	%486 = xor i32 %480, %485                 ; <i32> [#uses=1]
+	%487 = or i32 %469, 768                   ; <i32> [#uses=1]
+	%488 = and i32 %487, 1023                 ; <i32> [#uses=1]
+	%489 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %488; <i32*> [#uses=1]
+	%490 = load i32* %489, align 4            ; <i32> [#uses=1]
+	%491 = add i32 %486, %490                 ; <i32> [#uses=1]
+	%492 = xor i32 %471, %446                 ; <i32> [#uses=1]
+	%493 = xor i32 %492, %491                 ; <i32> [#uses=5]
+	%494 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
+	%495 = load i32* %494, align 4            ; <i32> [#uses=1]
+	%496 = lshr i32 %493, 24                  ; <i32> [#uses=1]
+	%497 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %496; <i32*> [#uses=1]
+	%498 = load i32* %497, align 4            ; <i32> [#uses=1]
+	%499 = lshr i32 %493, 16                  ; <i32> [#uses=1]
+	%500 = or i32 %499, 256                   ; <i32> [#uses=1]
+	%501 = and i32 %500, 511                  ; <i32> [#uses=1]
+	%502 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %501; <i32*> [#uses=1]
+	%503 = load i32* %502, align 4            ; <i32> [#uses=1]
+	%504 = add i32 %503, %498                 ; <i32> [#uses=1]
+	%505 = lshr i32 %493, 8                   ; <i32> [#uses=1]
+	%506 = or i32 %505, 512                   ; <i32> [#uses=1]
+	%507 = and i32 %506, 767                  ; <i32> [#uses=1]
+	%508 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %507; <i32*> [#uses=1]
+	%509 = load i32* %508, align 4            ; <i32> [#uses=1]
+	%510 = xor i32 %504, %509                 ; <i32> [#uses=1]
+	%511 = or i32 %493, 768                   ; <i32> [#uses=1]
+	%512 = and i32 %511, 1023                 ; <i32> [#uses=1]
+	%513 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %512; <i32*> [#uses=1]
+	%514 = load i32* %513, align 4            ; <i32> [#uses=1]
+	%515 = add i32 %510, %514                 ; <i32> [#uses=1]
+	%516 = xor i32 %495, %469                 ; <i32> [#uses=1]
+	%517 = xor i32 %516, %515                 ; <i32> [#uses=5]
+	%518 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 8; <i32*> [#uses=1]
+	%519 = load i32* %518, align 4            ; <i32> [#uses=1]
+	%520 = lshr i32 %517, 24                  ; <i32> [#uses=1]
+	%521 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %520; <i32*> [#uses=1]
+	%522 = load i32* %521, align 4            ; <i32> [#uses=1]
+	%523 = lshr i32 %517, 16                  ; <i32> [#uses=1]
+	%524 = or i32 %523, 256                   ; <i32> [#uses=1]
+	%525 = and i32 %524, 511                  ; <i32> [#uses=1]
+	%526 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %525; <i32*> [#uses=1]
+	%527 = load i32* %526, align 4            ; <i32> [#uses=1]
+	%528 = add i32 %527, %522                 ; <i32> [#uses=1]
+	%529 = lshr i32 %517, 8                   ; <i32> [#uses=1]
+	%530 = or i32 %529, 512                   ; <i32> [#uses=1]
+	%531 = and i32 %530, 767                  ; <i32> [#uses=1]
+	%532 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %531; <i32*> [#uses=1]
+	%533 = load i32* %532, align 4            ; <i32> [#uses=1]
+	%534 = xor i32 %528, %533                 ; <i32> [#uses=1]
+	%535 = or i32 %517, 768                   ; <i32> [#uses=1]
+	%536 = and i32 %535, 1023                 ; <i32> [#uses=1]
+	%537 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %536; <i32*> [#uses=1]
+	%538 = load i32* %537, align 4            ; <i32> [#uses=1]
+	%539 = add i32 %534, %538                 ; <i32> [#uses=1]
+	%540 = xor i32 %519, %493                 ; <i32> [#uses=1]
+	%541 = xor i32 %540, %539                 ; <i32> [#uses=5]
+	%542 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%543 = lshr i32 %541, 24                  ; <i32> [#uses=1]
+	%544 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %543; <i32*> [#uses=1]
+	%545 = load i32* %544, align 4            ; <i32> [#uses=1]
+	%546 = lshr i32 %541, 16                  ; <i32> [#uses=1]
+	%547 = or i32 %546, 256                   ; <i32> [#uses=1]
+	%548 = and i32 %547, 511                  ; <i32> [#uses=1]
+	%549 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %548; <i32*> [#uses=1]
+	%550 = load i32* %549, align 4            ; <i32> [#uses=1]
+	%551 = add i32 %550, %545                 ; <i32> [#uses=1]
+	%552 = lshr i32 %541, 8                   ; <i32> [#uses=1]
+	%553 = or i32 %552, 512                   ; <i32> [#uses=1]
+	%554 = and i32 %553, 767                  ; <i32> [#uses=1]
+	%555 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %554; <i32*> [#uses=1]
+	%556 = load i32* %555, align 4            ; <i32> [#uses=1]
+	%557 = xor i32 %551, %556                 ; <i32> [#uses=1]
+	%558 = or i32 %541, 768                   ; <i32> [#uses=1]
+	%559 = and i32 %558, 1023                 ; <i32> [#uses=1]
+	%560 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %559; <i32*> [#uses=1]
+	%561 = load i32* %560, align 4            ; <i32> [#uses=1]
+	%562 = add i32 %557, %561                 ; <i32> [#uses=1]
+	%563 = xor i32 %542, %517                 ; <i32> [#uses=1]
+	%564 = xor i32 %563, %562                 ; <i32> [#uses=5]
+	%565 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
+	%566 = load i32* %565, align 4            ; <i32> [#uses=1]
+	%567 = lshr i32 %564, 24                  ; <i32> [#uses=1]
+	%568 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %567; <i32*> [#uses=1]
+	%569 = load i32* %568, align 4            ; <i32> [#uses=1]
+	%570 = lshr i32 %564, 16                  ; <i32> [#uses=1]
+	%571 = or i32 %570, 256                   ; <i32> [#uses=1]
+	%572 = and i32 %571, 511                  ; <i32> [#uses=1]
+	%573 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %572; <i32*> [#uses=1]
+	%574 = load i32* %573, align 4            ; <i32> [#uses=1]
+	%575 = add i32 %574, %569                 ; <i32> [#uses=1]
+	%576 = lshr i32 %564, 8                   ; <i32> [#uses=1]
+	%577 = or i32 %576, 512                   ; <i32> [#uses=1]
+	%578 = and i32 %577, 767                  ; <i32> [#uses=1]
+	%579 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %578; <i32*> [#uses=1]
+	%580 = load i32* %579, align 4            ; <i32> [#uses=1]
+	%581 = xor i32 %575, %580                 ; <i32> [#uses=1]
+	%582 = or i32 %564, 768                   ; <i32> [#uses=1]
+	%583 = and i32 %582, 1023                 ; <i32> [#uses=1]
+	%584 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %583; <i32*> [#uses=1]
+	%585 = load i32* %584, align 4            ; <i32> [#uses=1]
+	%586 = add i32 %581, %585                 ; <i32> [#uses=1]
+	%587 = xor i32 %566, %541                 ; <i32> [#uses=1]
+	%588 = xor i32 %587, %586                 ; <i32> [#uses=5]
+	%589 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 5; <i32*> [#uses=1]
+	%590 = load i32* %589, align 4            ; <i32> [#uses=1]
+	%591 = lshr i32 %588, 24                  ; <i32> [#uses=1]
+	%592 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %591; <i32*> [#uses=1]
+	%593 = load i32* %592, align 4            ; <i32> [#uses=1]
+	%594 = lshr i32 %588, 16                  ; <i32> [#uses=1]
+	%595 = or i32 %594, 256                   ; <i32> [#uses=1]
+	%596 = and i32 %595, 511                  ; <i32> [#uses=1]
+	%597 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %596; <i32*> [#uses=1]
+	%598 = load i32* %597, align 4            ; <i32> [#uses=1]
+	%599 = add i32 %598, %593                 ; <i32> [#uses=1]
+	%600 = lshr i32 %588, 8                   ; <i32> [#uses=1]
+	%601 = or i32 %600, 512                   ; <i32> [#uses=1]
+	%602 = and i32 %601, 767                  ; <i32> [#uses=1]
+	%603 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %602; <i32*> [#uses=1]
+	%604 = load i32* %603, align 4            ; <i32> [#uses=1]
+	%605 = xor i32 %599, %604                 ; <i32> [#uses=1]
+	%606 = or i32 %588, 768                   ; <i32> [#uses=1]
+	%607 = and i32 %606, 1023                 ; <i32> [#uses=1]
+	%608 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %607; <i32*> [#uses=1]
+	%609 = load i32* %608, align 4            ; <i32> [#uses=1]
+	%610 = add i32 %605, %609                 ; <i32> [#uses=1]
+	%611 = xor i32 %590, %564                 ; <i32> [#uses=1]
+	%612 = xor i32 %611, %610                 ; <i32> [#uses=5]
+	%613 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 4; <i32*> [#uses=1]
+	%614 = load i32* %613, align 4            ; <i32> [#uses=1]
+	%615 = lshr i32 %612, 24                  ; <i32> [#uses=1]
+	%616 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %615; <i32*> [#uses=1]
+	%617 = load i32* %616, align 4            ; <i32> [#uses=1]
+	%618 = lshr i32 %612, 16                  ; <i32> [#uses=1]
+	%619 = or i32 %618, 256                   ; <i32> [#uses=1]
+	%620 = and i32 %619, 511                  ; <i32> [#uses=1]
+	%621 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %620; <i32*> [#uses=1]
+	%622 = load i32* %621, align 4            ; <i32> [#uses=1]
+	%623 = add i32 %622, %617                 ; <i32> [#uses=1]
+	%624 = lshr i32 %612, 8                   ; <i32> [#uses=1]
+	%625 = or i32 %624, 512                   ; <i32> [#uses=1]
+	%626 = and i32 %625, 767                  ; <i32> [#uses=1]
+	%627 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %626; <i32*> [#uses=1]
+	%628 = load i32* %627, align 4            ; <i32> [#uses=1]
+	%629 = xor i32 %623, %628                 ; <i32> [#uses=1]
+	%630 = or i32 %612, 768                   ; <i32> [#uses=1]
+	%631 = and i32 %630, 1023                 ; <i32> [#uses=1]
+	%632 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %631; <i32*> [#uses=1]
+	%633 = load i32* %632, align 4            ; <i32> [#uses=1]
+	%634 = add i32 %629, %633                 ; <i32> [#uses=1]
+	%635 = xor i32 %614, %588                 ; <i32> [#uses=1]
+	%636 = xor i32 %635, %634                 ; <i32> [#uses=5]
+	%637 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
+	%638 = load i32* %637, align 4            ; <i32> [#uses=1]
+	%639 = lshr i32 %636, 24                  ; <i32> [#uses=1]
+	%640 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %639; <i32*> [#uses=1]
+	%641 = load i32* %640, align 4            ; <i32> [#uses=1]
+	%642 = lshr i32 %636, 16                  ; <i32> [#uses=1]
+	%643 = or i32 %642, 256                   ; <i32> [#uses=1]
+	%644 = and i32 %643, 511                  ; <i32> [#uses=1]
+	%645 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %644; <i32*> [#uses=1]
+	%646 = load i32* %645, align 4            ; <i32> [#uses=1]
+	%647 = add i32 %646, %641                 ; <i32> [#uses=1]
+	%648 = lshr i32 %636, 8                   ; <i32> [#uses=1]
+	%649 = or i32 %648, 512                   ; <i32> [#uses=1]
+	%650 = and i32 %649, 767                  ; <i32> [#uses=1]
+	%651 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %650; <i32*> [#uses=1]
+	%652 = load i32* %651, align 4            ; <i32> [#uses=1]
+	%653 = xor i32 %647, %652                 ; <i32> [#uses=1]
+	%654 = or i32 %636, 768                   ; <i32> [#uses=1]
+	%655 = and i32 %654, 1023                 ; <i32> [#uses=1]
+	%656 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %655; <i32*> [#uses=1]
+	%657 = load i32* %656, align 4            ; <i32> [#uses=1]
+	%658 = add i32 %653, %657                 ; <i32> [#uses=1]
+	%659 = xor i32 %638, %612                 ; <i32> [#uses=1]
+	%660 = xor i32 %659, %658                 ; <i32> [#uses=5]
+	%661 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%662 = lshr i32 %660, 24                  ; <i32> [#uses=1]
+	%663 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %662; <i32*> [#uses=1]
+	%664 = load i32* %663, align 4            ; <i32> [#uses=1]
+	%665 = lshr i32 %660, 16                  ; <i32> [#uses=1]
+	%666 = or i32 %665, 256                   ; <i32> [#uses=1]
+	%667 = and i32 %666, 511                  ; <i32> [#uses=1]
+	%668 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %667; <i32*> [#uses=1]
+	%669 = load i32* %668, align 4            ; <i32> [#uses=1]
+	%670 = add i32 %669, %664                 ; <i32> [#uses=1]
+	%671 = lshr i32 %660, 8                   ; <i32> [#uses=1]
+	%672 = or i32 %671, 512                   ; <i32> [#uses=1]
+	%673 = and i32 %672, 767                  ; <i32> [#uses=1]
+	%674 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %673; <i32*> [#uses=1]
+	%675 = load i32* %674, align 4            ; <i32> [#uses=1]
+	%676 = xor i32 %670, %675                 ; <i32> [#uses=1]
+	%677 = or i32 %660, 768                   ; <i32> [#uses=1]
+	%678 = and i32 %677, 1023                 ; <i32> [#uses=1]
+	%679 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %678; <i32*> [#uses=1]
+	%680 = load i32* %679, align 4            ; <i32> [#uses=1]
+	%681 = add i32 %676, %680                 ; <i32> [#uses=1]
+	%682 = xor i32 %661, %636                 ; <i32> [#uses=1]
+	%683 = xor i32 %682, %681                 ; <i32> [#uses=5]
+	%684 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 1; <i32*> [#uses=1]
+	br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+	%.pn2.in = phi i32* [ %329, %bb ], [ %0, %bb1 ]; <i32*> [#uses=1]
+	%.pn3 = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn15.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn14.in.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn13.in.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn10.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn4.in = phi i32* [ null, %bb ], [ %684, %bb1 ]; <i32*> [#uses=1]
+	%.pn5 = phi i32 [ 0, %bb ], [ %660, %bb1 ]; <i32> [#uses=1]
+	%.pn14.in.in = lshr i32 %.pn14.in.in.in, 16; <i32> [#uses=1]
+	%.pn14.in = or i32 %.pn14.in.in, 256      ; <i32> [#uses=1]
+	%.pn13.in.in = lshr i32 %.pn13.in.in.in, 8; <i32> [#uses=1]
+	%.pn15 = lshr i32 %.pn15.in, 24           ; <i32> [#uses=1]
+	%.pn14 = and i32 %.pn14.in, 511           ; <i32> [#uses=1]
+	%.pn13.in = or i32 %.pn13.in.in, 512      ; <i32> [#uses=1]
+	%.pn11.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn15; <i32*> [#uses=1]
+	%.pn12.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn14; <i32*> [#uses=1]
+	%.pn13 = and i32 %.pn13.in, 767           ; <i32> [#uses=1]
+	%.pn10.in = or i32 %.pn10.in.in, 768      ; <i32> [#uses=1]
+	%.pn11 = load i32* %.pn11.in              ; <i32> [#uses=1]
+	%.pn12 = load i32* %.pn12.in              ; <i32> [#uses=1]
+	%.pn9.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn13; <i32*> [#uses=1]
+	%.pn10 = and i32 %.pn10.in, 1023          ; <i32> [#uses=1]
+	%.pn8 = add i32 %.pn12, %.pn11            ; <i32> [#uses=1]
+	%.pn9 = load i32* %.pn9.in                ; <i32> [#uses=1]
+	%.pn7.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn10; <i32*> [#uses=1]
+	%.pn6 = xor i32 %.pn8, %.pn9              ; <i32> [#uses=1]
+	%.pn7 = load i32* %.pn7.in                ; <i32> [#uses=1]
+	%.pn4 = load i32* %.pn4.in                ; <i32> [#uses=1]
+	%.pn2 = load i32* %.pn2.in                ; <i32> [#uses=1]
+	%.pn = add i32 %.pn6, %.pn7               ; <i32> [#uses=1]
+	%r.0 = xor i32 %.pn2, %.pn3               ; <i32> [#uses=1]
+	%.pn1 = xor i32 %.pn, %.pn5               ; <i32> [#uses=1]
+	%l.0 = xor i32 %.pn1, %.pn4               ; <i32> [#uses=1]
+	store i32 %l.0, i32* undef, align 4
+	store i32 %r.0, i32* %data, align 4
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll b/src/LLVM/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
new file mode 100644
index 0000000..041306d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin
+
+	%struct.vorbis_comment = type { i8**, i32*, i32, i8* }
+@.str16 = external constant [2 x i8], align 1     ; <[2 x i8]*> [#uses=1]
+
+declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
+
+declare i8* @__strcat_chk(i8*, i8*, i32) nounwind
+
+define i8* @vorbis_comment_query(%struct.vorbis_comment* nocapture %vc, i8* %tag, i32 %count) nounwind {
+entry:
+	%0 = alloca i8, i32 undef, align 4        ; <i8*> [#uses=2]
+	%1 = call  i8* @__strcpy_chk(i8* %0, i8* %tag, i32 -1) nounwind; <i8*> [#uses=0]
+	%2 = call  i8* @__strcat_chk(i8* %0, i8* getelementptr ([2 x i8]* @.str16, i32 0, i32 0), i32 -1) nounwind; <i8*> [#uses=0]
+	%3 = getelementptr %struct.vorbis_comment* %vc, i32 0, i32 0; <i8***> [#uses=1]
+	br label %bb11
+
+bb6:                                              ; preds = %bb11
+	%4 = load i8*** %3, align 4               ; <i8**> [#uses=1]
+	%scevgep = getelementptr i8** %4, i32 %8  ; <i8**> [#uses=1]
+	%5 = load i8** %scevgep, align 4          ; <i8*> [#uses=1]
+	br label %bb3.i
+
+bb3.i:                                            ; preds = %bb3.i, %bb6
+	%scevgep7.i = getelementptr i8* %5, i32 0 ; <i8*> [#uses=1]
+	%6 = load i8* %scevgep7.i, align 1        ; <i8> [#uses=0]
+	br i1 undef, label %bb3.i, label %bb10
+
+bb10:                                             ; preds = %bb3.i
+	%7 = add i32 %8, 1                        ; <i32> [#uses=1]
+	br label %bb11
+
+bb11:                                             ; preds = %bb10, %entry
+	%8 = phi i32 [ %7, %bb10 ], [ 0, %entry ] ; <i32> [#uses=3]
+	%9 = icmp sgt i32 undef, %8               ; <i1> [#uses=1]
+	br i1 %9, label %bb6, label %bb13
+
+bb13:                                             ; preds = %bb11
+	ret i8* null
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/2009-08-20-ISelBug.ll b/src/LLVM/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
new file mode 100644
index 0000000..7876557
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2009-08-20-ISelBug.ll

@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6 -verify-machineinstrs | FileCheck %s
+; rdar://7157006
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+%struct.asl_file_t = type { i32, i32, i32, %struct.file_string_t*, i64, i64, i64, i64, i64, i64, i32, %struct.FILE*, i8*, i8* }
+%struct.file_string_t = type { i64, i32, %struct.file_string_t*, [0 x i8] }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.asl_file_t*, i64, i64*)* @t to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i32 @t(%struct.asl_file_t* %s, i64 %off, i64* %out) nounwind optsize {
+; CHECK: t:
+; CHECK: adds {{r[0-7]}}, #8
+entry:
+  %val = alloca i64, align 4                      ; <i64*> [#uses=3]
+  %0 = icmp eq %struct.asl_file_t* %s, null       ; <i1> [#uses=1]
+  br i1 %0, label %bb13, label %bb1
+
+bb1:                                              ; preds = %entry
+  %1 = getelementptr inbounds %struct.asl_file_t* %s, i32 0, i32 11 ; <%struct.FILE**> [#uses=2]
+  %2 = load %struct.FILE** %1, align 4            ; <%struct.FILE*> [#uses=2]
+  %3 = icmp eq %struct.FILE* %2, null             ; <i1> [#uses=1]
+  br i1 %3, label %bb13, label %bb3
+
+bb3:                                              ; preds = %bb1
+  %4 = add nsw i64 %off, 8                        ; <i64> [#uses=1]
+  %5 = getelementptr inbounds %struct.asl_file_t* %s, i32 0, i32 10 ; <i32*> [#uses=1]
+  %6 = load i32* %5, align 4                      ; <i32> [#uses=1]
+  %7 = zext i32 %6 to i64                         ; <i64> [#uses=1]
+  %8 = icmp sgt i64 %4, %7                        ; <i1> [#uses=1]
+  br i1 %8, label %bb13, label %bb5
+
+bb5:                                              ; preds = %bb3
+  %9 = call  i32 @fseeko(%struct.FILE* %2, i64 %off, i32 0) nounwind ; <i32> [#uses=1]
+  %10 = icmp eq i32 %9, 0                         ; <i1> [#uses=1]
+  br i1 %10, label %bb7, label %bb13
+
+bb7:                                              ; preds = %bb5
+  store i64 0, i64* %val, align 4
+  %11 = load %struct.FILE** %1, align 4           ; <%struct.FILE*> [#uses=1]
+  %val8 = bitcast i64* %val to i8*                ; <i8*> [#uses=1]
+  %12 = call  i32 @fread(i8* noalias %val8, i32 8, i32 1, %struct.FILE* noalias %11) nounwind ; <i32> [#uses=1]
+  %13 = icmp eq i32 %12, 1                        ; <i1> [#uses=1]
+  br i1 %13, label %bb10, label %bb13
+
+bb10:                                             ; preds = %bb7
+  %14 = icmp eq i64* %out, null                   ; <i1> [#uses=1]
+  br i1 %14, label %bb13, label %bb11
+
+bb11:                                             ; preds = %bb10
+  %15 = load i64* %val, align 4                   ; <i64> [#uses=1]
+  %16 = call  i64 @asl_core_ntohq(i64 %15) nounwind ; <i64> [#uses=1]
+  store i64 %16, i64* %out, align 4
+  ret i32 0
+
+bb13:                                             ; preds = %bb10, %bb7, %bb5, %bb3, %bb1, %entry
+  %.0 = phi i32 [ 2, %entry ], [ 2, %bb1 ], [ 7, %bb3 ], [ 7, %bb5 ], [ 7, %bb7 ], [ 0, %bb10 ] ; <i32> [#uses=1]
+  ret i32 %.0
+}
+
+declare i32 @fseeko(%struct.FILE* nocapture, i64, i32) nounwind
+
+declare i32 @fread(i8* noalias nocapture, i32, i32, %struct.FILE* noalias nocapture) nounwind
+
+declare i64 @asl_core_ntohq(i64)

diff --git a/src/LLVM/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll b/src/LLVM/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll
new file mode 100644
index 0000000..132d9ac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll

@@ -0,0 +1,66 @@
+; RUN: llc -O3 < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; This test should not produce any spills, even when tail duplication creates lots of phi nodes.
+; CHECK-NOT: push
+; CHECK-NOT: pop
+; CHECK: bx lr
+
+@codetable.2928 = internal constant [5 x i8*] [i8* blockaddress(@interpret_threaded, %RETURN), i8* blockaddress(@interpret_threaded, %INCREMENT), i8* blockaddress(@interpret_threaded, %DECREMENT), i8* blockaddress(@interpret_threaded, %DOUBLE), i8* blockaddress(@interpret_threaded, %SWAPWORD)] ; <[5 x i8*]*> [#uses=5]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i8*)* @interpret_threaded to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i32 @interpret_threaded(i8* nocapture %opcodes) nounwind readonly optsize {
+entry:
+  %0 = load i8* %opcodes, align 1                 ; <i8> [#uses=1]
+  %1 = zext i8 %0 to i32                          ; <i32> [#uses=1]
+  %2 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %1 ; <i8**> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb.backedge, %entry
+  %indvar = phi i32 [ %phitmp, %bb.backedge ], [ 1, %entry ] ; <i32> [#uses=2]
+  %gotovar.22.0.in = phi i8** [ %gotovar.22.0.in.be, %bb.backedge ], [ %2, %entry ] ; <i8**> [#uses=1]
+  %result.0 = phi i32 [ %result.0.be, %bb.backedge ], [ 0, %entry ] ; <i32> [#uses=6]
+  %opcodes_addr.0 = getelementptr i8* %opcodes, i32 %indvar ; <i8*> [#uses=4]
+  %gotovar.22.0 = load i8** %gotovar.22.0.in, align 4 ; <i8*> [#uses=1]
+  indirectbr i8* %gotovar.22.0, [label %RETURN, label %INCREMENT, label %DECREMENT, label %DOUBLE, label %SWAPWORD]
+
+RETURN:                                           ; preds = %bb
+  ret i32 %result.0
+
+INCREMENT:                                        ; preds = %bb
+  %3 = add nsw i32 %result.0, 1                   ; <i32> [#uses=1]
+  %4 = load i8* %opcodes_addr.0, align 1          ; <i8> [#uses=1]
+  %5 = zext i8 %4 to i32                          ; <i32> [#uses=1]
+  %6 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %5 ; <i8**> [#uses=1]
+  br label %bb.backedge
+
+bb.backedge:                                      ; preds = %SWAPWORD, %DOUBLE, %DECREMENT, %INCREMENT
+  %gotovar.22.0.in.be = phi i8** [ %20, %SWAPWORD ], [ %14, %DOUBLE ], [ %10, %DECREMENT ], [ %6, %INCREMENT ] ; <i8**> [#uses=1]
+  %result.0.be = phi i32 [ %17, %SWAPWORD ], [ %11, %DOUBLE ], [ %7, %DECREMENT ], [ %3, %INCREMENT ] ; <i32> [#uses=1]
+  %phitmp = add i32 %indvar, 1                    ; <i32> [#uses=1]
+  br label %bb
+
+DECREMENT:                                        ; preds = %bb
+  %7 = add i32 %result.0, -1                      ; <i32> [#uses=1]
+  %8 = load i8* %opcodes_addr.0, align 1          ; <i8> [#uses=1]
+  %9 = zext i8 %8 to i32                          ; <i32> [#uses=1]
+  %10 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %9 ; <i8**> [#uses=1]
+  br label %bb.backedge
+
+DOUBLE:                                           ; preds = %bb
+  %11 = shl i32 %result.0, 1                      ; <i32> [#uses=1]
+  %12 = load i8* %opcodes_addr.0, align 1         ; <i8> [#uses=1]
+  %13 = zext i8 %12 to i32                        ; <i32> [#uses=1]
+  %14 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %13 ; <i8**> [#uses=1]
+  br label %bb.backedge
+
+SWAPWORD:                                         ; preds = %bb
+  %15 = shl i32 %result.0, 16                     ; <i32> [#uses=1]
+  %16 = ashr i32 %result.0, 16                    ; <i32> [#uses=1]
+  %17 = or i32 %15, %16                           ; <i32> [#uses=1]
+  %18 = load i8* %opcodes_addr.0, align 1         ; <i8> [#uses=1]
+  %19 = zext i8 %18 to i32                        ; <i32> [#uses=1]
+  %20 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %19 ; <i8**> [#uses=1]
+  br label %bb.backedge
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll b/src/LLVM/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 0000000..b903977
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll

@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=thumb -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+

diff --git a/src/LLVM/test/CodeGen/Thumb/2010-06-18-SibCallCrash.ll b/src/LLVM/test/CodeGen/Thumb/2010-06-18-SibCallCrash.ll
new file mode 100644
index 0000000..ad8b064
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2010-06-18-SibCallCrash.ll

@@ -0,0 +1,8 @@
+; RUN: llc -march=thumb < %s
+; rdar://8104457
+
+define arm_apcscc void @t(i32* %m) nounwind {
+entry:
+  tail call arm_apcscc  void undef(i32* %m, i16 zeroext undef) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/2010-07-01-FuncAlign.ll b/src/LLVM/test/CodeGen/Thumb/2010-07-01-FuncAlign.ll
new file mode 100644
index 0000000..8e09441
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2010-07-01-FuncAlign.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; Radar 8143571: Function alignments were off by a power of two.
+; CHECK: .align 1
+define void @test() {
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/src/LLVM/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
new file mode 100644
index 0000000..9f5a677
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll

@@ -0,0 +1,147 @@
+; RUN: llc -mtriple=thumbv6-apple-darwin10 < %s | FileCheck %s
+; RUN: opt -strip-debug < %s | llc -mtriple=thumbv6-apple-darwin10 | FileCheck %s
+; Stripping out debug info formerly caused the last two multiplies to be emitted in
+; the other order.  7797940 (part of it dated 6/29/2010..7/15/2010).
+
+%0 = type { [3 x double] }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%0*, i32, i32)* @_Z19getClosestDiagonal3ii to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind {
+; CHECK: blx ___muldf3
+; CHECK: blx ___muldf3
+; CHECK: beq LBB0
+; CHECK: blx ___muldf3
+; <label>:3
+  switch i32 %1, label %4 [
+    i32 0, label %5
+    i32 3, label %5
+  ]
+
+; <label>:4                                       ; preds = %3
+  br label %5, !dbg !0
+
+; <label>:5                                       ; preds = %4, %3, %3
+  %storemerge = phi double [ -1.000000e+00, %4 ], [ 1.000000e+00, %3 ], [ 1.000000e+00, %3 ] ; <double> [#uses=1]
+  %v_6 = icmp slt i32 %1, 2                         ; <i1> [#uses=1]
+  %storemerge1 = select i1 %v_6, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3]
+  call void @llvm.dbg.value(metadata !{double %storemerge}, i64 0, metadata !91), !dbg !0
+  %v_7 = icmp eq i32 %2, 1, !dbg !92                ; <i1> [#uses=1]
+  %storemerge2 = select i1 %v_7, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3]
+  %v_8 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+  %v_10 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+  %v_11 = fmul double %storemerge1, %storemerge1, !dbg !93 ; <double> [#uses=1]
+  %v_15 = tail call double @sqrt(double %v_11) nounwind readonly, !dbg !93 ; <double> [#uses=1]
+  %v_16 = fdiv double 1.000000e+00, %v_15, !dbg !93   ; <double> [#uses=3]
+  %v_17 = fmul double %storemerge, %v_16, !dbg !97    ; <double> [#uses=1]
+  store double %v_17, double* %v_8, align 4, !dbg !97
+  %v_19 = fmul double %storemerge2, %v_16, !dbg !97   ; <double> [#uses=1]
+  store double %v_19, double* %v_10, align 4, !dbg !97
+  ret void, !dbg !98
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare double @sqrt(double) nounwind readonly
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 46, i32 0, metadata !1, null}
+!1 = metadata !{i32 524299, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524299, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"getClosestDiagonal3", metadata !"getClosestDiagonal3", metadata !"_Z19getClosestDiagonal3ii", metadata !4, i32 44, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 524329, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 524305, i32 0, i32 4, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{metadata !8, metadata !22, metadata !22}
+!8 = metadata !{i32 524307, metadata !4, metadata !"ggVector3", metadata !9, i32 66, i64 192, i64 32, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 524329, metadata !"ggVector3.h", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ]
+!10 = metadata !{metadata !11, metadata !16, metadata !23, metadata !26, metadata !29, metadata !30, metadata !35, metadata !36, metadata !37, metadata !41, metadata !42, metadata !43, metadata !46, metadata !47, metadata !48, metadata !52, metadata !53, metadata !54, metadata !57, metadata !60, metadata !63, metadata !66, metadata !70, metadata !71, metadata !74, metadata !75, metadata !76, metadata !77, metadata !78, metadata !81, metadata !82, metadata !83, metadata !84, metadata !85, metadata !88, metadata !89, metadata !90}
+!11 = metadata !{i32 524301, metadata !8, metadata !"e", metadata !9, i32 160, i64 192, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
+!12 = metadata !{i32 524289, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !13, metadata !14, i32 0, null} ; [ DW_TAG_array_type ]
+!13 = metadata !{i32 524324, metadata !4, metadata !"double", metadata !4, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 524321, i64 0, i64 2}        ; [ DW_TAG_subrange_type ]
+!16 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 72, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!18 = metadata !{null, metadata !19, metadata !20}
+!19 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 524310, metadata !21, metadata !"ggBoolean", metadata !21, i32 478, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ]
+!21 = metadata !{i32 524329, metadata !"math.h", metadata !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm", metadata !5} ; [ DW_TAG_file_type ]
+!22 = metadata !{i32 524324, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!23 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 73, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!25 = metadata !{null, metadata !19}
+!26 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 74, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!27 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!28 = metadata !{null, metadata !19, metadata !13, metadata !13, metadata !13}
+!29 = metadata !{i32 524334, i32 0, metadata !8, metadata !"Set", metadata !"Set", metadata !"_ZN9ggVector33SetEddd", metadata !9, i32 81, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!30 = metadata !{i32 524334, i32 0, metadata !8, metadata !"x", metadata !"x", metadata !"_ZNK9ggVector31xEv", metadata !9, i32 82, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!31 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !32, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!32 = metadata !{metadata !13, metadata !33}
+!33 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 64, metadata !34} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 524326, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
+!35 = metadata !{i32 524334, i32 0, metadata !8, metadata !"y", metadata !"y", metadata !"_ZNK9ggVector31yEv", metadata !9, i32 83, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!36 = metadata !{i32 524334, i32 0, metadata !8, metadata !"z", metadata !"z", metadata !"_ZNK9ggVector31zEv", metadata !9, i32 84, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!37 = metadata !{i32 524334, i32 0, metadata !8, metadata !"x", metadata !"x", metadata !"_ZN9ggVector31xEv", metadata !9, i32 85, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!38 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !39, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!39 = metadata !{metadata !40, metadata !19}
+!40 = metadata !{i32 524304, metadata !4, metadata !"double", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !13} ; [ DW_TAG_reference_type ]
+!41 = metadata !{i32 524334, i32 0, metadata !8, metadata !"y", metadata !"y", metadata !"_ZN9ggVector31yEv", metadata !9, i32 86, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!42 = metadata !{i32 524334, i32 0, metadata !8, metadata !"z", metadata !"z", metadata !"_ZN9ggVector31zEv", metadata !9, i32 87, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!43 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetX", metadata !"SetX", metadata !"_ZN9ggVector34SetXEd", metadata !9, i32 88, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!44 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!45 = metadata !{null, metadata !19, metadata !13}
+!46 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetY", metadata !"SetY", metadata !"_ZN9ggVector34SetYEd", metadata !9, i32 89, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!47 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetZ", metadata !"SetZ", metadata !"_ZN9ggVector34SetZEd", metadata !9, i32 90, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!48 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 92, metadata !49, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!49 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !50, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!50 = metadata !{null, metadata !19, metadata !51}
+!51 = metadata !{i32 524304, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !34} ; [ DW_TAG_reference_type ]
+!52 = metadata !{i32 524334, i32 0, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZNK9ggVector39toleranceEv", metadata !9, i32 100, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!53 = metadata !{i32 524334, i32 0, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZN9ggVector39toleranceEv", metadata !9, i32 101, metadata !38, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!54 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator+", metadata !"operator+", metadata !"_ZNK9ggVector3psEv", metadata !9, i32 107, metadata !55, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!55 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !56, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!56 = metadata !{metadata !51, metadata !33}
+!57 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator-", metadata !"operator-", metadata !"_ZNK9ggVector3ngEv", metadata !9, i32 108, metadata !58, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!58 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !59, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!59 = metadata !{metadata !8, metadata !33}
+!60 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZNK9ggVector3ixEi", metadata !9, i32 290, metadata !61, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!61 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !62, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!62 = metadata !{metadata !13, metadata !33, metadata !22}
+!63 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZN9ggVector3ixEi", metadata !9, i32 278, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!64 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!65 = metadata !{metadata !40, metadata !19, metadata !22}
+!66 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator+=", metadata !"operator+=", metadata !"_ZN9ggVector3pLERKS_", metadata !9, i32 303, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!67 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !68, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!68 = metadata !{metadata !69, metadata !19, metadata !51}
+!69 = metadata !{i32 524304, metadata !4, metadata !"ggVector3", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_reference_type ]
+!70 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator-=", metadata !"operator-=", metadata !"_ZN9ggVector3mIERKS_", metadata !9, i32 310, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!71 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator*=", metadata !"operator*=", metadata !"_ZN9ggVector3mLEd", metadata !9, i32 317, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!72 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !73, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!73 = metadata !{metadata !69, metadata !19, metadata !13}
+!74 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator/=", metadata !"operator/=", metadata !"_ZN9ggVector3dVEd", metadata !9, i32 324, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!75 = metadata !{i32 524334, i32 0, metadata !8, metadata !"length", metadata !"length", metadata !"_ZNK9ggVector36lengthEv", metadata !9, i32 121, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!76 = metadata !{i32 524334, i32 0, metadata !8, metadata !"squaredLength", metadata !"squaredLength", metadata !"_ZNK9ggVector313squaredLengthEv", metadata !9, i32 122, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!77 = metadata !{i32 524334, i32 0, metadata !8, metadata !"MakeUnitVector", metadata !"MakeUnitVector", metadata !"_ZN9ggVector314MakeUnitVectorEv", metadata !9, i32 217, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!78 = metadata !{i32 524334, i32 0, metadata !8, metadata !"Perturb", metadata !"Perturb", metadata !"_ZNK9ggVector37PerturbEdd", metadata !9, i32 126, metadata !79, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!79 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !80, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!80 = metadata !{metadata !8, metadata !33, metadata !13, metadata !13}
+!81 = metadata !{i32 524334, i32 0, metadata !8, metadata !"maxComponent", metadata !"maxComponent", metadata !"_ZNK9ggVector312maxComponentEv", metadata !9, i32 128, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!82 = metadata !{i32 524334, i32 0, metadata !8, metadata !"minComponent", metadata !"minComponent", metadata !"_ZNK9ggVector312minComponentEv", metadata !9, i32 129, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!83 = metadata !{i32 524334, i32 0, metadata !8, metadata !"maxAbsComponent", metadata !"maxAbsComponent", metadata !"_ZNK9ggVector315maxAbsComponentEv", metadata !9, i32 131, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!84 = metadata !{i32 524334, i32 0, metadata !8, metadata !"minAbsComponent", metadata !"minAbsComponent", metadata !"_ZNK9ggVector315minAbsComponentEv", metadata !9, i32 132, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!85 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMinComponent", metadata !"indexOfMinComponent", metadata !"_ZNK9ggVector319indexOfMinComponentEv", metadata !9, i32 133, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!86 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !87, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!87 = metadata !{metadata !22, metadata !33}
+!88 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMinAbsComponent", metadata !"indexOfMinAbsComponent", metadata !"_ZNK9ggVector322indexOfMinAbsComponentEv", metadata !9, i32 137, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!89 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMaxComponent", metadata !"indexOfMaxComponent", metadata !"_ZNK9ggVector319indexOfMaxComponentEv", metadata !9, i32 146, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!90 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMaxAbsComponent", metadata !"indexOfMaxAbsComponent", metadata !"_ZNK9ggVector322indexOfMaxAbsComponentEv", metadata !9, i32 150, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!91 = metadata !{i32 524544, metadata !1, metadata !"vx", metadata !4, i32 46, metadata !13} ; [ DW_TAG_auto_variable ]
+!92 = metadata !{i32 48, i32 0, metadata !1, null}
+!93 = metadata !{i32 218, i32 0, metadata !94, metadata !96}
+!94 = metadata !{i32 524299, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
+!95 = metadata !{i32 524299, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
+!96 = metadata !{i32 51, i32 0, metadata !1, null}
+!97 = metadata !{i32 227, i32 0, metadata !94, metadata !96}
+!98 = metadata !{i32 52, i32 0, metadata !1, null}

diff --git a/src/LLVM/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll b/src/LLVM/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
new file mode 100644
index 0000000..2890c22
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll

@@ -0,0 +1,64 @@
+; DISABLED: llc -mtriple=thumbv6-apple-darwin < %s
+; RUN: false
+; rdar://problem/9416774
+; ModuleID = 'reduced.ll'
+
+; byval is currently unsupported.
+; XFAIL: *
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-ios"
+
+%struct.MMMMMMMMMMMM = type { [4 x %struct.RRRRRRRR] }
+%struct.RRRRRRRR = type { [78 x i32] }
+
+@kkkkkk = external constant i8*
+@__PRETTY_FUNCTION__._ZN12CLGll = private unnamed_addr constant [62 x i8] c"static void tttttttttttt::lllllllllllll(const MMMMMMMMMMMM &)\00"
+@.str = private unnamed_addr constant [75 x i8] c"\09GGGGGGGGGGGGGGGGGGGGGGG:,BE:0x%08lx,ALM:0x%08lx,LTO:0x%08lx,CBEE:0x%08lx\0A\00"
+
+define void @_ZN12CLGll(%struct.MMMMMMMMMMMM* %aidData) ssp align 2 {
+entry:
+  %aidData.addr = alloca %struct.MMMMMMMMMMMM*, align 4
+  %agg.tmp = alloca %struct.RRRRRRRR, align 4
+  %agg.tmp4 = alloca %struct.RRRRRRRR, align 4
+  %agg.tmp10 = alloca %struct.RRRRRRRR, align 4
+  %agg.tmp16 = alloca %struct.RRRRRRRR, align 4
+  store %struct.MMMMMMMMMMMM* %aidData, %struct.MMMMMMMMMMMM** %aidData.addr, align 4
+  br label %do.body
+
+do.body:                                          ; preds = %entry
+  %tmp = load i8** @kkkkkk, align 4
+  %tmp1 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %eph = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp1, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph, i32 0, i32 0
+  %tmp2 = bitcast %struct.RRRRRRRR* %agg.tmp to i8*
+  %tmp3 = bitcast %struct.RRRRRRRR* %arrayidx to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 312, i32 4, i1 false)
+  %tmp5 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %eph6 = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp5, i32 0, i32 0
+  %arrayidx7 = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph6, i32 0, i32 1
+  %tmp8 = bitcast %struct.RRRRRRRR* %agg.tmp4 to i8*
+  %tmp9 = bitcast %struct.RRRRRRRR* %arrayidx7 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp8, i8* %tmp9, i32 312, i32 4, i1 false)
+  %tmp11 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %eph12 = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp11, i32 0, i32 0
+  %arrayidx13 = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph12, i32 0, i32 2
+  %tmp14 = bitcast %struct.RRRRRRRR* %agg.tmp10 to i8*
+  %tmp15 = bitcast %struct.RRRRRRRR* %arrayidx13 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp14, i8* %tmp15, i32 312, i32 4, i1 false)
+  %tmp17 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %eph18 = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp17, i32 0, i32 0
+  %arrayidx19 = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph18, i32 0, i32 3
+  %tmp20 = bitcast %struct.RRRRRRRR* %agg.tmp16 to i8*
+  %tmp21 = bitcast %struct.RRRRRRRR* %arrayidx19 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp20, i8* %tmp21, i32 312, i32 4, i1 false)
+  call void (i8*, i32, i8*, i8*, ...)* @CLLoggingLog(i8* %tmp, i32 2, i8* getelementptr inbounds ([62 x i8]* @__PRETTY_FUNCTION__._ZN12CLGll, i32 0, i32 0), i8* getelementptr inbounds ([75 x i8]* @.str, i32 0, i32 0), %struct.RRRRRRRR* byval %agg.tmp, %struct.RRRRRRRR* byval %agg.tmp4, %struct.RRRRRRRR* byval %agg.tmp10, %struct.RRRRRRRR* byval %agg.tmp16)
+  br label %do.end
+
+do.end:                                           ; preds = %do.body
+  ret void
+}
+
+declare void @CLLoggingLog(i8*, i32, i8*, i8*, ...)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb/2011-06-16-NoGPRs.ll b/src/LLVM/test/CodeGen/Thumb/2011-06-16-NoGPRs.ll
new file mode 100644
index 0000000..d39a760
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2011-06-16-NoGPRs.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s
+;
+; This test would crash because isel creates a GPR register for the return
+; value from f1. The register is only used by tBLXr_r9 which accepts a full GPR
+; register, but we cannot have live GPRs in thumb mode because we don't know how
+; to spill them.
+;
+; <rdar://problem/9624323>
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv6-apple-darwin10"
+
+%0 = type opaque
+
+declare i8* (i8*, i8*, ...)* @f1(i8*, i8*) optsize
+declare i8* @f2(i8*, i8*, ...)
+
+define internal void @f(i8* %self, i8* %_cmd, %0* %inObjects, %0* %inIndexes) optsize ssp {
+entry:
+  %call14 = tail call i8* (i8*, i8*, ...)* (i8*, i8*)* @f1(i8* undef, i8* %_cmd) optsize
+  %0 = bitcast i8* (i8*, i8*, ...)* %call14 to void (i8*, i8*, %0*, %0*)*
+  tail call void %0(i8* %self, i8* %_cmd, %0* %inObjects, %0* %inIndexes) optsize
+  tail call void bitcast (i8* (i8*, i8*, ...)* @f2 to void (i8*, i8*, i32, %0*, %0*)*)(i8* %self, i8* undef, i32 2, %0* %inIndexes, %0* undef) optsize
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/2011-EpilogueBug.ll b/src/LLVM/test/CodeGen/Thumb/2011-EpilogueBug.ll
new file mode 100644
index 0000000..16789e6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/2011-EpilogueBug.ll

@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=thumbv6-apple-darwin < %s | FileCheck %s
+; r8869722
+
+%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+
+define void @t1(%struct.state* %v) {
+; CHECK: push {r4
+  %tmp6 = load i32* null
+  %tmp8 = alloca float, i32 %tmp6
+  store i32 1, i32* null
+  br label %return
+
+return:                                           ; preds = %0
+; CHECK: mov sp, r4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/asmprinter-bug.ll b/src/LLVM/test/CodeGen/Thumb/asmprinter-bug.ll
new file mode 100644
index 0000000..f73f93d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/asmprinter-bug.ll

@@ -0,0 +1,288 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin10 | grep rsbs | grep {#0}
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.adpcm_state = type { i16, i8 }
+@stepsizeTable = internal constant [89 x i32] [i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16, i32 17, i32 19, i32 21, i32 23, i32 25, i32 28, i32 31, i32 34, i32 37, i32 41, i32 45, i32 50, i32 55, i32 60, i32 66, i32 73, i32 80, i32 88, i32 97, i32 107, i32 118, i32 130, i32 143, i32 157, i32 173, i32 190, i32 209, i32 230, i32 253, i32 279, i32 307, i32 337, i32 371, i32 408, i32 449, i32 494, i32 544, i32 598, i32 658, i32 724, i32 796, i32 876, i32 963, i32 1060, i32 1166, i32 1282, i32 1411, i32 1552, i32 1707, i32 1878, i32 2066, i32 2272, i32 2499, i32 2749, i32 3024, i32 3327, i32 3660, i32 4026, i32 4428, i32 4871, i32 5358, i32 5894, i32 6484, i32 7132, i32 7845, i32 8630, i32 9493, i32 10442, i32 11487, i32 12635, i32 13899, i32 15289, i32 16818, i32 18500, i32 20350, i32 22385, i32 24623, i32 27086, i32 29794, i32 32767]		; <[89 x i32]*> [#uses=4]
+@indexTable = internal constant [16 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 2, i32 4, i32 6, i32 8, i32 -1, i32 -1, i32 -1, i32 -1, i32 2, i32 4, i32 6, i32 8]		; <[16 x i32]*> [#uses=2]
+@abuf = common global [500 x i8] zeroinitializer		; <[500 x i8]*> [#uses=1]
+@.str = private constant [11 x i8] c"input file\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[11 x i8]*> [#uses=1]
+@sbuf = common global [1000 x i16] zeroinitializer		; <[1000 x i16]*> [#uses=1]
+@state = common global %struct.adpcm_state zeroinitializer		; <%struct.adpcm_state*> [#uses=3]
+@__stderrp = external global %struct.FILE*		; <%struct.FILE**> [#uses=1]
+@.str1 = private constant [28 x i8] c"Final valprev=%d, index=%d\0A\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[28 x i8]*> [#uses=1]
+
+define void @adpcm_coder(i16* nocapture %indata, i8* nocapture %outdata, i32 %len, %struct.adpcm_state* nocapture %state) nounwind {
+entry:
+	%0 = getelementptr %struct.adpcm_state* %state, i32 0, i32 0		; <i16*> [#uses=2]
+	%1 = load i16* %0, align 2		; <i16> [#uses=1]
+	%2 = sext i16 %1 to i32		; <i32> [#uses=2]
+	%3 = getelementptr %struct.adpcm_state* %state, i32 0, i32 1		; <i8*> [#uses=2]
+	%4 = load i8* %3, align 2		; <i8> [#uses=1]
+	%5 = sext i8 %4 to i32		; <i32> [#uses=3]
+	%6 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %5		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = icmp sgt i32 %len, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb, label %bb27
+
+bb:		; preds = %bb25, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb25 ]		; <i32> [#uses=2]
+	%outp.136 = phi i8* [ %outdata, %entry ], [ %outp.0, %bb25 ]		; <i8*> [#uses=3]
+	%bufferstep.035 = phi i32 [ 1, %entry ], [ %tmp, %bb25 ]		; <i32> [#uses=3]
+	%outputbuffer.134 = phi i32 [ undef, %entry ], [ %outputbuffer.0, %bb25 ]		; <i32> [#uses=2]
+	%index.033 = phi i32 [ %5, %entry ], [ %index.2, %bb25 ]		; <i32> [#uses=1]
+	%valpred.132 = phi i32 [ %2, %entry ], [ %valpred.2, %bb25 ]		; <i32> [#uses=2]
+	%step.031 = phi i32 [ %7, %entry ], [ %36, %bb25 ]		; <i32> [#uses=5]
+	%inp.038 = getelementptr i16* %indata, i32 %indvar		; <i16*> [#uses=1]
+	%9 = load i16* %inp.038, align 2		; <i16> [#uses=1]
+	%10 = sext i16 %9 to i32		; <i32> [#uses=1]
+	%11 = sub i32 %10, %valpred.132		; <i32> [#uses=3]
+	%12 = icmp slt i32 %11, 0		; <i1> [#uses=1]
+	%iftmp.1.0 = select i1 %12, i32 8, i32 0		; <i32> [#uses=2]
+	%13 = sub i32 0, %11		; <i32> [#uses=1]
+	%14 = icmp eq i32 %iftmp.1.0, 0		; <i1> [#uses=2]
+	%. = select i1 %14, i32 %11, i32 %13		; <i32> [#uses=2]
+	%15 = ashr i32 %step.031, 3		; <i32> [#uses=1]
+	%16 = icmp slt i32 %., %step.031		; <i1> [#uses=2]
+	%delta.0 = select i1 %16, i32 0, i32 4		; <i32> [#uses=2]
+	%17 = select i1 %16, i32 0, i32 %step.031		; <i32> [#uses=2]
+	%diff.1 = sub i32 %., %17		; <i32> [#uses=2]
+	%18 = ashr i32 %step.031, 1		; <i32> [#uses=2]
+	%19 = icmp slt i32 %diff.1, %18		; <i1> [#uses=2]
+	%20 = or i32 %delta.0, 2		; <i32> [#uses=1]
+	%21 = select i1 %19, i32 %delta.0, i32 %20		; <i32> [#uses=1]
+	%22 = select i1 %19, i32 0, i32 %18		; <i32> [#uses=2]
+	%diff.2 = sub i32 %diff.1, %22		; <i32> [#uses=1]
+	%23 = ashr i32 %step.031, 2		; <i32> [#uses=2]
+	%24 = icmp slt i32 %diff.2, %23		; <i1> [#uses=2]
+	%25 = zext i1 %24 to i32		; <i32> [#uses=1]
+	%26 = select i1 %24, i32 0, i32 %23		; <i32> [#uses=1]
+	%vpdiff.0 = add i32 %17, %15		; <i32> [#uses=1]
+	%vpdiff.1 = add i32 %vpdiff.0, %22		; <i32> [#uses=1]
+	%vpdiff.2 = add i32 %vpdiff.1, %26		; <i32> [#uses=2]
+	%tmp30 = sub i32 0, %vpdiff.2		; <i32> [#uses=1]
+	%valpred.0.p = select i1 %14, i32 %vpdiff.2, i32 %tmp30		; <i32> [#uses=1]
+	%valpred.0 = add i32 %valpred.0.p, %valpred.132		; <i32> [#uses=3]
+	%27 = icmp sgt i32 %valpred.0, 32767		; <i1> [#uses=1]
+	br i1 %27, label %bb18, label %bb16
+
+bb16:		; preds = %bb
+	%28 = icmp slt i32 %valpred.0, -32768		; <i1> [#uses=1]
+	br i1 %28, label %bb17, label %bb18
+
+bb17:		; preds = %bb16
+	br label %bb18
+
+bb18:		; preds = %bb17, %bb16, %bb
+	%valpred.2 = phi i32 [ -32768, %bb17 ], [ 32767, %bb ], [ %valpred.0, %bb16 ]		; <i32> [#uses=2]
+	%delta.1 = or i32 %21, %iftmp.1.0		; <i32> [#uses=1]
+	%delta.2 = or i32 %delta.1, %25		; <i32> [#uses=1]
+	%29 = xor i32 %delta.2, 1		; <i32> [#uses=3]
+	%30 = getelementptr [16 x i32]* @indexTable, i32 0, i32 %29		; <i32*> [#uses=1]
+	%31 = load i32* %30, align 4		; <i32> [#uses=1]
+	%32 = add i32 %31, %index.033		; <i32> [#uses=2]
+	%33 = icmp slt i32 %32, 0		; <i1> [#uses=1]
+	%index.1 = select i1 %33, i32 0, i32 %32		; <i32> [#uses=2]
+	%34 = icmp sgt i32 %index.1, 88		; <i1> [#uses=1]
+	%index.2 = select i1 %34, i32 88, i32 %index.1		; <i32> [#uses=3]
+	%35 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %index.2		; <i32*> [#uses=1]
+	%36 = load i32* %35, align 4		; <i32> [#uses=1]
+	%37 = icmp eq i32 %bufferstep.035, 0		; <i1> [#uses=1]
+	br i1 %37, label %bb24, label %bb23
+
+bb23:		; preds = %bb18
+	%38 = shl i32 %29, 4		; <i32> [#uses=1]
+	%39 = and i32 %38, 240		; <i32> [#uses=1]
+	br label %bb25
+
+bb24:		; preds = %bb18
+	%40 = trunc i32 %29 to i8		; <i8> [#uses=1]
+	%41 = and i8 %40, 15		; <i8> [#uses=1]
+	%42 = trunc i32 %outputbuffer.134 to i8		; <i8> [#uses=1]
+	%43 = or i8 %41, %42		; <i8> [#uses=1]
+	store i8 %43, i8* %outp.136, align 1
+	%44 = getelementptr i8* %outp.136, i32 1		; <i8*> [#uses=1]
+	br label %bb25
+
+bb25:		; preds = %bb24, %bb23
+	%outputbuffer.0 = phi i32 [ %39, %bb23 ], [ %outputbuffer.134, %bb24 ]		; <i32> [#uses=2]
+	%outp.0 = phi i8* [ %outp.136, %bb23 ], [ %44, %bb24 ]		; <i8*> [#uses=2]
+	%tmp = xor i32 %bufferstep.035, 1		; <i32> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %len		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb26.bb27_crit_edge, label %bb
+
+bb26.bb27_crit_edge:		; preds = %bb25
+	%phitmp44 = icmp eq i32 %bufferstep.035, 1		; <i1> [#uses=1]
+	br label %bb27
+
+bb27:		; preds = %bb26.bb27_crit_edge, %entry
+	%outp.1.lcssa = phi i8* [ %outp.0, %bb26.bb27_crit_edge ], [ %outdata, %entry ]		; <i8*> [#uses=1]
+	%bufferstep.0.lcssa = phi i1 [ %phitmp44, %bb26.bb27_crit_edge ], [ false, %entry ]		; <i1> [#uses=1]
+	%outputbuffer.1.lcssa = phi i32 [ %outputbuffer.0, %bb26.bb27_crit_edge ], [ undef, %entry ]		; <i32> [#uses=1]
+	%index.0.lcssa = phi i32 [ %index.2, %bb26.bb27_crit_edge ], [ %5, %entry ]		; <i32> [#uses=1]
+	%valpred.1.lcssa = phi i32 [ %valpred.2, %bb26.bb27_crit_edge ], [ %2, %entry ]		; <i32> [#uses=1]
+	br i1 %bufferstep.0.lcssa, label %bb28, label %bb29
+
+bb28:		; preds = %bb27
+	%45 = trunc i32 %outputbuffer.1.lcssa to i8		; <i8> [#uses=1]
+	store i8 %45, i8* %outp.1.lcssa, align 1
+	br label %bb29
+
+bb29:		; preds = %bb28, %bb27
+	%46 = trunc i32 %valpred.1.lcssa to i16		; <i16> [#uses=1]
+	store i16 %46, i16* %0, align 2
+	%47 = trunc i32 %index.0.lcssa to i8		; <i8> [#uses=1]
+	store i8 %47, i8* %3, align 2
+	ret void
+}
+
+define void @adpcm_decoder(i8* nocapture %indata, i16* nocapture %outdata, i32 %len, %struct.adpcm_state* nocapture %state) nounwind {
+entry:
+	%0 = getelementptr %struct.adpcm_state* %state, i32 0, i32 0		; <i16*> [#uses=2]
+	%1 = load i16* %0, align 2		; <i16> [#uses=1]
+	%2 = sext i16 %1 to i32		; <i32> [#uses=2]
+	%3 = getelementptr %struct.adpcm_state* %state, i32 0, i32 1		; <i8*> [#uses=2]
+	%4 = load i8* %3, align 2		; <i8> [#uses=1]
+	%5 = sext i8 %4 to i32		; <i32> [#uses=3]
+	%6 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %5		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = icmp sgt i32 %len, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb, label %bb22
+
+bb:		; preds = %bb20, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb20 ]		; <i32> [#uses=2]
+	%inp.131 = phi i8* [ %indata, %entry ], [ %inp.0, %bb20 ]		; <i8*> [#uses=3]
+	%bufferstep.028 = phi i32 [ 0, %entry ], [ %tmp, %bb20 ]		; <i32> [#uses=2]
+	%inputbuffer.127 = phi i32 [ undef, %entry ], [ %inputbuffer.0, %bb20 ]		; <i32> [#uses=2]
+	%index.026 = phi i32 [ %5, %entry ], [ %index.2, %bb20 ]		; <i32> [#uses=1]
+	%valpred.125 = phi i32 [ %2, %entry ], [ %valpred.2, %bb20 ]		; <i32> [#uses=1]
+	%step.024 = phi i32 [ %7, %entry ], [ %35, %bb20 ]		; <i32> [#uses=4]
+	%outp.030 = getelementptr i16* %outdata, i32 %indvar		; <i16*> [#uses=1]
+	%9 = icmp eq i32 %bufferstep.028, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb2, label %bb3
+
+bb2:		; preds = %bb
+	%10 = load i8* %inp.131, align 1		; <i8> [#uses=1]
+	%11 = sext i8 %10 to i32		; <i32> [#uses=2]
+	%12 = getelementptr i8* %inp.131, i32 1		; <i8*> [#uses=1]
+	%13 = ashr i32 %11, 4		; <i32> [#uses=1]
+	br label %bb3
+
+bb3:		; preds = %bb2, %bb
+	%inputbuffer.0 = phi i32 [ %11, %bb2 ], [ %inputbuffer.127, %bb ]		; <i32> [#uses=1]
+	%delta.0.in = phi i32 [ %13, %bb2 ], [ %inputbuffer.127, %bb ]		; <i32> [#uses=5]
+	%inp.0 = phi i8* [ %12, %bb2 ], [ %inp.131, %bb ]		; <i8*> [#uses=1]
+	%delta.0 = and i32 %delta.0.in, 15		; <i32> [#uses=1]
+	%tmp = xor i32 %bufferstep.028, 1		; <i32> [#uses=1]
+	%14 = getelementptr [16 x i32]* @indexTable, i32 0, i32 %delta.0		; <i32*> [#uses=1]
+	%15 = load i32* %14, align 4		; <i32> [#uses=1]
+	%16 = add i32 %15, %index.026		; <i32> [#uses=2]
+	%17 = icmp slt i32 %16, 0		; <i1> [#uses=1]
+	%index.1 = select i1 %17, i32 0, i32 %16		; <i32> [#uses=2]
+	%18 = icmp sgt i32 %index.1, 88		; <i1> [#uses=1]
+	%index.2 = select i1 %18, i32 88, i32 %index.1		; <i32> [#uses=3]
+	%19 = and i32 %delta.0.in, 8		; <i32> [#uses=1]
+	%20 = ashr i32 %step.024, 3		; <i32> [#uses=1]
+	%21 = and i32 %delta.0.in, 4		; <i32> [#uses=1]
+	%22 = icmp eq i32 %21, 0		; <i1> [#uses=1]
+	%23 = select i1 %22, i32 0, i32 %step.024		; <i32> [#uses=1]
+	%vpdiff.0 = add i32 %23, %20		; <i32> [#uses=2]
+	%24 = and i32 %delta.0.in, 2		; <i32> [#uses=1]
+	%25 = icmp eq i32 %24, 0		; <i1> [#uses=1]
+	br i1 %25, label %bb11, label %bb10
+
+bb10:		; preds = %bb3
+	%26 = ashr i32 %step.024, 1		; <i32> [#uses=1]
+	%27 = add i32 %vpdiff.0, %26		; <i32> [#uses=1]
+	br label %bb11
+
+bb11:		; preds = %bb10, %bb3
+	%vpdiff.1 = phi i32 [ %27, %bb10 ], [ %vpdiff.0, %bb3 ]		; <i32> [#uses=2]
+	%28 = and i32 %delta.0.in, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %28, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb13, label %bb12
+
+bb12:		; preds = %bb11
+	%29 = ashr i32 %step.024, 2		; <i32> [#uses=1]
+	%30 = add i32 %vpdiff.1, %29		; <i32> [#uses=1]
+	br label %bb13
+
+bb13:		; preds = %bb12, %bb11
+	%vpdiff.2 = phi i32 [ %30, %bb12 ], [ %vpdiff.1, %bb11 ]		; <i32> [#uses=2]
+	%31 = icmp eq i32 %19, 0		; <i1> [#uses=1]
+	%tmp23 = sub i32 0, %vpdiff.2		; <i32> [#uses=1]
+	%valpred.0.p = select i1 %31, i32 %vpdiff.2, i32 %tmp23		; <i32> [#uses=1]
+	%valpred.0 = add i32 %valpred.0.p, %valpred.125		; <i32> [#uses=3]
+	%32 = icmp sgt i32 %valpred.0, 32767		; <i1> [#uses=1]
+	br i1 %32, label %bb20, label %bb18
+
+bb18:		; preds = %bb13
+	%33 = icmp slt i32 %valpred.0, -32768		; <i1> [#uses=1]
+	br i1 %33, label %bb19, label %bb20
+
+bb19:		; preds = %bb18
+	br label %bb20
+
+bb20:		; preds = %bb19, %bb18, %bb13
+	%valpred.2 = phi i32 [ -32768, %bb19 ], [ 32767, %bb13 ], [ %valpred.0, %bb18 ]		; <i32> [#uses=3]
+	%34 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %index.2		; <i32*> [#uses=1]
+	%35 = load i32* %34, align 4		; <i32> [#uses=1]
+	%36 = trunc i32 %valpred.2 to i16		; <i16> [#uses=1]
+	store i16 %36, i16* %outp.030, align 2
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %len		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb22, label %bb
+
+bb22:		; preds = %bb20, %entry
+	%index.0.lcssa = phi i32 [ %5, %entry ], [ %index.2, %bb20 ]		; <i32> [#uses=1]
+	%valpred.1.lcssa = phi i32 [ %2, %entry ], [ %valpred.2, %bb20 ]		; <i32> [#uses=1]
+	%37 = trunc i32 %valpred.1.lcssa to i16		; <i16> [#uses=1]
+	store i16 %37, i16* %0, align 2
+	%38 = trunc i32 %index.0.lcssa to i8		; <i8> [#uses=1]
+	store i8 %38, i8* %3, align 2
+	ret void
+}
+
+define i32 @main() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb3, %entry
+	%0 = tail call  i32 (...)* @read(i32 0, i8* getelementptr ([500 x i8]* @abuf, i32 0, i32 0), i32 500) nounwind		; <i32> [#uses=4]
+	%1 = icmp slt i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb1, label %bb2
+
+bb1:		; preds = %bb
+	tail call  void @perror(i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0)) nounwind
+	ret i32 1
+
+bb2:		; preds = %bb
+	%2 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb4, label %bb3
+
+bb3:		; preds = %bb2
+	%3 = shl i32 %0, 1		; <i32> [#uses=1]
+	tail call  void @adpcm_decoder(i8* getelementptr ([500 x i8]* @abuf, i32 0, i32 0), i16* getelementptr ([1000 x i16]* @sbuf, i32 0, i32 0), i32 %3, %struct.adpcm_state* @state) nounwind
+	%4 = shl i32 %0, 2		; <i32> [#uses=1]
+	%5 = tail call  i32 (...)* @write(i32 1, i16* getelementptr ([1000 x i16]* @sbuf, i32 0, i32 0), i32 %4) nounwind		; <i32> [#uses=0]
+	br label %bb
+
+bb4:		; preds = %bb2
+	%6 = load %struct.FILE** @__stderrp, align 4		; <%struct.FILE*> [#uses=1]
+	%7 = load i16* getelementptr (%struct.adpcm_state* @state, i32 0, i32 0), align 4		; <i16> [#uses=1]
+	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
+	%9 = load i8* getelementptr (%struct.adpcm_state* @state, i32 0, i32 1), align 2		; <i8> [#uses=1]
+	%10 = sext i8 %9 to i32		; <i32> [#uses=1]
+	%11 = tail call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %6, i8* getelementptr ([28 x i8]* @.str1, i32 0, i32 0), i32 %8, i32 %10) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @read(...)
+
+declare void @perror(i8* nocapture) nounwind
+
+declare i32 @write(...)
+
+declare i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb/barrier.ll b/src/LLVM/test/CodeGen/Thumb/barrier.ll
new file mode 100644
index 0000000..50d138f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/barrier.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin  | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=-db | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -march=thumb -mcpu=cortex-m0   | FileCheck %s -check-prefix=V6M
+
+define void @t1() {
+; V6: t1:
+; V6: blx {{_*}}sync_synchronize
+
+; V6M: t1:
+; V6M: dmb ish
+  fence seq_cst
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/dg.exp b/src/LLVM/test/CodeGen/Thumb/dg.exp
new file mode 100644
index 0000000..3ff359a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/dyn-stackalloc.ll b/src/LLVM/test/CodeGen/Thumb/dyn-stackalloc.ll
new file mode 100644
index 0000000..f3f0834
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/dyn-stackalloc.ll

@@ -0,0 +1,77 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s
+
+	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+
+define void @t1(%struct.state* %v) {
+; CHECK: t1:
+; CHECK: push
+; CHECK: add r7, sp, #12
+; CHECK: lsls r[[R0:[0-9]+]]
+; CHECK: mov r[[R1:[0-9]+]], sp
+; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r[[R0]]
+; CHECK: mov sp, r[[R2]]
+	%tmp6 = load i32* null
+	%tmp8 = alloca float, i32 %tmp6
+	store i32 1, i32* null
+	br i1 false, label %bb123.preheader, label %return
+
+bb123.preheader:
+	br i1 false, label %bb43, label %return
+
+bb43:
+	call fastcc void @f1( float* %tmp8, float* null, i32 0 )
+	%tmp70 = load i32* null
+	%tmp85 = getelementptr float* %tmp8, i32 0
+	call fastcc void @f2( float* null, float* null, float* %tmp85, i32 %tmp70 )
+	ret void
+
+return:
+	ret void
+}
+
+declare fastcc void @f1(float*, float*, i32)
+
+declare fastcc void @f2(float*, float*, float*, i32)
+
+	%struct.comment = type { i8**, i32*, i32, i8* }
+@str215 = external global [2 x i8]
+
+define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
+; CHECK: t2:
+; CHECK: push
+; CHECK: add r7, sp, #12
+; CHECK: sub sp, #
+; CHECK: mov r[[R0:[0-9]+]], sp
+; CHECK: str r{{[0-9+]}}, [r[[R0]]
+; CHECK: str r{{[0-9+]}}, [r[[R0]]
+; CHECK-NOT: ldr r0, [sp
+; CHECK: mov r[[R1:[0-9]+]], sp
+; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r{{[0-9]+}}
+; CHECK: mov sp, r[[R2]]
+; CHECK-NOT: ldr r0, [sp
+; CHECK: bx
+	%tmp1 = call i32 @strlen( i8* %tag )
+	%tmp3 = call i32 @strlen( i8* %contents )
+	%tmp4 = add i32 %tmp1, 2
+	%tmp5 = add i32 %tmp4, %tmp3
+	%tmp6 = alloca i8, i32 %tmp5
+	%tmp9 = call i8* @strcpy( i8* %tmp6, i8* %tag )
+	%tmp6.len = call i32 @strlen( i8* %tmp6 )
+	%tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
+	call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false)
+	%tmp15 = call i8* @strcat( i8* %tmp6, i8* %contents )
+	call fastcc void @comment_add( %struct.comment* %vc, i8* %tmp6 )
+	ret void
+}
+
+declare i32 @strlen(i8*)
+
+declare i8* @strcat(i8*, i8*)
+
+declare fastcc void @comment_add(%struct.comment*, i8*)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+declare i8* @strcpy(i8*, i8*)

diff --git a/src/LLVM/test/CodeGen/Thumb/fpconv.ll b/src/LLVM/test/CodeGen/Thumb/fpconv.ll
new file mode 100644
index 0000000..7da36dd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/fpconv.ll

@@ -0,0 +1,61 @@
+; RUN: llc < %s -march=thumb
+
+define float @f1(double %x) {
+entry:
+	%tmp1 = fptrunc double %x to float		; <float> [#uses=1]
+	ret float %tmp1
+}
+
+define double @f2(float %x) {
+entry:
+	%tmp1 = fpext float %x to double		; <double> [#uses=1]
+	ret double %tmp1
+}
+
+define i32 @f3(float %x) {
+entry:
+	%tmp = fptosi float %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @f4(float %x) {
+entry:
+	%tmp = fptoui float %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @f5(double %x) {
+entry:
+	%tmp = fptosi double %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @f6(double %x) {
+entry:
+	%tmp = fptoui double %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define float @f7(i32 %a) {
+entry:
+	%tmp = sitofp i32 %a to float		; <float> [#uses=1]
+	ret float %tmp
+}
+
+define double @f8(i32 %a) {
+entry:
+	%tmp = sitofp i32 %a to double		; <double> [#uses=1]
+	ret double %tmp
+}
+
+define float @f9(i32 %a) {
+entry:
+	%tmp = uitofp i32 %a to float		; <float> [#uses=1]
+	ret float %tmp
+}
+
+define double @f10(i32 %a) {
+entry:
+	%tmp = uitofp i32 %a to double		; <double> [#uses=1]
+	ret double %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/fpow.ll b/src/LLVM/test/CodeGen/Thumb/fpow.ll
new file mode 100644
index 0000000..be3dc0b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/fpow.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=thumb
+
+define double @t(double %x, double %y) nounwind optsize {
+entry:
+	%0 = tail call double @llvm.pow.f64( double %x, double %y )		; <double> [#uses=1]
+	ret double %0
+}
+
+declare double @llvm.pow.f64(double, double) nounwind readonly

diff --git a/src/LLVM/test/CodeGen/Thumb/frame_thumb.ll b/src/LLVM/test/CodeGen/Thumb/frame_thumb.ll
new file mode 100644
index 0000000..9b1f4c3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/frame_thumb.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin \

+; RUN:     -disable-fp-elim | not grep {r11}

+; RUN: llc < %s -mtriple=thumb-linux-gnueabi \

+; RUN:     -disable-fp-elim | not grep {r11}

+

+define i32 @f() {

+entry:

+	ret i32 10

+}


diff --git a/src/LLVM/test/CodeGen/Thumb/iabs.ll b/src/LLVM/test/CodeGen/Thumb/iabs.ll
new file mode 100644
index 0000000..d03b5b2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/iabs.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=thumb -stats |& \
+; RUN:   grep {4 .*Number of machine instrs printed}
+
+;; Integer absolute value, should produce something as good as:
+;; Thumb:
+;;   movs r0, r0
+;;   bpl
+;;   rsb r0, r0, #0 (with opitmization, bpl + rsb is if-converted into rsbmi)
+;;   bx lr
+
+define i32 @test(i32 %a) {
+        %tmp1neg = sub i32 0, %a
+        %b = icmp sgt i32 %a, -1
+        %abs = select i1 %b, i32 %a, i32 %tmp1neg
+        ret i32 %abs
+; CHECK:  movs r0, r0
+; CHECK:  bpl
+; CHECK:  rsb r0, r0, #0
+; CHECK:  bx lr
+}
+
+

diff --git a/src/LLVM/test/CodeGen/Thumb/inlineasm-imm-thumb.ll b/src/LLVM/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
new file mode 100644
index 0000000..5c8a52a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/inlineasm-imm-thumb.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=thumb
+
+; Test Thumb-mode "I" constraint, for ADD immediate.
+define i32 @testI(i32 %x) {
+	%y = call i32 asm "add $0, $1, $2", "=r,r,I"( i32 %x, i32 255 ) nounwind
+	ret i32 %y
+}
+
+; Test Thumb-mode "J" constraint, for negated ADD immediates.
+define void @testJ() {
+	tail call void asm sideeffect ".word $0", "J"( i32 -255 ) nounwind
+	ret void
+}
+
+; Test Thumb-mode "K" constraint, for compatibility with GCC's internal use.
+define void @testK() {
+	tail call void asm sideeffect ".word $0", "K"( i32 65280 ) nounwind
+	ret void
+}
+
+; Test Thumb-mode "L" constraint, for 3-operand ADD immediates.
+define i32 @testL(i32 %x) {
+	%y = call i32 asm "add $0, $1, $2", "=r,r,L"( i32 %x, i32 -7 ) nounwind
+	ret i32 %y
+}
+
+; Test Thumb-mode "M" constraint, for "ADD r = sp + imm".
+define i32 @testM() {
+	%y = call i32 asm "add $0, sp, $1", "=r,M"( i32 1020 ) nounwind
+	ret i32 %y
+}
+
+; Test Thumb-mode "N" constraint, for values between 0 and 31.
+define i32 @testN(i32 %x) {
+	%y = call i32 asm "lsl $0, $1, $2", "=r,r,N"( i32 %x, i32 31 ) nounwind
+	ret i32 %y
+}
+
+; Test Thumb-mode "O" constraint, for "ADD sp = sp + imm".
+define void @testO() {
+	tail call void asm sideeffect "add sp, sp, $0; add sp, sp, $1", "O,O"( i32 -508, i32 508 ) nounwind
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/inlineasm-thumb.ll b/src/LLVM/test/CodeGen/Thumb/inlineasm-thumb.ll
new file mode 100644
index 0000000..f2683c8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/inlineasm-thumb.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=thumb | FileCheck %s
+define i32 @t1(i32 %x, i32 %y) nounwind {
+entry:
+  ; CHECK: mov r0, r12
+  %0 = tail call i32 asm "mov $0, $1", "=l,h"(i32 %y) nounwind
+  ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/ispositive.ll b/src/LLVM/test/CodeGen/Thumb/ispositive.ll
new file mode 100644
index 0000000..eac3ef2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/ispositive.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=thumb | FileCheck %s
+
+define i32 @test1(i32 %X) {
+entry:
+; CHECK: test1:
+; CHECK: lsrs r0, r0, #31
+        icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
+        zext i1 %0 to i32               ; <i32>:1 [#uses=1]
+        ret i32 %1
+}
+

diff --git a/src/LLVM/test/CodeGen/Thumb/large-stack.ll b/src/LLVM/test/CodeGen/Thumb/large-stack.ll
new file mode 100644
index 0000000..fbacaba
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/large-stack.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+
+define void @test1() {
+; CHECK: test1:
+; CHECK: sub sp, #256
+; CHECK: add sp, #256
+    %tmp = alloca [ 64 x i32 ] , align 4
+    ret void
+}
+
+define void @test2() {
+; CHECK: test2:
+; CHECK: ldr.n r0, LCPI
+; CHECK: add sp, r0
+; CHECK: subs r4, r7, #4
+; CHECK: mov sp, r4
+    %tmp = alloca [ 4168 x i8 ] , align 4
+    ret void
+}
+
+define i32 @test3() {
+; CHECK: test3:
+; CHECK: ldr.n r2, LCPI
+; CHECK: add sp, r2
+; CHECK: ldr.n r1, LCPI
+; CHECK: add r1, sp
+; CHECK: subs r4, r7, #4
+; CHECK: mov sp, r4
+    %retval = alloca i32, align 4
+    %tmp = alloca i32, align 4
+    %a = alloca [805306369 x i8], align 16
+    store i32 0, i32* %tmp
+    %tmp1 = load i32* %tmp
+    ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/ldr_ext.ll b/src/LLVM/test/CodeGen/Thumb/ldr_ext.ll
new file mode 100644
index 0000000..9a28124
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/ldr_ext.ll

@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=thumb | FileCheck %s -check-prefix=V5
+; RUN: llc < %s -march=thumb -mattr=+v6 | FileCheck %s -check-prefix=V6
+
+; rdar://7176514
+
+define i32 @test1(i8* %t1) nounwind {
+; V5: ldrb
+
+; V6: ldrb
+    %tmp.u = load i8* %t1
+    %tmp1.s = zext i8 %tmp.u to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test2(i16* %t1) nounwind {
+; V5: ldrh
+
+; V6: ldrh
+    %tmp.u = load i16* %t1
+    %tmp1.s = zext i16 %tmp.u to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test3(i8* %t0) nounwind {
+; V5: ldrb
+; V5: lsls
+; V5: asrs
+
+; V6: ldrb
+; V6: sxtb
+    %tmp.s = load i8* %t0
+    %tmp1.s = sext i8 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test4(i16* %t0) nounwind {
+; V5: ldrh
+; V5: lsls
+; V5: asrs
+
+; V6: ldrh
+; V6: sxth
+    %tmp.s = load i16* %t0
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test5() nounwind {
+; V5: movs r0, #0
+; V5: ldrsh
+
+; V6: movs r0, #0
+; V6: ldrsh
+    %tmp.s = load i16* null
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/ldr_frame.ll b/src/LLVM/test/CodeGen/Thumb/ldr_frame.ll
new file mode 100644
index 0000000..81782cd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/ldr_frame.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=thumb | FileCheck %s
+
+define i32 @f1() {
+; CHECK: f1:
+; CHECK: ldr r0
+	%buf = alloca [32 x i32], align 4
+	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 0
+	%tmp1 = load i32* %tmp
+	ret i32 %tmp1
+}
+
+define i32 @f2() {
+; CHECK: f2:
+; CHECK: mov r0
+; CHECK: ldrb
+	%buf = alloca [32 x i8], align 4
+	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 0
+	%tmp1 = load i8* %tmp
+        %tmp2 = zext i8 %tmp1 to i32
+	ret i32 %tmp2
+}
+
+define i32 @f3() {
+; CHECK: f3:
+; CHECK: ldr r0
+	%buf = alloca [32 x i32], align 4
+	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 32
+	%tmp1 = load i32* %tmp
+	ret i32 %tmp1
+}
+
+define i32 @f4() {
+; CHECK: f4:
+; CHECK: mov r0
+; CHECK: ldrb
+	%buf = alloca [32 x i8], align 4
+	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 2
+	%tmp1 = load i8* %tmp
+        %tmp2 = zext i8 %tmp1 to i32
+	ret i32 %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/long-setcc.ll b/src/LLVM/test/CodeGen/Thumb/long-setcc.ll
new file mode 100644
index 0000000..8f2d98f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/long-setcc.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=thumb | grep cmp | count 1
+
+
+define i1 @t1(i64 %x) {
+	%B = icmp slt i64 %x, 0
+	ret i1 %B
+}
+
+define i1 @t2(i64 %x) {
+	%tmp = icmp ult i64 %x, 4294967296
+	ret i1 %tmp
+}
+
+define i1 @t3(i32 %x) {
+	%tmp = icmp ugt i32 %x, -1
+	ret i1 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/long.ll b/src/LLVM/test/CodeGen/Thumb/long.ll
new file mode 100644
index 0000000..197e19e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/long.ll

@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=thumb | \
+; RUN:   grep mvn | count 1
+; RUN: llc < %s -march=thumb | \
+; RUN:   grep adc | count 1
+; RUN: llc < %s -march=thumb | \
+; RUN:   grep sbc | count 1
+; RUN: llc < %s -mtriple=thumb-apple-darwin | grep __muldi3
+
+define i64 @f1() {
+entry:
+        ret i64 0
+}
+
+define i64 @f2() {
+entry:
+        ret i64 1
+}
+
+define i64 @f3() {
+entry:
+        ret i64 2147483647
+}
+
+define i64 @f4() {
+entry:
+        ret i64 2147483648
+}
+
+define i64 @f5() {
+entry:
+        ret i64 9223372036854775807
+}
+
+define i64 @f6(i64 %x, i64 %y) {
+entry:
+        %tmp1 = add i64 %y, 1           ; <i64> [#uses=1]
+        ret i64 %tmp1
+}
+
+define void @f7() {
+entry:
+        %tmp = call i64 @f8( )          ; <i64> [#uses=0]
+        ret void
+}
+
+declare i64 @f8()
+
+define i64 @f9(i64 %a, i64 %b) {
+entry:
+        %tmp = sub i64 %a, %b           ; <i64> [#uses=1]
+        ret i64 %tmp
+}
+
+define i64 @f(i32 %a, i32 %b) {
+entry:
+        %tmp = sext i32 %a to i64               ; <i64> [#uses=1]
+        %tmp1 = sext i32 %b to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        ret i64 %tmp2
+}
+
+define i64 @g(i32 %a, i32 %b) {
+entry:
+        %tmp = zext i32 %a to i64               ; <i64> [#uses=1]
+        %tmp1 = zext i32 %b to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        ret i64 %tmp2
+}
+
+define i64 @f10() {
+entry:
+        %a = alloca i64, align 8                ; <i64*> [#uses=1]
+        %retval = load i64* %a          ; <i64> [#uses=1]
+        ret i64 %retval
+}
+

diff --git a/src/LLVM/test/CodeGen/Thumb/long_shift.ll b/src/LLVM/test/CodeGen/Thumb/long_shift.ll
new file mode 100644
index 0000000..2431714
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/long_shift.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=thumb
+
+define i64 @f0(i64 %A, i64 %B) {
+        %tmp = bitcast i64 %A to i64
+        %tmp2 = lshr i64 %B, 1
+        %tmp3 = sub i64 %tmp, %tmp2
+        ret i64 %tmp3
+}
+
+define i32 @f1(i64 %x, i64 %y) {
+        %a = shl i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}
+
+define i32 @f2(i64 %x, i64 %y) {
+        %a = ashr i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}
+
+define i32 @f3(i64 %x, i64 %y) {
+        %a = lshr i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/mul.ll b/src/LLVM/test/CodeGen/Thumb/mul.ll
new file mode 100644
index 0000000..c1a2fb2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/mul.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=thumb | grep mul | count 3
+; RUN: llc < %s -march=thumb | grep lsl | count 1
+
+define i32 @f1(i32 %u) {
+    %tmp = mul i32 %u, %u
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %u, i32 %v) {
+    %tmp = mul i32 %u, %v
+    ret i32 %tmp
+}
+
+define i32 @f3(i32 %u) {
+    %tmp = mul i32 %u, 5
+    ret i32 %tmp
+}
+
+define i32 @f4(i32 %u) {
+    %tmp = mul i32 %u, 4
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/pop.ll b/src/LLVM/test/CodeGen/Thumb/pop.ll
new file mode 100644
index 0000000..63f2feb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/pop.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; rdar://7268481
+
+define void @t(i8* %a, ...) nounwind {
+; CHECK:      t:
+; CHECK:      pop {r3}
+; CHECK-NEXT: add sp, #12
+; CHECK-NEXT: bx r3
+entry:
+  %a.addr = alloca i8*
+  store i8* %a, i8** %a.addr
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/push.ll b/src/LLVM/test/CodeGen/Thumb/push.ll
new file mode 100644
index 0000000..94ef8e9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/push.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-fp-elim | FileCheck %s
+; rdar://7268481
+
+define void @t() nounwind {
+; CHECK: t:
+; CHECK: push {r7}
+entry:
+  call void asm sideeffect alignstack ".long 0xe7ffdefe", ""() nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/rev.ll b/src/LLVM/test/CodeGen/Thumb/rev.ll
new file mode 100644
index 0000000..5e163f8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/rev.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=thumb -mattr=+v6 | FileCheck %s
+
+define i32 @test1(i32 %X) nounwind {
+; CHECK: test1
+; CHECK: rev16 r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %X15 = bitcast i32 %X to i32
+        %tmp4 = shl i32 %X15, 8
+        %tmp2 = and i32 %tmp1, 16711680
+        %tmp5 = and i32 %tmp4, -16777216
+        %tmp9 = and i32 %tmp1, 255
+        %tmp13 = and i32 %tmp4, 65280
+        %tmp6 = or i32 %tmp5, %tmp2
+        %tmp10 = or i32 %tmp6, %tmp13
+        %tmp14 = or i32 %tmp10, %tmp9
+        ret i32 %tmp14
+}
+
+define i32 @test2(i32 %X) nounwind {
+; CHECK: test2
+; CHECK: revsh r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16
+        %tmp3 = trunc i32 %X to i16
+        %tmp2 = and i16 %tmp1.upgrd.1, 255
+        %tmp4 = shl i16 %tmp3, 8
+        %tmp5 = or i16 %tmp2, %tmp4
+        %tmp5.upgrd.2 = sext i16 %tmp5 to i32
+        ret i32 %tmp5.upgrd.2
+}
+
+; rdar://9147637
+define i32 @test3(i16 zeroext %a) nounwind {
+entry:
+; CHECK: test3:
+; CHECK: revsh r0, r0
+  %0 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %1 = sext i16 %0 to i32
+  ret i32 %1
+}
+
+declare i16 @llvm.bswap.i16(i16) nounwind readnone
+
+define i32 @test4(i16 zeroext %a) nounwind {
+entry:
+; CHECK: test4:
+; CHECK: revsh r0, r0
+  %conv = zext i16 %a to i32
+  %shr9 = lshr i16 %a, 8
+  %conv2 = zext i16 %shr9 to i32
+  %shl = shl nuw nsw i32 %conv, 8
+  %or = or i32 %conv2, %shl
+  %sext = shl i32 %or, 16
+  %conv8 = ashr exact i32 %sext, 16
+  ret i32 %conv8
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/select.ll b/src/LLVM/test/CodeGen/Thumb/select.ll
new file mode 100644
index 0000000..3f10b05
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/select.ll

@@ -0,0 +1,82 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-pc-linux-gnueabi | FileCheck -check-prefix=CHECK-EABI %s
+
+define i32 @f1(i32 %a.s) {
+entry:
+    %tmp = icmp eq i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+; CHECK: f1:
+; CHECK: beq
+; CHECK-EABI: f1:
+; CHECK-EABI: beq
+
+define i32 @f2(i32 %a.s) {
+entry:
+    %tmp = icmp sgt i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+; CHECK: f2:
+; CHECK: bgt
+; CHECK-EABI: f2:
+; CHECK-EABI: bgt
+
+define i32 @f3(i32 %a.s, i32 %b.s) {
+entry:
+    %tmp = icmp slt i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+; CHECK: f3:
+; CHECK: blt
+; CHECK-EABI: f3:
+; CHECK-EABI: blt
+
+define i32 @f4(i32 %a.s, i32 %b.s) {
+entry:
+    %tmp = icmp sle i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+; CHECK: f4:
+; CHECK: ble
+; CHECK-EABI: f4:
+; CHECK-EABI: ble
+
+define i32 @f5(i32 %a.u, i32 %b.u) {
+entry:
+    %tmp = icmp ule i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+; CHECK: f5:
+; CHECK: bls
+; CHECK-EABI: f5:
+; CHECK-EABI: bls
+
+define i32 @f6(i32 %a.u, i32 %b.u) {
+entry:
+    %tmp = icmp ugt i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+; CHECK: f6:
+; CHECK: bhi
+; CHECK-EABI: f6:
+; CHECK-EABI: bhi
+
+define double @f7(double %a, double %b) {
+    %tmp = fcmp olt double %a, 1.234e+00
+    %tmp1 = select i1 %tmp, double -1.000e+00, double %b
+    ret double %tmp1
+}
+; CHECK: f7:
+; CHECK: blt
+; CHECK: blt
+; CHECK: __ltdf2
+; CHECK-EABI: f7:
+; CHECK-EABI: __aeabi_dcmplt
+; CHECK-EABI: bne
+; CHECK-EABI: bne

diff --git a/src/LLVM/test/CodeGen/Thumb/stack-frame.ll b/src/LLVM/test/CodeGen/Thumb/stack-frame.ll
new file mode 100644
index 0000000..b103b33
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/stack-frame.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=thumb
+; RUN: llc < %s -march=thumb | grep add | count 1
+
+define void @f1() {
+	%c = alloca i8, align 1
+	ret void
+}
+
+define i32 @f2() {
+	ret i32 1
+}
+
+

diff --git a/src/LLVM/test/CodeGen/Thumb/thumb-imm.ll b/src/LLVM/test/CodeGen/Thumb/thumb-imm.ll
new file mode 100644
index 0000000..d2aa5f2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/thumb-imm.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=thumb | not grep CPI

+

+

+define i32 @test1() {

+  ret i32 1000

+}

+

+define i32 @test2() {

+  ret i32 -256

+}


diff --git a/src/LLVM/test/CodeGen/Thumb/trap.ll b/src/LLVM/test/CodeGen/Thumb/trap.ll
new file mode 100644
index 0000000..04cd3ee
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/trap.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=thumb | FileCheck %s
+; rdar://7961298
+
+define void @t() nounwind {
+entry:
+; CHECK: t:
+; CHECK: trap
+  call void @llvm.trap()
+  unreachable
+}
+
+declare void @llvm.trap() nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb/tst_teq.ll b/src/LLVM/test/CodeGen/Thumb/tst_teq.ll
new file mode 100644
index 0000000..21ada3e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/tst_teq.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=thumb | grep tst
+
+define i32 @f(i32 %a) {
+entry:
+	%tmp2 = and i32 %a, 255		; <i32> [#uses=1]
+	icmp eq i32 %tmp2, 0		; <i1>:0 [#uses=1]
+	%retval = select i1 %0, i32 20, i32 10		; <i32> [#uses=1]
+	ret i32 %retval
+}
+
+define i32 @g(i32 %a) {
+entry:
+        %tmp2 = xor i32 %a, 255
+	icmp eq i32 %tmp2, 0		; <i1>:0 [#uses=1]
+	%retval = select i1 %0, i32 20, i32 10		; <i32> [#uses=1]
+	ret i32 %retval
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/unord.ll b/src/LLVM/test/CodeGen/Thumb/unord.ll
new file mode 100644
index 0000000..39458ae
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/unord.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=thumb | grep bne | count 1
+; RUN: llc < %s -march=thumb | grep beq | count 1
+
+define i32 @f1(float %X, float %Y) {
+	%tmp = fcmp uno float %X, %Y
+	%retval = select i1 %tmp, i32 1, i32 -1
+	ret i32 %retval
+}
+
+define i32 @f2(float %X, float %Y) {
+	%tmp = fcmp ord float %X, %Y
+	%retval = select i1 %tmp, i32 1, i32 -1
+	ret i32 %retval
+}

diff --git a/src/LLVM/test/CodeGen/Thumb/vargs.ll b/src/LLVM/test/CodeGen/Thumb/vargs.ll
new file mode 100644
index 0000000..5d3718d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb/vargs.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=thumb

+; RUN: llc < %s -mtriple=thumb-linux | grep pop | count 2

+; RUN: llc < %s -mtriple=thumb-darwin | grep pop | count 2

+

+@str = internal constant [4 x i8] c"%d\0A\00"           ; <[4 x i8]*> [#uses=1]

+

+define void @f(i32 %a, ...) {

+entry:

+        %va = alloca i8*, align 4               ; <i8**> [#uses=4]

+        %va.upgrd.1 = bitcast i8** %va to i8*           ; <i8*> [#uses=1]

+        call void @llvm.va_start( i8* %va.upgrd.1 )

+        br label %bb

+

+bb:             ; preds = %bb, %entry

+        %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp5, %bb ]              ; <i32> [#uses=2]

+        %tmp = volatile load i8** %va           ; <i8*> [#uses=2]

+        %tmp2 = getelementptr i8* %tmp, i32 4           ; <i8*> [#uses=1]

+        volatile store i8* %tmp2, i8** %va

+        %tmp5 = add i32 %a_addr.0, -1           ; <i32> [#uses=1]

+        %tmp.upgrd.2 = icmp eq i32 %a_addr.0, 1         ; <i1> [#uses=1]

+        br i1 %tmp.upgrd.2, label %bb7, label %bb

+

+bb7:            ; preds = %bb

+        %tmp3 = bitcast i8* %tmp to i32*                ; <i32*> [#uses=1]

+        %tmp.upgrd.3 = load i32* %tmp3          ; <i32> [#uses=1]

+        %tmp10 = call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @str, i32 0, i64 0), i32 %tmp.upgrd.3 )                ; <i32> [#uses=0]

+        %va.upgrd.4 = bitcast i8** %va to i8*           ; <i8*> [#uses=1]

+        call void @llvm.va_end( i8* %va.upgrd.4 )

+        ret void

+}

+

+declare void @llvm.va_start(i8*)

+

+declare i32 @printf(i8*, ...)

+

+declare void @llvm.va_end(i8*)


diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll b/src/LLVM/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
new file mode 100644
index 0000000..76ffe2a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv6t2-elf"
+	%struct.dwarf_cie = type <{ i32, i32, i8, [0 x i8], [3 x i8] }>
+
+declare i8* @read_sleb128(i8*, i32* nocapture) nounwind
+
+define i32 @get_cie_encoding(%struct.dwarf_cie* %cie) nounwind {
+entry:
+	br i1 undef, label %bb1, label %bb13
+
+bb1:		; preds = %entry
+	%tmp38 = add i32 undef, 10		; <i32> [#uses=1]
+	br label %bb.i
+
+bb.i:		; preds = %bb.i, %bb1
+	%indvar.i = phi i32 [ 0, %bb1 ], [ %2, %bb.i ]		; <i32> [#uses=3]
+	%tmp39 = add i32 %indvar.i, %tmp38		; <i32> [#uses=1]
+	%p_addr.0.i = getelementptr i8* undef, i32 %tmp39		; <i8*> [#uses=1]
+	%0 = load i8* %p_addr.0.i, align 1		; <i8> [#uses=1]
+	%1 = icmp slt i8 %0, 0		; <i1> [#uses=1]
+	%2 = add i32 %indvar.i, 1		; <i32> [#uses=1]
+	br i1 %1, label %bb.i, label %read_uleb128.exit
+
+read_uleb128.exit:		; preds = %bb.i
+	%.sum40 = add i32 %indvar.i, undef		; <i32> [#uses=1]
+	%.sum31 = add i32 %.sum40, 2		; <i32> [#uses=1]
+	%scevgep.i = getelementptr %struct.dwarf_cie* %cie, i32 0, i32 3, i32 %.sum31		; <i8*> [#uses=1]
+	%3 = call  i8* @read_sleb128(i8* %scevgep.i, i32* undef)		; <i8*> [#uses=0]
+	unreachable
+
+bb13:		; preds = %entry
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/src/LLVM/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
new file mode 100644
index 0000000..4e1394f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2 | FileCheck %s
+; rdar://7076238
+
+@"\01LC" = external constant [36 x i8], align 1		; <[36 x i8]*> [#uses=1]
+
+define i32 @t(i32, ...) nounwind {
+entry:
+; CHECK: t:
+; CHECK: add r7, sp, #12
+	%1 = load i8** undef, align 4		; <i8*> [#uses=3]
+	%2 = getelementptr i8* %1, i32 4		; <i8*> [#uses=1]
+	%3 = getelementptr i8* %1, i32 8		; <i8*> [#uses=1]
+	%4 = bitcast i8* %2 to i32*		; <i32*> [#uses=1]
+	%5 = load i32* %4, align 4		; <i32> [#uses=1]
+	%6 = trunc i32 %5 to i8		; <i8> [#uses=1]
+	%7 = getelementptr i8* %1, i32 12		; <i8*> [#uses=1]
+	%8 = bitcast i8* %3 to i32*		; <i32*> [#uses=1]
+	%9 = load i32* %8, align 4		; <i32> [#uses=1]
+	%10 = trunc i32 %9 to i16		; <i16> [#uses=1]
+	%11 = bitcast i8* %7 to i32*		; <i32*> [#uses=1]
+	%12 = load i32* %11, align 4		; <i32> [#uses=1]
+	%13 = trunc i32 %12 to i16		; <i16> [#uses=1]
+	%14 = load i32* undef, align 4		; <i32> [#uses=2]
+	%15 = sext i8 %6 to i32		; <i32> [#uses=2]
+	%16 = sext i16 %10 to i32		; <i32> [#uses=2]
+	%17 = sext i16 %13 to i32		; <i32> [#uses=2]
+	%18 = call  i32 (i8*, ...)* @printf(i8* getelementptr ([36 x i8]* @"\01LC", i32 0, i32 0), i32 -128, i32 0, i32 %15, i32 %16, i32 %17, i32 0, i32 %14) nounwind		; <i32> [#uses=0]
+	%19 = add i32 0, %15		; <i32> [#uses=1]
+	%20 = add i32 %19, %16		; <i32> [#uses=1]
+	%21 = add i32 %20, %14		; <i32> [#uses=1]
+	%22 = add i32 %21, %17		; <i32> [#uses=1]
+	%23 = add i32 %22, 0		; <i32> [#uses=1]
+	ret i32 %23
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll b/src/LLVM/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll
new file mode 100644
index 0000000..4357366
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2
+; rdar://7083961
+
+define i32 @value(i64 %b1, i64 %b2) nounwind readonly {
+entry:
+	%0 = icmp eq i32 undef, 0		; <i1> [#uses=1]
+	%mod.0.ph.ph = select i1 %0, float -1.000000e+00, float 1.000000e+00		; <float> [#uses=1]
+	br label %bb7
+
+bb7:		; preds = %bb7, %entry
+	br i1 undef, label %bb86.preheader, label %bb7
+
+bb86.preheader:		; preds = %bb7
+	%1 = fmul float %mod.0.ph.ph, 5.000000e+00		; <float> [#uses=0]
+	br label %bb79
+
+bb79:		; preds = %bb79, %bb86.preheader
+	br i1 undef, label %bb119, label %bb79
+
+bb119:		; preds = %bb79
+	ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll b/src/LLVM/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
new file mode 100644
index 0000000..3e07618
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll

@@ -0,0 +1,193 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.JHUFF_TBL = type { [17 x i8], [256 x i8], i32 }
+	%struct.JQUANT_TBL = type { [64 x i16], i32 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.anon = type { [8 x i32], [48 x i8] }
+	%struct.backing_store_info = type { void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*)*, %struct.FILE*, [64 x i8] }
+	%struct.jpeg_color_deconverter = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32, i8**, i32)* }
+	%struct.jpeg_color_quantizer = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i8**, i32)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_common_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32 }
+	%struct.jpeg_component_info = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.JQUANT_TBL*, i8* }
+	%struct.jpeg_d_coef_controller = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, i8***)*, %struct.jvirt_barray_control** }
+	%struct.jpeg_d_main_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i32*, i32)* }
+	%struct.jpeg_d_post_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)* }
+	%struct.jpeg_decomp_master = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 }
+	%struct.jpeg_decompress_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32, %struct.jpeg_source_mgr*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %struct.JQUANT_TBL*], [4 x %struct.JHUFF_TBL*], [4 x %struct.JHUFF_TBL*], i32, %struct.jpeg_component_info*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i16, i16, i32, i8, i32, i32, i32, i32, i32, i8*, i32, [4 x %struct.jpeg_component_info*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %struct.jpeg_decomp_master*, %struct.jpeg_d_main_controller*, %struct.jpeg_d_coef_controller*, %struct.jpeg_d_post_controller*, %struct.jpeg_input_controller*, %struct.jpeg_marker_reader*, %struct.jpeg_entropy_decoder*, %struct.jpeg_inverse_dct*, %struct.jpeg_upsampler*, %struct.jpeg_color_deconverter*, %struct.jpeg_color_quantizer* }
+	%struct.jpeg_entropy_decoder = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, [64 x i16]**)* }
+	%struct.jpeg_error_mgr = type { void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i8*)*, void (%struct.jpeg_common_struct*)*, i32, %struct.anon, i32, i32, i8**, i32, i8**, i32, i32 }
+	%struct.jpeg_input_controller = type { i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32, i32 }
+	%struct.jpeg_inverse_dct = type { void (%struct.jpeg_decompress_struct*)*, [10 x void (%struct.jpeg_decompress_struct*, %struct.jpeg_component_info*, i16*, i8**, i32)*] }
+	%struct.jpeg_marker_reader = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, [16 x i32 (%struct.jpeg_decompress_struct*)*], i32, i32, i32, i32 }
+	%struct.jpeg_memory_mgr = type { i8* (%struct.jpeg_common_struct*, i32, i32)*, i8* (%struct.jpeg_common_struct*, i32, i32)*, i8** (%struct.jpeg_common_struct*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, i32, i32, i32)*, %struct.jvirt_sarray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, %struct.jvirt_barray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, void (%struct.jpeg_common_struct*)*, i8** (%struct.jpeg_common_struct*, %struct.jvirt_sarray_control*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, %struct.jvirt_barray_control*, i32, i32, i32)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, i32 }
+	%struct.jpeg_progress_mgr = type { void (%struct.jpeg_common_struct*)*, i32, i32, i32, i32 }
+	%struct.jpeg_source_mgr = type { i8*, i32, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i32)*, i32 (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_upsampler = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
+	%struct.jvirt_barray_control = type { [64 x i16]**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_barray_control*, %struct.backing_store_info }
+	%struct.jvirt_sarray_control = type { i8**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_sarray_control*, %struct.backing_store_info }
+
+define void @jpeg_idct_float(%struct.jpeg_decompress_struct* nocapture %cinfo, %struct.jpeg_component_info* nocapture %compptr, i16* nocapture %coef_block, i8** nocapture %output_buf, i32 %output_col) nounwind {
+entry:
+	%workspace = alloca [64 x float], align 4		; <[64 x float]*> [#uses=11]
+	%0 = load i8** undef, align 4		; <i8*> [#uses=5]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=11]
+	%tmp39 = add i32 %indvar, 8		; <i32> [#uses=0]
+	%tmp41 = add i32 %indvar, 16		; <i32> [#uses=2]
+	%scevgep42 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp41		; <float*> [#uses=1]
+	%tmp43 = add i32 %indvar, 24		; <i32> [#uses=1]
+	%scevgep44 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp43		; <float*> [#uses=1]
+	%tmp45 = add i32 %indvar, 32		; <i32> [#uses=1]
+	%scevgep46 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp45		; <float*> [#uses=1]
+	%tmp47 = add i32 %indvar, 40		; <i32> [#uses=1]
+	%scevgep48 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp47		; <float*> [#uses=1]
+	%tmp49 = add i32 %indvar, 48		; <i32> [#uses=1]
+	%scevgep50 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp49		; <float*> [#uses=1]
+	%tmp51 = add i32 %indvar, 56		; <i32> [#uses=1]
+	%scevgep52 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp51		; <float*> [#uses=1]
+	%wsptr.119 = getelementptr [64 x float]* %workspace, i32 0, i32 %indvar		; <float*> [#uses=1]
+	%tmp54 = shl i32 %indvar, 2		; <i32> [#uses=1]
+	%scevgep76 = getelementptr i8* undef, i32 %tmp54		; <i8*> [#uses=1]
+	%quantptr.118 = bitcast i8* %scevgep76 to float*		; <float*> [#uses=1]
+	%scevgep79 = getelementptr i16* %coef_block, i32 %tmp41		; <i16*> [#uses=0]
+	%inptr.117 = getelementptr i16* %coef_block, i32 %indvar		; <i16*> [#uses=1]
+	%1 = load i16* null, align 2		; <i16> [#uses=1]
+	%2 = load i16* undef, align 2		; <i16> [#uses=1]
+	%3 = load i16* %inptr.117, align 2		; <i16> [#uses=1]
+	%4 = sitofp i16 %3 to float		; <float> [#uses=1]
+	%5 = load float* %quantptr.118, align 4		; <float> [#uses=1]
+	%6 = fmul float %4, %5		; <float> [#uses=1]
+	%7 = fsub float %6, undef		; <float> [#uses=2]
+	%8 = fmul float undef, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%9 = fsub float %8, 0.000000e+00		; <float> [#uses=2]
+	%10 = fadd float undef, 0.000000e+00		; <float> [#uses=2]
+	%11 = fadd float %7, %9		; <float> [#uses=2]
+	%12 = fsub float %7, %9		; <float> [#uses=2]
+	%13 = sitofp i16 %1 to float		; <float> [#uses=1]
+	%14 = fmul float %13, undef		; <float> [#uses=2]
+	%15 = sitofp i16 %2 to float		; <float> [#uses=1]
+	%16 = load float* undef, align 4		; <float> [#uses=1]
+	%17 = fmul float %15, %16		; <float> [#uses=1]
+	%18 = fadd float %14, undef		; <float> [#uses=2]
+	%19 = fsub float %14, undef		; <float> [#uses=2]
+	%20 = fadd float undef, %17		; <float> [#uses=2]
+	%21 = fadd float %20, %18		; <float> [#uses=3]
+	%22 = fsub float %20, %18		; <float> [#uses=1]
+	%23 = fmul float %22, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%24 = fadd float %19, undef		; <float> [#uses=1]
+	%25 = fmul float %24, 0x3FFD906BC0000000		; <float> [#uses=2]
+	%26 = fmul float undef, 0x3FF1517A80000000		; <float> [#uses=1]
+	%27 = fsub float %26, %25		; <float> [#uses=1]
+	%28 = fmul float %19, 0xC004E7AEA0000000		; <float> [#uses=1]
+	%29 = fadd float %28, %25		; <float> [#uses=1]
+	%30 = fsub float %29, %21		; <float> [#uses=3]
+	%31 = fsub float %23, %30		; <float> [#uses=3]
+	%32 = fadd float %27, %31		; <float> [#uses=1]
+	%33 = fadd float %10, %21		; <float> [#uses=1]
+	store float %33, float* %wsptr.119, align 4
+	%34 = fsub float %10, %21		; <float> [#uses=1]
+	store float %34, float* %scevgep52, align 4
+	%35 = fadd float %11, %30		; <float> [#uses=1]
+	store float %35, float* null, align 4
+	%36 = fsub float %11, %30		; <float> [#uses=1]
+	store float %36, float* %scevgep50, align 4
+	%37 = fadd float %12, %31		; <float> [#uses=1]
+	store float %37, float* %scevgep42, align 4
+	%38 = fsub float %12, %31		; <float> [#uses=1]
+	store float %38, float* %scevgep48, align 4
+	%39 = fadd float undef, %32		; <float> [#uses=1]
+	store float %39, float* %scevgep46, align 4
+	store float undef, float* %scevgep44, align 4
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb6, label %bb
+
+bb6:		; preds = %bb
+	%.sum10 = add i32 %output_col, 1		; <i32> [#uses=1]
+	%.sum8 = add i32 %output_col, 6		; <i32> [#uses=1]
+	%.sum6 = add i32 %output_col, 2		; <i32> [#uses=1]
+	%.sum = add i32 %output_col, 3		; <i32> [#uses=1]
+	br label %bb8
+
+bb8:		; preds = %bb8, %bb6
+	%ctr.116 = phi i32 [ 0, %bb6 ], [ %88, %bb8 ]		; <i32> [#uses=3]
+	%scevgep = getelementptr i8** %output_buf, i32 %ctr.116		; <i8**> [#uses=1]
+	%tmp = shl i32 %ctr.116, 3		; <i32> [#uses=5]
+	%tmp2392 = or i32 %tmp, 4		; <i32> [#uses=1]
+	%scevgep24 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2392		; <float*> [#uses=1]
+	%tmp2591 = or i32 %tmp, 2		; <i32> [#uses=1]
+	%scevgep26 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2591		; <float*> [#uses=1]
+	%tmp2790 = or i32 %tmp, 6		; <i32> [#uses=1]
+	%scevgep28 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2790		; <float*> [#uses=1]
+	%tmp3586 = or i32 %tmp, 7		; <i32> [#uses=0]
+	%wsptr.215 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp		; <float*> [#uses=1]
+	%40 = load i8** %scevgep, align 4		; <i8*> [#uses=4]
+	%41 = load float* %wsptr.215, align 4		; <float> [#uses=1]
+	%42 = load float* %scevgep24, align 4		; <float> [#uses=1]
+	%43 = fadd float %41, %42		; <float> [#uses=1]
+	%44 = load float* %scevgep26, align 4		; <float> [#uses=1]
+	%45 = load float* %scevgep28, align 4		; <float> [#uses=1]
+	%46 = fadd float %44, %45		; <float> [#uses=1]
+	%47 = fsub float %43, %46		; <float> [#uses=2]
+	%48 = fsub float undef, 0.000000e+00		; <float> [#uses=1]
+	%49 = fadd float 0.000000e+00, undef		; <float> [#uses=1]
+	%50 = fptosi float %49 to i32		; <i32> [#uses=1]
+	%51 = add i32 %50, 4		; <i32> [#uses=1]
+	%52 = lshr i32 %51, 3		; <i32> [#uses=1]
+	%53 = and i32 %52, 1023		; <i32> [#uses=1]
+	%.sum14 = add i32 %53, 128		; <i32> [#uses=1]
+	%54 = getelementptr i8* %0, i32 %.sum14		; <i8*> [#uses=1]
+	%55 = load i8* %54, align 1		; <i8> [#uses=1]
+	store i8 %55, i8* null, align 1
+	%56 = getelementptr i8* %40, i32 %.sum10		; <i8*> [#uses=1]
+	store i8 0, i8* %56, align 1
+	%57 = load i8* null, align 1		; <i8> [#uses=1]
+	%58 = getelementptr i8* %40, i32 %.sum8		; <i8*> [#uses=1]
+	store i8 %57, i8* %58, align 1
+	%59 = fadd float undef, %48		; <float> [#uses=1]
+	%60 = fptosi float %59 to i32		; <i32> [#uses=1]
+	%61 = add i32 %60, 4		; <i32> [#uses=1]
+	%62 = lshr i32 %61, 3		; <i32> [#uses=1]
+	%63 = and i32 %62, 1023		; <i32> [#uses=1]
+	%.sum7 = add i32 %63, 128		; <i32> [#uses=1]
+	%64 = getelementptr i8* %0, i32 %.sum7		; <i8*> [#uses=1]
+	%65 = load i8* %64, align 1		; <i8> [#uses=1]
+	%66 = getelementptr i8* %40, i32 %.sum6		; <i8*> [#uses=1]
+	store i8 %65, i8* %66, align 1
+	%67 = fptosi float undef to i32		; <i32> [#uses=1]
+	%68 = add i32 %67, 4		; <i32> [#uses=1]
+	%69 = lshr i32 %68, 3		; <i32> [#uses=1]
+	%70 = and i32 %69, 1023		; <i32> [#uses=1]
+	%.sum5 = add i32 %70, 128		; <i32> [#uses=1]
+	%71 = getelementptr i8* %0, i32 %.sum5		; <i8*> [#uses=1]
+	%72 = load i8* %71, align 1		; <i8> [#uses=1]
+	store i8 %72, i8* undef, align 1
+	%73 = fadd float %47, undef		; <float> [#uses=1]
+	%74 = fptosi float %73 to i32		; <i32> [#uses=1]
+	%75 = add i32 %74, 4		; <i32> [#uses=1]
+	%76 = lshr i32 %75, 3		; <i32> [#uses=1]
+	%77 = and i32 %76, 1023		; <i32> [#uses=1]
+	%.sum3 = add i32 %77, 128		; <i32> [#uses=1]
+	%78 = getelementptr i8* %0, i32 %.sum3		; <i8*> [#uses=1]
+	%79 = load i8* %78, align 1		; <i8> [#uses=1]
+	store i8 %79, i8* undef, align 1
+	%80 = fsub float %47, undef		; <float> [#uses=1]
+	%81 = fptosi float %80 to i32		; <i32> [#uses=1]
+	%82 = add i32 %81, 4		; <i32> [#uses=1]
+	%83 = lshr i32 %82, 3		; <i32> [#uses=1]
+	%84 = and i32 %83, 1023		; <i32> [#uses=1]
+	%.sum1 = add i32 %84, 128		; <i32> [#uses=1]
+	%85 = getelementptr i8* %0, i32 %.sum1		; <i8*> [#uses=1]
+	%86 = load i8* %85, align 1		; <i8> [#uses=1]
+	%87 = getelementptr i8* %40, i32 %.sum		; <i8*> [#uses=1]
+	store i8 %86, i8* %87, align 1
+	%88 = add i32 %ctr.116, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %88, 8		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb8
+
+return:		; preds = %bb8
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll b/src/LLVM/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
new file mode 100644
index 0000000..095aecc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll

@@ -0,0 +1,85 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+@csize = external global [100 x [20 x [4 x i8]]]		; <[100 x [20 x [4 x i8]]]*> [#uses=1]
+@vsize = external global [100 x [20 x [4 x i8]]]		; <[100 x [20 x [4 x i8]]]*> [#uses=1]
+@cll = external global [20 x [10 x i8]]		; <[20 x [10 x i8]]*> [#uses=1]
+@lefline = external global [100 x [20 x i32]]		; <[100 x [20 x i32]]*> [#uses=1]
+@sep = external global [20 x i32]		; <[20 x i32]*> [#uses=1]
+
+define void @main(i32 %argc, i8** %argv) noreturn nounwind {
+; CHECK: main:
+; CHECK: ldrb
+entry:
+	%nb.i.i.i = alloca [25 x i8], align 1		; <[25 x i8]*> [#uses=0]
+	%line.i.i.i = alloca [200 x i8], align 1		; <[200 x i8]*> [#uses=1]
+	%line.i = alloca [1024 x i8], align 1		; <[1024 x i8]*> [#uses=0]
+	br i1 undef, label %bb.i.i, label %bb4.preheader.i
+
+bb.i.i:		; preds = %entry
+	unreachable
+
+bb4.preheader.i:		; preds = %entry
+	br i1 undef, label %tbl.exit, label %bb.i.preheader
+
+bb.i.preheader:		; preds = %bb4.preheader.i
+	%line3.i.i.i = getelementptr [200 x i8]* %line.i.i.i, i32 0, i32 0		; <i8*> [#uses=1]
+	br label %bb.i
+
+bb.i:		; preds = %bb4.backedge.i, %bb.i.preheader
+	br i1 undef, label %bb3.i, label %bb4.backedge.i
+
+bb3.i:		; preds = %bb.i
+	br i1 undef, label %bb2.i184.i.i, label %bb.i183.i.i
+
+bb.i183.i.i:		; preds = %bb.i183.i.i, %bb3.i
+	br i1 undef, label %bb2.i184.i.i, label %bb.i183.i.i
+
+bb2.i184.i.i:		; preds = %bb.i183.i.i, %bb3.i
+	br i1 undef, label %bb5.i185.i.i, label %bb35.preheader.i.i.i
+
+bb35.preheader.i.i.i:		; preds = %bb2.i184.i.i
+	%0 = load i8* %line3.i.i.i, align 1		; <i8> [#uses=1]
+	%1 = icmp eq i8 %0, 59		; <i1> [#uses=1]
+	br i1 %1, label %bb36.i.i.i, label %bb9.i186.i.i
+
+bb5.i185.i.i:		; preds = %bb2.i184.i.i
+	br label %bb.i171.i.i
+
+bb9.i186.i.i:		; preds = %bb35.preheader.i.i.i
+	unreachable
+
+bb36.i.i.i:		; preds = %bb35.preheader.i.i.i
+	br label %bb.i171.i.i
+
+bb.i171.i.i:		; preds = %bb3.i176.i.i, %bb36.i.i.i, %bb5.i185.i.i
+	%2 = phi i32 [ %4, %bb3.i176.i.i ], [ 0, %bb36.i.i.i ], [ 0, %bb5.i185.i.i ]		; <i32> [#uses=6]
+	%scevgep16.i.i.i = getelementptr [20 x i32]* @sep, i32 0, i32 %2		; <i32*> [#uses=1]
+	%scevgep18.i.i.i = getelementptr [20 x [10 x i8]]* @cll, i32 0, i32 %2, i32 0		; <i8*> [#uses=0]
+	store i32 -1, i32* %scevgep16.i.i.i, align 4
+	br label %bb1.i175.i.i
+
+bb1.i175.i.i:		; preds = %bb1.i175.i.i, %bb.i171.i.i
+	%i.03.i172.i.i = phi i32 [ 0, %bb.i171.i.i ], [ %3, %bb1.i175.i.i ]		; <i32> [#uses=4]
+	%scevgep11.i.i.i = getelementptr [100 x [20 x i32]]* @lefline, i32 0, i32 %i.03.i172.i.i, i32 %2		; <i32*> [#uses=1]
+	%scevgep12.i.i.i = getelementptr [100 x [20 x [4 x i8]]]* @vsize, i32 0, i32 %i.03.i172.i.i, i32 %2, i32 0		; <i8*> [#uses=1]
+	%scevgep13.i.i.i = getelementptr [100 x [20 x [4 x i8]]]* @csize, i32 0, i32 %i.03.i172.i.i, i32 %2, i32 0		; <i8*> [#uses=0]
+	store i8 0, i8* %scevgep12.i.i.i, align 1
+	store i32 0, i32* %scevgep11.i.i.i, align 4
+	store i32 108, i32* undef, align 4
+	%3 = add i32 %i.03.i172.i.i, 1		; <i32> [#uses=2]
+	%exitcond.i174.i.i = icmp eq i32 %3, 100		; <i1> [#uses=1]
+	br i1 %exitcond.i174.i.i, label %bb3.i176.i.i, label %bb1.i175.i.i
+
+bb3.i176.i.i:		; preds = %bb1.i175.i.i
+	%4 = add i32 %2, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb5.i177.i.i, label %bb.i171.i.i
+
+bb5.i177.i.i:		; preds = %bb3.i176.i.i
+	unreachable
+
+bb4.backedge.i:		; preds = %bb.i
+	br i1 undef, label %tbl.exit, label %bb.i
+
+tbl.exit:		; preds = %bb4.backedge.i, %bb4.preheader.i
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll b/src/LLVM/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
new file mode 100644
index 0000000..0b56103
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
+
+	%0 = type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16)*, i32 }		; type %0
+	%1 = type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*)*, i32 }		; type %1
+	%2 = type { void (%"struct.xalanc_1_8::FormatterToXML"*, %"struct.xalanc_1_8::XalanDOMString"*)*, i32 }		; type %2
+	%3 = type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32, i32)*, i32 }		; type %3
+	%4 = type { void (%"struct.xalanc_1_8::FormatterToXML"*)*, i32 }		; type %4
+	%"struct.std::CharVectorType" = type { %"struct.std::_Vector_base<char,std::allocator<char> >" }
+	%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base" }
+	%"struct.std::_Bit_iterator_base" = type { i32*, i32 }
+	%"struct.std::_Bvector_base<std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" }
+	%"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" = type { %"struct.std::_Bit_const_iterator", %"struct.std::_Bit_const_iterator", i32* }
+	%"struct.std::_Vector_base<char,std::allocator<char> >" = type { %"struct.std::_Vector_base<char,std::allocator<char> >::_Vector_impl" }
+	%"struct.std::_Vector_base<char,std::allocator<char> >::_Vector_impl" = type { i8*, i8*, i8* }
+	%"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >" = type { %"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >::_Vector_impl" }
+	%"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >::_Vector_impl" = type { i16*, i16*, i16* }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >.base" = type { i32 (...)** }
+	%"struct.std::vector<bool,std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >" }
+	%"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >" = type { %"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >" }
+	%"struct.xalanc_1_8::FormatterListener" = type { %"struct.std::basic_ostream<char,std::char_traits<char> >.base", %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, i32 }
+	%"struct.xalanc_1_8::FormatterToXML" = type { %"struct.xalanc_1_8::FormatterListener", %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, %"struct.xalanc_1_8::XalanOutputStream"*, i16, [256 x i16], [256 x i16], i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", i32, i32, %"struct.std::vector<bool,std::allocator<bool> >", %"struct.xalanc_1_8::XalanDOMString", i8, i8, i8, i8, i8, %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", i32, %"struct.std::CharVectorType", %"struct.std::vector<bool,std::allocator<bool> >", %0, %1, %2, %3, %0, %1, %2, %3, %4, i16*, i32 }
+	%"struct.xalanc_1_8::XalanDOMString" = type { %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", i32 }
+	%"struct.xalanc_1_8::XalanOutputStream" = type { i32 (...)**, i32, %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, i32, %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", %"struct.xalanc_1_8::XalanDOMString", i8, i8, %"struct.std::CharVectorType" }
+
+declare void @_ZN10xalanc_1_814FormatterToXML17writeParentTagEndEv(%"struct.xalanc_1_8::FormatterToXML"*)
+
+define void @_ZN10xalanc_1_814FormatterToXML5cdataEPKtj(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 %length) {
+entry:
+	%0 = getelementptr %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 13		; <i8*> [#uses=1]
+	br i1 undef, label %bb4, label %bb
+
+bb:		; preds = %entry
+	store i8 0, i8* %0, align 1
+	%1 = getelementptr %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
+	%2 = load i32 (...)*** %1, align 4		; <i32 (...)**> [#uses=1]
+	%3 = getelementptr i32 (...)** %2, i32 11		; <i32 (...)**> [#uses=1]
+	%4 = load i32 (...)** %3, align 4		; <i32 (...)*> [#uses=1]
+	%5 = bitcast i32 (...)* %4 to void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32)*		; <void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32)*> [#uses=1]
+	tail call  void %5(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 %length)
+	ret void
+
+bb4:		; preds = %entry
+	tail call  void @_ZN10xalanc_1_814FormatterToXML17writeParentTagEndEv(%"struct.xalanc_1_8::FormatterToXML"* %this)
+	tail call  void undef(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 0, i32 %length, i8 zeroext undef)
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll b/src/LLVM/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
new file mode 100644
index 0000000..acff261
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi
+; PR4681
+
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+@.str2 = external constant [30 x i8], align 1		; <[30 x i8]*> [#uses=1]
+
+define i32 @__mf_heuristic_check(i32 %ptr, i32 %ptr_high) nounwind {
+entry:
+	br i1 undef, label %bb1, label %bb
+
+bb:		; preds = %entry
+	unreachable
+
+bb1:		; preds = %entry
+	br i1 undef, label %bb9, label %bb2
+
+bb2:		; preds = %bb1
+	%0 = call i8* @llvm.frameaddress(i32 0)		; <i8*> [#uses=1]
+	%1 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* noalias undef, i8* noalias getelementptr ([30 x i8]* @.str2, i32 0, i32 0), i8* %0, i8* null) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb9:		; preds = %bb1
+	ret i32 undef
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+declare i32 @fprintf(%struct.FILE* noalias nocapture, i8* noalias nocapture, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll b/src/LLVM/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
new file mode 100644
index 0000000..28ac28b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll

@@ -0,0 +1,153 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
+
+	%0 = type { %struct.GAP }		; type %0
+	%1 = type { i16, i8, i8 }		; type %1
+	%2 = type { [2 x i32], [2 x i32] }		; type %2
+	%3 = type { %struct.rec* }		; type %3
+	%4 = type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %1 }
+	%struct.STYLE = type { %0, %0, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %2 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+@.str24239 = external constant [20 x i8], align 1		; <[20 x i8]*> [#uses=1]
+@no_file_pos = external global %4		; <%4*> [#uses=1]
+@zz_tmp = external global %struct.rec*		; <%struct.rec**> [#uses=1]
+@.str81872 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@out_fp = external global %struct.FILE*		; <%struct.FILE**> [#uses=2]
+@cpexists = external global i32		; <i32*> [#uses=2]
+@.str212784 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str1822946 = external constant [8 x i8], align 1		; <[8 x i8]*> [#uses=1]
+@.str1842948 = external constant [11 x i8], align 1		; <[11 x i8]*> [#uses=1]
+
+declare i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
+
+declare i32 @"\01_fwrite"(i8*, i32, i32, i8*)
+
+declare %struct.FILE* @OpenIncGraphicFile(i8*, i8 zeroext, %struct.rec** nocapture, %struct.FILE_POS*, i32* nocapture) nounwind
+
+declare void @Error(i32, i32, i8*, i32, %struct.FILE_POS*, ...) nounwind
+
+declare i8* @fgets(i8*, i32, %struct.FILE* nocapture) nounwind
+
+define void @PS_PrintGraphicInclude(%struct.rec* %x, i32 %colmark, i32 %rowmark) nounwind {
+entry:
+	br label %bb5
+
+bb5:		; preds = %bb5, %entry
+	%.pn = phi %struct.rec* [ %y.0, %bb5 ], [ undef, %entry ]		; <%struct.rec*> [#uses=1]
+	%y.0.in = getelementptr %struct.rec* %.pn, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	%y.0 = load %struct.rec** %y.0.in		; <%struct.rec*> [#uses=2]
+	br i1 undef, label %bb5, label %bb6
+
+bb6:		; preds = %bb5
+	%0 = call  %struct.FILE* @OpenIncGraphicFile(i8* undef, i8 zeroext 0, %struct.rec** undef, %struct.FILE_POS* null, i32* undef) nounwind		; <%struct.FILE*> [#uses=1]
+	br i1 false, label %bb.i, label %FontHalfXHeight.exit
+
+bb.i:		; preds = %bb6
+	br label %FontHalfXHeight.exit
+
+FontHalfXHeight.exit:		; preds = %bb.i, %bb6
+	br i1 undef, label %bb.i1, label %FontSize.exit
+
+bb.i1:		; preds = %FontHalfXHeight.exit
+	br label %FontSize.exit
+
+FontSize.exit:		; preds = %bb.i1, %FontHalfXHeight.exit
+	%1 = load i32* undef, align 4		; <i32> [#uses=1]
+	%2 = icmp ult i32 0, undef		; <i1> [#uses=1]
+	br i1 %2, label %bb.i5, label %FontName.exit
+
+bb.i5:		; preds = %FontSize.exit
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8]* @.str81872, i32 0, i32 0)) nounwind
+	br label %FontName.exit
+
+FontName.exit:		; preds = %bb.i5, %FontSize.exit
+	%3 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8]* @.str1822946, i32 0, i32 0), i32 %1, i8* undef) nounwind		; <i32> [#uses=0]
+	%4 = call  i32 @"\01_fwrite"(i8* getelementptr ([11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind		; <i32> [#uses=0]
+	%5 = sub i32 %colmark, undef		; <i32> [#uses=1]
+	%6 = sub i32 %rowmark, undef		; <i32> [#uses=1]
+	%7 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%8 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %7, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 %5, i32 %6) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%9 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
+	%10 = load i32* %9, align 4		; <i32> [#uses=1]
+	%11 = sub i32 0, %10		; <i32> [#uses=1]
+	%12 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%13 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %12, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %11) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	br label %bb100.outer.outer
+
+bb100.outer.outer:		; preds = %bb79.critedge, %bb1.i3, %FontName.exit
+	%x_addr.0.ph.ph = phi %struct.rec* [ %x, %FontName.exit ], [ null, %bb79.critedge ], [ null, %bb1.i3 ]		; <%struct.rec*> [#uses=1]
+	%14 = getelementptr %struct.rec* %x_addr.0.ph.ph, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=0]
+	br label %bb100.outer
+
+bb.i80:		; preds = %bb3.i85
+	br i1 undef, label %bb2.i84, label %bb2.i51
+
+bb2.i84:		; preds = %bb100.outer, %bb.i80
+	br i1 undef, label %bb3.i77, label %bb3.i85
+
+bb3.i85:		; preds = %bb2.i84
+	br i1 false, label %StringBeginsWith.exit88, label %bb.i80
+
+StringBeginsWith.exit88:		; preds = %bb3.i85
+	br i1 undef, label %bb3.i77, label %bb2.i51
+
+bb2.i.i68:		; preds = %bb3.i77
+	br label %bb3.i77
+
+bb3.i77:		; preds = %bb2.i.i68, %StringBeginsWith.exit88, %bb2.i84
+	br i1 false, label %bb1.i58, label %bb2.i.i68
+
+bb1.i58:		; preds = %bb3.i77
+	unreachable
+
+bb.i47:		; preds = %bb3.i52
+	br i1 undef, label %bb2.i51, label %bb2.i.i15.critedge
+
+bb2.i51:		; preds = %bb.i47, %StringBeginsWith.exit88, %bb.i80
+	%15 = load i8* undef, align 1		; <i8> [#uses=0]
+	br i1 false, label %StringBeginsWith.exit55thread-split, label %bb3.i52
+
+bb3.i52:		; preds = %bb2.i51
+	br i1 false, label %StringBeginsWith.exit55, label %bb.i47
+
+StringBeginsWith.exit55thread-split:		; preds = %bb2.i51
+	br label %StringBeginsWith.exit55
+
+StringBeginsWith.exit55:		; preds = %StringBeginsWith.exit55thread-split, %bb3.i52
+	br label %bb2.i41
+
+bb2.i41:		; preds = %bb2.i41, %StringBeginsWith.exit55
+	br label %bb2.i41
+
+bb2.i.i15.critedge:		; preds = %bb.i47
+	%16 = call  i8* @fgets(i8* undef, i32 512, %struct.FILE* %0) nounwind		; <i8*> [#uses=0]
+	%iftmp.560.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb2.i8:		; preds = %bb100.outer
+	br i1 undef, label %bb1.i3, label %bb79.critedge
+
+bb1.i3:		; preds = %bb2.i8
+	br label %bb100.outer.outer
+
+bb79.critedge:		; preds = %bb2.i8
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	br label %bb100.outer.outer
+
+bb100.outer:		; preds = %bb2.i.i15.critedge, %bb100.outer.outer
+	%state.0.ph = phi i32 [ 0, %bb100.outer.outer ], [ %iftmp.560.0, %bb2.i.i15.critedge ]		; <i32> [#uses=1]
+	%cond = icmp eq i32 %state.0.ph, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb2.i8, label %bb2.i84
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll b/src/LLVM/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
new file mode 100644
index 0000000..88accf8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll

@@ -0,0 +1,508 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -O3
+
+	%0 = type { i16, i8, i8 }		; type %0
+	%1 = type { [2 x i32], [2 x i32] }		; type %1
+	%2 = type { %struct.GAP }		; type %2
+	%3 = type { %struct.rec* }		; type %3
+	%4 = type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
+	%5 = type { i8, i8, i8, i8 }		; type %5
+	%struct.COMPOSITE = type { i8, i16, i16 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FONT_INFO = type { %struct.metrics*, i8*, i16*, %struct.COMPOSITE*, i32, %struct.rec*, %struct.rec*, i16, i16, i16*, i8*, i8*, i16* }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %0 }
+	%struct.STYLE = type { %2, %2, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %1 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.metrics = type { i16, i16, i16, i16, i16 }
+	%struct.rec = type { %struct.head_type }
+@.str24239 = external constant [20 x i8], align 1		; <[20 x i8]*> [#uses=1]
+@no_file_pos = external global %4		; <%4*> [#uses=1]
+@.str19294 = external constant [9 x i8], align 1		; <[9 x i8]*> [#uses=1]
+@zz_lengths = external global [150 x i8]		; <[150 x i8]*> [#uses=1]
+@next_free.4772 = external global i8**		; <i8***> [#uses=3]
+@top_free.4773 = external global i8**		; <i8***> [#uses=2]
+@.str1575 = external constant [32 x i8], align 1		; <[32 x i8]*> [#uses=1]
+@zz_free = external global [524 x %struct.rec*]		; <[524 x %struct.rec*]*> [#uses=2]
+@zz_hold = external global %struct.rec*		; <%struct.rec**> [#uses=5]
+@zz_tmp = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@zz_res = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@xx_link = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@font_count = external global i32		; <i32*> [#uses=1]
+@.str81872 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@.str101874 = external constant [30 x i8], align 1		; <[30 x i8]*> [#uses=1]
+@.str111875 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str141878 = external constant [27 x i8], align 1		; <[27 x i8]*> [#uses=1]
+@out_fp = external global %struct.FILE*		; <%struct.FILE**> [#uses=3]
+@.str192782 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@cpexists = external global i32		; <i32*> [#uses=2]
+@.str212784 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@currentfont = external global i32		; <i32*> [#uses=3]
+@wordcount = external global i32		; <i32*> [#uses=1]
+@needs = external global %struct.rec*		; <%struct.rec**> [#uses=1]
+@.str742838 = external constant [6 x i8], align 1		; <[6 x i8]*> [#uses=1]
+@.str752839 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@.str1802944 = external constant [40 x i8], align 1		; <[40 x i8]*> [#uses=1]
+@.str1822946 = external constant [8 x i8], align 1		; <[8 x i8]*> [#uses=1]
+@.str1842948 = external constant [11 x i8], align 1		; <[11 x i8]*> [#uses=1]
+@.str1852949 = external constant [23 x i8], align 1		; <[23 x i8]*> [#uses=1]
+@.str1872951 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str1932957 = external constant [26 x i8], align 1		; <[26 x i8]*> [#uses=1]
+
+declare i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
+
+declare i32 @"\01_fwrite"(i8*, i32, i32, i8*)
+
+declare i32 @remove(i8* nocapture) nounwind
+
+declare %struct.FILE* @OpenIncGraphicFile(i8*, i8 zeroext, %struct.rec** nocapture, %struct.FILE_POS*, i32* nocapture) nounwind
+
+declare %struct.rec* @MakeWord(i32, i8* nocapture, %struct.FILE_POS*) nounwind
+
+declare void @Error(i32, i32, i8*, i32, %struct.FILE_POS*, ...) nounwind
+
+declare i32 @"\01_fputs"(i8*, %struct.FILE*)
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+declare i8* @fgets(i8*, i32, %struct.FILE* nocapture) nounwind
+
+define void @PS_PrintGraphicInclude(%struct.rec* %x, i32 %colmark, i32 %rowmark) nounwind {
+entry:
+	%buff = alloca [512 x i8], align 4		; <[512 x i8]*> [#uses=5]
+	%0 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 1, i32 0, i32 0		; <i8*> [#uses=2]
+	%1 = load i8* %0, align 4		; <i8> [#uses=1]
+	%2 = add i8 %1, -94		; <i8> [#uses=1]
+	%3 = icmp ugt i8 %2, 1		; <i1> [#uses=1]
+	br i1 %3, label %bb, label %bb1
+
+bb:		; preds = %entry
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%4 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%5 = bitcast %struct.SECOND_UNION* %4 to %5*		; <%5*> [#uses=1]
+	%6 = getelementptr %5* %5, i32 0, i32 1		; <i8*> [#uses=1]
+	%7 = load i8* %6, align 1		; <i8> [#uses=1]
+	%8 = icmp eq i8 %7, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb2, label %bb3
+
+bb2:		; preds = %bb1
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([40 x i8]* @.str1802944, i32 0, i32 0)) nounwind
+	br label %bb3
+
+bb3:		; preds = %bb2, %bb1
+	%9 = load %struct.rec** undef, align 4		; <%struct.rec*> [#uses=0]
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3
+	%y.0 = load %struct.rec** null		; <%struct.rec*> [#uses=2]
+	br i1 false, label %bb5, label %bb6
+
+bb6:		; preds = %bb5
+	%10 = load i8* %0, align 4		; <i8> [#uses=1]
+	%11 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=1]
+	%12 = call  %struct.FILE* @OpenIncGraphicFile(i8* undef, i8 zeroext %10, %struct.rec** null, %struct.FILE_POS* %11, i32* undef) nounwind		; <%struct.FILE*> [#uses=4]
+	br i1 false, label %bb7, label %bb8
+
+bb7:		; preds = %bb6
+	unreachable
+
+bb8:		; preds = %bb6
+	%13 = and i32 undef, 4095		; <i32> [#uses=2]
+	%14 = load i32* @currentfont, align 4		; <i32> [#uses=0]
+	br i1 false, label %bb10, label %bb9
+
+bb9:		; preds = %bb8
+	%15 = icmp ult i32 0, %13		; <i1> [#uses=1]
+	br i1 %15, label %bb.i, label %FontHalfXHeight.exit
+
+bb.i:		; preds = %bb9
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([17 x i8]* @.str111875, i32 0, i32 0)) nounwind
+	%.pre186 = load i32* @currentfont, align 4		; <i32> [#uses=1]
+	br label %FontHalfXHeight.exit
+
+FontHalfXHeight.exit:		; preds = %bb.i, %bb9
+	%16 = phi i32 [ %.pre186, %bb.i ], [ %13, %bb9 ]		; <i32> [#uses=1]
+	br i1 false, label %bb.i1, label %bb1.i
+
+bb.i1:		; preds = %FontHalfXHeight.exit
+	br label %bb1.i
+
+bb1.i:		; preds = %bb.i1, %FontHalfXHeight.exit
+	br i1 undef, label %bb2.i, label %FontSize.exit
+
+bb2.i:		; preds = %bb1.i
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 37, i32 61, i8* getelementptr ([30 x i8]* @.str101874, i32 0, i32 0), i32 1, %struct.FILE_POS* null) nounwind
+	unreachable
+
+FontSize.exit:		; preds = %bb1.i
+	%17 = getelementptr %struct.FONT_INFO* undef, i32 %16, i32 5		; <%struct.rec**> [#uses=0]
+	%18 = load i32* undef, align 4		; <i32> [#uses=1]
+	%19 = load i32* @currentfont, align 4		; <i32> [#uses=2]
+	%20 = load i32* @font_count, align 4		; <i32> [#uses=1]
+	%21 = icmp ult i32 %20, %19		; <i1> [#uses=1]
+	br i1 %21, label %bb.i5, label %FontName.exit
+
+bb.i5:		; preds = %FontSize.exit
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8]* @.str81872, i32 0, i32 0)) nounwind
+	br label %FontName.exit
+
+FontName.exit:		; preds = %bb.i5, %FontSize.exit
+	%22 = phi %struct.FONT_INFO* [ undef, %bb.i5 ], [ undef, %FontSize.exit ]		; <%struct.FONT_INFO*> [#uses=1]
+	%23 = getelementptr %struct.FONT_INFO* %22, i32 %19, i32 5		; <%struct.rec**> [#uses=0]
+	%24 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8]* @.str1822946, i32 0, i32 0), i32 %18, i8* null) nounwind		; <i32> [#uses=0]
+	br label %bb10
+
+bb10:		; preds = %FontName.exit, %bb8
+	%25 = call  i32 @"\01_fwrite"(i8* getelementptr ([11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind		; <i32> [#uses=0]
+	%26 = sub i32 %rowmark, undef		; <i32> [#uses=1]
+	%27 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%28 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %27, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %26) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%29 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([17 x i8]* @.str192782, i32 0, i32 0), double 2.000000e+01, double 2.000000e+01) nounwind		; <i32> [#uses=0]
+	%30 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	%31 = load i32* %30, align 4		; <i32> [#uses=1]
+	%32 = sub i32 0, %31		; <i32> [#uses=1]
+	%33 = load i32* undef, align 4		; <i32> [#uses=1]
+	%34 = sub i32 0, %33		; <i32> [#uses=1]
+	%35 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%36 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %35, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 %32, i32 %34) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%37 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	%38 = getelementptr %struct.rec* %37, i32 0, i32 0, i32 4		; <%struct.FOURTH_UNION*> [#uses=1]
+	%39 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([23 x i8]* @.str1852949, i32 0, i32 0), %struct.FOURTH_UNION* %38) nounwind		; <i32> [#uses=0]
+	%buff14 = getelementptr [512 x i8]* %buff, i32 0, i32 0		; <i8*> [#uses=5]
+	%40 = call  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	%iftmp.506.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	%41 = getelementptr [512 x i8]* %buff, i32 0, i32 26		; <i8*> [#uses=1]
+	br label %bb100.outer.outer
+
+bb100.outer.outer:		; preds = %bb83, %bb10
+	%state.0.ph.ph = phi i32 [ %iftmp.506.0, %bb10 ], [ undef, %bb83 ]		; <i32> [#uses=1]
+	%x_addr.0.ph.ph = phi %struct.rec* [ %x, %bb10 ], [ %71, %bb83 ]		; <%struct.rec*> [#uses=1]
+	%42 = getelementptr %struct.rec* %x_addr.0.ph.ph, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=0]
+	br label %bb100.outer
+
+bb.i80:		; preds = %bb3.i85
+	%43 = icmp eq i8 %44, %46		; <i1> [#uses=1]
+	%indvar.next.i79 = add i32 %indvar.i81, 1		; <i32> [#uses=1]
+	br i1 %43, label %bb2.i84, label %bb2.i51
+
+bb2.i84:		; preds = %bb100.outer, %bb.i80
+	%indvar.i81 = phi i32 [ %indvar.next.i79, %bb.i80 ], [ 0, %bb100.outer ]		; <i32> [#uses=3]
+	%pp.0.i82 = getelementptr [27 x i8]* @.str141878, i32 0, i32 %indvar.i81		; <i8*> [#uses=2]
+	%sp.0.i83 = getelementptr [512 x i8]* %buff, i32 0, i32 %indvar.i81		; <i8*> [#uses=1]
+	%44 = load i8* %sp.0.i83, align 1		; <i8> [#uses=2]
+	%45 = icmp eq i8 %44, 0		; <i1> [#uses=1]
+	br i1 %45, label %StringBeginsWith.exit88thread-split, label %bb3.i85
+
+bb3.i85:		; preds = %bb2.i84
+	%46 = load i8* %pp.0.i82, align 1		; <i8> [#uses=3]
+	%47 = icmp eq i8 %46, 0		; <i1> [#uses=1]
+	br i1 %47, label %StringBeginsWith.exit88, label %bb.i80
+
+StringBeginsWith.exit88thread-split:		; preds = %bb2.i84
+	%.pr = load i8* %pp.0.i82		; <i8> [#uses=1]
+	br label %StringBeginsWith.exit88
+
+StringBeginsWith.exit88:		; preds = %StringBeginsWith.exit88thread-split, %bb3.i85
+	%48 = phi i8 [ %.pr, %StringBeginsWith.exit88thread-split ], [ %46, %bb3.i85 ]		; <i8> [#uses=1]
+	%phitmp91 = icmp eq i8 %48, 0		; <i1> [#uses=1]
+	br i1 %phitmp91, label %bb3.i77, label %bb2.i51
+
+bb2.i.i68:		; preds = %bb3.i77
+	br i1 false, label %bb2.i51, label %bb2.i75
+
+bb2.i75:		; preds = %bb2.i.i68
+	br label %bb3.i77
+
+bb3.i77:		; preds = %bb2.i75, %StringBeginsWith.exit88
+	%sp.0.i76 = getelementptr [512 x i8]* %buff, i32 0, i32 undef		; <i8*> [#uses=1]
+	%49 = load i8* %sp.0.i76, align 1		; <i8> [#uses=1]
+	%50 = icmp eq i8 %49, 0		; <i1> [#uses=1]
+	br i1 %50, label %bb24, label %bb2.i.i68
+
+bb24:		; preds = %bb3.i77
+	%51 = call  %struct.rec* @MakeWord(i32 11, i8* %41, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind		; <%struct.rec*> [#uses=0]
+	%52 = load i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4		; <i8> [#uses=1]
+	%53 = zext i8 %52 to i32		; <i32> [#uses=2]
+	%54 = getelementptr [524 x %struct.rec*]* @zz_free, i32 0, i32 %53		; <%struct.rec**> [#uses=2]
+	%55 = load %struct.rec** %54, align 4		; <%struct.rec*> [#uses=3]
+	%56 = icmp eq %struct.rec* %55, null		; <i1> [#uses=1]
+	br i1 %56, label %bb27, label %bb28
+
+bb27:		; preds = %bb24
+	br i1 undef, label %bb.i56, label %GetMemory.exit62
+
+bb.i56:		; preds = %bb27
+	br i1 undef, label %bb1.i58, label %bb2.i60
+
+bb1.i58:		; preds = %bb.i56
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 31, i32 1, i8* getelementptr ([32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
+	br label %bb2.i60
+
+bb2.i60:		; preds = %bb1.i58, %bb.i56
+	%.pre1.i59 = phi i8** [ undef, %bb1.i58 ], [ undef, %bb.i56 ]		; <i8**> [#uses=1]
+	store i8** undef, i8*** @top_free.4773, align 4
+	br label %GetMemory.exit62
+
+GetMemory.exit62:		; preds = %bb2.i60, %bb27
+	%57 = phi i8** [ %.pre1.i59, %bb2.i60 ], [ undef, %bb27 ]		; <i8**> [#uses=1]
+	%58 = getelementptr i8** %57, i32 %53		; <i8**> [#uses=1]
+	store i8** %58, i8*** @next_free.4772, align 4
+	store %struct.rec* undef, %struct.rec** @zz_hold, align 4
+	br label %bb29
+
+bb28:		; preds = %bb24
+	store %struct.rec* %55, %struct.rec** @zz_hold, align 4
+	%59 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %59, %struct.rec** %54, align 4
+	br label %bb29
+
+bb29:		; preds = %bb28, %GetMemory.exit62
+	%.pre184 = phi %struct.rec* [ %55, %bb28 ], [ undef, %GetMemory.exit62 ]		; <%struct.rec*> [#uses=3]
+	store i8 0, i8* undef
+	store %struct.rec* %.pre184, %struct.rec** @xx_link, align 4
+	br i1 undef, label %bb35, label %bb31
+
+bb31:		; preds = %bb29
+	store %struct.rec* %.pre184, %struct.rec** undef
+	br label %bb35
+
+bb35:		; preds = %bb31, %bb29
+	br i1 undef, label %bb41, label %bb37
+
+bb37:		; preds = %bb35
+	%60 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %60, %struct.rec** undef
+	store %struct.rec* undef, %struct.rec** null
+	store %struct.rec* %.pre184, %struct.rec** null, align 4
+	br label %bb41
+
+bb41:		; preds = %bb37, %bb35
+	%61 = call  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=1]
+	%62 = icmp eq i8* %61, null		; <i1> [#uses=1]
+	%iftmp.554.0 = select i1 %62, i32 2, i32 1		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb.i47:		; preds = %bb3.i52
+	%63 = icmp eq i8 %64, %65		; <i1> [#uses=1]
+	br i1 %63, label %bb2.i51, label %bb2.i41
+
+bb2.i51:		; preds = %bb.i47, %bb2.i.i68, %StringBeginsWith.exit88, %bb.i80
+	%pp.0.i49 = getelementptr [17 x i8]* @.str1872951, i32 0, i32 0		; <i8*> [#uses=1]
+	%64 = load i8* null, align 1		; <i8> [#uses=1]
+	br i1 false, label %StringBeginsWith.exit55thread-split, label %bb3.i52
+
+bb3.i52:		; preds = %bb2.i51
+	%65 = load i8* %pp.0.i49, align 1		; <i8> [#uses=1]
+	br i1 false, label %StringBeginsWith.exit55, label %bb.i47
+
+StringBeginsWith.exit55thread-split:		; preds = %bb2.i51
+	br label %StringBeginsWith.exit55
+
+StringBeginsWith.exit55:		; preds = %StringBeginsWith.exit55thread-split, %bb3.i52
+	br i1 false, label %bb49, label %bb2.i41
+
+bb49:		; preds = %StringBeginsWith.exit55
+	br label %bb2.i41
+
+bb2.i41:		; preds = %bb2.i41, %bb49, %StringBeginsWith.exit55, %bb.i47
+	br i1 false, label %bb2.i41, label %bb2.i.i15
+
+bb2.i.i15:		; preds = %bb2.i41
+	%pp.0.i.i13 = getelementptr [6 x i8]* @.str742838, i32 0, i32 0		; <i8*> [#uses=1]
+	br i1 false, label %StringBeginsWith.exitthread-split.i18, label %bb3.i.i16
+
+bb3.i.i16:		; preds = %bb2.i.i15
+	%66 = load i8* %pp.0.i.i13, align 1		; <i8> [#uses=1]
+	br label %StringBeginsWith.exit.i20
+
+StringBeginsWith.exitthread-split.i18:		; preds = %bb2.i.i15
+	br label %StringBeginsWith.exit.i20
+
+StringBeginsWith.exit.i20:		; preds = %StringBeginsWith.exitthread-split.i18, %bb3.i.i16
+	%67 = phi i8 [ undef, %StringBeginsWith.exitthread-split.i18 ], [ %66, %bb3.i.i16 ]		; <i8> [#uses=1]
+	%phitmp.i19 = icmp eq i8 %67, 0		; <i1> [#uses=1]
+	br i1 %phitmp.i19, label %bb58, label %bb2.i6.i26
+
+bb2.i6.i26:		; preds = %bb2.i6.i26, %StringBeginsWith.exit.i20
+	%indvar.i3.i23 = phi i32 [ %indvar.next.i1.i21, %bb2.i6.i26 ], [ 0, %StringBeginsWith.exit.i20 ]		; <i32> [#uses=3]
+	%sp.0.i5.i25 = getelementptr [512 x i8]* %buff, i32 0, i32 %indvar.i3.i23		; <i8*> [#uses=0]
+	%pp.0.i4.i24 = getelementptr [10 x i8]* @.str752839, i32 0, i32 %indvar.i3.i23		; <i8*> [#uses=1]
+	%68 = load i8* %pp.0.i4.i24, align 1		; <i8> [#uses=0]
+	%indvar.next.i1.i21 = add i32 %indvar.i3.i23, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb2.i6.i26, label %bb55
+
+bb55:		; preds = %bb2.i6.i26
+	%69 = call  i32 @"\01_fputs"(i8* %buff14, %struct.FILE* undef) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb58:		; preds = %StringBeginsWith.exit.i20
+	%70 = call  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	%iftmp.560.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb.i7:		; preds = %bb3.i
+	br i1 false, label %bb2.i8, label %bb2.i.i
+
+bb2.i8:		; preds = %bb100.outer, %bb.i7
+	br i1 undef, label %StringBeginsWith.exitthread-split, label %bb3.i
+
+bb3.i:		; preds = %bb2.i8
+	br i1 undef, label %StringBeginsWith.exit, label %bb.i7
+
+StringBeginsWith.exitthread-split:		; preds = %bb2.i8
+	br label %StringBeginsWith.exit
+
+StringBeginsWith.exit:		; preds = %StringBeginsWith.exitthread-split, %bb3.i
+	%phitmp93 = icmp eq i8 undef, 0		; <i1> [#uses=1]
+	br i1 %phitmp93, label %bb66, label %bb2.i.i
+
+bb66:		; preds = %StringBeginsWith.exit
+	%71 = call  %struct.rec* @MakeWord(i32 11, i8* undef, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind		; <%struct.rec*> [#uses=4]
+	%72 = load i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4		; <i8> [#uses=1]
+	%73 = zext i8 %72 to i32		; <i32> [#uses=2]
+	%74 = getelementptr [524 x %struct.rec*]* @zz_free, i32 0, i32 %73		; <%struct.rec**> [#uses=2]
+	%75 = load %struct.rec** %74, align 4		; <%struct.rec*> [#uses=3]
+	%76 = icmp eq %struct.rec* %75, null		; <i1> [#uses=1]
+	br i1 %76, label %bb69, label %bb70
+
+bb69:		; preds = %bb66
+	br i1 undef, label %bb.i2, label %GetMemory.exit
+
+bb.i2:		; preds = %bb69
+	%77 = call  noalias i8* @calloc(i32 1020, i32 4) nounwind		; <i8*> [#uses=1]
+	%78 = bitcast i8* %77 to i8**		; <i8**> [#uses=3]
+	store i8** %78, i8*** @next_free.4772, align 4
+	br i1 undef, label %bb1.i3, label %bb2.i4
+
+bb1.i3:		; preds = %bb.i2
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 31, i32 1, i8* getelementptr ([32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
+	br label %bb2.i4
+
+bb2.i4:		; preds = %bb1.i3, %bb.i2
+	%.pre1.i = phi i8** [ undef, %bb1.i3 ], [ %78, %bb.i2 ]		; <i8**> [#uses=1]
+	%79 = phi i8** [ undef, %bb1.i3 ], [ %78, %bb.i2 ]		; <i8**> [#uses=1]
+	%80 = getelementptr i8** %79, i32 1020		; <i8**> [#uses=1]
+	store i8** %80, i8*** @top_free.4773, align 4
+	br label %GetMemory.exit
+
+GetMemory.exit:		; preds = %bb2.i4, %bb69
+	%81 = phi i8** [ %.pre1.i, %bb2.i4 ], [ undef, %bb69 ]		; <i8**> [#uses=2]
+	%82 = bitcast i8** %81 to %struct.rec*		; <%struct.rec*> [#uses=3]
+	%83 = getelementptr i8** %81, i32 %73		; <i8**> [#uses=1]
+	store i8** %83, i8*** @next_free.4772, align 4
+	store %struct.rec* %82, %struct.rec** @zz_hold, align 4
+	br label %bb71
+
+bb70:		; preds = %bb66
+	%84 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %84, %struct.rec** %74, align 4
+	br label %bb71
+
+bb71:		; preds = %bb70, %GetMemory.exit
+	%.pre185 = phi %struct.rec* [ %75, %bb70 ], [ %82, %GetMemory.exit ]		; <%struct.rec*> [#uses=8]
+	%85 = phi %struct.rec* [ %75, %bb70 ], [ %82, %GetMemory.exit ]		; <%struct.rec*> [#uses=1]
+	%86 = getelementptr %struct.rec* %85, i32 0, i32 0, i32 1, i32 0, i32 0		; <i8*> [#uses=0]
+	%87 = getelementptr %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 1		; <%struct.rec**> [#uses=0]
+	%88 = getelementptr %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* %.pre185, %struct.rec** @xx_link, align 4
+	store %struct.rec* %.pre185, %struct.rec** @zz_res, align 4
+	%89 = load %struct.rec** @needs, align 4		; <%struct.rec*> [#uses=2]
+	store %struct.rec* %89, %struct.rec** @zz_hold, align 4
+	br i1 false, label %bb77, label %bb73
+
+bb73:		; preds = %bb71
+	%90 = getelementptr %struct.rec* %89, i32 0, i32 0, i32 0, i32 0, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	store %struct.rec* %.pre185, %struct.rec** %90
+	store %struct.rec* %.pre185, %struct.rec** undef, align 4
+	br label %bb77
+
+bb77:		; preds = %bb73, %bb71
+	store %struct.rec* %.pre185, %struct.rec** @zz_res, align 4
+	store %struct.rec* %71, %struct.rec** @zz_hold, align 4
+	br i1 undef, label %bb83, label %bb79
+
+bb79:		; preds = %bb77
+	%91 = getelementptr %struct.rec* %71, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	%92 = load %struct.rec** %88, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %92, %struct.rec** %91
+	%93 = getelementptr %struct.rec* undef, i32 0, i32 0, i32 0, i32 1, i32 1		; <%struct.rec**> [#uses=1]
+	store %struct.rec* %71, %struct.rec** %93, align 4
+	store %struct.rec* %.pre185, %struct.rec** undef, align 4
+	br label %bb83
+
+bb83:		; preds = %bb79, %bb77
+	br label %bb100.outer.outer
+
+bb.i.i:		; preds = %bb3.i.i
+	br i1 undef, label %bb2.i.i, label %bb2.i6.i
+
+bb2.i.i:		; preds = %bb.i.i, %StringBeginsWith.exit, %bb.i7
+	br i1 undef, label %StringBeginsWith.exitthread-split.i, label %bb3.i.i
+
+bb3.i.i:		; preds = %bb2.i.i
+	br i1 undef, label %StringBeginsWith.exit.i, label %bb.i.i
+
+StringBeginsWith.exitthread-split.i:		; preds = %bb2.i.i
+	br label %StringBeginsWith.exit.i
+
+StringBeginsWith.exit.i:		; preds = %StringBeginsWith.exitthread-split.i, %bb3.i.i
+	br i1 false, label %bb94, label %bb2.i6.i
+
+bb.i2.i:		; preds = %bb3.i7.i
+	br i1 false, label %bb2.i6.i, label %bb91
+
+bb2.i6.i:		; preds = %bb.i2.i, %StringBeginsWith.exit.i, %bb.i.i
+	br i1 undef, label %strip_out.exitthread-split, label %bb3.i7.i
+
+bb3.i7.i:		; preds = %bb2.i6.i
+	%94 = load i8* undef, align 1		; <i8> [#uses=1]
+	br i1 undef, label %strip_out.exit, label %bb.i2.i
+
+strip_out.exitthread-split:		; preds = %bb2.i6.i
+	%.pr100 = load i8* undef		; <i8> [#uses=1]
+	br label %strip_out.exit
+
+strip_out.exit:		; preds = %strip_out.exitthread-split, %bb3.i7.i
+	%95 = phi i8 [ %.pr100, %strip_out.exitthread-split ], [ %94, %bb3.i7.i ]		; <i8> [#uses=0]
+	br i1 undef, label %bb94, label %bb91
+
+bb91:		; preds = %strip_out.exit, %bb.i2.i
+	unreachable
+
+bb94:		; preds = %strip_out.exit, %StringBeginsWith.exit.i
+	%96 = call  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb100.outer:		; preds = %bb58, %bb41, %bb100.outer.outer
+	%state.0.ph = phi i32 [ %state.0.ph.ph, %bb100.outer.outer ], [ %iftmp.560.0, %bb58 ], [ %iftmp.554.0, %bb41 ]		; <i32> [#uses=1]
+	switch i32 %state.0.ph, label %bb2.i84 [
+		i32 2, label %bb101.split
+		i32 1, label %bb2.i8
+	]
+
+bb101.split:		; preds = %bb100.outer
+	%97 = icmp eq i32 undef, 0		; <i1> [#uses=1]
+	br i1 %97, label %bb103, label %bb102
+
+bb102:		; preds = %bb101.split
+	%98 = call  i32 @remove(i8* getelementptr ([9 x i8]* @.str19294, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb103:		; preds = %bb101.split
+	%99 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%100 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %99, i8* getelementptr ([26 x i8]* @.str1932957, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @wordcount, align 4
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll b/src/LLVM/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
new file mode 100644
index 0000000..779e100
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | not grep fcpys
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Results = type { float, float, float }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define void @get_results(%struct.Results* noalias nocapture sret %agg.result, %struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb, label %bb6.preheader
+
+bb6.preheader:		; preds = %entry
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 12, i32 4, i1 false)
+	br i1 undef, label %bb15, label %bb13
+
+bb:		; preds = %entry
+	ret void
+
+bb13:		; preds = %bb13, %bb6.preheader
+	%0 = fadd float undef, undef		; <float> [#uses=1]
+	%1 = fadd float undef, 1.000000e+00		; <float> [#uses=1]
+	br i1 undef, label %bb15, label %bb13
+
+bb15:		; preds = %bb13, %bb6.preheader
+	%r1.0.0.lcssa = phi float [ 0.000000e+00, %bb6.preheader ], [ %1, %bb13 ]		; <float> [#uses=1]
+	%r1.1.0.lcssa = phi float [ undef, %bb6.preheader ], [ %0, %bb13 ]		; <float> [#uses=0]
+	store float %r1.0.0.lcssa, float* undef, align 4
+	ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll b/src/LLVM/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll
new file mode 100644
index 0000000..9d4fc31
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define %struct.List* @sim(%struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb14, label %bb3.preheader
+
+bb3.preheader:		; preds = %entry
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3.preheader
+	br i1 undef, label %bb11, label %bb5
+
+bb11:		; preds = %bb5
+	%0 = fmul float undef, 0x41E0000000000000		; <float> [#uses=1]
+	%1 = fptosi float %0 to i32		; <i32> [#uses=1]
+	store i32 %1, i32* undef, align 4
+	br i1 undef, label %generate_patient.exit, label %generate_patient.exit.thread
+
+generate_patient.exit.thread:		; preds = %bb11
+	ret %struct.List* null
+
+generate_patient.exit:		; preds = %bb11
+	br i1 undef, label %bb14, label %bb12
+
+bb12:		; preds = %generate_patient.exit
+	br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb1.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb14:		; preds = %generate_patient.exit, %entry
+	ret %struct.List* undef
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll b/src/LLVM/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
new file mode 100644
index 0000000..ad32dc9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll

@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define %struct.List* @sim(%struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb14, label %bb3.preheader
+
+bb3.preheader:		; preds = %entry
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3.preheader
+	br i1 undef, label %bb11, label %bb5
+
+bb11:		; preds = %bb5
+	%0 = load i32* undef, align 4		; <i32> [#uses=1]
+	%1 = xor i32 %0, 123459876		; <i32> [#uses=1]
+	%2 = sdiv i32 %1, 127773		; <i32> [#uses=1]
+	%3 = mul i32 %2, 2836		; <i32> [#uses=1]
+	%4 = sub i32 0, %3		; <i32> [#uses=1]
+	%5 = xor i32 %4, 123459876		; <i32> [#uses=1]
+	%idum_addr.0.i.i = select i1 undef, i32 undef, i32 %5		; <i32> [#uses=1]
+	%6 = sitofp i32 %idum_addr.0.i.i to double		; <double> [#uses=1]
+	%7 = fmul double %6, 0x3E00000000200000		; <double> [#uses=1]
+	%8 = fptrunc double %7 to float		; <float> [#uses=2]
+	%9 = fmul float %8, 0x41E0000000000000		; <float> [#uses=1]
+	%10 = fptosi float %9 to i32		; <i32> [#uses=1]
+	store i32 %10, i32* undef, align 4
+	%11 = fpext float %8 to double		; <double> [#uses=1]
+	%12 = fcmp ogt double %11, 6.660000e-01		; <i1> [#uses=1]
+	br i1 %12, label %generate_patient.exit, label %generate_patient.exit.thread
+
+generate_patient.exit.thread:		; preds = %bb11
+	ret %struct.List* null
+
+generate_patient.exit:		; preds = %bb11
+	br i1 undef, label %bb14, label %bb12
+
+bb12:		; preds = %generate_patient.exit
+	br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb1.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb14:		; preds = %generate_patient.exit, %entry
+	ret %struct.List* undef
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/src/LLVM/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
new file mode 100644
index 0000000..ff68e66
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s
+; PR4659
+; PR4682
+
+define hidden i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind {
+entry:
+; CHECK: __gcov_execlp:
+; CHECK: sub sp, #8
+; CHECK: push
+; CHECK: add r7, sp, #4
+; CHECK: sub.w r4, r7, #4
+; CHECK: mov sp, r4
+; CHECK-NOT: mov sp, r7
+; CHECK: add sp, #8
+	call void @__gcov_flush() nounwind
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br i1 undef, label %bb5, label %bb
+
+bb5:		; preds = %bb, %entry
+	%0 = alloca i8*, i32 undef, align 4		; <i8**> [#uses=1]
+	%1 = call i32 @execvp(i8* %path, i8** %0) nounwind		; <i32> [#uses=1]
+	ret i32 %1
+}
+
+declare hidden void @__gcov_flush()
+
+declare i32 @execvp(i8*, i8**) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll b/src/LLVM/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll
new file mode 100644
index 0000000..93f5a0f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+vfp2
+; PR4686
+
+	%a = type { i32 (...)** }
+	%b = type { %a }
+	%c = type { float, float, float, float }
+
+declare arm_aapcs_vfpcc float @bar(%c*)
+
+define arm_aapcs_vfpcc void @foo(%b* %x, %c* %y) {
+entry:
+	%0 = call arm_aapcs_vfpcc  float @bar(%c* %y)		; <float> [#uses=0]
+	%1 = fadd float undef, undef		; <float> [#uses=1]
+	store float %1, float* undef, align 8
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll b/src/LLVM/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
new file mode 100644
index 0000000..f3baeb7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll

@@ -0,0 +1,80 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.JHUFF_TBL = type { [17 x i8], [256 x i8], i32 }
+	%struct.JQUANT_TBL = type { [64 x i16], i32 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.anon = type { [8 x i32], [48 x i8] }
+	%struct.backing_store_info = type { void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*)*, %struct.FILE*, [64 x i8] }
+	%struct.jpeg_color_deconverter = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32, i8**, i32)* }
+	%struct.jpeg_color_quantizer = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i8**, i32)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_common_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32 }
+	%struct.jpeg_component_info = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.JQUANT_TBL*, i8* }
+	%struct.jpeg_d_coef_controller = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, i8***)*, %struct.jvirt_barray_control** }
+	%struct.jpeg_d_main_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i32*, i32)* }
+	%struct.jpeg_d_post_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)* }
+	%struct.jpeg_decomp_master = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 }
+	%struct.jpeg_decompress_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32, %struct.jpeg_source_mgr*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %struct.JQUANT_TBL*], [4 x %struct.JHUFF_TBL*], [4 x %struct.JHUFF_TBL*], i32, %struct.jpeg_component_info*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i16, i16, i32, i8, i32, i32, i32, i32, i32, i8*, i32, [4 x %struct.jpeg_component_info*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %struct.jpeg_decomp_master*, %struct.jpeg_d_main_controller*, %struct.jpeg_d_coef_controller*, %struct.jpeg_d_post_controller*, %struct.jpeg_input_controller*, %struct.jpeg_marker_reader*, %struct.jpeg_entropy_decoder*, %struct.jpeg_inverse_dct*, %struct.jpeg_upsampler*, %struct.jpeg_color_deconverter*, %struct.jpeg_color_quantizer* }
+	%struct.jpeg_entropy_decoder = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, [64 x i16]**)* }
+	%struct.jpeg_error_mgr = type { void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i8*)*, void (%struct.jpeg_common_struct*)*, i32, %struct.anon, i32, i32, i8**, i32, i8**, i32, i32 }
+	%struct.jpeg_input_controller = type { i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32, i32 }
+	%struct.jpeg_inverse_dct = type { void (%struct.jpeg_decompress_struct*)*, [10 x void (%struct.jpeg_decompress_struct*, %struct.jpeg_component_info*, i16*, i8**, i32)*] }
+	%struct.jpeg_marker_reader = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, [16 x i32 (%struct.jpeg_decompress_struct*)*], i32, i32, i32, i32 }
+	%struct.jpeg_memory_mgr = type { i8* (%struct.jpeg_common_struct*, i32, i32)*, i8* (%struct.jpeg_common_struct*, i32, i32)*, i8** (%struct.jpeg_common_struct*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, i32, i32, i32)*, %struct.jvirt_sarray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, %struct.jvirt_barray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, void (%struct.jpeg_common_struct*)*, i8** (%struct.jpeg_common_struct*, %struct.jvirt_sarray_control*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, %struct.jvirt_barray_control*, i32, i32, i32)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, i32 }
+	%struct.jpeg_progress_mgr = type { void (%struct.jpeg_common_struct*)*, i32, i32, i32, i32 }
+	%struct.jpeg_source_mgr = type { i8*, i32, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i32)*, i32 (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_upsampler = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
+	%struct.jvirt_barray_control = type { [64 x i16]**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_barray_control*, %struct.backing_store_info }
+	%struct.jvirt_sarray_control = type { i8**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_sarray_control*, %struct.backing_store_info }
+
+define void @jpeg_idct_float(%struct.jpeg_decompress_struct* nocapture %cinfo, %struct.jpeg_component_info* nocapture %compptr, i16* nocapture %coef_block, i8** nocapture %output_buf, i32 %output_col) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%0 = load float* undef, align 4		; <float> [#uses=1]
+	%1 = fmul float undef, %0		; <float> [#uses=2]
+	%tmp73 = add i32 0, 224		; <i32> [#uses=1]
+	%scevgep74 = getelementptr i8* null, i32 %tmp73		; <i8*> [#uses=1]
+	%scevgep7475 = bitcast i8* %scevgep74 to float*		; <float*> [#uses=1]
+	%2 = load float* null, align 4		; <float> [#uses=1]
+	%3 = fmul float 0.000000e+00, %2		; <float> [#uses=2]
+	%4 = fadd float %1, %3		; <float> [#uses=1]
+	%5 = fsub float %1, %3		; <float> [#uses=2]
+	%6 = fadd float undef, 0.000000e+00		; <float> [#uses=2]
+	%7 = fmul float undef, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%8 = fsub float %7, %6		; <float> [#uses=2]
+	%9 = fsub float %4, %6		; <float> [#uses=1]
+	%10 = fadd float %5, %8		; <float> [#uses=2]
+	%11 = fsub float %5, %8		; <float> [#uses=1]
+	%12 = sitofp i16 undef to float		; <float> [#uses=1]
+	%13 = fmul float %12, 0.000000e+00		; <float> [#uses=2]
+	%14 = sitofp i16 undef to float		; <float> [#uses=1]
+	%15 = load float* %scevgep7475, align 4		; <float> [#uses=1]
+	%16 = fmul float %14, %15		; <float> [#uses=2]
+	%17 = fadd float undef, undef		; <float> [#uses=2]
+	%18 = fadd float %13, %16		; <float> [#uses=2]
+	%19 = fsub float %13, %16		; <float> [#uses=1]
+	%20 = fadd float %18, %17		; <float> [#uses=2]
+	%21 = fsub float %18, %17		; <float> [#uses=1]
+	%22 = fmul float %21, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%23 = fmul float undef, 0x3FFD906BC0000000		; <float> [#uses=2]
+	%24 = fmul float %19, 0x3FF1517A80000000		; <float> [#uses=1]
+	%25 = fsub float %24, %23		; <float> [#uses=1]
+	%26 = fadd float undef, %23		; <float> [#uses=1]
+	%27 = fsub float %26, %20		; <float> [#uses=3]
+	%28 = fsub float %22, %27		; <float> [#uses=2]
+	%29 = fadd float %25, %28		; <float> [#uses=1]
+	%30 = fadd float undef, %20		; <float> [#uses=1]
+	store float %30, float* undef, align 4
+	%31 = fadd float %10, %27		; <float> [#uses=1]
+	store float %31, float* undef, align 4
+	%32 = fsub float %10, %27		; <float> [#uses=1]
+	store float %32, float* undef, align 4
+	%33 = fadd float %11, %28		; <float> [#uses=1]
+	store float %33, float* undef, align 4
+	%34 = fsub float %9, %29		; <float> [#uses=1]
+	store float %34, float* undef, align 4
+	br label %bb
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll b/src/LLVM/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll
new file mode 100644
index 0000000..e3c23ac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+vfp2
+; PR4686
+
+@g_d = external global double		; <double*> [#uses=1]
+
+define void @foo(float %yIncr) {
+entry:
+	br i1 undef, label %bb, label %bb4
+
+bb:		; preds = %entry
+	%0 = call arm_aapcs_vfpcc  float @bar()		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	store double %1, double* @g_d, align 8
+	br label %bb4
+
+bb4:		; preds = %bb, %entry
+	unreachable
+}
+
+declare arm_aapcs_vfpcc float @bar()

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll b/src/LLVM/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
new file mode 100644
index 0000000..974ce50
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+vfp2
+
+define float @t1(i32 %v0) nounwind {
+entry:
+	store i32 undef, i32* undef, align 4
+	%0 = load [4 x i8]** undef, align 4		; <[4 x i8]*> [#uses=1]
+	%1 = load i8* undef, align 1		; <i8> [#uses=1]
+	%2 = zext i8 %1 to i32		; <i32> [#uses=1]
+	%3 = getelementptr [4 x i8]* %0, i32 %v0, i32 0		; <i8*> [#uses=1]
+	%4 = load i8* %3, align 1		; <i8> [#uses=1]
+	%5 = zext i8 %4 to i32		; <i32> [#uses=1]
+	%6 = sub i32 %5, %2		; <i32> [#uses=1]
+	%7 = sitofp i32 %6 to float		; <float> [#uses=1]
+	ret float %7
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll b/src/LLVM/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
new file mode 100644
index 0000000..5cfc68d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+@.str = external constant [36 x i8], align 1      ; <[36 x i8]*> [#uses=0]
+@.str1 = external constant [31 x i8], align 1     ; <[31 x i8]*> [#uses=1]
+@.str2 = external constant [4 x i8], align 1      ; <[4 x i8]*> [#uses=1]
+
+declare i32 @getUnknown(i32, ...) nounwind
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+  %0 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; <i32> [#uses=0]
+  %1 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; <i32> [#uses=0]
+  %2 = tail call  i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; <i32> [#uses=1]
+  %3 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll b/src/LLVM/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
new file mode 100644
index 0000000..06a152d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll

@@ -0,0 +1,106 @@
+; RUN: llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-apple-darwin9"
+
+@history = internal global [2 x [56 x i32]] [[56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0], [56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0]] ; <[2 x [56 x i32]]*> [#uses=3]
+@nodes = internal global i64 0                    ; <i64*> [#uses=4]
+@.str = private constant [9 x i8] c"##-<=>+#\00", align 1 ; <[9 x i8]*> [#uses=2]
+@.str1 = private constant [6 x i8] c"%c%d\0A\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str2 = private constant [16 x i8] c"Fhourstones 2.0\00", align 1 ; <[16 x i8]*> [#uses=1]
+@.str3 = private constant [54 x i8] c"Using %d transposition table entries with %d probes.\0A\00", align 1 ; <[54 x i8]*> [#uses=1]
+@.str4 = private constant [31 x i8] c"Solving %d-ply position after \00", align 1 ; <[31 x i8]*> [#uses=1]
+@.str5 = private constant [7 x i8] c" . . .\00", align 1 ; <[7 x i8]*> [#uses=1]
+@.str6 = private constant [28 x i8] c"score = %d (%c)  work = %d\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
+@.str7 = private constant [36 x i8] c"%lu pos / %lu msec = %.1f Kpos/sec\0A\00", align 1 ; <[36 x i8]*> [#uses=1]
+@plycnt = internal global i32 0                   ; <i32*> [#uses=21]
+@dias = internal global [19 x i32] zeroinitializer ; <[19 x i32]*> [#uses=43]
+@columns = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=18]
+@height = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=21]
+@rows = internal global [8 x i32] zeroinitializer ; <[8 x i32]*> [#uses=20]
+@colthr = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=5]
+@moves = internal global [44 x i32] zeroinitializer ; <[44 x i32]*> [#uses=9]
+@.str8 = private constant [3 x i8] c"%d\00", align 1 ; <[3 x i8]*> [#uses=1]
+@he = internal global i8* null                    ; <i8**> [#uses=9]
+@hits = internal global i64 0                     ; <i64*> [#uses=8]
+@posed = internal global i64 0                    ; <i64*> [#uses=7]
+@ht = internal global i32* null                   ; <i32**> [#uses=5]
+@.str16 = private constant [19 x i8] c"store rate = %.3f\0A\00", align 1 ; <[19 x i8]*> [#uses=1]
+@.str117 = private constant [45 x i8] c"- %5.3f  < %5.3f  = %5.3f  > %5.3f  + %5.3f\0A\00", align 1 ; <[45 x i8]*> [#uses=1]
+@.str218 = private constant [6 x i8] c"%7d%c\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str319 = private constant [30 x i8] c"Failed to allocate %u bytes.\0A\00", align 1 ; <[30 x i8]*> [#uses=1]
+
+declare i32 @puts(i8* nocapture) nounwind
+
+declare i32 @getchar() nounwind
+
+define internal i32 @transpose() nounwind readonly {
+; CHECK: push
+entry:
+  %0 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; <i32> [#uses=1]
+  %1 = shl i32 %0, 7                              ; <i32> [#uses=1]
+  %2 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 2), align 4 ; <i32> [#uses=1]
+  %3 = or i32 %1, %2                              ; <i32> [#uses=1]
+  %4 = shl i32 %3, 7                              ; <i32> [#uses=1]
+  %5 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 3), align 4 ; <i32> [#uses=1]
+  %6 = or i32 %4, %5                              ; <i32> [#uses=3]
+  %7 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 7), align 4 ; <i32> [#uses=1]
+  %8 = shl i32 %7, 7                              ; <i32> [#uses=1]
+  %9 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 6), align 4 ; <i32> [#uses=1]
+  %10 = or i32 %8, %9                             ; <i32> [#uses=1]
+  %11 = shl i32 %10, 7                            ; <i32> [#uses=1]
+  %12 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 5), align 4 ; <i32> [#uses=1]
+  %13 = or i32 %11, %12                           ; <i32> [#uses=3]
+  %14 = icmp ugt i32 %6, %13                      ; <i1> [#uses=2]
+  %.pn2.in.i = select i1 %14, i32 %6, i32 %13     ; <i32> [#uses=1]
+  %.pn1.in.i = select i1 %14, i32 %13, i32 %6     ; <i32> [#uses=1]
+  %.pn2.i = shl i32 %.pn2.in.i, 7                 ; <i32> [#uses=1]
+  %.pn3.i = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 4) ; <i32> [#uses=1]
+  %.pn.in.in.i = or i32 %.pn2.i, %.pn3.i          ; <i32> [#uses=1]
+  %.pn.in.i = zext i32 %.pn.in.in.i to i64        ; <i64> [#uses=1]
+  %.pn.i = shl i64 %.pn.in.i, 21                  ; <i64> [#uses=1]
+  %.pn1.i = zext i32 %.pn1.in.i to i64            ; <i64> [#uses=1]
+  %iftmp.22.0.i = or i64 %.pn.i, %.pn1.i          ; <i64> [#uses=2]
+  %15 = lshr i64 %iftmp.22.0.i, 17                ; <i64> [#uses=1]
+  %16 = trunc i64 %15 to i32                      ; <i32> [#uses=2]
+  %17 = urem i64 %iftmp.22.0.i, 1050011           ; <i64> [#uses=1]
+  %18 = trunc i64 %17 to i32                      ; <i32> [#uses=1]
+  %19 = urem i32 %16, 179                         ; <i32> [#uses=1]
+  %20 = or i32 %19, 131072                        ; <i32> [#uses=1]
+  %21 = load i32** @ht, align 4                   ; <i32*> [#uses=1]
+  br label %bb5
+
+bb:                                               ; preds = %bb5
+  %22 = getelementptr inbounds i32* %21, i32 %x.0 ; <i32*> [#uses=1]
+  %23 = load i32* %22, align 4                    ; <i32> [#uses=1]
+  %24 = icmp eq i32 %23, %16                      ; <i1> [#uses=1]
+  br i1 %24, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  %25 = load i8** @he, align 4                    ; <i8*> [#uses=1]
+  %26 = getelementptr inbounds i8* %25, i32 %x.0  ; <i8*> [#uses=1]
+  %27 = load i8* %26, align 1                     ; <i8> [#uses=1]
+  %28 = sext i8 %27 to i32                        ; <i32> [#uses=1]
+  ret i32 %28
+
+bb2:                                              ; preds = %bb
+  %29 = add nsw i32 %20, %x.0                     ; <i32> [#uses=3]
+  %30 = add i32 %29, -1050011                     ; <i32> [#uses=1]
+  %31 = icmp sgt i32 %29, 1050010                 ; <i1> [#uses=1]
+  %. = select i1 %31, i32 %30, i32 %29            ; <i32> [#uses=1]
+  %32 = add i32 %33, 1                            ; <i32> [#uses=1]
+  br label %bb5
+
+bb5:                                              ; preds = %bb2, %entry
+  %33 = phi i32 [ 0, %entry ], [ %32, %bb2 ]      ; <i32> [#uses=2]
+  %x.0 = phi i32 [ %18, %entry ], [ %., %bb2 ]    ; <i32> [#uses=3]
+  %34 = icmp sgt i32 %33, 7                       ; <i1> [#uses=1]
+  br i1 %34, label %bb7, label %bb
+
+bb7:                                              ; preds = %bb5
+  ret i32 -128
+}
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll b/src/LLVM/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
new file mode 100644
index 0000000..ac3e80a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll

@@ -0,0 +1,152 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-cgp-branch-opts | FileCheck %s
+
+%struct.pix_pos = type { i32, i32, i32, i32, i32, i32 }
+
+@getNeighbour = external global void (i32, i32, i32, i32, %struct.pix_pos*)*, align 4 ; <void (i32, i32, i32, i32, %struct.pix_pos*)**> [#uses=2]
+
+define void @t() nounwind {
+; CHECK: t:
+; CHECK:      it eq
+; CHECK-NEXT: cmpeq
+entry:
+  %pix_a.i294 = alloca [4 x %struct.pix_pos], align 4 ; <[4 x %struct.pix_pos]*> [#uses=2]
+  br i1 undef, label %land.rhs, label %lor.end
+
+land.rhs:                                         ; preds = %entry
+  br label %lor.end
+
+lor.end:                                          ; preds = %land.rhs, %entry
+  switch i32 0, label %if.end371 [
+    i32 10, label %if.then366
+    i32 14, label %if.then366
+  ]
+
+if.then366:                                       ; preds = %lor.end, %lor.end
+  unreachable
+
+if.end371:                                        ; preds = %lor.end
+  %arrayidx56.2.i = getelementptr [4 x %struct.pix_pos]* %pix_a.i294, i32 0, i32 2 ; <%struct.pix_pos*> [#uses=1]
+  %arrayidx56.3.i = getelementptr [4 x %struct.pix_pos]* %pix_a.i294, i32 0, i32 3 ; <%struct.pix_pos*> [#uses=1]
+  br i1 undef, label %for.body1857, label %for.end4557
+
+for.body1857:                                     ; preds = %if.end371
+  br i1 undef, label %if.then1867, label %for.cond1933
+
+if.then1867:                                      ; preds = %for.body1857
+  unreachable
+
+for.cond1933:                                     ; preds = %for.body1857
+  br i1 undef, label %for.body1940, label %if.then4493
+
+for.body1940:                                     ; preds = %for.cond1933
+  %shl = shl i32 undef, 2                         ; <i32> [#uses=1]
+  %shl1959 = shl i32 undef, 2                     ; <i32> [#uses=4]
+  br i1 undef, label %if.then1992, label %if.else2003
+
+if.then1992:                                      ; preds = %for.body1940
+  %tmp14.i302 = load i32* undef                   ; <i32> [#uses=4]
+  %add.i307452 = or i32 %shl1959, 1               ; <i32> [#uses=1]
+  %sub.i308 = add i32 %shl, -1                    ; <i32> [#uses=4]
+  call  void undef(i32 %tmp14.i302, i32 %sub.i308, i32 %shl1959, i32 0, %struct.pix_pos* undef) nounwind
+  %tmp49.i309 = load void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
+  call  void %tmp49.i309(i32 %tmp14.i302, i32 %sub.i308, i32 %add.i307452, i32 0, %struct.pix_pos* null) nounwind
+  %tmp49.1.i = load void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
+  call  void %tmp49.1.i(i32 %tmp14.i302, i32 %sub.i308, i32 undef, i32 0, %struct.pix_pos* %arrayidx56.2.i) nounwind
+  call  void undef(i32 %tmp14.i302, i32 %sub.i308, i32 undef, i32 0, %struct.pix_pos* %arrayidx56.3.i) nounwind
+  unreachable
+
+if.else2003:                                      ; preds = %for.body1940
+  switch i32 undef, label %if.then2015 [
+    i32 10, label %if.then4382
+    i32 14, label %if.then4382
+  ]
+
+if.then2015:                                      ; preds = %if.else2003
+  br i1 undef, label %if.else2298, label %if.then2019
+
+if.then2019:                                      ; preds = %if.then2015
+  br i1 undef, label %if.then2065, label %if.else2081
+
+if.then2065:                                      ; preds = %if.then2019
+  br label %if.end2128
+
+if.else2081:                                      ; preds = %if.then2019
+  br label %if.end2128
+
+if.end2128:                                       ; preds = %if.else2081, %if.then2065
+  unreachable
+
+if.else2298:                                      ; preds = %if.then2015
+  br i1 undef, label %land.lhs.true2813, label %cond.end2841
+
+land.lhs.true2813:                                ; preds = %if.else2298
+  br i1 undef, label %cond.end2841, label %cond.true2824
+
+cond.true2824:                                    ; preds = %land.lhs.true2813
+  br label %cond.end2841
+
+cond.end2841:                                     ; preds = %cond.true2824, %land.lhs.true2813, %if.else2298
+  br i1 undef, label %for.cond2882.preheader, label %for.cond2940.preheader
+
+for.cond2882.preheader:                           ; preds = %cond.end2841
+  %mul3693 = shl i32 undef, 1                     ; <i32> [#uses=2]
+  br i1 undef, label %if.then3689, label %if.else3728
+
+for.cond2940.preheader:                           ; preds = %cond.end2841
+  br label %for.inc3040
+
+for.inc3040:                                      ; preds = %for.inc3040, %for.cond2940.preheader
+  br label %for.inc3040
+
+if.then3689:                                      ; preds = %for.cond2882.preheader
+  %add3695 = add nsw i32 %mul3693, %shl1959       ; <i32> [#uses=1]
+  %mul3697 = shl i32 %add3695, 2                  ; <i32> [#uses=2]
+  %arrayidx3705 = getelementptr inbounds i16* undef, i32 1 ; <i16*> [#uses=1]
+  %tmp3706 = load i16* %arrayidx3705              ; <i16> [#uses=1]
+  %conv3707 = sext i16 %tmp3706 to i32            ; <i32> [#uses=1]
+  %add3708 = add nsw i32 %conv3707, %mul3697      ; <i32> [#uses=1]
+  %arrayidx3724 = getelementptr inbounds i16* null, i32 1 ; <i16*> [#uses=1]
+  %tmp3725 = load i16* %arrayidx3724              ; <i16> [#uses=1]
+  %conv3726 = sext i16 %tmp3725 to i32            ; <i32> [#uses=1]
+  %add3727 = add nsw i32 %conv3726, %mul3697      ; <i32> [#uses=1]
+  br label %if.end3770
+
+if.else3728:                                      ; preds = %for.cond2882.preheader
+  %mul3733 = add i32 %shl1959, 1073741816         ; <i32> [#uses=1]
+  %add3735 = add nsw i32 %mul3733, %mul3693       ; <i32> [#uses=1]
+  %mul3737 = shl i32 %add3735, 2                  ; <i32> [#uses=2]
+  %tmp3746 = load i16* undef                      ; <i16> [#uses=1]
+  %conv3747 = sext i16 %tmp3746 to i32            ; <i32> [#uses=1]
+  %add3748 = add nsw i32 %conv3747, %mul3737      ; <i32> [#uses=1]
+  %arrayidx3765 = getelementptr inbounds i16* null, i32 1 ; <i16*> [#uses=1]
+  %tmp3766 = load i16* %arrayidx3765              ; <i16> [#uses=1]
+  %conv3767 = sext i16 %tmp3766 to i32            ; <i32> [#uses=1]
+  %add3768 = add nsw i32 %conv3767, %mul3737      ; <i32> [#uses=1]
+  br label %if.end3770
+
+if.end3770:                                       ; preds = %if.else3728, %if.then3689
+  %vec2_y.1 = phi i32 [ %add3727, %if.then3689 ], [ %add3768, %if.else3728 ] ; <i32> [#uses=0]
+  %vec1_y.2 = phi i32 [ %add3708, %if.then3689 ], [ %add3748, %if.else3728 ] ; <i32> [#uses=0]
+  unreachable
+
+if.then4382:                                      ; preds = %if.else2003, %if.else2003
+  switch i32 undef, label %if.then4394 [
+    i32 10, label %if.else4400
+    i32 14, label %if.else4400
+  ]
+
+if.then4394:                                      ; preds = %if.then4382
+  unreachable
+
+if.else4400:                                      ; preds = %if.then4382, %if.then4382
+  br label %for.cond4451.preheader
+
+for.cond4451.preheader:                           ; preds = %for.cond4451.preheader, %if.else4400
+  br label %for.cond4451.preheader
+
+if.then4493:                                      ; preds = %for.cond1933
+  unreachable
+
+for.end4557:                                      ; preds = %if.end371
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/src/LLVM/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
new file mode 100644
index 0000000..18c2e0b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=thumbv7-eabi -mcpu=cortex-a8 -float-abi=hard | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-eabi -mcpu=cortex-a8 -float-abi=hard -regalloc=basic | FileCheck %s
+; PR5204
+
+%"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { i8* }
+%"struct.__gnu_cxx::new_allocator<char>" = type <{ i8 }>
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" = type { i32, i32, i32 }
+
+
+define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
+; CHECK: _ZNKSs7compareERKSs:
+; CHECK:      it  eq
+; CHECK-NEXT: subeq{{(.w)?}} r0, r{{[0-9]+}}, r{{[0-9]+}}
+; CHECK-NEXT: pop.w
+entry:
+  %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
+  %1 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
+  %2 = icmp ult i32 %1, %0                        ; <i1> [#uses=1]
+  %3 = select i1 %2, i32 %1, i32 %0               ; <i32> [#uses=1]
+  %4 = tail call arm_aapcs_vfpcc  i8* @_ZNKSs7_M_dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i8*> [#uses=1]
+  %5 = tail call arm_aapcs_vfpcc  i8* @_ZNKSs4dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i8*> [#uses=1]
+  %6 = tail call arm_aapcs_vfpcc  i32 @memcmp(i8* %4, i8* %5, i32 %3) nounwind readonly ; <i32> [#uses=2]
+  %7 = icmp eq i32 %6, 0                          ; <i1> [#uses=1]
+  br i1 %7, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %8 = sub i32 %0, %1                             ; <i32> [#uses=1]
+  ret i32 %8
+
+bb1:                                              ; preds = %entry
+  ret i32 %6
+}
+
+declare arm_aapcs_vfpcc i32 @memcmp(i8* nocapture, i8* nocapture, i32) nounwind readonly
+
+declare arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)
+
+declare arm_aapcs_vfpcc i8* @_ZNKSs7_M_dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)
+
+declare arm_aapcs_vfpcc i8* @_ZNKSs4dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-11-01-CopyReg2RegBug.ll b/src/LLVM/test/CodeGen/Thumb2/2009-11-01-CopyReg2RegBug.ll
new file mode 100644
index 0000000..4588018
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-11-01-CopyReg2RegBug.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8
+
+define void @get_initial_mb16x16_cost() nounwind {
+entry:
+  br i1 undef, label %bb4, label %bb1
+
+bb1:                                              ; preds = %entry
+  br label %bb7
+
+bb4:                                              ; preds = %entry
+  br i1 undef, label %bb7.thread, label %bb5
+
+bb5:                                              ; preds = %bb4
+  br label %bb7
+
+bb7.thread:                                       ; preds = %bb4
+  br label %bb8
+
+bb7:                                              ; preds = %bb5, %bb1
+  br i1 undef, label %bb8, label %bb10
+
+bb8:                                              ; preds = %bb7, %bb7.thread
+  %0 = phi double [ 5.120000e+02, %bb7.thread ], [ undef, %bb7 ] ; <double> [#uses=1]
+  %1 = fdiv double %0, undef                      ; <double> [#uses=0]
+  unreachable
+
+bb10:                                             ; preds = %bb7
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll b/src/LLVM/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll
new file mode 100644
index 0000000..956263b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll

@@ -0,0 +1,85 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+
+%struct.OP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i32, i16, i16, i8, i8 }
+%struct.SV = type { i8*, i32, i32 }
+
+declare void @Perl_mg_set(%struct.SV*) nounwind
+
+define %struct.OP* @Perl_pp_complement() nounwind {
+entry:
+  %0 = load %struct.SV** null, align 4            ; <%struct.SV*> [#uses=2]
+  br i1 undef, label %bb21, label %bb5
+
+bb5:                                              ; preds = %entry
+  br i1 undef, label %bb13, label %bb6
+
+bb6:                                              ; preds = %bb5
+  br i1 undef, label %bb8, label %bb7
+
+bb7:                                              ; preds = %bb6
+  %1 = getelementptr inbounds %struct.SV* %0, i32 0, i32 0 ; <i8**> [#uses=1]
+  %2 = load i8** %1, align 4                      ; <i8*> [#uses=1]
+  %3 = getelementptr inbounds i8* %2, i32 12      ; <i8*> [#uses=1]
+  %4 = bitcast i8* %3 to i32*                     ; <i32*> [#uses=1]
+  %5 = load i32* %4, align 4                      ; <i32> [#uses=1]
+  %storemerge5 = xor i32 %5, -1                   ; <i32> [#uses=1]
+  call  void @Perl_sv_setiv(%struct.SV* undef, i32 %storemerge5) nounwind
+  %6 = getelementptr inbounds %struct.SV* undef, i32 0, i32 2 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 4                      ; <i32> [#uses=1]
+  %8 = and i32 %7, 16384                          ; <i32> [#uses=1]
+  %9 = icmp eq i32 %8, 0                          ; <i1> [#uses=1]
+  br i1 %9, label %bb12, label %bb11
+
+bb8:                                              ; preds = %bb6
+  unreachable
+
+bb11:                                             ; preds = %bb7
+  call  void @Perl_mg_set(%struct.SV* undef) nounwind
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb7
+  store %struct.SV* undef, %struct.SV** null, align 4
+  br label %bb44
+
+bb13:                                             ; preds = %bb5
+  %10 = call  i32 @Perl_sv_2uv(%struct.SV* %0) nounwind ; <i32> [#uses=0]
+  br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:                                             ; preds = %bb13
+  call  void @Perl_sv_setiv(%struct.SV* undef, i32 undef) nounwind
+  br label %Perl_sv_setuv.exit
+
+bb1.i:                                            ; preds = %bb13
+  br label %Perl_sv_setuv.exit
+
+Perl_sv_setuv.exit:                               ; preds = %bb1.i, %bb.i
+  %11 = getelementptr inbounds %struct.SV* undef, i32 0, i32 2 ; <i32*> [#uses=1]
+  %12 = load i32* %11, align 4                    ; <i32> [#uses=1]
+  %13 = and i32 %12, 16384                        ; <i32> [#uses=1]
+  %14 = icmp eq i32 %13, 0                        ; <i1> [#uses=1]
+  br i1 %14, label %bb20, label %bb19
+
+bb19:                                             ; preds = %Perl_sv_setuv.exit
+  call  void @Perl_mg_set(%struct.SV* undef) nounwind
+  br label %bb20
+
+bb20:                                             ; preds = %bb19, %Perl_sv_setuv.exit
+  store %struct.SV* undef, %struct.SV** null, align 4
+  br label %bb44
+
+bb21:                                             ; preds = %entry
+  br i1 undef, label %bb23, label %bb22
+
+bb22:                                             ; preds = %bb21
+  unreachable
+
+bb23:                                             ; preds = %bb21
+  unreachable
+
+bb44:                                             ; preds = %bb20, %bb12
+  ret %struct.OP* undef
+}
+
+declare void @Perl_sv_setiv(%struct.SV*, i32) nounwind
+
+declare i32 @Perl_sv_2uv(%struct.SV*) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-11-13-STRDBug.ll b/src/LLVM/test/CodeGen/Thumb2/2009-11-13-STRDBug.ll
new file mode 100644
index 0000000..0c9fa5e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-11-13-STRDBug.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+; rdar://7394794
+
+define void @lshift_double(i64 %l1, i64 %h1, i64 %count, i32 %prec, i64* nocapture %lv, i64* nocapture %hv, i32 %arith) nounwind {
+entry:
+  %..i = select i1 false, i64 0, i64 0            ; <i64> [#uses=1]
+  br i1 undef, label %bb11.i, label %bb6.i
+
+bb6.i:                                            ; preds = %entry
+  %0 = lshr i64 %h1, 0                            ; <i64> [#uses=1]
+  store i64 %0, i64* %hv, align 4
+  %1 = lshr i64 %l1, 0                            ; <i64> [#uses=1]
+  %2 = or i64 0, %1                               ; <i64> [#uses=1]
+  store i64 %2, i64* %lv, align 4
+  br label %bb11.i
+
+bb11.i:                                           ; preds = %bb6.i, %entry
+  store i64 %..i, i64* %lv, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll b/src/LLVM/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
new file mode 100644
index 0000000..034a28f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll

@@ -0,0 +1,128 @@
+; RUN: opt < %s -std-compile-opts | \
+; RUN:   llc -mtriple=thumbv7-apple-darwin10 -mattr=+neon | FileCheck %s
+
+define void @fred(i32 %three_by_three, i8* %in, double %dt1, i32 %x_size, i32 %y_size, i8* %bp) nounwind {
+entry:
+; -- The loop following the load should only use a single add-literation
+;    instruction.
+; CHECK: ldr.64
+; CHECK: adds r{{[0-9]+.*}}#1
+; CHECK-NOT: adds
+; CHECK: subsections_via_symbols
+
+
+  %three_by_three_addr = alloca i32               ; <i32*> [#uses=2]
+  %in_addr = alloca i8*                           ; <i8**> [#uses=2]
+  %dt_addr = alloca float                         ; <float*> [#uses=4]
+  %x_size_addr = alloca i32                       ; <i32*> [#uses=2]
+  %y_size_addr = alloca i32                       ; <i32*> [#uses=1]
+  %bp_addr = alloca i8*                           ; <i8**> [#uses=1]
+  %tmp_image = alloca i8*                         ; <i8**> [#uses=0]
+  %out = alloca i8*                               ; <i8**> [#uses=1]
+  %cp = alloca i8*                                ; <i8**> [#uses=0]
+  %dpt = alloca i8*                               ; <i8**> [#uses=4]
+  %dp = alloca i8*                                ; <i8**> [#uses=2]
+  %ip = alloca i8*                                ; <i8**> [#uses=0]
+  %centre = alloca i32                            ; <i32*> [#uses=0]
+  %tmp = alloca i32                               ; <i32*> [#uses=0]
+  %brightness = alloca i32                        ; <i32*> [#uses=0]
+  %area = alloca i32                              ; <i32*> [#uses=0]
+  %y = alloca i32                                 ; <i32*> [#uses=0]
+  %x = alloca i32                                 ; <i32*> [#uses=2]
+  %j = alloca i32                                 ; <i32*> [#uses=6]
+  %i = alloca i32                                 ; <i32*> [#uses=1]
+  %mask_size = alloca i32                         ; <i32*> [#uses=5]
+  %increment = alloca i32                         ; <i32*> [#uses=1]
+  %n_max = alloca i32                             ; <i32*> [#uses=4]
+  %temp = alloca float                            ; <float*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %three_by_three, i32* %three_by_three_addr
+  store i8* %in, i8** %in_addr
+  %dt = fptrunc double %dt1 to float              ; <float> [#uses=1]
+  store float %dt, float* %dt_addr
+  store i32 %x_size, i32* %x_size_addr
+  store i32 %y_size, i32* %y_size_addr
+  store i8* %bp, i8** %bp_addr
+  %0 = load i8** %in_addr, align 4                ; <i8*> [#uses=1]
+  store i8* %0, i8** %out, align 4
+  %1 = call  i32 (...)* @foo() nounwind ; <i32> [#uses=1]
+  store i32 %1, i32* %i, align 4
+  %2 = load i32* %three_by_three_addr, align 4    ; <i32> [#uses=1]
+  %3 = icmp eq i32 %2, 0                          ; <i1> [#uses=1]
+  br i1 %3, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  %4 = load float* %dt_addr, align 4              ; <float> [#uses=1]
+  %5 = fpext float %4 to double                   ; <double> [#uses=1]
+  %6 = fmul double %5, 1.500000e+00               ; <double> [#uses=1]
+  %7 = fptosi double %6 to i32                    ; <i32> [#uses=1]
+  %8 = add nsw i32 %7, 1                          ; <i32> [#uses=1]
+  store i32 %8, i32* %mask_size, align 4
+  br label %bb3
+
+bb2:                                              ; preds = %entry
+  store i32 1, i32* %mask_size, align 4
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %9 = load i32* %mask_size, align 4              ; <i32> [#uses=1]
+  %10 = mul i32 %9, 2                             ; <i32> [#uses=1]
+  %11 = add nsw i32 %10, 1                        ; <i32> [#uses=1]
+  store i32 %11, i32* %n_max, align 4
+  %12 = load i32* %x_size_addr, align 4           ; <i32> [#uses=1]
+  %13 = load i32* %n_max, align 4                 ; <i32> [#uses=1]
+  %14 = sub i32 %12, %13                          ; <i32> [#uses=1]
+  store i32 %14, i32* %increment, align 4
+  %15 = load i32* %n_max, align 4                 ; <i32> [#uses=1]
+  %16 = load i32* %n_max, align 4                 ; <i32> [#uses=1]
+  %17 = mul i32 %15, %16                          ; <i32> [#uses=1]
+  %18 = call  noalias i8* @malloc(i32 %17) nounwind ; <i8*> [#uses=1]
+  store i8* %18, i8** %dp, align 4
+  %19 = load i8** %dp, align 4                    ; <i8*> [#uses=1]
+  store i8* %19, i8** %dpt, align 4
+  %20 = load float* %dt_addr, align 4             ; <float> [#uses=1]
+  %21 = load float* %dt_addr, align 4             ; <float> [#uses=1]
+  %22 = fmul float %20, %21                       ; <float> [#uses=1]
+  %23 = fsub float -0.000000e+00, %22             ; <float> [#uses=1]
+  store float %23, float* %temp, align 4
+  %24 = load i32* %mask_size, align 4             ; <i32> [#uses=1]
+  %25 = sub i32 0, %24                            ; <i32> [#uses=1]
+  store i32 %25, i32* %j, align 4
+  br label %bb5
+
+bb4:                                              ; preds = %bb5
+  %26 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %27 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %28 = mul i32 %26, %27                          ; <i32> [#uses=1]
+  %29 = sitofp i32 %28 to double                  ; <double> [#uses=1]
+  %30 = fmul double %29, 1.234000e+00             ; <double> [#uses=1]
+  %31 = fptosi double %30 to i32                  ; <i32> [#uses=1]
+  store i32 %31, i32* %x, align 4
+  %32 = load i32* %x, align 4                     ; <i32> [#uses=1]
+  %33 = trunc i32 %32 to i8                       ; <i8> [#uses=1]
+  %34 = load i8** %dpt, align 4                   ; <i8*> [#uses=1]
+  store i8 %33, i8* %34, align 1
+  %35 = load i8** %dpt, align 4                   ; <i8*> [#uses=1]
+  %36 = getelementptr inbounds i8* %35, i64 1     ; <i8*> [#uses=1]
+  store i8* %36, i8** %dpt, align 4
+  %37 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %38 = add nsw i32 %37, 1                        ; <i32> [#uses=1]
+  store i32 %38, i32* %j, align 4
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3
+  %39 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %40 = load i32* %mask_size, align 4             ; <i32> [#uses=1]
+  %41 = icmp sle i32 %39, %40                     ; <i1> [#uses=1]
+  br i1 %41, label %bb4, label %bb6
+
+bb6:                                              ; preds = %bb5
+  br label %return
+
+return:                                           ; preds = %bb6
+  ret void
+}
+
+declare i32 @foo(...)
+
+declare noalias i8* @malloc(i32) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll b/src/LLVM/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
new file mode 100644
index 0000000..af7d716
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll

@@ -0,0 +1,89 @@
+; RUN: llc -relocation-model=pic < %s | grep {:$} | sort | uniq -d | count 0
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; This function produces a duplicate LPC label unless special care is taken when duplicating a t2LDRpci_pic instruction.
+
+%struct.PlatformMutex = type { i32, [40 x i8] }
+%struct.SpinLock = type { %struct.PlatformMutex }
+%"struct.WTF::TCMalloc_ThreadCache" = type { i32, %struct._opaque_pthread_t*, i8, [68 x %"struct.WTF::TCMalloc_ThreadCache_FreeList"], i32, i32, %"struct.WTF::TCMalloc_ThreadCache"*, %"struct.WTF::TCMalloc_ThreadCache"* }
+%"struct.WTF::TCMalloc_ThreadCache_FreeList" = type { i8*, i16, i16 }
+%struct.__darwin_pthread_handler_rec = type { void (i8*)*, i8*, %struct.__darwin_pthread_handler_rec* }
+%struct._opaque_pthread_t = type { i32, %struct.__darwin_pthread_handler_rec*, [596 x i8] }
+
+@_ZN3WTFL8heap_keyE = internal global i32 0       ; <i32*> [#uses=1]
+@_ZN3WTFL10tsd_initedE.b = internal global i1 false ; <i1*> [#uses=2]
+@_ZN3WTFL13pageheap_lockE = internal global %struct.SpinLock { %struct.PlatformMutex { i32 850045863, [40 x i8] zeroinitializer } } ; <%struct.SpinLock*> [#uses=1]
+@_ZN3WTFL12thread_heapsE = internal global %"struct.WTF::TCMalloc_ThreadCache"* null ; <%"struct.WTF::TCMalloc_ThreadCache"**> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (%"struct.WTF::TCMalloc_ThreadCache"* ()* @_ZN3WTF20TCMalloc_ThreadCache22CreateCacheIfNecessaryEv to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define %"struct.WTF::TCMalloc_ThreadCache"* @_ZN3WTF20TCMalloc_ThreadCache22CreateCacheIfNecessaryEv() nounwind {
+entry:
+  %0 = tail call  i32 @pthread_mutex_lock(%struct.PlatformMutex* getelementptr inbounds (%struct.SpinLock* @_ZN3WTFL13pageheap_lockE, i32 0, i32 0)) nounwind
+  %.b24 = load i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; <i1> [#uses=1]
+  br i1 %.b24, label %bb5, label %bb6
+
+bb5:                                              ; preds = %entry
+  %1 = tail call  %struct._opaque_pthread_t* @pthread_self() nounwind
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %entry
+  %me.0 = phi %struct._opaque_pthread_t* [ %1, %bb5 ], [ null, %entry ] ; <%struct._opaque_pthread_t*> [#uses=2]
+  br label %bb11
+
+bb7:                                              ; preds = %bb11
+  %2 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %h.0, i32 0, i32 1
+  %3 = load %struct._opaque_pthread_t** %2, align 4
+  %4 = tail call  i32 @pthread_equal(%struct._opaque_pthread_t* %3, %struct._opaque_pthread_t* %me.0) nounwind
+  %5 = icmp eq i32 %4, 0
+  br i1 %5, label %bb10, label %bb14
+
+bb10:                                             ; preds = %bb7
+  %6 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %h.0, i32 0, i32 6
+  br label %bb11
+
+bb11:                                             ; preds = %bb10, %bb6
+  %h.0.in = phi %"struct.WTF::TCMalloc_ThreadCache"** [ @_ZN3WTFL12thread_heapsE, %bb6 ], [ %6, %bb10 ] ; <%"struct.WTF::TCMalloc_ThreadCache"**> [#uses=1]
+  %h.0 = load %"struct.WTF::TCMalloc_ThreadCache"** %h.0.in, align 4 ; <%"struct.WTF::TCMalloc_ThreadCache"*> [#uses=4]
+  %7 = icmp eq %"struct.WTF::TCMalloc_ThreadCache"* %h.0, null
+  br i1 %7, label %bb13, label %bb7
+
+bb13:                                             ; preds = %bb11
+  %8 = tail call  %"struct.WTF::TCMalloc_ThreadCache"* @_ZN3WTF20TCMalloc_ThreadCache7NewHeapEP17_opaque_pthread_t(%struct._opaque_pthread_t* %me.0) nounwind
+  br label %bb14
+
+bb14:                                             ; preds = %bb13, %bb7
+  %heap.1 = phi %"struct.WTF::TCMalloc_ThreadCache"* [ %8, %bb13 ], [ %h.0, %bb7 ] ; <%"struct.WTF::TCMalloc_ThreadCache"*> [#uses=4]
+  %9 = tail call  i32 @pthread_mutex_unlock(%struct.PlatformMutex* getelementptr inbounds (%struct.SpinLock* @_ZN3WTFL13pageheap_lockE, i32 0, i32 0)) nounwind
+  %10 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %heap.1, i32 0, i32 2
+  %11 = load i8* %10, align 4
+  %toBool15not = icmp eq i8 %11, 0                ; <i1> [#uses=1]
+  br i1 %toBool15not, label %bb19, label %bb22
+
+bb19:                                             ; preds = %bb14
+  %.b = load i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; <i1> [#uses=1]
+  br i1 %.b, label %bb21, label %bb22
+
+bb21:                                             ; preds = %bb19
+  store i8 1, i8* %10, align 4
+  %12 = load i32* @_ZN3WTFL8heap_keyE, align 4
+  %13 = bitcast %"struct.WTF::TCMalloc_ThreadCache"* %heap.1 to i8*
+  %14 = tail call  i32 @pthread_setspecific(i32 %12, i8* %13) nounwind
+  ret %"struct.WTF::TCMalloc_ThreadCache"* %heap.1
+
+bb22:                                             ; preds = %bb19, %bb14
+  ret %"struct.WTF::TCMalloc_ThreadCache"* %heap.1
+}
+
+declare i32 @pthread_mutex_lock(%struct.PlatformMutex*)
+
+declare i32 @pthread_mutex_unlock(%struct.PlatformMutex*)
+
+declare hidden %"struct.WTF::TCMalloc_ThreadCache"* @_ZN3WTF20TCMalloc_ThreadCache7NewHeapEP17_opaque_pthread_t(%struct._opaque_pthread_t*) nounwind
+
+declare i32 @pthread_setspecific(i32, i8*)
+
+declare %struct._opaque_pthread_t* @pthread_self()
+
+declare i32 @pthread_equal(%struct._opaque_pthread_t*, %struct._opaque_pthread_t*)
+

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-01-19-RemovePredicates.ll b/src/LLVM/test/CodeGen/Thumb2/2010-01-19-RemovePredicates.ll
new file mode 100644
index 0000000..771a4f8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-01-19-RemovePredicates.ll

@@ -0,0 +1,53 @@
+; RUN: llc -O3 -relocation-model=pic -mcpu=cortex-a8 -mattr=+thumb2 < %s
+;
+; This test creates a predicated t2ADDri instruction that is then turned into a t2MOVgpr2gpr instr.
+; Test that that the predicate operands are removed properly.
+;
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+declare void @etoe53(i16* nocapture, i16* nocapture) nounwind
+
+define void @earith(double* nocapture %value, i32 %icode, double* nocapture %r1, double* nocapture %r2) nounwind {
+entry:
+  %v = alloca [6 x i16], align 4                  ; <[6 x i16]*> [#uses=1]
+  br i1 undef, label %bb2.i, label %bb5
+
+bb2.i:                                            ; preds = %entry
+  %0 = bitcast double* %value to i16*             ; <i16*> [#uses=1]
+  call  void @etoe53(i16* null, i16* %0) nounwind
+  ret void
+
+bb5:                                              ; preds = %entry
+  switch i32 %icode, label %bb10 [
+    i32 57, label %bb14
+    i32 58, label %bb18
+    i32 67, label %bb22
+    i32 76, label %bb26
+    i32 77, label %bb35
+  ]
+
+bb10:                                             ; preds = %bb5
+  br label %bb46
+
+bb14:                                             ; preds = %bb5
+  unreachable
+
+bb18:                                             ; preds = %bb5
+  unreachable
+
+bb22:                                             ; preds = %bb5
+  unreachable
+
+bb26:                                             ; preds = %bb5
+  br label %bb46
+
+bb35:                                             ; preds = %bb5
+  unreachable
+
+bb46:                                             ; preds = %bb26, %bb10
+  %1 = bitcast double* %value to i16*             ; <i16*> [#uses=1]
+  %v47 = getelementptr inbounds [6 x i16]* %v, i32 0, i32 0 ; <i16*> [#uses=1]
+  call  void @etoe53(i16* %v47, i16* %1) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll b/src/LLVM/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
new file mode 100644
index 0000000..c153092
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll

@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+
+define i32 @test(i32 %n) nounwind {
+; CHECK: test:
+; CHECK-NOT: mov
+; CHECK: return
+entry:
+  %0 = icmp eq i32 %n, 1                          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %tmp = add i32 %n, -1                           ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb.nph, %bb
+  %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=1]
+  %u.05 = phi i64 [ undef, %bb.nph ], [ %ins, %bb ] ; <i64> [#uses=1]
+  %1 = tail call  i32 @f() nounwind    ; <i32> [#uses=1]
+  %tmp4 = zext i32 %1 to i64                      ; <i64> [#uses=1]
+  %mask = and i64 %u.05, -4294967296              ; <i64> [#uses=1]
+  %ins = or i64 %tmp4, %mask                      ; <i64> [#uses=2]
+  tail call  void @g(i64 %ins) nounwind
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %tmp      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret i32 undef
+}
+
+define i32 @test_dead_cycle(i32 %n) nounwind {
+; CHECK: test_dead_cycle:
+; CHECK: blx
+; CHECK-NOT: mov
+; CHECK: blx
+entry:
+  %0 = icmp eq i32 %n, 1                          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %tmp = add i32 %n, -1                           ; <i32> [#uses=2]
+  br label %bb
+
+bb:                                               ; preds = %bb.nph, %bb2
+  %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb2 ] ; <i32> [#uses=2]
+  %u.17 = phi i64 [ undef, %bb.nph ], [ %u.0, %bb2 ] ; <i64> [#uses=2]
+  %tmp9 = sub i32 %tmp, %indvar                   ; <i32> [#uses=1]
+  %1 = icmp sgt i32 %tmp9, 1                      ; <i1> [#uses=1]
+  br i1 %1, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  %2 = tail call  i32 @f() nounwind    ; <i32> [#uses=1]
+  %tmp6 = zext i32 %2 to i64                      ; <i64> [#uses=1]
+  %mask = and i64 %u.17, -4294967296              ; <i64> [#uses=1]
+  %ins = or i64 %tmp6, %mask                      ; <i64> [#uses=1]
+  tail call  void @g(i64 %ins) nounwind
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+; also check for duplicate induction variables (radar 7645034)
+; CHECK: subs r{{.*}}, #1
+; CHECK-NOT: subs r{{.*}}, #1
+; CHECK: pop
+  %u.0 = phi i64 [ %ins, %bb1 ], [ %u.17, %bb ]   ; <i64> [#uses=2]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %tmp      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb2, %entry
+  ret i32 undef
+}
+
+declare i32 @f()
+
+declare void @g(i64)

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-02-24-BigStack.ll b/src/LLVM/test/CodeGen/Thumb2/2010-02-24-BigStack.ll
new file mode 100644
index 0000000..2b53747
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-02-24-BigStack.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -O0 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 -mattr=+vfp2
+; This test creates a big stack frame without spilling any callee-saved registers.
+; Make sure the whole stack frame is addrerssable wiothout scavenger crashes.
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin3.0.0-iphoneos"
+
+define void @FindMin(double* %panelTDEL, i8* %dclOfRow, i32 %numRows, i32 %numCols, double* %retMin_RES_TDEL) {
+entry:
+  %panelTDEL.addr = alloca double*, align 4       ; <double**> [#uses=1]
+  %panelResTDEL = alloca [2560 x double], align 4 ; <[2560 x double]*> [#uses=0]
+  store double* %panelTDEL, double** %panelTDEL.addr
+  store double* %retMin_RES_TDEL, double** undef
+  store i32 0, i32* undef
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll b/src/LLVM/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll
new file mode 100644
index 0000000..7ce3c25
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll

@@ -0,0 +1,266 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin
+
+@.str41196 = external constant [2 x i8], align 4  ; <[2 x i8]*> [#uses=1]
+
+declare void @syStopraw(i32) nounwind
+
+declare i32 @SyFopen(i8*, i8*) nounwind
+
+declare i8* @SyFgets(i8*, i32) nounwind
+
+define void @SyHelp(i8* nocapture %topic, i32 %fin) nounwind {
+entry:
+  %line = alloca [256 x i8], align 4              ; <[256 x i8]*> [#uses=1]
+  %secname = alloca [1024 x i8], align 4          ; <[1024 x i8]*> [#uses=0]
+  %last = alloca [256 x i8], align 4              ; <[256 x i8]*> [#uses=1]
+  %last2 = alloca [256 x i8], align 4             ; <[256 x i8]*> [#uses=1]
+  br i1 undef, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb, %entry
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %storemerge = phi i32 [ 0, %bb2 ], [ 1, %bb ]   ; <i32> [#uses=1]
+  br i1 undef, label %bb19, label %bb20
+
+bb19:                                             ; preds = %bb3
+  br label %bb20
+
+bb20:                                             ; preds = %bb19, %bb3
+  br i1 undef, label %bb25, label %bb26
+
+bb25:                                             ; preds = %bb20
+  br label %bb26
+
+bb26:                                             ; preds = %bb25, %bb20
+  %offset.2 = phi i32 [ -2, %bb25 ], [ 0, %bb20 ] ; <i32> [#uses=1]
+  br i1 undef, label %bb.nph508, label %bb49
+
+bb.nph508:                                        ; preds = %bb26
+  unreachable
+
+bb49:                                             ; preds = %bb26
+  br i1 undef, label %bb51, label %bb50
+
+bb50:                                             ; preds = %bb49
+  br i1 undef, label %bb51, label %bb104
+
+bb51:                                             ; preds = %bb50, %bb49
+  unreachable
+
+bb104:                                            ; preds = %bb50
+  br i1 undef, label %bb106, label %bb105
+
+bb105:                                            ; preds = %bb104
+  br i1 undef, label %bb106, label %bb161
+
+bb106:                                            ; preds = %bb105, %bb104
+  unreachable
+
+bb161:                                            ; preds = %bb105
+  br i1 false, label %bb163, label %bb162
+
+bb162:                                            ; preds = %bb161
+  br i1 undef, label %bb163, label %bb224
+
+bb163:                                            ; preds = %bb162, %bb161
+  unreachable
+
+bb224:                                            ; preds = %bb162
+  %0 = call  i32 @SyFopen(i8* undef, i8* getelementptr inbounds ([2 x i8]* @.str41196, i32 0, i32 0)) nounwind ; <i32> [#uses=2]
+  br i1 false, label %bb297, label %bb300
+
+bb297:                                            ; preds = %bb224
+  unreachable
+
+bb300:                                            ; preds = %bb224
+  %1 = icmp eq i32 %offset.2, -1                  ; <i1> [#uses=1]
+  br label %bb440
+
+bb307:                                            ; preds = %isdigit1498.exit67
+  br label %bb308
+
+bb308:                                            ; preds = %bb440, %bb307
+  br i1 undef, label %bb309, label %isdigit1498.exit67
+
+isdigit1498.exit67:                               ; preds = %bb308
+  br i1 undef, label %bb309, label %bb307
+
+bb309:                                            ; preds = %isdigit1498.exit67, %bb308
+  br i1 undef, label %bb310, label %bb313
+
+bb310:                                            ; preds = %bb309
+  br label %bb313
+
+bb313:                                            ; preds = %bb310, %bb309
+  br i1 false, label %bb318, label %bb317
+
+bb317:                                            ; preds = %bb313
+  %2 = icmp sgt i8 undef, -1                      ; <i1> [#uses=1]
+  br i1 %2, label %bb.i.i73, label %bb1.i.i74
+
+bb.i.i73:                                         ; preds = %bb317
+  br i1 false, label %bb318, label %bb329.outer
+
+bb1.i.i74:                                        ; preds = %bb317
+  unreachable
+
+bb318:                                            ; preds = %bb.i.i73, %bb313
+  ret void
+
+bb329.outer:                                      ; preds = %bb.i.i73
+  br i1 undef, label %bb333, label %bb329.us.us
+
+bb329.us.us:                                      ; preds = %bb329.us.us, %bb329.outer
+  br i1 undef, label %bb333, label %bb329.us.us
+
+bb333:                                            ; preds = %bb329.us.us, %bb329.outer
+  %match.0.lcssa = phi i32 [ undef, %bb329.us.us ], [ 2, %bb329.outer ] ; <i32> [#uses=2]
+  br i1 undef, label %bb335, label %bb388
+
+bb335:                                            ; preds = %bb333
+  %3 = and i1 undef, %1                           ; <i1> [#uses=1]
+  br i1 %3, label %bb339, label %bb348
+
+bb339:                                            ; preds = %bb335
+  br i1 false, label %bb340, label %bb345
+
+bb340:                                            ; preds = %bb339
+  br i1 undef, label %return, label %bb341
+
+bb341:                                            ; preds = %bb340
+  ret void
+
+bb345:                                            ; preds = %bb345, %bb339
+  %4 = phi i8 [ %5, %bb345 ], [ undef, %bb339 ]   ; <i8> [#uses=0]
+  %indvar670 = phi i32 [ %tmp673, %bb345 ], [ 0, %bb339 ] ; <i32> [#uses=1]
+  %tmp673 = add i32 %indvar670, 1                 ; <i32> [#uses=2]
+  %scevgep674 = getelementptr [256 x i8]* %last, i32 0, i32 %tmp673 ; <i8*> [#uses=1]
+  %5 = load i8* %scevgep674, align 1              ; <i8> [#uses=1]
+  br i1 undef, label %bb347, label %bb345
+
+bb347:                                            ; preds = %bb345
+  br label %bb348
+
+bb348:                                            ; preds = %bb347, %bb335
+  br i1 false, label %bb352, label %bb356
+
+bb352:                                            ; preds = %bb348
+  unreachable
+
+bb356:                                            ; preds = %bb348
+  br i1 undef, label %bb360, label %bb369
+
+bb360:                                            ; preds = %bb356
+  br i1 false, label %bb361, label %bb366
+
+bb361:                                            ; preds = %bb360
+  br i1 undef, label %return, label %bb362
+
+bb362:                                            ; preds = %bb361
+  ret void
+
+bb366:                                            ; preds = %bb366, %bb360
+  %indvar662 = phi i32 [ %tmp665, %bb366 ], [ 0, %bb360 ] ; <i32> [#uses=1]
+  %tmp665 = add i32 %indvar662, 1                 ; <i32> [#uses=2]
+  %scevgep666 = getelementptr [256 x i8]* %last2, i32 0, i32 %tmp665 ; <i8*> [#uses=1]
+  %6 = load i8* %scevgep666, align 1              ; <i8> [#uses=0]
+  br i1 false, label %bb368, label %bb366
+
+bb368:                                            ; preds = %bb366
+  br label %bb369
+
+bb369:                                            ; preds = %bb368, %bb356
+  br i1 undef, label %bb373, label %bb388
+
+bb373:                                            ; preds = %bb383, %bb369
+  %7 = call  i8* @SyFgets(i8* undef, i32 %0) nounwind ; <i8*> [#uses=1]
+  %8 = icmp eq i8* %7, null                       ; <i1> [#uses=1]
+  br i1 %8, label %bb375, label %bb383
+
+bb375:                                            ; preds = %bb373
+  %9 = icmp eq i32 %storemerge, 0                 ; <i1> [#uses=1]
+  br i1 %9, label %return, label %bb376
+
+bb376:                                            ; preds = %bb375
+  ret void
+
+bb383:                                            ; preds = %bb373
+  %10 = load i8* undef, align 1                   ; <i8> [#uses=1]
+  %cond1 = icmp eq i8 %10, 46                     ; <i1> [#uses=1]
+  br i1 %cond1, label %bb373, label %bb388
+
+bb388:                                            ; preds = %bb383, %bb369, %bb333
+  %match.1140 = phi i32 [ %match.0.lcssa, %bb369 ], [ 0, %bb333 ], [ %match.0.lcssa, %bb383 ] ; <i32> [#uses=1]
+  br label %bb391
+
+bb390:                                            ; preds = %isdigit1498.exit83, %bb392
+  %indvar.next725 = add i32 %indvar724, 1         ; <i32> [#uses=1]
+  br label %bb391
+
+bb391:                                            ; preds = %bb390, %bb388
+  %indvar724 = phi i32 [ %indvar.next725, %bb390 ], [ 0, %bb388 ] ; <i32> [#uses=2]
+  %11 = load i8* undef, align 1                   ; <i8> [#uses=0]
+  br i1 false, label %bb395, label %bb392
+
+bb392:                                            ; preds = %bb391
+  br i1 undef, label %bb390, label %isdigit1498.exit83
+
+isdigit1498.exit83:                               ; preds = %bb392
+  br i1 undef, label %bb390, label %bb395
+
+bb394:                                            ; preds = %isdigit1498.exit87
+  br label %bb395
+
+bb395:                                            ; preds = %bb394, %isdigit1498.exit83, %bb391
+  %storemerge14.sum = add i32 %indvar724, undef   ; <i32> [#uses=1]
+  %p.26 = getelementptr [256 x i8]* %line, i32 0, i32 %storemerge14.sum ; <i8*> [#uses=1]
+  br i1 undef, label %bb400, label %isdigit1498.exit87
+
+isdigit1498.exit87:                               ; preds = %bb395
+  br i1 false, label %bb400, label %bb394
+
+bb400:                                            ; preds = %isdigit1498.exit87, %bb395
+  br i1 undef, label %bb402, label %bb403
+
+bb402:                                            ; preds = %bb400
+  %12 = getelementptr inbounds i8* %p.26, i32 undef ; <i8*> [#uses=1]
+  br label %bb403
+
+bb403:                                            ; preds = %bb402, %bb400
+  %p.29 = phi i8* [ %12, %bb402 ], [ undef, %bb400 ] ; <i8*> [#uses=0]
+  br i1 undef, label %bb405, label %bb404
+
+bb404:                                            ; preds = %bb403
+  br i1 undef, label %bb405, label %bb407
+
+bb405:                                            ; preds = %bb404, %bb403
+  br i1 undef, label %return, label %bb406
+
+bb406:                                            ; preds = %bb405
+  call  void @syStopraw(i32 %fin) nounwind
+  ret void
+
+bb407:                                            ; preds = %bb404
+  %cond = icmp eq i32 %match.1140, 2              ; <i1> [#uses=1]
+  br i1 %cond, label %bb408, label %bb428
+
+bb408:                                            ; preds = %bb407
+  unreachable
+
+bb428:                                            ; preds = %bb407
+  br label %bb440
+
+bb440:                                            ; preds = %bb428, %bb300
+  %13 = call  i8* @SyFgets(i8* undef, i32 %0) nounwind ; <i8*> [#uses=0]
+  br i1 false, label %bb442, label %bb308
+
+bb442:                                            ; preds = %bb440
+  unreachable
+
+return:                                           ; preds = %bb405, %bb375, %bb361, %bb340
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll b/src/LLVM/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
new file mode 100644
index 0000000..bb734ac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll

@@ -0,0 +1,68 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 \
+; RUN:   -pre-RA-sched=source | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 \
+; RUN:   -pre-RA-sched=list-hybrid | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -regalloc=basic | FileCheck %s
+; Radar 7459078
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+	
+%0 = type { i32, i32 }
+%s1 = type { %s3, i32, %s4, i8*, void (i8*, i8*)*, i8*, i32*, i32*, i32*, i32, i64, [1 x i32] }
+%s2 = type { i32 (...)**, %s4 }
+%s3 = type { %s2, i32, i32, i32*, [4 x i8], float, %s4, i8*, i8* }
+%s4 = type { %s5 }
+%s5 = type { i32 }
+
+; Make sure the cmp is not scheduled before the InlineAsm that clobbers cc.
+; CHECK: blx _f2
+; CHECK: cmp r0, #0
+; CHECK-NOT: cmp
+; CHECK: InlineAsm Start
+define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind {
+entry:
+  %tmp1 = getelementptr inbounds %s1* %this, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0
+  volatile store i32 1, i32* %tmp1, align 4
+  %tmp12 = getelementptr inbounds %s1* %this, i32 0, i32 1
+  store i32 %levels, i32* %tmp12, align 4
+  %tmp13 = getelementptr inbounds %s1* %this, i32 0, i32 3
+  store i8* %data, i8** %tmp13, align 4
+  %tmp14 = getelementptr inbounds %s1* %this, i32 0, i32 4
+  store void (i8*, i8*)* %release, void (i8*, i8*)** %tmp14, align 4
+  %tmp15 = getelementptr inbounds %s1* %this, i32 0, i32 5
+  store i8* %info, i8** %tmp15, align 4
+  %tmp16 = getelementptr inbounds %s1* %this, i32 0, i32 6
+  store i32* null, i32** %tmp16, align 4
+  %tmp17 = getelementptr inbounds %s1* %this, i32 0, i32 7
+  store i32* null, i32** %tmp17, align 4
+  %tmp19 = getelementptr inbounds %s1* %this, i32 0, i32 10
+  store i64 0, i64* %tmp19, align 4
+  %tmp20 = getelementptr inbounds %s1* %this, i32 0, i32 0
+  tail call  void @f1(%s3* %tmp20, i32* %s) nounwind
+  %tmp21 = shl i32 %format, 6
+  %tmp22 = tail call  zeroext i8 @f2(i32 %format) nounwind
+  %toBoolnot = icmp eq i8 %tmp22, 0
+  %tmp23 = zext i1 %toBoolnot to i32
+  %flags.0 = or i32 %tmp23, %tmp21
+  %tmp24 = shl i32 %flags.0, 16
+  %asmtmp.i.i.i = tail call %0 asm sideeffect "\0A0:\09ldrex $1, [$2]\0A\09orr $1, $1, $3\0A\09strex $0, $1, [$2]\0A\09cmp $0, #0\0A\09bne 0b", "=&r,=&r,r,r,~{memory},~{cc}"(i32* %tmp1, i32 %tmp24) nounwind
+  %tmp25 = getelementptr inbounds %s1* %this, i32 0, i32 2, i32 0, i32 0
+  volatile store i32 1, i32* %tmp25, align 4
+  %tmp26 = icmp eq i32 %levels, 0
+  br i1 %tmp26, label %return, label %bb4
+
+bb4:
+  %l.09 = phi i32 [ %tmp28, %bb4 ], [ 0, %entry ]
+  %scevgep = getelementptr %s1* %this, i32 0, i32 11, i32 %l.09
+  %scevgep10 = getelementptr i32* %rowbytes, i32 %l.09
+  %tmp27 = load i32* %scevgep10, align 4
+  store i32 %tmp27, i32* %scevgep, align 4
+  %tmp28 = add i32 %l.09, 1
+  %exitcond = icmp eq i32 %tmp28, %levels
+  br i1 %exitcond, label %return, label %bb4
+
+return:
+  ret void
+}
+
+declare void @f1(%s3*, i32*)
+declare zeroext i8 @f2(i32)

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll b/src/LLVM/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
new file mode 100644
index 0000000..2246de3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s
+; rdar://7493908
+
+; Make sure the result of the first dynamic_alloc isn't copied back to sp more
+; than once. We'll deal with poor codegen later.
+
+define void @t() nounwind ssp {
+entry:
+; CHECK: t:
+  %size = mul i32 8, 2
+; CHECK:  subs  r0, #16
+; CHECK:  mov sp, r0
+  %vla_a = alloca i8, i32 %size, align 8
+; CHECK:  subs  r0, #16
+; CHECK:  mov sp, r0
+  %vla_b = alloca i8, i32 %size, align 8
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-04-26-CopyRegCrash.ll b/src/LLVM/test/CodeGen/Thumb2/2010-04-26-CopyRegCrash.ll
new file mode 100644
index 0000000..3be016f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-04-26-CopyRegCrash.ll

@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin
+; Radar 7896289
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+define void @test(i32 %mode) nounwind optsize noinline {
+entry:
+  br i1 undef, label %return, label %bb3
+
+bb3:                                              ; preds = %entry
+  br i1 undef, label %bb15, label %bb18
+
+bb15:                                             ; preds = %bb3
+  unreachable
+
+bb18:                                             ; preds = %bb3
+  switch i32 %mode, label %return [
+    i32 0, label %bb26
+    i32 1, label %bb56
+    i32 2, label %bb107
+    i32 6, label %bb150.preheader
+    i32 9, label %bb310.preheader
+    i32 13, label %bb414.preheader
+    i32 15, label %bb468.preheader
+    i32 16, label %bb522.preheader
+  ]
+
+bb150.preheader:                                  ; preds = %bb18
+  br i1 undef, label %bb154, label %bb160
+
+bb310.preheader:                                  ; preds = %bb18
+  unreachable
+
+bb414.preheader:                                  ; preds = %bb18
+  unreachable
+
+bb468.preheader:                                  ; preds = %bb18
+  unreachable
+
+bb522.preheader:                                  ; preds = %bb18
+  unreachable
+
+bb26:                                             ; preds = %bb18
+  unreachable
+
+bb56:                                             ; preds = %bb18
+  unreachable
+
+bb107:                                            ; preds = %bb18
+  br label %bb110
+
+bb110:                                            ; preds = %bb122, %bb107
+  %asmtmp.i.i179 = tail call i16 asm "rev16 $0, $1\0A", "=l,l"(i16 undef) nounwind ; <i16> [#uses=1]
+  %asmtmp.i.i178 = tail call i16 asm "rev16 $0, $1\0A", "=l,l"(i16 %asmtmp.i.i179) nounwind ; <i16> [#uses=1]
+  store i16 %asmtmp.i.i178, i16* undef, align 2
+  br i1 undef, label %bb122, label %bb121
+
+bb121:                                            ; preds = %bb110
+  br label %bb122
+
+bb122:                                            ; preds = %bb121, %bb110
+  br label %bb110
+
+bb154:                                            ; preds = %bb150.preheader
+  unreachable
+
+bb160:                                            ; preds = %bb150.preheader
+  unreachable
+
+return:                                           ; preds = %bb18, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-05-24-rsbs.ll b/src/LLVM/test/CodeGen/Thumb2/2010-05-24-rsbs.ll
new file mode 100644
index 0000000..e72d542
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-05-24-rsbs.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; Radar 8017376: Missing 's' suffix for t2RSBS instructions.
+; CHECK: rsbs
+
+define i64 @test(i64 %x) nounwind readnone {
+entry:
+  %0 = sub nsw i64 1, %x                          ; <i64> [#uses=1]
+  ret i64 %0
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/src/LLVM/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
new file mode 100644
index 0000000..01fb0a5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -O3 -relocation-model=pic -mattr=+thumb2 -mcpu=cortex-a8 -disable-branch-fold | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; This is a case where the coalescer was too eager. These two copies were
+; considered equivalent and coalescable:
+;
+; 140 %reg1038:dsub_0<def> = VMOVD %reg1047:dsub_0, pred:14, pred:%reg0
+; 148 %reg1038:dsub_1<def> = VMOVD %reg1047:dsub_0, pred:14, pred:%reg0
+;
+; Only one can be coalesced.
+
+@.str = private constant [7 x i8] c"%g %g\0A\00", align 4 ; <[7 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** nocapture %Argv) nounwind {
+entry:
+  %0 = icmp eq i32 %argc, 2123                    ; <i1> [#uses=1]
+  %U.0 = select i1 %0, double 3.282190e+01, double 8.731834e+02 ; <double> [#uses=2]
+  %1 = icmp eq i32 %argc, 5123                    ; <i1> [#uses=1]
+  %V.0.ph = select i1 %1, double 7.779980e+01, double 0x409CCB9C779A6B51 ; <double> [#uses=1]
+  %2 = insertelement <2 x double> undef, double %U.0, i32 0 ; <<2 x double>> [#uses=2]
+  %3 = insertelement <2 x double> %2, double %U.0, i32 1 ; <<2 x double>> [#uses=2]
+  %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
+; Constant pool load followed by add.
+; Then clobber the loaded register, not the sum.
+; CHECK: vldr.64 [[LDR:d.*]],
+; CHECK: LPC0_0:
+; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]
+; CHECK-NOT: vmov.f64 [[ADD]]
+  %5 = fadd <2 x double> %3, %3                   ; <<2 x double>> [#uses=2]
+  %6 = fadd <2 x double> %4, %4                   ; <<2 x double>> [#uses=2]
+  %tmp7 = extractelement <2 x double> %5, i32 0   ; <double> [#uses=1]
+  %tmp5 = extractelement <2 x double> %5, i32 1   ; <double> [#uses=1]
+; CHECK: printf
+  %7 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp7, double %tmp5) nounwind ; <i32> [#uses=0]
+  %tmp3 = extractelement <2 x double> %6, i32 0   ; <double> [#uses=1]
+  %tmp1 = extractelement <2 x double> %6, i32 1   ; <double> [#uses=1]
+  %8 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp3, double %tmp1) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll b/src/LLVM/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll
new file mode 100644
index 0000000..501f763
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8
+; rdar://8110842
+
+declare arm_apcscc i32 @__maskrune(i32, i32)
+
+define arm_apcscc i32 @strncmpic(i8* nocapture %s1, i8* nocapture %s2, i32 %n) nounwind {
+entry:
+  br i1 undef, label %bb11, label %bb19
+
+bb11:                                             ; preds = %entry
+  %0 = sext i8 0 to i32                           ; <i32> [#uses=1]
+  br i1 undef, label %bb.i.i10, label %bb1.i.i11
+
+bb.i.i10:                                         ; preds = %bb11
+  br label %isupper144.exit12
+
+bb1.i.i11:                                        ; preds = %bb11
+  %1 = tail call arm_apcscc  i32 @__maskrune(i32 %0, i32 32768) nounwind ; <i32> [#uses=1]
+  %2 = icmp ne i32 %1, 0                          ; <i1> [#uses=1]
+  %3 = zext i1 %2 to i32                          ; <i32> [#uses=1]
+  %.pre = load i8* undef, align 1                 ; <i8> [#uses=1]
+  br label %isupper144.exit12
+
+isupper144.exit12:                                ; preds = %bb1.i.i11, %bb.i.i10
+  %4 = phi i8 [ %.pre, %bb1.i.i11 ], [ 0, %bb.i.i10 ] ; <i8> [#uses=1]
+  %5 = phi i32 [ %3, %bb1.i.i11 ], [ undef, %bb.i.i10 ] ; <i32> [#uses=1]
+  %6 = icmp eq i32 %5, 0                          ; <i1> [#uses=1]
+  %7 = sext i8 %4 to i32                          ; <i32> [#uses=1]
+  %storemerge1 = select i1 %6, i32 %7, i32 undef  ; <i32> [#uses=1]
+  %8 = sub nsw i32 %storemerge1, 0                ; <i32> [#uses=1]
+  ret i32 %8
+
+bb19:                                             ; preds = %entry
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll b/src/LLVM/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
new file mode 100644
index 0000000..244d0bb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll

@@ -0,0 +1,127 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic | FileCheck %s
+; rdar://8115404
+; Tail merging must not split an IT block.
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct._RuneCharClass = type { [14 x i8], i32 }
+%struct._RuneEntry = type { i32, i32, i32, i32* }
+%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i32, i8**)*, i32 (i32, i8*, i32, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* }
+%struct._RuneRange = type { i32, %struct._RuneEntry* }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+
+@finput = external global %struct.FILE*           ; <%struct.FILE**> [#uses=1]
+@_DefaultRuneLocale = external global %struct._RuneLocale ; <%struct._RuneLocale*> [#uses=0]
+@token_buffer = external global [1025 x i8], align 4 ; <[1025 x i8]*> [#uses=1]
+@.str73 = external constant [6 x i8], align 4     ; <[6 x i8]*> [#uses=0]
+@.str174 = external constant [5 x i8], align 4    ; <[5 x i8]*> [#uses=0]
+@.str275 = external constant [6 x i8], align 4    ; <[6 x i8]*> [#uses=0]
+@.str376 = external constant [5 x i8], align 4    ; <[5 x i8]*> [#uses=0]
+@.str477 = external constant [6 x i8], align 4    ; <[6 x i8]*> [#uses=0]
+@.str578 = external constant [6 x i8], align 4    ; <[6 x i8]*> [#uses=0]
+@.str679 = external constant [7 x i8], align 4    ; <[7 x i8]*> [#uses=0]
+@.str780 = external constant [6 x i8], align 4    ; <[6 x i8]*> [#uses=0]
+@.str881 = external constant [5 x i8], align 4    ; <[5 x i8]*> [#uses=0]
+@.str982 = external constant [6 x i8], align 4    ; <[6 x i8]*> [#uses=0]
+@.str1083 = external constant [9 x i8], align 4   ; <[9 x i8]*> [#uses=0]
+@.str1184 = external constant [7 x i8], align 4   ; <[7 x i8]*> [#uses=0]
+@.str1285 = external constant [16 x i8], align 4  ; <[16 x i8]*> [#uses=0]
+@.str1386 = external constant [12 x i8], align 4  ; <[12 x i8]*> [#uses=0]
+@.str1487 = external constant [5 x i8], align 4   ; <[5 x i8]*> [#uses=0]
+@llvm.used = external global [1 x i8*]            ; <[1 x i8*]*> [#uses=0]
+
+define fastcc i32 @parse_percent_token() nounwind {
+entry:
+; CHECK: pop
+; CHECK: pop
+; CHECK: pop
+; CHECK: pop
+; CHECK: pop
+; CHECK: pop
+; CHECK: pop
+; Do not convert into single stream code. BranchProbability Analysis assumes
+; that branches which goes to "ret" intruction have lower probabilities.
+  switch i32 undef, label %bb7 [
+    i32 37, label %bb43
+    i32 48, label %bb5
+    i32 50, label %bb4
+    i32 60, label %bb2
+    i32 61, label %bb6
+    i32 62, label %bb3
+    i32 123, label %bb1
+  ]
+
+bb1:                                              ; preds = %entry
+  ret i32 8
+
+bb2:                                              ; preds = %entry
+  ret i32 15
+
+bb3:                                              ; preds = %entry
+  ret i32 16
+
+bb4:                                              ; preds = %entry
+  ret i32 17
+
+bb5:                                              ; preds = %entry
+  ret i32 9
+
+bb6:                                              ; preds = %entry
+  ret i32 18
+
+bb7:                                              ; preds = %entry
+  br i1 undef, label %bb.i.i, label %bb1.i.i
+
+bb.i.i:                                           ; preds = %bb7
+  br i1 undef, label %bb43, label %bb12
+
+bb1.i.i:                                          ; preds = %bb7
+  unreachable
+
+bb9:                                              ; preds = %bb.i.i2
+  br i1 undef, label %bb10, label %bb11
+
+bb10:                                             ; preds = %bb9
+  br label %bb11
+
+bb11:                                             ; preds = %bb10, %bb9
+  %p.0 = phi i8* [ undef, %bb10 ], [ %p.1, %bb9 ] ; <i8*> [#uses=1]
+  %0 = load %struct.FILE** @finput, align 4       ; <%struct.FILE*> [#uses=1]
+  %1 = tail call i32 @getc(%struct.FILE* %0) nounwind ; <i32> [#uses=0]
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb.i.i
+  %p.1 = phi i8* [ %p.0, %bb11 ], [ getelementptr inbounds ([1025 x i8]* @token_buffer, i32 0, i32 0), %bb.i.i ] ; <i8*> [#uses=2]
+  %2 = icmp ult i32 undef, 128                    ; <i1> [#uses=1]
+  br i1 %2, label %bb.i.i2, label %bb1.i.i3
+
+bb.i.i2:                                          ; preds = %bb12
+  %3 = load i32* null, align 4                    ; <i32> [#uses=1]
+  %4 = lshr i32 %3, 8                             ; <i32> [#uses=1]
+  %.lobit.i1 = and i32 %4, 1                      ; <i32> [#uses=1]
+  %.not = icmp ne i32 %.lobit.i1, 0               ; <i1> [#uses=1]
+  %or.cond = or i1 %.not, undef                   ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb9, label %bb14
+
+bb1.i.i3:                                         ; preds = %bb12
+  unreachable
+
+bb14:                                             ; preds = %bb.i.i2
+  store i8 0, i8* %p.1, align 1
+  br i1 undef, label %bb43, label %bb15
+
+bb15:                                             ; preds = %bb14
+  unreachable
+
+bb43:                                             ; preds = %bb14, %bb.i.i, %entry
+  %.0 = phi i32 [ 7, %entry ], [ 24, %bb.i.i ], [ 9, %bb14 ] ; <i32> [#uses=1]
+  ret i32 %.0
+}
+
+declare i32 @getc(%struct.FILE* nocapture) nounwind
+
+declare i32 @strcmp(i8* nocapture, i8* nocapture) nounwind readonly
+
+declare i32 @__maskrune(i32, i32)
+
+declare i32 @ungetc(i32, %struct.FILE* nocapture) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/src/LLVM/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
new file mode 100644
index 0000000..47d7a9c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s
+
+@.str = private constant [4 x i8] c"%d\0A\00", align 4 ; <[4 x i8]*> [#uses=1]
+
+define internal fastcc i32 @Callee(i32 %i) nounwind {
+entry:
+; CHECK: Callee:
+; CHECK: push
+; CHECK: mov r4, sp
+; CHECK: sub.w [[R12:r[0-9]+]], r4, #1000
+; CHECK: mov sp, [[R12]]
+  %0 = icmp eq i32 %i, 0                          ; <i1> [#uses=1]
+  br i1 %0, label %bb2, label %bb
+
+bb:                                               ; preds = %entry
+  %1 = alloca [1000 x i8], align 4                ; <[1000 x i8]*> [#uses=1]
+  %.sub = getelementptr inbounds [1000 x i8]* %1, i32 0, i32 0 ; <i8*> [#uses=2]
+  %2 = call i32 (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* %.sub, i32 0, i32 1000, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %i) nounwind ; <i32> [#uses=0]
+  %3 = load i8* %.sub, align 4                    ; <i8> [#uses=1]
+  %4 = sext i8 %3 to i32                          ; <i32> [#uses=1]
+  ret i32 %4
+
+bb2:                                              ; preds = %entry
+; Must restore sp from fp here. Make sure not to leave sp in a temporarily invalid
+; state though. rdar://8465407
+; CHECK-NOT: mov sp, r7
+; CHECK: sub.w r4, r7, #8
+; CHECK: mov sp, r4
+; CHECK: pop
+  ret i32 0
+}
+
+declare i32 @__sprintf_chk(i8*, i32, i32, i8*, ...) nounwind
+
+define i32 @main() nounwind {
+; CHECK: main:
+bb.nph:
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %0 = phi i32 [ 0, %bb.nph ], [ %3, %bb ]        ; <i32> [#uses=2]
+  %j.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb ]     ; <i32> [#uses=1]
+  %1 = tail call fastcc i32 @Callee(i32 %0) nounwind ; <i32> [#uses=1]
+  %2 = add nsw i32 %1, %j.01                      ; <i32> [#uses=2]
+  %3 = add nsw i32 %0, 1                          ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %3, 10000               ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb2, label %bb
+
+bb2:                                              ; preds = %bb
+; No need to restore sp from fp here.
+; CHECK: printf
+; CHECK-NOT: mov sp, r7
+; CHECK-NOT: sub sp, #12
+; CHECK: pop
+  %4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/src/LLVM/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
new file mode 100644
index 0000000..d2140a1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll

@@ -0,0 +1,34 @@
+; rdar://8465407
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+%struct.buf = type opaque
+
+declare void @bar() nounwind optsize
+
+define void @foo() nounwind optsize {
+; CHECK: foo:
+; CHECK: push
+; CHECK: mov r7, sp
+; CHECK: sub sp, #4
+entry:
+  %m.i = alloca %struct.buf*, align 4
+  br label %bb
+
+bb:
+  br i1 undef, label %bb3, label %bb2
+
+bb2:
+  call void @bar() nounwind optsize
+  br i1 undef, label %bb, label %bb3
+
+bb3:
+  br i1 undef, label %return, label %bb
+
+return:
+; CHECK: %return
+; 'mov sp, r7' would have left sp in an invalid state
+; CHECK-NOT: mov sp, r7
+; CHECK-NOT: sub, sp, #4
+; CHECK: add sp, #4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll b/src/LLVM/test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll
new file mode 100644
index 0000000..5b91a5f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; Radar 8724703: Make sure that a t2ADDrSPi instruction with SP as the
+; destination register is narrowed to tADDspi instead of tADDrSPi.
+
+define void @test() nounwind {
+entry:
+; CHECK: sub.w
+; CHECK: add.w
+  %Buffer.i = alloca [512 x i8], align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/2011-04-21-FILoweringBug.ll b/src/LLVM/test/CodeGen/Thumb2/2011-04-21-FILoweringBug.ll
new file mode 100644
index 0000000..604a352
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2011-04-21-FILoweringBug.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+; Use sp, #imm to lower frame indices when the offset is multiple of 4
+; and in the range of 0-1020. This saves code size by utilizing
+; 16-bit instructions.
+; rdar://9321541
+
+define i32 @t() nounwind {
+entry:
+; CHECK: t:
+; CHECK: sub sp, #12
+; CHECK-NOT: sub
+; CHECK: add r0, sp, #4
+; CHECK: add r1, sp, #8
+; CHECK: mov r2, sp
+  %size = alloca i32, align 4
+  %count = alloca i32, align 4
+  %index = alloca i32, align 4
+  %0 = call i32 @foo(i32* %count, i32* %size, i32* %index) nounwind
+  ret i32 %0
+}
+
+declare i32 @foo(i32*, i32*, i32*)

diff --git a/src/LLVM/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll b/src/LLVM/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
new file mode 100644
index 0000000..b1ce3bb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll

@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=thumbv7-apple-darwin10 < %s | FileCheck %s
+
+%struct.op = type { %struct.op*, %struct.op*, %struct.op* ()*, i32, i16, i16, i8, i8 }
+
+; CHECK: Perl_ck_sort
+; CHECK: ldreq
+; CHECK: moveq [[REGISTER:(r[0-9]+)|(lr)]]
+; CHECK: streq {{(r[0-9])|(lr)}}, {{\[}}[[REGISTER]]{{\]}}, #24
+
+define void @Perl_ck_sort() nounwind optsize {
+entry:
+  %tmp27 = load %struct.op** undef, align 4
+  switch i16 undef, label %if.end151 [
+    i16 178, label %if.then60
+    i16 177, label %if.then60
+  ]
+
+if.then60:                                        ; preds = %if.then40
+  br i1 undef, label %if.then67, label %if.end95
+
+if.then67:                                        ; preds = %if.then60
+  %op_next71 = getelementptr inbounds %struct.op* %tmp27, i32 0, i32 0
+  store %struct.op* %tmp27, %struct.op** %op_next71, align 4
+  %0 = getelementptr inbounds %struct.op* %tmp27, i32 1, i32 0
+  br label %if.end95
+
+if.end95:                                         ; preds = %if.else92, %if.then67
+  %.pre-phi = phi %struct.op** [ undef, %if.then60 ], [ %0, %if.then67 ]
+  %tmp98 = load %struct.op** %.pre-phi, align 4
+  br label %if.end151
+
+if.end151:                                        ; preds = %if.end100, %if.end, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/bfi.ll b/src/LLVM/test/CodeGen/Thumb2/bfi.ll
new file mode 100644
index 0000000..3612e27
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/bfi.ll

@@ -0,0 +1,61 @@
+; RUN: llc -march=thumb -mattr=+v6t2 < %s | FileCheck %s
+
+%struct.F = type { [3 x i8], i8 }
+
+@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1]
+
+define void @f1([1 x i32] %f.coerce0) nounwind {
+entry:
+; CHECK: f1
+; CHECK: movs r2, #10
+; CHECK: bfi r1, r2, #22, #4
+  %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
+  %1 = and i32 %0, -62914561                      ; <i32> [#uses=1]
+  %2 = or i32 %1, 41943040                        ; <i32> [#uses=1]
+  store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
+  ret void
+}
+
+define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize {
+entry:
+; CHECK: f2
+; CHECK: lsrs  r1, r1, #7
+; CHECK: bfi r0, r1, #7, #16
+  %and = and i32 %A, -8388481                     ; <i32> [#uses=1]
+  %and2 = and i32 %B, 8388480                     ; <i32> [#uses=1]
+  %or = or i32 %and2, %and                        ; <i32> [#uses=1]
+  ret i32 %or
+}
+
+define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize {
+entry:
+; CHECK: f3
+; CHECK: lsrs {{.*}}, #7
+; CHECK: bfi {{.*}}, #7, #16
+  %and = and i32 %A, 8388480                      ; <i32> [#uses=1]
+  %and2 = and i32 %B, -8388481                    ; <i32> [#uses=1]
+  %or = or i32 %and2, %and                        ; <i32> [#uses=1]
+  ret i32 %or
+}
+
+; rdar://8752056
+define i32 @f4(i32 %a) nounwind {
+; CHECK: f4
+; CHECK: movw [[R1:r[0-9]+]], #3137
+; CHECK: bfi [[R1]], {{.*}}, #15, #5
+  %1 = shl i32 %a, 15
+  %ins7 = and i32 %1, 1015808
+  %ins12 = or i32 %ins7, 3137
+  ret i32 %ins12
+}
+
+; rdar://9177502
+define i32 @f5(i32 %a, i32 %b) nounwind readnone {
+entry:
+; CHECK: f5
+; CHECK-NOT: bfi r0, r2, #0, #1
+%and = and i32 %a, 2
+%b.masked = and i32 %b, -2
+%and3 = or i32 %b.masked, %and
+ret i32 %and3
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/bfx.ll b/src/LLVM/test/CodeGen/Thumb2/bfx.ll
new file mode 100644
index 0000000..489349d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/bfx.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @sbfx1(i32 %a) {
+; CHECK: sbfx1
+; CHECK: sbfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = trunc i32 %t1 to i11
+	%t3 = sext i11 %t2 to i32
+	ret i32 %t3
+}
+
+define i32 @ubfx1(i32 %a) {
+; CHECK: ubfx1
+; CHECK: ubfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = trunc i32 %t1 to i11
+	%t3 = zext i11 %t2 to i32
+	ret i32 %t3
+}
+
+define i32 @ubfx2(i32 %a) {
+; CHECK: ubfx2
+; CHECK: ubfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = and i32 %t1, 2047
+	ret i32 %t2
+}
+

diff --git a/src/LLVM/test/CodeGen/Thumb2/buildvector-crash.ll b/src/LLVM/test/CodeGen/Thumb2/buildvector-crash.ll
new file mode 100644
index 0000000..01ef472
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/buildvector-crash.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -O3 -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; Formerly crashed, 3573915.
+
+define void @RotateStarsFP_Vec() nounwind {
+bb.nph372:
+  br label %bb8
+
+bb8:                                              ; preds = %bb8, %bb.nph372
+  %0 = fadd <4 x float> undef, <float 0xBFEE353F80000000, float 0xBFEE353F80000000, float 0xBFEE353F80000000, float 0xBFEE353F80000000>
+  %1 = fmul <4 x float> %0, undef
+  %2 = fmul <4 x float> %1, undef
+  %3 = fadd <4 x float> undef, %2
+  store <4 x float> %3, <4 x float>* undef, align 4
+  br label %bb8
+; CHECK: RotateStarsFP_Vec:
+; CHECK: vldmia
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/carry.ll b/src/LLVM/test/CodeGen/Thumb2/carry.ll
new file mode 100644
index 0000000..de6f6e2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/carry.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i64 @f1(i64 %a, i64 %b) {
+entry:
+; CHECK: f1:
+; CHECK: subs r0, r0, r2
+; CHECK: sbcs r1, r3
+	%tmp = sub i64 %a, %b
+	ret i64 %tmp
+}
+
+define i64 @f2(i64 %a, i64 %b) {
+entry:
+; CHECK: f2:
+; CHECK: adds r0, r0, r0
+; CHECK: adcs r1, r1
+; CHECK: subs r0, r0, r2
+; CHECK: sbcs r1, r3
+        %tmp1 = shl i64 %a, 1
+	%tmp2 = sub i64 %tmp1, %b
+	ret i64 %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/cortex-fp.ll b/src/LLVM/test/CodeGen/Thumb2/cortex-fp.ll
new file mode 100644
index 0000000..d06f8a7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/cortex-fp.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=CORTEXM3
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m4 | FileCheck %s -check-prefix=CORTEXM4
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+
+
+define float @foo(float %a, float %b) {
+entry:
+; CHECK: foo
+; CORTEXM3: blx ___mulsf3
+; CORTEXM4: vmul.f32  s0, s1, s0
+; CORTEXA8: vmul.f32  d0, d1, d0
+  %0 = fmul float %a, %b
+  ret float %0
+}
+
+define double @bar(double %a, double %b) {
+entry:
+; CHECK: bar
+  %0 = fmul double %a, %b
+; CORTEXM3: blx ___muldf3
+; CORTEXM4: blx ___muldf3
+; CORTEXA8: vmul.f64  d16, d17, d16
+  ret double %0
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/crash.ll b/src/LLVM/test/CodeGen/Thumb2/crash.ll
new file mode 100644
index 0000000..d8b51ec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/crash.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; This function would crash LiveIntervalAnalysis by creating a chain of 4 INSERT_SUBREGs of the same register.
+define arm_apcscc void @NEON_vst4q_u32(i32* nocapture %sp0, i32* nocapture %sp1, i32* nocapture %sp2, i32* nocapture %sp3, i32* %dp) nounwind {
+entry:
+  %0 = bitcast i32* %sp0 to <4 x i32>*            ; <<4 x i32>*> [#uses=1]
+  %1 = load <4 x i32>* %0, align 16               ; <<4 x i32>> [#uses=1]
+  %2 = bitcast i32* %sp1 to <4 x i32>*            ; <<4 x i32>*> [#uses=1]
+  %3 = load <4 x i32>* %2, align 16               ; <<4 x i32>> [#uses=1]
+  %4 = bitcast i32* %sp2 to <4 x i32>*            ; <<4 x i32>*> [#uses=1]
+  %5 = load <4 x i32>* %4, align 16               ; <<4 x i32>> [#uses=1]
+  %6 = bitcast i32* %sp3 to <4 x i32>*            ; <<4 x i32>*> [#uses=1]
+  %7 = load <4 x i32>* %6, align 16               ; <<4 x i32>> [#uses=1]
+  %8 = bitcast i32* %dp to i8*                    ; <i8*> [#uses=1]
+  tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7, i32 1)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+
+@sbuf = common global [16 x i32] zeroinitializer, align 16 ; <[16 x i32]*> [#uses=5]
+@dbuf = common global [16 x i32] zeroinitializer  ; <[16 x i32]*> [#uses=2]
+
+; This function creates 4 chained INSERT_SUBREGS and then invokes the register scavenger.
+; The first INSERT_SUBREG needs an <undef> use operand for that to work.
+define arm_apcscc i32 @main() nounwind {
+bb.nph:
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %0 = phi i32 [ 0, %bb.nph ], [ %1, %bb ]        ; <i32> [#uses=4]
+  %scevgep = getelementptr [16 x i32]* @sbuf, i32 0, i32 %0 ; <i32*> [#uses=1]
+  %scevgep5 = getelementptr [16 x i32]* @dbuf, i32 0, i32 %0 ; <i32*> [#uses=1]
+  store i32 %0, i32* %scevgep, align 4
+  store i32 -1, i32* %scevgep5, align 4
+  %1 = add nsw i32 %0, 1                          ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %1, 16                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb2, label %bb
+
+bb2:                                              ; preds = %bb
+  %2 = load <4 x i32>* bitcast ([16 x i32]* @sbuf to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  %3 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  %4 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  %5 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/cross-rc-coalescing-1.ll b/src/LLVM/test/CodeGen/Thumb2/cross-rc-coalescing-1.ll
new file mode 100644
index 0000000..c71c3ca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/cross-rc-coalescing-1.ll

@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+
+declare i32 @fgetc(%struct.FILE* nocapture) nounwind
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+  br i1 undef, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb1:                                              ; preds = %entry
+  br i1 undef, label %bb.i1, label %bb1.i2
+
+bb.i1:                                            ; preds = %bb1
+  unreachable
+
+bb1.i2:                                           ; preds = %bb1
+  %0 = call  i32 @fgetc(%struct.FILE* undef) nounwind ; <i32> [#uses=0]
+  br i1 undef, label %bb2.i3, label %bb3.i4
+
+bb2.i3:                                           ; preds = %bb1.i2
+  br i1 undef, label %bb4.i, label %bb3.i4
+
+bb3.i4:                                           ; preds = %bb2.i3, %bb1.i2
+  unreachable
+
+bb4.i:                                            ; preds = %bb2.i3
+  br i1 undef, label %bb5.i, label %get_image.exit
+
+bb5.i:                                            ; preds = %bb4.i
+  unreachable
+
+get_image.exit:                                   ; preds = %bb4.i
+  br i1 undef, label %bb28, label %bb27
+
+bb27:                                             ; preds = %get_image.exit
+  br label %bb.i
+
+bb.i:                                             ; preds = %bb.i, %bb27
+  %1 = fptrunc double undef to float              ; <float> [#uses=1]
+  %2 = fptoui float %1 to i8                      ; <i8> [#uses=1]
+  store i8 %2, i8* undef, align 1
+  br label %bb.i
+
+bb28:                                             ; preds = %get_image.exit
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/src/LLVM/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
new file mode 100644
index 0000000..edbf834
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll

@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | FileCheck %s
+
+define void @fht(float* nocapture %fz, i16 signext %n) nounwind {
+; CHECK: fht:
+entry:
+  br label %bb5
+
+bb5:                                              ; preds = %bb5, %entry
+  br i1 undef, label %bb5, label %bb.nph
+
+bb.nph:                                           ; preds = %bb5
+  br label %bb7
+
+; Loop preheader
+; CHECK: vmov.f32
+bb7:                                              ; preds = %bb9, %bb.nph
+  %s1.02 = phi float [ undef, %bb.nph ], [ %35, %bb9 ] ; <float> [#uses=3]
+  %tmp79 = add i32 undef, undef                   ; <i32> [#uses=1]
+  %tmp53 = sub i32 undef, undef                   ; <i32> [#uses=1]
+  %0 = fadd float 0.000000e+00, 1.000000e+00      ; <float> [#uses=2]
+  %1 = fmul float 0.000000e+00, 0.000000e+00      ; <float> [#uses=2]
+  br label %bb8
+
+bb8:                                              ; preds = %bb8, %bb7
+; CHECK: %bb8
+; CHECK-NOT: vmov.f32
+; CHECK: blt
+  %tmp54 = add i32 0, %tmp53                      ; <i32> [#uses=0]
+  %fi.1 = getelementptr float* %fz, i32 undef     ; <float*> [#uses=2]
+  %tmp80 = add i32 0, %tmp79                      ; <i32> [#uses=1]
+  %scevgep81 = getelementptr float* %fz, i32 %tmp80 ; <float*> [#uses=1]
+  %2 = load float* undef, align 4                 ; <float> [#uses=1]
+  %3 = fmul float %2, %1                          ; <float> [#uses=1]
+  %4 = load float* null, align 4                  ; <float> [#uses=2]
+  %5 = fmul float %4, %0                          ; <float> [#uses=1]
+  %6 = fsub float %3, %5                          ; <float> [#uses=1]
+  %7 = fmul float %4, %1                          ; <float> [#uses=1]
+  %8 = fadd float undef, %7                       ; <float> [#uses=2]
+  %9 = load float* %fi.1, align 4                 ; <float> [#uses=2]
+  %10 = fsub float %9, %8                         ; <float> [#uses=1]
+  %11 = fadd float %9, %8                         ; <float> [#uses=1]
+  %12 = fsub float 0.000000e+00, %6               ; <float> [#uses=1]
+  %13 = fsub float 0.000000e+00, undef            ; <float> [#uses=2]
+  %14 = fmul float undef, %0                      ; <float> [#uses=1]
+  %15 = fadd float %14, undef                     ; <float> [#uses=2]
+  %16 = load float* %scevgep81, align 4           ; <float> [#uses=2]
+  %17 = fsub float %16, %15                       ; <float> [#uses=1]
+  %18 = fadd float %16, %15                       ; <float> [#uses=2]
+  %19 = load float* undef, align 4                ; <float> [#uses=2]
+  %20 = fsub float %19, %13                       ; <float> [#uses=2]
+  %21 = fadd float %19, %13                       ; <float> [#uses=1]
+  %22 = fmul float %s1.02, %18                    ; <float> [#uses=1]
+  %23 = fmul float 0.000000e+00, %20              ; <float> [#uses=1]
+  %24 = fsub float %22, %23                       ; <float> [#uses=1]
+  %25 = fmul float 0.000000e+00, %18              ; <float> [#uses=1]
+  %26 = fmul float %s1.02, %20                    ; <float> [#uses=1]
+  %27 = fadd float %25, %26                       ; <float> [#uses=1]
+  %28 = fadd float %11, %27                       ; <float> [#uses=1]
+  store float %28, float* %fi.1, align 4
+  %29 = fadd float %12, %24                       ; <float> [#uses=1]
+  store float %29, float* null, align 4
+  %30 = fmul float 0.000000e+00, %21              ; <float> [#uses=1]
+  %31 = fmul float %s1.02, %17                    ; <float> [#uses=1]
+  %32 = fsub float %30, %31                       ; <float> [#uses=1]
+  %33 = fsub float %10, %32                       ; <float> [#uses=1]
+  store float %33, float* undef, align 4
+  %34 = icmp slt i32 undef, undef                 ; <i1> [#uses=1]
+  br i1 %34, label %bb8, label %bb9
+
+bb9:                                              ; preds = %bb8
+  %35 = fadd float 0.000000e+00, undef            ; <float> [#uses=1]
+  br label %bb7
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/dg.exp b/src/LLVM/test/CodeGen/Thumb2/dg.exp
new file mode 100644
index 0000000..3ff359a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/div.ll b/src/LLVM/test/CodeGen/Thumb2/div.ll
new file mode 100644
index 0000000..2c00c70
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/div.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -mattr=+thumb2 \
+; RUN:    | FileCheck %s -check-prefix=CHECK-THUMB
+; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \
+; RUN:    | FileCheck %s -check-prefix=CHECK-THUMBV7M
+
+define i32 @f1(i32 %a, i32 %b) {
+entry:
+; CHECK-THUMB: f1
+; CHECK-THUMB: __divsi3
+; CHECK-THUMBV7M: f1
+; CHECK-THUMBV7M: sdiv
+        %tmp1 = sdiv i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+entry:
+; CHECK-THUMB: f2
+; CHECK-THUMB: __udivsi3
+; CHECK-THUMBV7M: f2
+; CHECK-THUMBV7M: udiv
+        %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+entry:
+; CHECK-THUMB: f3
+; CHECK-THUMB: __modsi3
+; CHECK-THUMBV7M: f3
+; CHECK-THUMBV7M: sdiv
+        %tmp1 = srem i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+entry:
+; CHECK-THUMB: f4
+; CHECK-THUMB: __umodsi3
+; CHECK-THUMBV7M: f4
+; CHECK-THUMBV7M: udiv
+        %tmp1 = urem i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+

diff --git a/src/LLVM/test/CodeGen/Thumb2/frameless.ll b/src/LLVM/test/CodeGen/Thumb2/frameless.ll
new file mode 100644
index 0000000..fa8d5d8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/frameless.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-fp-elim | not grep mov
+; RUN: llc < %s -mtriple=thumbv7-linux -disable-fp-elim | not grep mov
+
+define void @t() nounwind readnone {
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/frameless2.ll b/src/LLVM/test/CodeGen/Thumb2/frameless2.ll
new file mode 100644
index 0000000..c5d3239
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/frameless2.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-fp-elim | not grep r7
+
+%struct.noise3 = type { [3 x [17 x i32]] }
+%struct.noiseguard = type { i32, i32, i32 }
+
+define void @vorbis_encode_noisebias_setup(i8* nocapture %vi.0.7.val, double %s, i32 %block, i32* nocapture %suppress, %struct.noise3* nocapture %in, %struct.noiseguard* nocapture %guard, double %userbias) nounwind {
+entry:
+  %0 = getelementptr %struct.noiseguard* %guard, i32 %block, i32 2; <i32*> [#uses=1]
+  %1 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  store i32 %1, i32* undef, align 4
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/ifcvt-neon.ll b/src/LLVM/test/CodeGen/Thumb2/ifcvt-neon.ll
new file mode 100644
index 0000000..6832053
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/ifcvt-neon.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s
+; rdar://7368193
+
+@a = common global float 0.000000e+00             ; <float*> [#uses=2]
+@b = common global float 0.000000e+00             ; <float*> [#uses=1]
+
+define float @t(i32 %c) nounwind {
+entry:
+  %0 = icmp sgt i32 %c, 1                         ; <i1> [#uses=1]
+  %1 = load float* @a, align 4                    ; <float> [#uses=2]
+  %2 = load float* @b, align 4                    ; <float> [#uses=2]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+; CHECK:      ite lt
+; CHECK:      vsublt.f32
+; CHECK-NEXT: vaddge.f32
+  %3 = fadd float %1, %2                          ; <float> [#uses=1]
+  br label %bb2
+
+bb1:                                              ; preds = %entry
+  %4 = fsub float %1, %2                          ; <float> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %storemerge = phi float [ %4, %bb1 ], [ %3, %bb ] ; <float> [#uses=2]
+  store float %storemerge, float* @a
+  ret float %storemerge
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/large-stack.ll b/src/LLVM/test/CodeGen/Thumb2/large-stack.ll
new file mode 100644
index 0000000..68b5d1c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/large-stack.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=LINUX
+
+define void @test1() {
+; DARWIN: test1:
+; DARWIN: sub sp, #256
+; LINUX: test1:
+; LINUX: sub sp, #256
+    %tmp = alloca [ 64 x i32 ] , align 4
+    ret void
+}
+
+define void @test2() {
+; DARWIN: test2:
+; DARWIN: sub.w sp, sp, #4160
+; DARWIN: sub sp, #8
+; LINUX: test2:
+; LINUX: sub.w sp, sp, #4160
+; LINUX: sub sp, #8
+    %tmp = alloca [ 4168 x i8 ] , align 4
+    ret void
+}
+
+define i32 @test3() {
+; DARWIN: test3:
+; DARWIN: push    {r4, r7, lr}
+; DARWIN: sub.w sp, sp, #805306368
+; DARWIN: sub sp, #20
+; LINUX: test3:
+; LINUX: push.w {r4, r7, r11, lr}
+; LINUX: sub.w sp, sp, #805306368
+; LINUX: sub sp, #16
+    %retval = alloca i32, align 4
+    %tmp = alloca i32, align 4
+    %a = alloca [805306369 x i8], align 16
+    store i32 0, i32* %tmp
+    %tmp1 = load i32* %tmp
+    ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/ldr-str-imm12.ll b/src/LLVM/test/CodeGen/Thumb2/ldr-str-imm12.ll
new file mode 100644
index 0000000..4597ba5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/ldr-str-imm12.ll

@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -regalloc=linearscan | FileCheck %s
+; rdar://7352504
+; Make sure we use "str r9, [sp, #+28]" instead of "sub.w r4, r7, #256" followed by "str r9, [r4, #-32]".
+
+%0 = type { i16, i8, i8 }
+%1 = type { [2 x i32], [2 x i32] }
+%2 = type { %union.rec* }
+%struct.FILE_POS = type { i8, i8, i16, i32 }
+%struct.GAP = type { i8, i8, i16 }
+%struct.LIST = type { %union.rec*, %union.rec* }
+%struct.STYLE = type { %union.anon, %union.anon, i16, i16, i32 }
+%struct.head_type = type { [2 x %struct.LIST], %union.FIRST_UNION, %union.SECOND_UNION, %union.THIRD_UNION, %union.FOURTH_UNION, %union.rec*, %2, %union.rec*, %union.rec*, %union.rec*, %union.rec*, %union.rec*, %union.rec*, %union.rec*, %union.rec*, i32 }
+%union.FIRST_UNION = type { %struct.FILE_POS }
+%union.FOURTH_UNION = type { %struct.STYLE }
+%union.SECOND_UNION = type { %0 }
+%union.THIRD_UNION = type { %1 }
+%union.anon = type { %struct.GAP }
+%union.rec = type { %struct.head_type }
+
+@zz_hold = external global %union.rec*            ; <%union.rec**> [#uses=2]
+@zz_res = external global %union.rec*             ; <%union.rec**> [#uses=1]
+
+define %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind {
+entry:
+; CHECK:       ldr{{(.w)?}}	{{(r[0-9]+)|(lr)}}, [r7, #28]
+  %xgaps.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
+  %ycomp.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
+  br label %bb20
+
+bb20:                                             ; preds = %entry
+  switch i32 undef, label %bb1287 [
+    i32 11, label %bb119
+    i32 12, label %bb119
+    i32 21, label %bb420
+    i32 23, label %bb420
+    i32 45, label %bb438
+    i32 46, label %bb438
+    i32 55, label %bb533
+    i32 56, label %bb569
+    i32 64, label %bb745
+    i32 78, label %bb1098
+  ]
+
+bb119:                                            ; preds = %bb20, %bb20
+  unreachable
+
+bb420:                                            ; preds = %bb20, %bb20
+; CHECK: bb420
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #4]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #8]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #24]
+  store %union.rec* null, %union.rec** @zz_hold, align 4
+  store %union.rec* null, %union.rec** @zz_res, align 4
+  store %union.rec* %x, %union.rec** @zz_hold, align 4
+  %0 = call  %union.rec* @Manifest(%union.rec* undef, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind ; <%union.rec*> [#uses=0]
+  unreachable
+
+bb438:                                            ; preds = %bb20, %bb20
+  unreachable
+
+bb533:                                            ; preds = %bb20
+  ret %union.rec* %x
+
+bb569:                                            ; preds = %bb20
+  unreachable
+
+bb745:                                            ; preds = %bb20
+  unreachable
+
+bb1098:                                           ; preds = %bb20
+  unreachable
+
+bb1287:                                           ; preds = %bb20
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/lsr-deficiency.ll b/src/LLVM/test/CodeGen/Thumb2/lsr-deficiency.ll
new file mode 100644
index 0000000..9ff114e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/lsr-deficiency.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s
+; rdar://7387640
+
+; This now reduces to a single induction variable.
+
+; TODO: It still gets a GPR shuffle at the end of the loop
+; This is because something in instruction selection has decided
+; that comparing the pre-incremented value with zero is better
+; than comparing the post-incremented value with -4.
+
+@G = external global i32                          ; <i32*> [#uses=2]
+@array = external global i32*                     ; <i32**> [#uses=1]
+
+define void @t() nounwind optsize {
+; CHECK: t:
+; CHECK: mov{{.*}}, #1000
+entry:
+  %.pre = load i32* @G, align 4                   ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+; CHECK: LBB0_1:
+; CHECK: cmp [[R2:r[0-9]+]], #0
+; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], [[R2]], #1
+; CHECK: mov [[R2]], [[REGISTER]]
+
+  %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
+  %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
+  %tmp5 = sub i32 1000, %indvar                   ; <i32> [#uses=1]
+  %1 = load i32** @array, align 4                 ; <i32*> [#uses=1]
+  %scevgep = getelementptr i32* %1, i32 %tmp5     ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = add nsw i32 %2, %0                         ; <i32> [#uses=2]
+  store i32 %3, i32* @G, align 4
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, 1001      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/machine-licm.ll b/src/LLVM/test/CodeGen/Thumb2/machine-licm.ll
new file mode 100644
index 0000000..46937fc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/machine-licm.ll

@@ -0,0 +1,121 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=dynamic-no-pic -disable-fp-elim | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s --check-prefix=PIC
+; rdar://7353541
+; rdar://7354376
+
+@GV = external global i32                         ; <i32*> [#uses=2]
+
+define void @t1(i32* nocapture %vals, i32 %c) nounwind {
+entry:
+; CHECK: t1:
+; CHECK: bxeq lr
+
+  %0 = icmp eq i32 %c, 0                          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+; CHECK: movw r[[R2:[0-9]+]], :lower16:L_GV$non_lazy_ptr
+; CHECK: movt r[[R2]], :upper16:L_GV$non_lazy_ptr
+; CHECK: ldr{{(.w)?}} r[[R2b:[0-9]+]], [r[[R2]]
+; CHECK: ldr{{.*}}, [r[[R2b]]
+; CHECK: LBB0_
+; CHECK-NOT: LCPI0_0:
+
+; PIC: movw r[[R2:[0-9]+]], :lower16:(L_GV$non_lazy_ptr-(LPC0_0+4))
+; PIC: movt r[[R2]], :upper16:(L_GV$non_lazy_ptr-(LPC0_0+4))
+; PIC: add r[[R2]], pc
+; PIC: ldr{{(.w)?}} r[[R2b:[0-9]+]], [r[[R2]]
+; PIC: ldr{{.*}}, [r[[R2b]]
+; PIC: LBB0_
+; PIC-NOT: LCPI0_0:
+; PIC: .section
+  %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %1 = phi i32 [ %.pre, %bb.nph ], [ %3, %bb ]    ; <i32> [#uses=1]
+  %i.03 = phi i32 [ 0, %bb.nph ], [ %4, %bb ]     ; <i32> [#uses=2]
+  %scevgep = getelementptr i32* %vals, i32 %i.03  ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = add nsw i32 %1, %2                         ; <i32> [#uses=2]
+  store i32 %3, i32* @GV, align 4
+  %4 = add i32 %i.03, 1                           ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %4, %c                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+; rdar://8001136
+define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: mov.w [[R3:r[0-9]+]], #1065353216
+; CHECK: vdup.32 q{{.*}}, [[R3]]
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+; CHECK-NEXT: %bb1
+  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
+  %tmp1 = shl i32 %indvar, 2
+  %gep1 = getelementptr i8* %ptr1, i32 %tmp1
+  %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1)
+  %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
+  %gep2 = getelementptr i8* %ptr2, i32 %tmp1
+  call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
+  %indvar.next = add i32 %indvar, 1
+  %cond = icmp eq i32 %indvar.next, 10
+  br i1 %cond, label %bb2, label %bb1
+
+bb2:
+  ret void
+}
+
+; CHECK-NOT: LCPI1_0:
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+; rdar://8241368
+; isel should not fold immediate into eor's which would have prevented LICM.
+define zeroext i16 @t3(i8 zeroext %data, i16 zeroext %crc) nounwind readnone {
+; CHECK: t3:
+bb.nph:
+; CHECK: bb.nph
+; CHECK: movw {{(r[0-9])|(lr)}}, #32768
+; CHECK: movs {{(r[0-9]+)|(lr)}}, #0
+; CHECK: movw [[REGISTER:(r[0-9]+)|(lr)]], #16386
+; CHECK: movw {{(r[0-9]+)|(lr)}}, #65534
+; CHECK: movt {{(r[0-9]+)|(lr)}}, #65535
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+; CHECK: bb
+; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
+; CHECK: eor.w
+; CHECK-NOT: eor
+; CHECK: and
+  %data_addr.013 = phi i8 [ %data, %bb.nph ], [ %8, %bb ] ; <i8> [#uses=2]
+  %crc_addr.112 = phi i16 [ %crc, %bb.nph ], [ %crc_addr.2, %bb ] ; <i16> [#uses=3]
+  %i.011 = phi i8 [ 0, %bb.nph ], [ %7, %bb ]     ; <i8> [#uses=1]
+  %0 = trunc i16 %crc_addr.112 to i8              ; <i8> [#uses=1]
+  %1 = xor i8 %data_addr.013, %0                  ; <i8> [#uses=1]
+  %2 = and i8 %1, 1                               ; <i8> [#uses=1]
+  %3 = icmp eq i8 %2, 0                           ; <i1> [#uses=2]
+  %4 = xor i16 %crc_addr.112, 16386               ; <i16> [#uses=1]
+  %crc_addr.0 = select i1 %3, i16 %crc_addr.112, i16 %4 ; <i16> [#uses=1]
+  %5 = lshr i16 %crc_addr.0, 1                    ; <i16> [#uses=2]
+  %6 = or i16 %5, -32768                          ; <i16> [#uses=1]
+  %crc_addr.2 = select i1 %3, i16 %5, i16 %6      ; <i16> [#uses=2]
+  %7 = add i8 %i.011, 1                           ; <i8> [#uses=2]
+  %8 = lshr i8 %data_addr.013, 1                  ; <i8> [#uses=1]
+  %exitcond = icmp eq i8 %7, 8                    ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb8, label %bb
+
+bb8:                                              ; preds = %bb
+  ret i16 %crc_addr.2
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/mul_const.ll b/src/LLVM/test/CodeGen/Thumb2/mul_const.ll
new file mode 100644
index 0000000..9a2ec93
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/mul_const.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; rdar://7069502
+
+define i32 @t1(i32 %v) nounwind readnone {
+entry:
+; CHECK: t1:
+; CHECK: add.w r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 9
+	ret i32 %0
+}
+
+define i32 @t2(i32 %v) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK: rsb r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 7
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/pic-load.ll b/src/LLVM/test/CodeGen/Thumb2/pic-load.ll
new file mode 100644
index 0000000..35a03e7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/pic-load.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -relocation-model=pic | FileCheck %s
+
+	%struct.anon = type { void ()* }
+	%struct.one_atexit_routine = type { %struct.anon, i32, i8* }
+@__dso_handle = external global { }		; <{ }*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (void ()*)* @atexit to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define hidden i32 @atexit(void ()* %func) nounwind {
+entry:
+; CHECK: atexit:
+; CHECK: add r0, pc
+	%r = alloca %struct.one_atexit_routine, align 4		; <%struct.one_atexit_routine*> [#uses=3]
+	%0 = getelementptr %struct.one_atexit_routine* %r, i32 0, i32 0, i32 0		; <void ()**> [#uses=1]
+	store void ()* %func, void ()** %0, align 4
+	%1 = getelementptr %struct.one_atexit_routine* %r, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 0, i32* %1, align 4
+	%2 = call  i32 @atexit_common(%struct.one_atexit_routine* %r, i8* bitcast ({ }* @__dso_handle to i8*)) nounwind		; <i32> [#uses=1]
+	ret i32 %2
+}
+
+declare i32 @atexit_common(%struct.one_atexit_routine*, i8*) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-adc.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-adc.ll
new file mode 100644
index 0000000..702df91
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-adc.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 734439407618 = 0x000000ab00000002
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #2
+    %tmp = add i64 %a, 734439407618
+    ret i64 %tmp
+}
+
+; 5066626890203138 = 0x0012001200000002
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: adds r0, #2
+    %tmp = add i64 %a, 5066626890203138
+    ret i64 %tmp
+}
+
+; 3747052064576897026 = 0x3400340000000002
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: adds r0, #2
+    %tmp = add i64 %a, 3747052064576897026
+    ret i64 %tmp
+}
+
+; 6221254862626095106 = 0x5656565600000002
+define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK: adds r0, #2
+    %tmp = add i64 %a, 6221254862626095106 
+    ret i64 %tmp
+}
+
+; 287104476244869122 = 0x03fc000000000002
+define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: adds r0, #2
+    %tmp = add i64 %a, 287104476244869122
+    ret i64 %tmp
+}
+
+define i64 @f6(i64 %a, i64 %b) {
+; CHECK: f6:
+; CHECK: adds r0, r0, r2
+    %tmp = add i64 %a, %b
+    ret i64 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-add.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-add.ll
new file mode 100644
index 0000000..66fca13
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-add.ll

@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s 
+
+define i32 @t2ADDrc_255(i32 %lhs) {
+; CHECK: t2ADDrc_255:
+; CHECK-NOT: bx lr
+; CHECK: add{{.*}} #255
+; CHECK: bx lr
+
+    %Rd = add i32 %lhs, 255
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_256(i32 %lhs) {
+; CHECK: t2ADDrc_256:
+; CHECK-NOT: bx lr
+; CHECK: add{{.*}} #256
+; CHECK: bx lr
+
+    %Rd = add i32 %lhs, 256
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_257(i32 %lhs) {
+; CHECK: t2ADDrc_257:
+; CHECK-NOT: bx lr
+; CHECK: add{{.*}} #257
+; CHECK: bx lr
+
+    %Rd = add i32 %lhs, 257
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_4094(i32 %lhs) {
+; CHECK: t2ADDrc_4094:
+; CHECK-NOT: bx lr
+; CHECK: add{{.*}} #4094
+; CHECK: bx lr
+
+    %Rd = add i32 %lhs, 4094
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_4095(i32 %lhs) {
+; CHECK: t2ADDrc_4095:
+; CHECK-NOT: bx lr
+; CHECK: add{{.*}} #4095
+; CHECK: bx lr
+
+    %Rd = add i32 %lhs, 4095
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_4096(i32 %lhs) {
+; CHECK: t2ADDrc_4096:
+; CHECK-NOT: bx lr
+; CHECK: add{{.*}} #4096
+; CHECK: bx lr
+
+    %Rd = add i32 %lhs, 4096
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrr(i32 %lhs, i32 %rhs) {
+; CHECK: t2ADDrr:
+; CHECK-NOT: bx lr
+; CHECK: add
+; CHECK: bx lr
+
+    %Rd = add i32 %lhs, %rhs
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrs(i32 %lhs, i32 %rhs) {
+; CHECK: t2ADDrs:
+; CHECK-NOT: bx lr
+; CHECK: add{{.*}} lsl #8
+; CHECK: bx lr
+
+    %tmp = shl i32 %rhs, 8
+    %Rd = add i32 %lhs, %tmp
+    ret i32 %Rd
+}
+

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-add2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-add2.ll
new file mode 100644
index 0000000..e496654
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-add2.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #171
+    %tmp = add i32 %a, 171
+    ret i32 %tmp
+}
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: add.w r0, r0, #1179666
+    %tmp = add i32 %a, 1179666
+    ret i32 %tmp
+}
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: add.w r0, r0, #872428544
+    %tmp = add i32 %a, 872428544
+    ret i32 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: add.w r0, r0, #1448498774
+    %tmp = add i32 %a, 1448498774
+    ret i32 %tmp
+}
+
+; 510 = 0x000001fe
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: add.w r0, r0, #510
+    %tmp = add i32 %a, 510
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-add3.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-add3.ll
new file mode 100644
index 0000000..58fc333
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-add3.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+    %tmp = add i32 %a, 4095
+    ret i32 %tmp
+}
+
+; CHECK: f1:
+; CHECK: 	addw	r0, r0, #4095

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-add4.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-add4.ll
new file mode 100644
index 0000000..b94e84d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-add4.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 171 = 0x000000ab
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #171
+; CHECK: adc r1, r1, #0
+    %tmp = add i64 %a, 171
+    ret i64 %tmp
+}
+
+; 1179666 = 0x00120012
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: adds.w r0, r0, #1179666
+; CHECK: adc r1, r1, #0
+    %tmp = add i64 %a, 1179666
+    ret i64 %tmp
+}
+
+; 872428544 = 0x34003400
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: adds.w r0, r0, #872428544
+; CHECK: adc r1, r1, #0
+    %tmp = add i64 %a, 872428544
+    ret i64 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK: adds.w r0, r0, #1448498774
+; CHECK: adc r1, r1, #0
+    %tmp = add i64 %a, 1448498774
+    ret i64 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: adds.w r0, r0, #66846720
+; CHECK: adc r1, r1, #0
+    %tmp = add i64 %a, 66846720
+    ret i64 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-add5.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-add5.ll
new file mode 100644
index 0000000..8b3a4f6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-add5.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: add r0, r1
+    %tmp = add i32 %a, %b
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: add.w r0, r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = add i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: add.w r0, r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = add i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: add.w r0, r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = add i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: add.w r0, r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = add i32 %a, %tmp
+    ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-add6.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-add6.ll
new file mode 100644
index 0000000..0ecaa79
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-add6.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: adds r0, r0, r2
+; CHECK: adcs r1, r3
+    %tmp = add i64 %a, %b
+    ret i64 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-and.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-and.ll
new file mode 100644
index 0000000..8e2245a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-and.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: ands r0, r1
+    %tmp = and i32 %a, %b
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: and.w r0, r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: and.w r0, r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: and.w r0, r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: and.w r0, r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-and2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-and2.ll
new file mode 100644
index 0000000..7b0432d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-and2.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+    %tmp = and i32 %a, 171
+    ret i32 %tmp
+}
+; CHECK: f1:
+; CHECK: 	and	r0, r0, #171
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+    %tmp = and i32 %a, 1179666
+    ret i32 %tmp
+}
+; CHECK: f2:
+; CHECK: 	and	r0, r0, #1179666
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+    %tmp = and i32 %a, 872428544
+    ret i32 %tmp
+}
+; CHECK: f3:
+; CHECK: 	and	r0, r0, #872428544
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+    %tmp = and i32 %a, 1448498774
+    ret i32 %tmp
+}
+; CHECK: f4:
+; CHECK: bic r0, r0, #-1448498775
+
+; 66846720 = 0x03fc0000
+define i32 @f5(i32 %a) {
+    %tmp = and i32 %a, 66846720
+    ret i32 %tmp
+}
+; CHECK: f5:
+; CHECK: 	and	r0, r0, #66846720

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-asr.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-asr.ll
new file mode 100644
index 0000000..a0a60e6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-asr.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: asrs r0, r1
+    %tmp = ashr i32 %a, %b
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-asr2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-asr2.ll
new file mode 100644
index 0000000..9c8634f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-asr2.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: asrs r0, r0, #17
+    %tmp = ashr i32 %a, 17
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-bcc.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-bcc.ll
new file mode 100644
index 0000000..4a2d600
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-bcc.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; If-conversion defeats the purpose of this test, which is to check CBZ
+; generation, so use memory barrier instruction to make sure it doesn't
+; happen and we get actual branches.
+
+define i32 @t1(i32 %a, i32 %b, i32 %c) {
+; CHECK: t1:
+; CHECK: cbz
+  %tmp2 = icmp eq i32 %a, 0
+  br i1 %tmp2, label %cond_false, label %cond_true
+
+cond_true:
+  fence seq_cst
+  %tmp5 = add i32 %b, 1
+  %tmp6 = and i32 %tmp5, %c
+  ret i32 %tmp6
+
+cond_false:
+  fence seq_cst
+  %tmp7 = add i32 %b, -1
+  %tmp8 = xor i32 %tmp7, %c
+  ret i32 %tmp8
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-bfc.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-bfc.ll
new file mode 100644
index 0000000..b486045
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-bfc.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 4278190095 = 0xff00000f
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: bfc r
+    %tmp = and i32 %a, 4278190095
+    ret i32 %tmp
+}
+
+; 4286578688 = 0xff800000
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: bfc r
+    %tmp = and i32 %a, 4286578688
+    ret i32 %tmp
+}
+
+; 4095 = 0x00000fff
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: bfc r
+    %tmp = and i32 %a, 4095
+    ret i32 %tmp
+}
+
+; 2147483646 = 0x7ffffffe   not implementable w/ BFC
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+    %tmp = and i32 %a, 2147483646
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-bic.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-bic.ll
new file mode 100644
index 0000000..4e35383
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-bic.ll

@@ -0,0 +1,105 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: bics r0, r1
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: bics r0, r1
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = and i32 %tmp, %a
+    ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: bics r0, r1
+    %tmp = xor i32 4294967295, %b
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: bics r0, r1
+    %tmp = xor i32 4294967295, %b
+    %tmp1 = and i32 %tmp, %a
+    ret i32 %tmp1
+}
+
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: bic.w r0, r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = xor i32 4294967295, %tmp
+    %tmp2 = and i32 %a, %tmp1
+    ret i32 %tmp2
+}
+
+define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: bic.w r0, r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = xor i32 %tmp, 4294967295
+    %tmp2 = and i32 %tmp1, %a
+    ret i32 %tmp2
+}
+
+define i32 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: bic.w r0, r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = xor i32 %tmp, 4294967295
+    %tmp2 = and i32 %a, %tmp1
+    ret i32 %tmp2
+}
+
+define i32 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: bic.w r0, r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = xor i32 4294967295, %tmp
+    %tmp2 = and i32 %tmp1, %a
+    ret i32 %tmp2
+}
+
+; ~0x000000bb = 4294967108
+define i32 @f9(i32 %a) {
+    %tmp = and i32 %a, 4294967108
+    ret i32 %tmp
+    
+; CHECK: f9:
+; CHECK: bic r0, r0, #187
+}
+
+; ~0x00aa00aa = 4283826005
+define i32 @f10(i32 %a) {
+    %tmp = and i32 %a, 4283826005
+    ret i32 %tmp
+    
+; CHECK: f10:
+; CHECK: bic r0, r0, #11141290
+}
+
+; ~0xcc00cc00 = 872363007
+define i32 @f11(i32 %a) {
+    %tmp = and i32 %a, 872363007
+    ret i32 %tmp
+; CHECK: f11:
+; CHECK: bic r0, r0, #-872363008
+}
+
+; ~0x00110000 = 4293853183
+define i32 @f12(i32 %a) {
+    %tmp = and i32 %a, 4293853183
+    ret i32 %tmp
+; CHECK: f12:
+; CHECK: bic r0, r0, #1114112
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-branch.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-branch.ll
new file mode 100644
index 0000000..27d8e8f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-branch.ll

@@ -0,0 +1,72 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+; If-conversion defeats the purpose of this test, which is to check conditional
+; branch generation, so use memory barrier instruction to make sure it doesn't
+; happen and we get actual branches.
+
+define i32 @f1(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f1:
+; CHECK: bne LBB
+        %tmp = icmp eq i32 %a, %b               ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        fence seq_cst
+        store i32 0, i32* %v
+        ret i32 0
+
+return:         ; preds = %entry
+        fence seq_cst
+        ret i32 1
+}
+
+define i32 @f2(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f2:
+; CHECK: bge LBB
+        %tmp = icmp slt i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        fence seq_cst
+        store i32 0, i32* %v
+        ret i32 0
+
+return:         ; preds = %entry
+        fence seq_cst
+        ret i32 1
+}
+
+define i32 @f3(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f3:
+; CHECK: bhs LBB
+        %tmp = icmp ult i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        fence seq_cst
+        store i32 0, i32* %v
+        ret i32 0
+
+return:         ; preds = %entry
+        fence seq_cst
+        ret i32 1
+}
+
+define i32 @f4(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f4:
+; CHECK: blo LBB
+        %tmp = icmp ult i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %return, label %cond_true
+
+cond_true:              ; preds = %entry
+        fence seq_cst
+        store i32 0, i32* %v
+        ret i32 0
+
+return:         ; preds = %entry
+        fence seq_cst
+        ret i32 1
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-call-tc.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-call-tc.ll
new file mode 100644
index 0000000..2e4da1b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-call-tc.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX
+; XFAIL: *
+
+@t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @f() {
+; DARWIN: f:
+; DARWIN: blx _g
+
+; LINUX: f:
+; LINUX: bl g
+        tail call void @g( i32 1, i32 2, i32 3, i32 4 )
+        ret void
+}
+
+define void @h() {
+; DARWIN: h:
+; DARWIN: bx r0 @ TAILCALL
+
+; LINUX: h:
+; LINUX: bx r0 @ TAILCALL
+        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
+        ret void
+}
+
+define void @j() {
+; DARWIN: j:
+; DARWIN: b.w _f  @ TAILCALL
+
+; LINUX: j:
+; LINUX: b.w f  @ TAILCALL
+        tail call void @f()
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-call.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-call.ll
new file mode 100644
index 0000000..8513cfb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-call.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX
+
+@t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @f() {
+; DARWIN: f:
+; DARWIN: blx _g
+
+; LINUX: f:
+; LINUX: bl g
+        call void @g( i32 1, i32 2, i32 3, i32 4 )
+        ret void
+}
+
+define void @h() {
+; DARWIN: h:
+; DARWIN: blx r0
+
+; LINUX: h:
+; LINUX: blx r0
+        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp.upgrd.2 = call i32 %tmp( )            ; <i32> [#uses=0]
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-cbnz.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-cbnz.ll
new file mode 100644
index 0000000..0992fa8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-cbnz.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; rdar://7354379
+
+declare double @floor(double) nounwind readnone
+
+define void @t(i32 %c, double %b) {
+entry:
+  %cmp1 = icmp ne i32 %c, 0
+  br i1 %cmp1, label %bb3, label %bb1
+
+bb1:                                              ; preds = %entry
+  unreachable
+
+bb3:                                              ; preds = %entry
+  %cmp2 = icmp ne i32 %c, 0
+  br i1 %cmp2, label %bb7, label %bb5
+
+bb5:                                              ; preds = %bb3
+  unreachable
+
+bb7:                                              ; preds = %bb3
+  %cmp3 = icmp ne i32 %c, 0
+  br i1 %cmp3, label %bb11, label %bb9
+
+bb9:                                              ; preds = %bb7
+; CHECK:      cmp	r0, #0
+; CHECK:      cmp	r0, #0
+; CHECK-NEXT:      cbnz
+  %0 = tail call  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb7
+  %1 = getelementptr i32* undef, i32 0
+  store i32 0, i32* %1
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-clz.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-clz.ll
new file mode 100644
index 0000000..00a54a0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-clz.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: clz r
+    %tmp = tail call i32 @llvm.ctlz.i32(i32 %a)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-cmn.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-cmn.ll
new file mode 100644
index 0000000..df221b9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-cmn.ll

@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'. So far, that requires physreg joining.
+
+define i1 @f1(i32 %a, i32 %b) {
+    %nb = sub i32 0, %b
+    %tmp = icmp ne i32 %a, %nb
+    ret i1 %tmp
+}
+; CHECK: f1:
+; CHECK: 	cmn.w	r0, r1
+
+define i1 @f2(i32 %a, i32 %b) {
+    %nb = sub i32 0, %b
+    %tmp = icmp ne i32 %nb, %a
+    ret i1 %tmp
+}
+; CHECK: f2:
+; CHECK: 	cmn.w	r0, r1
+
+define i1 @f3(i32 %a, i32 %b) {
+    %nb = sub i32 0, %b
+    %tmp = icmp eq i32 %a, %nb
+    ret i1 %tmp
+}
+; CHECK: f3:
+; CHECK: 	cmn.w	r0, r1
+
+define i1 @f4(i32 %a, i32 %b) {
+    %nb = sub i32 0, %b
+    %tmp = icmp eq i32 %nb, %a
+    ret i1 %tmp
+}
+; CHECK: f4:
+; CHECK: 	cmn.w	r0, r1
+
+define i1 @f5(i32 %a, i32 %b) {
+    %tmp = shl i32 %b, 5
+    %nb = sub i32 0, %tmp
+    %tmp1 = icmp eq i32 %nb, %a
+    ret i1 %tmp1
+}
+; CHECK: f5:
+; CHECK: 	cmn.w	r0, r1, lsl #5
+
+define i1 @f6(i32 %a, i32 %b) {
+    %tmp = lshr i32 %b, 6
+    %nb = sub i32 0, %tmp
+    %tmp1 = icmp ne i32 %nb, %a
+    ret i1 %tmp1
+}
+; CHECK: f6:
+; CHECK: 	cmn.w	r0, r1, lsr #6
+
+define i1 @f7(i32 %a, i32 %b) {
+    %tmp = ashr i32 %b, 7
+    %nb = sub i32 0, %tmp
+    %tmp1 = icmp eq i32 %a, %nb
+    ret i1 %tmp1
+}
+; CHECK: f7:
+; CHECK: 	cmn.w	r0, r1, asr #7
+
+define i1 @f8(i32 %a, i32 %b) {
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %nb = sub i32 0, %tmp
+    %tmp1 = icmp ne i32 %a, %nb
+    ret i1 %tmp1
+}
+; CHECK: f8:
+; CHECK: 	cmn.w	r0, r0, ror #8
+

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-cmn2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-cmn2.ll
new file mode 100644
index 0000000..c0e19f6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-cmn2.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; -0x000000bb = 4294967109
+define i1 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: cmn.w {{r.*}}, #187
+    %tmp = icmp ne i32 %a, 4294967109
+    ret i1 %tmp
+}
+
+; -0x00aa00aa = 4283826006
+define i1 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: cmn.w {{r.*}}, #11141290
+    %tmp = icmp eq i32 %a, 4283826006
+    ret i1 %tmp
+}
+
+; -0xcc00cc00 = 872363008
+define i1 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: cmn.w {{r.*}}, #-872363008
+    %tmp = icmp ne i32 %a, 872363008
+    ret i1 %tmp
+}
+
+; -0x00110000 = 4293853184
+define i1 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: cmn.w {{r.*}}, #1114112
+    %tmp = icmp eq i32 %a, 4293853184
+    ret i1 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-cmp.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-cmp.ll
new file mode 100644
index 0000000..da12114
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-cmp.ll

@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'. So far, that requires physreg joining.
+
+; 0x000000bb = 187
+define i1 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: cmp r0, #187
+    %tmp = icmp ne i32 %a, 187
+    ret i1 %tmp
+}
+
+; 0x00aa00aa = 11141290
+define i1 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: cmp.w r0, #11141290
+    %tmp = icmp eq i32 %a, 11141290 
+    ret i1 %tmp
+}
+
+; 0xcc00cc00 = 3422604288
+define i1 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: cmp.w r0, #-872363008
+    %tmp = icmp ne i32 %a, 3422604288
+    ret i1 %tmp
+}
+
+; 0xdddddddd = 3722304989
+define i1 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: cmp.w r0, #-572662307
+    %tmp = icmp ne i32 %a, 3722304989
+    ret i1 %tmp
+}
+
+; 0x00110000 = 1114112
+define i1 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: cmp.w r0, #1114112
+    %tmp = icmp eq i32 %a, 1114112
+    ret i1 %tmp
+}
+
+; Check that we don't do an invalid (a > b) --> !(a < b + 1) transform.
+;
+; CHECK: f6:
+; CHECK-NOT: cmp.w r0, #-2147483648
+; CHECK: bx lr
+define i32 @f6(i32 %a) {
+    %tmp = icmp sgt i32 %a, 2147483647
+    br i1 %tmp, label %true, label %false
+true:
+    ret i32 2
+false:
+    ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-cmp2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-cmp2.ll
new file mode 100644
index 0000000..15052e0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-cmp2.ll

@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'. So far, that requires physreg joining.
+
+define i1 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: cmp r0, r1
+    %tmp = icmp ne i32 %a, %b
+    ret i1 %tmp
+}
+
+define i1 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: cmp r0, r1
+    %tmp = icmp eq i32 %a, %b
+    ret i1 %tmp
+}
+
+define i1 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: cmp.w r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = icmp eq i32 %tmp, %a
+    ret i1 %tmp1
+}
+
+define i1 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: cmp.w r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = icmp ne i32 %tmp, %a
+    ret i1 %tmp1
+}
+
+define i1 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: cmp.w r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = icmp eq i32 %a, %tmp
+    ret i1 %tmp1
+}
+
+define i1 @f9(i32 %a, i32 %b) {
+; CHECK: f9:
+; CHECK: cmp.w r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = icmp ne i32 %a, %tmp
+    ret i1 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-eor.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-eor.ll
new file mode 100644
index 0000000..116a1a3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-eor.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: eors r0, r1
+    %tmp = xor i32 %a, %b
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: eors r0, r1
+    %tmp = xor i32 %b, %a
+    ret i32 %tmp
+}
+
+define i32 @f2b(i32 %a, i32 %b, i32 %c) {
+; CHECK: f2b:
+; CHECK: eor.w r0, r1, r2
+    %tmp = xor i32 %b, %c
+    ret i32 %tmp
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: eor.w r0, r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = xor i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: eor.w r0, r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = xor i32 %tmp, %a
+    ret i32 %tmp1
+}
+
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: eor.w r0, r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = xor i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: eor.w r0, r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = xor i32 %tmp, %a
+    ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-eor2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-eor2.ll
new file mode 100644
index 0000000..6b2e9dc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-eor2.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 0x000000bb = 187
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: eor {{.*}}#187
+    %tmp = xor i32 %a, 187
+    ret i32 %tmp
+}
+
+; 0x00aa00aa = 11141290
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: eor {{.*}}#11141290
+    %tmp = xor i32 %a, 11141290 
+    ret i32 %tmp
+}
+
+; 0xcc00cc00 = 3422604288
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: eor {{.*}}#-872363008
+    %tmp = xor i32 %a, 3422604288
+    ret i32 %tmp
+}
+
+; 0xdddddddd = 3722304989
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: eor {{.*}}#-572662307
+    %tmp = xor i32 %a, 3722304989
+    ret i32 %tmp
+}
+
+; 0x00110000 = 1114112
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: eor {{.*}}#1114112
+    %tmp = xor i32 %a, 1114112
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
new file mode 100644
index 0000000..5315535
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll

@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; XFAIL: *
+
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; CHECK: t1:
+; CHECK: it ne
+; CHECK: cmpne
+	switch i32 %c, label %cond_next [
+		 i32 1, label %cond_true
+		 i32 7, label %cond_true
+	]
+
+cond_true:
+	%tmp12 = add i32 %a, 1
+	%tmp1518 = add i32 %tmp12, %b
+	ret i32 %tmp1518
+
+cond_next:
+	%tmp15 = add i32 %b, %a
+	ret i32 %tmp15
+}
+
+; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
+define i32 @t2(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: ite gt
+; CHECK: subgt
+; CHECK: suble
+	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %tmp1434, label %bb17, label %bb.outer
+
+bb.outer:		; preds = %cond_false, %entry
+	%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]		; <i32> [#uses=5]
+	%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_true, %bb.outer
+	%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%tmp. = sub i32 0, %b_addr.021.0.ph		; <i32> [#uses=1]
+	%tmp.40 = mul i32 %indvar, %tmp.		; <i32> [#uses=1]
+	%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph		; <i32> [#uses=6]
+	%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph		; <i1> [#uses=1]
+	br i1 %tmp3, label %cond_true, label %cond_false
+
+cond_true:		; preds = %bb
+	%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph		; <i32> [#uses=2]
+	%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp1437, label %bb17, label %bb
+
+cond_false:		; preds = %bb
+	%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0		; <i32> [#uses=2]
+	%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb17, label %bb.outer
+
+bb17:		; preds = %cond_false, %cond_true, %entry
+	%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	ret i32 %a_addr.026.1
+}
+
+@x = external global i32*		; <i32**> [#uses=1]
+
+define void @foo(i32 %a) nounwind {
+entry:
+	%tmp = load i32** @x		; <i32*> [#uses=1]
+	store i32 %a, i32* %tmp
+	ret void
+}
+
+; Tail call prevents use of ifcvt in this one.  Seems like a win though.
+define void @t3(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: t3:
+; CHECK-NOT: it lt
+; CHECK-NOT: poplt
+; CHECK: b.w _foo @ TAILCALL
+	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
+	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	tail call void @foo( i32 %b )
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
new file mode 100644
index 0000000..af8fcc6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-ifcvt1.ll

@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; CHECK: t1:
+; CHECK: ittt ne
+; CHECK: cmpne
+; CHECK: addne
+; CHECK: bxne lr
+	switch i32 %c, label %cond_next [
+		 i32 1, label %cond_true
+		 i32 7, label %cond_true
+	]
+
+cond_true:
+	%tmp12 = add i32 %a, 1
+	%tmp1518 = add i32 %tmp12, %b
+	ret i32 %tmp1518
+
+cond_next:
+	%tmp15 = add i32 %b, %a
+	ret i32 %tmp15
+}
+
+define i32 @t2(i32 %a, i32 %b) nounwind {
+entry:
+; Do not if-convert when branches go to the different loops.
+; CHECK: t2:
+; CHECK-NOT: ite gt
+; CHECK-NOT: subgt
+; CHECK-NOT: suble
+	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %tmp1434, label %bb17, label %bb.outer
+
+bb.outer:		; preds = %cond_false, %entry
+	%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]		; <i32> [#uses=5]
+	%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_true, %bb.outer
+	%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%tmp. = sub i32 0, %b_addr.021.0.ph		; <i32> [#uses=1]
+	%tmp.40 = mul i32 %indvar, %tmp.		; <i32> [#uses=1]
+	%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph		; <i32> [#uses=6]
+	%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph		; <i1> [#uses=1]
+	br i1 %tmp3, label %cond_true, label %cond_false
+
+cond_true:		; preds = %bb
+	%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph		; <i32> [#uses=2]
+	%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp1437, label %bb17, label %bb
+
+cond_false:		; preds = %bb
+	%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0		; <i32> [#uses=2]
+	%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb17, label %bb.outer
+
+bb17:		; preds = %cond_false, %cond_true, %entry
+	%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	ret i32 %a_addr.026.1
+}
+
+@x = external global i32*		; <i32**> [#uses=1]
+
+define void @foo(i32 %a) nounwind {
+entry:
+	%tmp = load i32** @x		; <i32*> [#uses=1]
+	store i32 %a, i32* %tmp
+	ret void
+}
+
+define void @t3(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: t3:
+; CHECK: itt ge
+; CHECK: movge r0, r1
+; CHECK: blge  _foo
+	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
+	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	call void @foo( i32 %b )
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
new file mode 100644
index 0000000..2c57348
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-ifcvt2.ll

@@ -0,0 +1,94 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+define void @foo(i32 %X, i32 %Y) {
+entry:
+; CHECK: foo:
+; CHECK: it ne
+; CHECK: cmpne
+; CHECK: it hi
+; CHECK: pophi {r7, pc}
+	%tmp1 = icmp ult i32 %X, 4		; <i1> [#uses=1]
+	%tmp4 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
+	%tmp7 = or i1 %tmp4, %tmp1		; <i1> [#uses=1]
+	br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	%tmp10 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare i32 @bar(...)
+
+; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
+
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
+entry:
+; CHECK: CountTree:
+; CHECK: it eq
+; CHECK: cmpeq
+; CHECK: bne
+; CHECK: cmp
+; CHECK: itt eq
+; CHECK: moveq
+; CHECK: popeq
+	br label %tailrecurse
+
+tailrecurse:		; preds = %bb, %entry
+	%tmp6 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp9 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
+	%tmp12 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp14 = icmp eq %struct.quad_struct* null, null		; <i1> [#uses=1]
+	%tmp17 = icmp eq %struct.quad_struct* %tmp6, null		; <i1> [#uses=1]
+	%tmp23 = icmp eq %struct.quad_struct* %tmp9, null		; <i1> [#uses=1]
+	%tmp29 = icmp eq %struct.quad_struct* %tmp12, null		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp17, %tmp14		; <i1> [#uses=1]
+	%bothcond1 = and i1 %bothcond, %tmp23		; <i1> [#uses=1]
+	%bothcond2 = and i1 %bothcond1, %tmp29		; <i1> [#uses=1]
+	br i1 %bothcond2, label %return, label %bb
+
+bb:		; preds = %tailrecurse
+	%tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 )		; <i32> [#uses=0]
+	br label %tailrecurse
+
+return:		; preds = %tailrecurse
+	ret i32 0
+}
+
+	%struct.SString = type { i8*, i32, i32 }
+
+declare void @abort()
+
+define fastcc void @t1(%struct.SString* %word, i8 signext  %c) {
+entry:
+; CHECK: t1:
+; CHECK: it ne
+; CHECK: popne {r7, pc}
+	%tmp1 = icmp eq %struct.SString* %word, null		; <i1> [#uses=1]
+	br i1 %tmp1, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	tail call void @abort( )
+	unreachable
+
+cond_false:		; preds = %entry
+	ret void
+}
+
+define fastcc void @t2() nounwind {
+entry:
+; CHECK: t2:
+; CHECK: cmp r0, #0
+; CHECK: beq
+	br i1 undef, label %bb.i.i3, label %growMapping.exit
+
+bb.i.i3:		; preds = %entry
+	unreachable
+
+growMapping.exit:		; preds = %entry
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
new file mode 100644
index 0000000..bcf10ef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-ifcvt3.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+; There shouldn't be a unconditional branch at end of bb52.
+; rdar://7184787
+
+@posed = external global i64                      ; <i64*> [#uses=1]
+
+define i1 @ab_bb52(i64 %.reload78, i64* %.out, i64* %.out1) nounwind {
+newFuncRoot:
+  br label %bb52
+
+bb52.bb55_crit_edge.exitStub:                     ; preds = %bb52
+  store i64 %0, i64* %.out
+  store i64 %2, i64* %.out1
+  ret i1 true
+
+bb52.bb53_crit_edge.exitStub:                     ; preds = %bb52
+  store i64 %0, i64* %.out
+  store i64 %2, i64* %.out1
+  ret i1 false
+
+bb52:                                             ; preds = %newFuncRoot
+; CHECK: movne
+; CHECK: moveq
+; CHECK: pop
+  %0 = load i64* @posed, align 4                  ; <i64> [#uses=3]
+  %1 = sub i64 %0, %.reload78                     ; <i64> [#uses=1]
+  %2 = ashr i64 %1, 1                             ; <i64> [#uses=3]
+  %3 = icmp eq i64 %2, 0                          ; <i1> [#uses=1]
+  br i1 %3, label %bb52.bb55_crit_edge.exitStub, label %bb52.bb53_crit_edge.exitStub
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-jtb.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-jtb.ll
new file mode 100644
index 0000000..f5a56e5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-jtb.ll

@@ -0,0 +1,120 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -arm-adjust-jump-tables=0 | not grep tbb
+
+; Do not use tbb / tbh if any destination is before the jumptable.
+; rdar://7102917
+
+define i16 @main__getopt_internal_2E_exit_2E_ce(i32) nounwind {
+newFuncRoot:
+	br label %_getopt_internal.exit.ce
+
+codeRepl127.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 0
+
+parse_options.exit.loopexit.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 1
+
+bb1.i.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 2
+
+bb90.i.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 3
+
+codeRepl104.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 4
+
+codeRepl113.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 5
+
+codeRepl51.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 6
+
+codeRepl70.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 7
+
+codeRepl119.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 8
+
+codeRepl93.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 9
+
+codeRepl101.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 10
+
+codeRepl120.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 11
+
+codeRepl89.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 12
+
+codeRepl45.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 13
+
+codeRepl58.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 14
+
+codeRepl46.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 15
+
+codeRepl50.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 16
+
+codeRepl52.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 17
+
+codeRepl53.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 18
+
+codeRepl61.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 19
+
+codeRepl85.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 20
+
+codeRepl97.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 21
+
+codeRepl79.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 22
+
+codeRepl102.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 23
+
+codeRepl54.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 24
+
+codeRepl57.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 25
+
+codeRepl103.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 26
+
+_getopt_internal.exit.ce:		; preds = %newFuncRoot
+	switch i32 %0, label %codeRepl127.exitStub [
+		i32 -1, label %parse_options.exit.loopexit.exitStub
+		i32 0, label %bb1.i.exitStub
+		i32 63, label %bb90.i.exitStub
+		i32 66, label %codeRepl104.exitStub
+		i32 67, label %codeRepl113.exitStub
+		i32 71, label %codeRepl51.exitStub
+		i32 77, label %codeRepl70.exitStub
+		i32 78, label %codeRepl119.exitStub
+		i32 80, label %codeRepl93.exitStub
+		i32 81, label %codeRepl101.exitStub
+		i32 82, label %codeRepl120.exitStub
+		i32 88, label %codeRepl89.exitStub
+		i32 97, label %codeRepl45.exitStub
+		i32 98, label %codeRepl58.exitStub
+		i32 99, label %codeRepl46.exitStub
+		i32 100, label %codeRepl50.exitStub
+		i32 104, label %codeRepl52.exitStub
+		i32 108, label %codeRepl53.exitStub
+		i32 109, label %codeRepl61.exitStub
+		i32 110, label %codeRepl85.exitStub
+		i32 111, label %codeRepl97.exitStub
+		i32 113, label %codeRepl79.exitStub
+		i32 114, label %codeRepl102.exitStub
+		i32 115, label %codeRepl54.exitStub
+		i32 116, label %codeRepl57.exitStub
+		i32 118, label %codeRepl103.exitStub
+	]
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-ldm.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldm.ll
new file mode 100644
index 0000000..4f2b7c1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldm.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+
+@X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
+
+define i32 @t1() {
+; CHECK: t1:
+; CHECK: push {r7, lr}
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
+        %tmp4 = call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @t2() {
+; CHECK: t2:
+; CHECK: push {r7, lr}
+; CHECK: ldm
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
+        %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+define i32 @t3() {
+; CHECK: t3:
+; CHECK: push {r7, lr}
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+declare i32 @f1(i32, i32)
+
+declare i32 @f2(i32, i32, i32)

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-ldr.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldr.ll
new file mode 100644
index 0000000..88434f1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldr.ll

@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32* %v) {
+entry:
+; CHECK: f1:
+; CHECK: ldr r0, [r0]
+        %tmp = load i32* %v
+        ret i32 %tmp
+}
+
+define i32 @f2(i32* %v) {
+entry:
+; CHECK: f2:
+; CHECK: ldr.w r0, [r0, #4092]
+        %tmp2 = getelementptr i32* %v, i32 1023
+        %tmp = load i32* %tmp2
+        ret i32 %tmp
+}
+
+define i32 @f3(i32* %v) {
+entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldr r0, [r0, r1]
+        %tmp2 = getelementptr i32* %v, i32 1024
+        %tmp = load i32* %tmp2
+        ret i32 %tmp
+}
+
+define i32 @f4(i32 %base) {
+entry:
+; CHECK: f4:
+; CHECK: ldr r0, [r0, #-128]
+        %tmp1 = sub i32 %base, 128
+        %tmp2 = inttoptr i32 %tmp1 to i32*
+        %tmp3 = load i32* %tmp2
+        ret i32 %tmp3
+}
+
+define i32 @f5(i32 %base, i32 %offset) {
+entry:
+; CHECK: f5:
+; CHECK: ldr r0, [r0, r1]
+        %tmp1 = add i32 %base, %offset
+        %tmp2 = inttoptr i32 %tmp1 to i32*
+        %tmp3 = load i32* %tmp2
+        ret i32 %tmp3
+}
+
+define i32 @f6(i32 %base, i32 %offset) {
+entry:
+; CHECK: f6:
+; CHECK: ldr.w r0, [r0, r1, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        %tmp4 = load i32* %tmp3
+        ret i32 %tmp4
+}
+
+define i32 @f7(i32 %base, i32 %offset) {
+entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldr r0, [r0, r1]
+
+        %tmp1 = lshr i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        %tmp4 = load i32* %tmp3
+        ret i32 %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-ldr_ext.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
new file mode 100644
index 0000000..9e6aef4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldr_ext.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrb | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrh | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrsb | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrsh | count 1
+
+define i32 @test1(i8* %v.pntr.s0.u1) {
+    %tmp.u = load i8* %v.pntr.s0.u1
+    %tmp1.s = zext i8 %tmp.u to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test2(i16* %v.pntr.s0.u1) {
+    %tmp.u = load i16* %v.pntr.s0.u1
+    %tmp1.s = zext i16 %tmp.u to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test3(i8* %v.pntr.s1.u0) {
+    %tmp.s = load i8* %v.pntr.s1.u0
+    %tmp1.s = sext i8 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test4() {
+    %tmp.s = load i16* null
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldr_post.ll
new file mode 100644
index 0000000..d1af4ba
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldr_post.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep {ldr.*\\\[.*\],} | count 1
+
+define i32 @test(i32 %a, i32 %b, i32 %c) {
+        %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
+        %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
+        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp4 = sub i32 %tmp1, 8               ; <i32> [#uses=1]
+        %tmp5 = mul i32 %tmp4, %tmp3            ; <i32> [#uses=1]
+        ret i32 %tmp5
+}
+

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-ldr_pre.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
new file mode 100644
index 0000000..9cc3f4a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldr_pre.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep {ldr.*\\!} | count 3
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep {ldrsb.*\\!} | count 1
+
+define i32* @test1(i32* %X, i32* %dest) {
+        %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]
+        %A = load i32* %Y               ; <i32> [#uses=1]
+        store i32 %A, i32* %dest
+        ret i32* %Y
+}
+
+define i32 @test2(i32 %a, i32 %b) {
+        %tmp1 = sub i32 %a, 64          ; <i32> [#uses=2]
+        %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
+        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp4 = sub i32 %tmp1, %b               ; <i32> [#uses=1]
+        %tmp5 = add i32 %tmp4, %tmp3            ; <i32> [#uses=1]
+        ret i32 %tmp5
+}
+
+define i8* @test3(i8* %X, i32* %dest) {
+        %tmp1 = getelementptr i8* %X, i32 4
+        %tmp2 = load i8* %tmp1
+        %tmp3 = sext i8 %tmp2 to i32
+        store i32 %tmp3, i32* %dest
+        ret i8* %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-ldrb.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldrb.ll
new file mode 100644
index 0000000..bf10097
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldrb.ll

@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i8 @f1(i8* %v) {
+entry:
+; CHECK: f1:
+; CHECK: ldrb r0, [r0]
+        %tmp = load i8* %v
+        ret i8 %tmp
+}
+
+define i8 @f2(i8* %v) {
+entry:
+; CHECK: f2:
+; CHECK: ldrb r0, [r0, #-1]
+        %tmp2 = getelementptr i8* %v, i8 1023
+        %tmp = load i8* %tmp2
+        ret i8 %tmp
+}
+
+define i8 @f3(i32 %base) {
+entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldrb r0, [r0, r1]
+        %tmp1 = add i32 %base, 4096
+        %tmp2 = inttoptr i32 %tmp1 to i8*
+        %tmp3 = load i8* %tmp2
+        ret i8 %tmp3
+}
+
+define i8 @f4(i32 %base) {
+entry:
+; CHECK: f4:
+; CHECK: ldrb r0, [r0, #-128]
+        %tmp1 = sub i32 %base, 128
+        %tmp2 = inttoptr i32 %tmp1 to i8*
+        %tmp3 = load i8* %tmp2
+        ret i8 %tmp3
+}
+
+define i8 @f5(i32 %base, i32 %offset) {
+entry:
+; CHECK: f5:
+; CHECK: ldrb r0, [r0, r1]
+        %tmp1 = add i32 %base, %offset
+        %tmp2 = inttoptr i32 %tmp1 to i8*
+        %tmp3 = load i8* %tmp2
+        ret i8 %tmp3
+}
+
+define i8 @f6(i32 %base, i32 %offset) {
+entry:
+; CHECK: f6:
+; CHECK: ldrb.w r0, [r0, r1, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i8*
+        %tmp4 = load i8* %tmp3
+        ret i8 %tmp4
+}
+
+define i8 @f7(i32 %base, i32 %offset) {
+entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldrb r0, [r0, r1]
+        %tmp1 = lshr i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i8*
+        %tmp4 = load i8* %tmp3
+        ret i8 %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-ldrd.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldrd.ll
new file mode 100644
index 0000000..d3b781d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldrd.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 -regalloc=linearscan | FileCheck %s
+
+@b = external global i64*
+
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+;CHECK: ldrd r2, r3, [r2]
+	%0 = load i64** @b, align 4
+	%1 = load i64* %0, align 4
+	%2 = mul i64 %1, %a
+	ret i64 %2
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-ldrh.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldrh.ll
new file mode 100644
index 0000000..fee97bf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-ldrh.ll

@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i16 @f1(i16* %v) {
+entry:
+; CHECK: f1:
+; CHECK: ldrh r0, [r0]
+        %tmp = load i16* %v
+        ret i16 %tmp
+}
+
+define i16 @f2(i16* %v) {
+entry:
+; CHECK: f2:
+; CHECK: ldrh.w r0, [r0, #2046]
+        %tmp2 = getelementptr i16* %v, i16 1023
+        %tmp = load i16* %tmp2
+        ret i16 %tmp
+}
+
+define i16 @f3(i16* %v) {
+entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldrh r0, [r0, r1]
+        %tmp2 = getelementptr i16* %v, i16 2048
+        %tmp = load i16* %tmp2
+        ret i16 %tmp
+}
+
+define i16 @f4(i32 %base) {
+entry:
+; CHECK: f4:
+; CHECK: ldrh r0, [r0, #-128]
+        %tmp1 = sub i32 %base, 128
+        %tmp2 = inttoptr i32 %tmp1 to i16*
+        %tmp3 = load i16* %tmp2
+        ret i16 %tmp3
+}
+
+define i16 @f5(i32 %base, i32 %offset) {
+entry:
+; CHECK: f5:
+; CHECK: ldrh r0, [r0, r1]
+        %tmp1 = add i32 %base, %offset
+        %tmp2 = inttoptr i32 %tmp1 to i16*
+        %tmp3 = load i16* %tmp2
+        ret i16 %tmp3
+}
+
+define i16 @f6(i32 %base, i32 %offset) {
+entry:
+; CHECK: f6:
+; CHECK: ldrh.w r0, [r0, r1, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i16*
+        %tmp4 = load i16* %tmp3
+        ret i16 %tmp4
+}
+
+define i16 @f7(i32 %base, i32 %offset) {
+entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldrh r0, [r0, r1]
+        %tmp1 = lshr i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i16*
+        %tmp4 = load i16* %tmp3
+        ret i16 %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-lsl.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-lsl.ll
new file mode 100644
index 0000000..6b0818a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-lsl.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lsls r0, r0, #5
+    %tmp = shl i32 %a, 5
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-lsl2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-lsl2.ll
new file mode 100644
index 0000000..f283eef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-lsl2.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: lsls r0, r1
+    %tmp = shl i32 %a, %b
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-lsr.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-lsr.ll
new file mode 100644
index 0000000..7cbee54
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-lsr.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lsrs r0, r0, #13
+    %tmp = lshr i32 %a, 13
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-lsr2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-lsr2.ll
new file mode 100644
index 0000000..87800f9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-lsr2.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: lsrs r0, r1
+    %tmp = lshr i32 %a, %b
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-lsr3.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-lsr3.ll
new file mode 100644
index 0000000..e7ba782
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-lsr3.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i1 @test1(i64 %poscnt, i32 %work) {
+entry:
+; CHECK: lsrs.w r1, r1, #1
+; CHECK: rrx r0, r0
+	%0 = lshr i64 %poscnt, 1
+	%1 = icmp eq i64 %0, 0
+	ret i1 %1
+}
+
+define i1 @test2(i64 %poscnt, i32 %work) {
+entry:
+; CHECK: asrs.w r1, r1, #1
+; CHECK: rrx r0, r0
+	%0 = ashr i64 %poscnt, 1
+	%1 = icmp eq i64 %0, 0
+	ret i1 %1
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-mla.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-mla.ll
new file mode 100644
index 0000000..c4cc749
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-mla.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = add i32 %c, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f1:
+; CHECK: 	mla	r0, r0, r1, r2
+
+define i32 @f2(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = add i32 %tmp1, %c
+    ret i32 %tmp2
+}
+; CHECK: f2:
+; CHECK: 	mla	r0, r0, r1, r2

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-mls.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-mls.ll
new file mode 100644
index 0000000..24c45c5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-mls.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %c, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f1:
+; CHECK: 	mls	r0, r0, r1, r2
+
+; sub doesn't commute, so no mls for this one
+define i32 @f2(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %tmp1, %c
+    ret i32 %tmp2
+}
+; CHECK: f2:
+; CHECK: 	muls	r0, r0, r1
+

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-mov.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-mov.ll
new file mode 100644
index 0000000..adb6dde
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-mov.ll

@@ -0,0 +1,266 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; Test #<const>
+
+; var 2.1 - 0x00ab00ab
+define i32 @t2_const_var2_1_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_1_ok_1:
+;CHECK: add.w   r0, r0, #11206827
+    %ret = add i32 %lhs, 11206827 ; 0x00ab00ab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_1_ok_2(i32 %lhs) {
+;CHECK: t2_const_var2_1_ok_2:
+;CHECK: add.w   r0, r0, #11206656
+;CHECK: adds    r0, #187
+    %ret = add i32 %lhs, 11206843 ; 0x00ab00bb
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_1_ok_3(i32 %lhs) {
+;CHECK: t2_const_var2_1_ok_3:
+;CHECK: add.w   r0, r0, #11206827
+;CHECK: add.w   r0, r0, #16777216
+    %ret = add i32 %lhs, 27984043 ; 0x01ab00ab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_1_ok_4(i32 %lhs) {
+;CHECK: t2_const_var2_1_ok_4:
+;CHECK: add.w   r0, r0, #16777472
+;CHECK: add.w   r0, r0, #11206827
+    %ret = add i32 %lhs, 27984299 ; 0x01ab01ab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_1_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_1_fail_1:
+;CHECK: movw    r1, #43777
+;CHECK: movt    r1, #427
+;CHECK: add     r0, r1
+    %ret = add i32 %lhs, 28027649 ; 0x01abab01
+    ret i32 %ret
+}
+
+; var 2.2 - 0xab00ab00
+define i32 @t2_const_var2_2_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_2_ok_1:
+;CHECK: add.w   r0, r0, #-1426019584
+    %ret = add i32 %lhs, 2868947712 ; 0xab00ab00
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_2_ok_2(i32 %lhs) {
+;CHECK: t2_const_var2_2_ok_2:
+;CHECK: add.w   r0, r0, #2868903936
+;CHECK: add.w   r0, r0, #47616
+    %ret = add i32 %lhs, 2868951552 ; 0xab00ba00
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_2_ok_3(i32 %lhs) {
+;CHECK: t2_const_var2_2_ok_3:
+;CHECK: add.w   r0, r0, #2868947712
+;CHECK: adds    r0, #16
+    %ret = add i32 %lhs, 2868947728 ; 0xab00ab10
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_2_ok_4(i32 %lhs) {
+;CHECK: t2_const_var2_2_ok_4:
+;CHECK: add.w   r0, r0, #2868947712
+;CHECK: add.w   r0, r0, #1048592
+    %ret = add i32 %lhs, 2869996304 ; 0xab10ab10
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_2_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_2_fail_1:
+;CHECK: movw    r1, #43792
+;CHECK: movt    r1, #4267
+;CHECK: add     r0, r1
+    %ret = add i32 %lhs, 279685904 ; 0x10abab10
+    ret i32 %ret
+}
+
+; var 2.3 - 0xabababab
+define i32 @t2_const_var2_3_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_3_ok_1:
+;CHECK: add.w   r0, r0, #-1414812757
+    %ret = add i32 %lhs, 2880154539 ; 0xabababab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_3_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_1:
+;CHECK: movw    r1, #43962
+;CHECK: movt    r1, #43947
+;CHECK: add     r0, r1
+    %ret = add i32 %lhs, 2880154554 ; 0xabababba
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_3_fail_2(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_2:
+;CHECK: movw    r1, #47787
+;CHECK: movt    r1, #43947
+;CHECK: add     r0, r1
+    %ret = add i32 %lhs, 2880158379 ; 0xababbaab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_3_fail_3(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_3:
+;CHECK: movw    r1, #43947
+;CHECK: movt    r1, #43962
+;CHECK: add     r0, r1
+    %ret = add i32 %lhs, 2881137579 ; 0xabbaabab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_3_fail_4(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_4:
+;CHECK: movw    r1, #43947
+;CHECK: movt    r1, #47787
+;CHECK: add     r0, r1
+    %ret = add i32 %lhs, 3131812779 ; 0xbaababab
+    ret i32 %ret
+}
+
+; var 3 - 0x0F000000
+define i32 @t2_const_var3_1_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_1_ok_1:
+;CHECK: add.w   r0, r0, #251658240
+    %ret = add i32 %lhs, 251658240 ; 0x0F000000
+    ret i32 %ret
+}
+
+define i32 @t2_const_var3_2_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_2_ok_1:
+;CHECK: add.w   r0, r0, #3948544
+    %ret = add i32 %lhs, 3948544 ; 0b00000000001111000100000000000000
+    ret i32 %ret
+}
+
+define i32 @t2_const_var3_2_ok_2(i32 %lhs) {
+;CHECK: t2_const_var3_2_ok_2:
+;CHECK: add.w   r0, r0, #2097152
+;CHECK: add.w   r0, r0, #1843200
+    %ret = add i32 %lhs, 3940352 ; 0b00000000001111000010000000000000
+    ret i32 %ret
+}
+
+define i32 @t2_const_var3_3_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_3_ok_1:
+;CHECK: add.w   r0, r0, #258
+    %ret = add i32 %lhs, 258 ; 0b00000000000000000000000100000010
+    ret i32 %ret
+}
+
+define i32 @t2_const_var3_4_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_4_ok_1:
+;CHECK: add.w   r0, r0, #-268435456
+    %ret = add i32 %lhs, 4026531840 ; 0xF0000000
+    ret i32 %ret
+}
+
+define i32 @t2MOVTi16_ok_1(i32 %a) {
+; CHECK: t2MOVTi16_ok_1:
+; CHECK: movt r0, #1234
+    %1 = and i32 %a, 65535
+    %2 = shl i32 1234, 16
+    %3 = or  i32 %1, %2
+
+    ret i32 %3
+}
+
+define i32 @t2MOVTi16_test_1(i32 %a) {
+; CHECK: t2MOVTi16_test_1:
+; CHECK: movt r0, #1234
+    %1 = shl i32  255,   8
+    %2 = shl i32 1234,   8
+    %3 = or  i32   %1, 255  ; This gives us 0xFFFF in %3
+    %4 = shl i32   %2,   8  ; This gives us (1234 << 16) in %4
+    %5 = and i32   %a,  %3
+    %6 = or  i32   %4,  %5
+
+    ret i32 %6
+}
+
+define i32 @t2MOVTi16_test_2(i32 %a) {
+; CHECK: t2MOVTi16_test_2:
+; CHECK: movt r0, #1234
+    %1 = shl i32  255,   8
+    %2 = shl i32 1234,   8
+    %3 = or  i32   %1, 255  ; This gives us 0xFFFF in %3
+    %4 = shl i32   %2,   6
+    %5 = and i32   %a,  %3
+    %6 = shl i32   %4,   2  ; This gives us (1234 << 16) in %6
+    %7 = or  i32   %5,  %6
+
+    ret i32 %7
+}
+
+define i32 @t2MOVTi16_test_3(i32 %a) {
+; CHECK: t2MOVTi16_test_3:
+; CHECK: movt r0, #1234
+    %1 = shl i32  255,   8
+    %2 = shl i32 1234,   8
+    %3 = or  i32   %1, 255  ; This gives us 0xFFFF in %3
+    %4 = shl i32   %2,   6
+    %5 = and i32   %a,  %3
+    %6 = shl i32   %4,   2  ; This gives us (1234 << 16) in %6
+    %7 = lshr i32  %6,   6
+    %8 = shl i32   %7,   6
+    %9 = or  i32   %5,  %8
+
+    ret i32 %8
+}
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: movs r0, #171
+    %tmp = add i32 0, 171
+    ret i32 %tmp
+}
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mov.w r0, #1179666
+    %tmp = add i32 0, 1179666
+    ret i32 %tmp
+}
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: mov.w r0, #872428544
+    %tmp = add i32 0, 872428544
+    ret i32 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: mov.w r0, #1448498774
+    %tmp = add i32 0, 1448498774
+    ret i32 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mov.w r0, #66846720
+    %tmp = add i32 0, 66846720
+    ret i32 %tmp
+}
+
+define i32 @f6(i32 %a) {
+;CHECK: f6
+;CHECK: movw    r0, #65535
+    %tmp = add i32 0, 65535
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-mul.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-mul.ll
new file mode 100644
index 0000000..bb97d97
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-mul.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+; CHECK: f1:
+; CHECK: muls r0, r0, r1
+    %tmp = mul i32 %a, %b
+    ret i32 %tmp
+}
+
+%struct.CMPoint = type { %struct.Point, float, float, [5 x float] }
+%struct.Point = type { float, float }
+
+define %struct.CMPoint* @t1(i32 %i, i32 %j, i32 %n, %struct.CMPoint* %thePoints) nounwind readnone ssp {
+entry:
+; CHECK: t1:
+; CHECK: mla     r0, r2, r0, r1
+; CHECK: add.w   r0, r0, r0, lsl #3
+; CHECL: add.w   r0, r3, r0, lsl #2
+  %mul = mul i32 %n, %i
+  %add = add i32 %mul, %j
+  %0 = ptrtoint %struct.CMPoint* %thePoints to i32
+  %mul5 = mul i32 %add, 36
+  %add6 = add i32 %mul5, %0
+  %1 = inttoptr i32 %add6 to %struct.CMPoint*
+  ret %struct.CMPoint* %1
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-mulhi.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-mulhi.ll
new file mode 100644
index 0000000..9d4840a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-mulhi.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2dsp | FileCheck %s
+
+define i32 @smulhi(i32 %x, i32 %y) {
+; CHECK: smulhi
+; CHECK: smmul r0, r1, r0
+        %tmp = sext i32 %x to i64               ; <i64> [#uses=1]
+        %tmp1 = sext i32 %y to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        %tmp3 = lshr i64 %tmp2, 32              ; <i64> [#uses=1]
+        %tmp3.upgrd.1 = trunc i64 %tmp3 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp3.upgrd.1
+}
+
+define i32 @umulhi(i32 %x, i32 %y) {
+; CHECK: umulhi
+; CHECK: umull r1, r0, r1, r0
+        %tmp = zext i32 %x to i64               ; <i64> [#uses=1]
+        %tmp1 = zext i32 %y to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        %tmp3 = lshr i64 %tmp2, 32              ; <i64> [#uses=1]
+        %tmp3.upgrd.2 = trunc i64 %tmp3 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp3.upgrd.2
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-mvn.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-mvn.ll
new file mode 100644
index 0000000..a8c8f83
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-mvn.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+; 0x000000bb = 187
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: mvn r0, #187
+    %tmp = xor i32 4294967295, 187
+    ret i32 %tmp
+}
+
+; 0x00aa00aa = 11141290
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mvn r0, #11141290
+    %tmp = xor i32 4294967295, 11141290 
+    ret i32 %tmp
+}
+
+; 0xcc00cc00 = 3422604288
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: mvn r0, #-872363008
+    %tmp = xor i32 4294967295, 3422604288
+    ret i32 %tmp
+}
+
+; 0x00110000 = 1114112
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mvn r0, #1114112
+    %tmp = xor i32 4294967295, 1114112
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-mvn2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-mvn2.ll
new file mode 100644
index 0000000..375d0aa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-mvn2.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: mvns r0, r0
+    %tmp = xor i32 4294967295, %a
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mvns r0, r0
+    %tmp = xor i32 %a, 4294967295
+    ret i32 %tmp
+}
+
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mvn.w r0, r0, lsl #5
+    %tmp = shl i32 %a, 5
+    %tmp1 = xor i32 %tmp, 4294967295
+    ret i32 %tmp1
+}
+
+define i32 @f6(i32 %a) {
+; CHECK: f6:
+; CHECK: mvn.w r0, r0, lsr #6
+    %tmp = lshr i32 %a, 6
+    %tmp1 = xor i32 %tmp, 4294967295
+    ret i32 %tmp1
+}
+
+define i32 @f7(i32 %a) {
+; CHECK: f7:
+; CHECK: mvn.w r0, r0, asr #7
+    %tmp = ashr i32 %a, 7
+    %tmp1 = xor i32 %tmp, 4294967295
+    ret i32 %tmp1
+}
+
+define i32 @f8(i32 %a) {
+; CHECK: f8:
+; CHECK: mvn.w r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = xor i32 %tmp, 4294967295
+    ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-neg.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-neg.ll
new file mode 100644
index 0000000..6bf11ec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-neg.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: rsbs r0, r0, #0
+    %tmp = sub i32 0, %a
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-orn.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-orn.ll
new file mode 100644
index 0000000..97a3fd7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-orn.ll

@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = or i32 %a, %tmp
+    ret i32 %tmp1
+}
+; CHECK: f1:
+; CHECK: 	orn	r0, r0, r1
+
+define i32 @f2(i32 %a, i32 %b) {
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = or i32 %tmp, %a
+    ret i32 %tmp1
+}
+; CHECK: f2:
+; CHECK: 	orn	r0, r0, r1
+
+define i32 @f3(i32 %a, i32 %b) {
+    %tmp = xor i32 4294967295, %b
+    %tmp1 = or i32 %a, %tmp
+    ret i32 %tmp1
+}
+; CHECK: f3:
+; CHECK: 	orn	r0, r0, r1
+
+define i32 @f4(i32 %a, i32 %b) {
+    %tmp = xor i32 4294967295, %b
+    %tmp1 = or i32 %tmp, %a
+    ret i32 %tmp1
+}
+; CHECK: f4:
+; CHECK: 	orn	r0, r0, r1
+
+define i32 @f5(i32 %a, i32 %b) {
+    %tmp = shl i32 %b, 5
+    %tmp1 = xor i32 4294967295, %tmp
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f5:
+; CHECK: 	orn	r0, r0, r1, lsl #5
+
+define i32 @f6(i32 %a, i32 %b) {
+    %tmp = lshr i32 %b, 6
+    %tmp1 = xor i32 4294967295, %tmp
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f6:
+; CHECK: 	orn	r0, r0, r1, lsr #6
+
+define i32 @f7(i32 %a, i32 %b) {
+    %tmp = ashr i32 %b, 7
+    %tmp1 = xor i32 4294967295, %tmp
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f7:
+; CHECK: 	orn	r0, r0, r1, asr #7
+
+define i32 @f8(i32 %a, i32 %b) {
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = xor i32 4294967295, %tmp
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f8:
+; CHECK: 	orn	r0, r0, r0, ror #8

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-orn2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-orn2.ll
new file mode 100644
index 0000000..34ab3a5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-orn2.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+
+; 0x000000bb = 187
+define i32 @f1(i32 %a) {
+    %tmp1 = xor i32 4294967295, 187
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f1:
+; CHECK: 	orn	r0, r0, #187
+
+; 0x00aa00aa = 11141290
+define i32 @f2(i32 %a) {
+    %tmp1 = xor i32 4294967295, 11141290 
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f2:
+; CHECK: 	orn	r0, r0, #11141290
+
+; 0xcc00cc00 = 3422604288
+define i32 @f3(i32 %a) {
+    %tmp1 = xor i32 4294967295, 3422604288
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f3:
+; CHECK: 	orn	r0, r0, #-872363008
+
+; 0x00110000 = 1114112
+define i32 @f5(i32 %a) {
+    %tmp1 = xor i32 4294967295, 1114112
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f5:
+; CHECK: 	orn	r0, r0, #1114112

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-orr.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-orr.ll
new file mode 100644
index 0000000..89ab7b1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-orr.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: orrs r0, r1
+    %tmp2 = or i32 %a, %b
+    ret i32 %tmp2
+}
+
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: orr.w r0, r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp2 = or i32 %a, %tmp
+    ret i32 %tmp2
+}
+
+define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: orr.w r0, r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp2 = or i32 %a, %tmp
+    ret i32 %tmp2
+}
+
+define i32 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: orr.w r0, r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp2 = or i32 %a, %tmp
+    ret i32 %tmp2
+}
+
+define i32 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: orr.w r0, r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp2 = or i32 %a, %tmp
+    ret i32 %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-orr2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-orr2.ll
new file mode 100644
index 0000000..8f7a3c2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-orr2.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+
+; 0x000000bb = 187
+define i32 @f1(i32 %a) {
+    %tmp2 = or i32 %a, 187
+    ret i32 %tmp2
+}
+; CHECK: f1:
+; CHECK: 	orr	r0, r0, #187
+
+; 0x00aa00aa = 11141290
+define i32 @f2(i32 %a) {
+    %tmp2 = or i32 %a, 11141290 
+    ret i32 %tmp2
+}
+; CHECK: f2:
+; CHECK: 	orr	r0, r0, #11141290
+
+; 0xcc00cc00 = 3422604288
+define i32 @f3(i32 %a) {
+    %tmp2 = or i32 %a, 3422604288
+    ret i32 %tmp2
+}
+; CHECK: f3:
+; CHECK: 	orr	r0, r0, #-872363008
+
+; 0x44444444 = 1145324612
+define i32 @f4(i32 %a) {
+    %tmp2 = or i32 %a, 1145324612
+    ret i32 %tmp2
+}
+; CHECK: f4:
+; CHECK: 	orr	r0, r0, #1145324612
+
+; 0x00110000 = 1114112
+define i32 @f5(i32 %a) {
+    %tmp2 = or i32 %a, 1114112
+    ret i32 %tmp2
+}
+; CHECK: f5:
+; CHECK: 	orr	r0, r0, #1114112

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-pack.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-pack.ll
new file mode 100644
index 0000000..2e8bb1d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-pack.ll

@@ -0,0 +1,97 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+
+; CHECK: test1
+; CHECK: pkhbt   r0, r0, r1, lsl #16
+define i32 @test1(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp4 = shl i32 %Y, 16		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+; CHECK: test1a
+; CHECK: pkhbt   r0, r0, r1, lsl #16
+define i32 @test1a(i32 %X, i32 %Y) {
+	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp37 = shl i32 %Y, 16		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp37, %tmp19		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+; CHECK: test2
+; CHECK: pkhbt   r0, r0, r1, lsl #12
+define i32 @test2(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp3 = shl i32 %Y, 12		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp3, -65536		; <i32> [#uses=1]
+	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp57
+}
+
+; CHECK: test3
+; CHECK: pkhbt   r0, r0, r1, lsl #18
+define i32 @test3(i32 %X, i32 %Y) {
+	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp37 = shl i32 %Y, 18		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp37, %tmp19		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+; CHECK: test4
+; CHECK: pkhbt   r0, r0, r1
+define i32 @test4(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp3 = and i32 %Y, -65536		; <i32> [#uses=1]
+	%tmp46 = or i32 %tmp3, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp46
+}
+
+; CHECK: test5
+; CHECK: pkhtb   r0, r0, r1, asr #16
+define i32 @test5(i32 %X, i32 %Y) {
+	%tmp17 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp2 = bitcast i32 %Y to i32		; <i32> [#uses=1]
+	%tmp4 = lshr i32 %tmp2, 16		; <i32> [#uses=2]
+	%tmp5 = or i32 %tmp4, %tmp17		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+; CHECK: test5a
+; CHECK: pkhtb   r0, r0, r1, asr #16
+define i32 @test5a(i32 %X, i32 %Y) {
+	%tmp110 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp37 = lshr i32 %Y, 16		; <i32> [#uses=1]
+	%tmp39 = bitcast i32 %tmp37 to i32		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp39, %tmp110		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+; CHECK: test6
+; CHECK: pkhtb   r0, r0, r1, asr #12
+define i32 @test6(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp37 = lshr i32 %Y, 12		; <i32> [#uses=1]
+	%tmp38 = bitcast i32 %tmp37 to i32		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp38, 65535		; <i32> [#uses=1]
+	%tmp59 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp59
+}
+
+; CHECK: test7
+; CHECK: pkhtb   r0, r0, r1, asr #18
+define i32 @test7(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp3 = ashr i32 %Y, 18		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp3, 65535		; <i32> [#uses=1]
+	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp57
+}
+
+; CHECK: test8
+; CHECK: pkhtb   r0, r0, r1, asr #22
+define i32 @test8(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536
+	%tmp3 = lshr i32 %Y, 22
+	%tmp57 = or i32 %tmp3, %tmp1
+	ret i32 %tmp57
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-rev.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-rev.ll
new file mode 100644
index 0000000..b469bbd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-rev.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7,+t2xtpk | FileCheck %s
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: rev r0, r0
+    %tmp = tail call i32 @llvm.bswap.i32(i32 %a)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+define i32 @f2(i32 %X) {
+; CHECK: f2:
+; CHECK: revsh r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16
+        %tmp3 = trunc i32 %X to i16
+        %tmp2 = and i16 %tmp1.upgrd.1, 255
+        %tmp4 = shl i16 %tmp3, 8
+        %tmp5 = or i16 %tmp2, %tmp4
+        %tmp5.upgrd.2 = sext i16 %tmp5 to i32
+        ret i32 %tmp5.upgrd.2
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-rev16.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-rev16.ll
new file mode 100644
index 0000000..39b6ac3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-rev16.ll

@@ -0,0 +1,32 @@
+; XFAIL: *
+; fixme rev16 pattern is not matching
+
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rev16\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+
+; 0xff00ff00 = 4278255360
+; 0x00ff00ff = 16711935
+define i32 @f1(i32 %a) {
+    %l8 = shl i32 %a, 8
+    %r8 = lshr i32 %a, 8
+    %mask_l8 = and i32 %l8, 4278255360
+    %mask_r8 = and i32 %r8, 16711935
+    %tmp = or i32 %mask_l8, %mask_r8
+    ret i32 %tmp
+}
+
+; 0xff000000 = 4278190080
+; 0x00ff0000 = 16711680
+; 0x0000ff00 = 65280
+; 0x000000ff = 255
+define i32 @f2(i32 %a) {
+    %l8 = shl i32 %a, 8
+    %r8 = lshr i32 %a, 8
+    %masklo_l8 = and i32 %l8, 65280
+    %maskhi_l8 = and i32 %l8, 4278190080
+    %masklo_r8 = and i32 %r8, 255
+    %maskhi_r8 = and i32 %r8, 16711680
+    %tmp1 = or i32 %masklo_l8, %masklo_r8
+    %tmp2 = or i32 %maskhi_l8, %maskhi_r8
+    %tmp = or i32 %tmp1, %tmp2
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-ror.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-ror.ll
new file mode 100644
index 0000000..590c333
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-ror.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+
+; CHECK: f1:
+; CHECK: 	ror.w	r0, r0, #22
+define i32 @f1(i32 %a) {
+    %l8 = shl i32 %a, 10
+    %r8 = lshr i32 %a, 22
+    %tmp = or i32 %l8, %r8
+    ret i32 %tmp
+}
+
+; CHECK: f2:
+; CHECK-NOT: and
+; CHECK: ror
+define i32 @f2(i32 %v, i32 %nbits) {
+entry:
+  %and = and i32 %nbits, 31
+  %shr = lshr i32 %v, %and
+  %sub = sub i32 32, %and
+  %shl = shl i32 %v, %sub
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
\ No newline at end of file

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-rsb.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-rsb.ll
new file mode 100644
index 0000000..15185be
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-rsb.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = shl i32 %b, 5
+    %tmp1 = sub i32 %tmp, %a
+    ret i32 %tmp1
+}
+; CHECK: f1:
+; CHECK: 	rsb	r0, r0, r1, lsl #5
+
+define i32 @f2(i32 %a, i32 %b) {
+    %tmp = lshr i32 %b, 6
+    %tmp1 = sub i32 %tmp, %a
+    ret i32 %tmp1
+}
+; CHECK: f2:
+; CHECK: 	rsb	r0, r0, r1, lsr #6
+
+define i32 @f3(i32 %a, i32 %b) {
+    %tmp = ashr i32 %b, 7
+    %tmp1 = sub i32 %tmp, %a
+    ret i32 %tmp1
+}
+; CHECK: f3:
+; CHECK: 	rsb	r0, r0, r1, asr #7
+
+define i32 @f4(i32 %a, i32 %b) {
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = sub i32 %tmp, %a
+    ret i32 %tmp1
+}
+; CHECK: f4:
+; CHECK: 	rsb	r0, r0, r0, ror #8

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-rsb2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-rsb2.ll
new file mode 100644
index 0000000..61fb619
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-rsb2.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+    %tmp = sub i32 171, %a
+    ret i32 %tmp
+}
+; CHECK: f1:
+; CHECK: 	rsb.w	r0, r0, #171
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+    %tmp = sub i32 1179666, %a
+    ret i32 %tmp
+}
+; CHECK: f2:
+; CHECK: 	rsb.w	r0, r0, #1179666
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+    %tmp = sub i32 872428544, %a
+    ret i32 %tmp
+}
+; CHECK: f3:
+; CHECK: 	rsb.w	r0, r0, #872428544
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+    %tmp = sub i32 1448498774, %a
+    ret i32 %tmp
+}
+; CHECK: f4:
+; CHECK: 	rsb.w	r0, r0, #1448498774
+
+; 66846720 = 0x03fc0000
+define i32 @f5(i32 %a) {
+    %tmp = sub i32 66846720, %a
+    ret i32 %tmp
+}
+; CHECK: f5:
+; CHECK: 	rsb.w	r0, r0, #66846720

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-sbc.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-sbc.ll
new file mode 100644
index 0000000..492e5f0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-sbc.ll

@@ -0,0 +1,68 @@
+; RUN: llc -march=thumb -mattr=+thumb2 < %s | FileCheck %s
+
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1
+; CHECK: subs r0, r0, r2
+    %tmp = sub i64 %a, %b
+    ret i64 %tmp
+}
+
+; 734439407618 = 0x000000ab00000002
+define i64 @f2(i64 %a) {
+; CHECK: f2
+; CHECK: subs r0, #2
+; CHECK: sbc r1, r1, #171
+    %tmp = sub i64 %a, 734439407618
+    ret i64 %tmp
+}
+
+; 5066626890203138 = 0x0012001200000002
+define i64 @f3(i64 %a) {
+; CHECK: f3
+; CHECK: subs  r0, #2
+; CHECK: sbc r1, r1, #1179666
+    %tmp = sub i64 %a, 5066626890203138
+    ret i64 %tmp
+}
+
+; 3747052064576897026 = 0x3400340000000002
+define i64 @f4(i64 %a) {
+; CHECK: f4
+; CHECK: subs  r0, #2
+; CHECK: sbc r1, r1, #872428544
+    %tmp = sub i64 %a, 3747052064576897026
+    ret i64 %tmp
+}
+
+; 6221254862626095106 = 0x5656565600000002
+define i64 @f5(i64 %a) {
+; CHECK: f5
+; CHECK: subs  r0, #2
+; CHECK: adc r1, r1, #-1448498775
+    %tmp = sub i64 %a, 6221254862626095106
+    ret i64 %tmp
+}
+
+; 287104476244869122 = 0x03fc000000000002
+define i64 @f6(i64 %a) {
+; CHECK: f6
+; CHECK: subs  r0, #2
+; CHECK: sbc r1, r1, #66846720
+    %tmp = sub i64 %a, 287104476244869122
+    ret i64 %tmp
+}
+
+; Example from numerics code that manually computes wider-than-64 values.
+;
+; CHECK: livecarry:
+; CHECK: adds
+; CHECK: adc
+define i64 @livecarry(i64 %carry, i32 %digit) nounwind {
+  %ch = lshr i64 %carry, 32
+  %cl = and i64 %carry, 4294967295
+  %truncdigit = zext i32 %digit to i64
+  %prod = add i64 %cl, %truncdigit
+  %ph = lshr i64 %prod, 32
+  %carryresult = add i64 %ch, %ph
+  ret i64 %carryresult
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-select.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-select.ll
new file mode 100644
index 0000000..2dcf8aa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-select.ll

@@ -0,0 +1,98 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a.s) {
+entry:
+; CHECK: f1:
+; CHECK: it eq
+; CHECK: moveq
+
+    %tmp = icmp eq i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f2(i32 %a.s) {
+entry:
+; CHECK: f2:
+; CHECK: it gt
+; CHECK: movgt
+    %tmp = icmp sgt i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f3(i32 %a.s, i32 %b.s) {
+entry:
+; CHECK: f3:
+; CHECK: it lt
+; CHECK: movlt
+    %tmp = icmp slt i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f4(i32 %a.s, i32 %b.s) {
+entry:
+; CHECK: f4:
+; CHECK: it le
+; CHECK: movle
+
+    %tmp = icmp sle i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f5(i32 %a.u, i32 %b.u) {
+entry:
+; CHECK: f5:
+; CHECK: it ls
+; CHECK: movls
+    %tmp = icmp ule i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f6(i32 %a.u, i32 %b.u) {
+entry:
+; CHECK: f6:
+; CHECK: it hi
+; CHECK: movhi
+    %tmp = icmp ugt i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f7(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f7:
+; CHECK: it hi
+; CHECK: lsrhi.w
+    %tmp1 = icmp ugt i32 %a, %b
+    %tmp2 = udiv i32 %c, 3
+    %tmp3 = select i1 %tmp1, i32 %tmp2, i32 3
+    ret i32 %tmp3
+}
+
+define i32 @f8(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f8:
+; CHECK: it lo
+; CHECK: lsllo.w
+    %tmp1 = icmp ult i32 %a, %b
+    %tmp2 = mul i32 %c, 4
+    %tmp3 = select i1 %tmp1, i32 %tmp2, i32 3
+    ret i32 %tmp3
+}
+
+define i32 @f9(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f9:
+; CHECK: it ge
+; CHECK: rorge.w
+    %tmp1 = icmp sge i32 %a, %b
+    %tmp2 = shl i32 %c, 10
+    %tmp3 = lshr i32 %c, 22
+    %tmp4 = or i32 %tmp2, %tmp3
+    %tmp5 = select i1 %tmp1, i32 %tmp4, i32 3
+    ret i32 %tmp5
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-select_xform.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-select_xform.ll
new file mode 100644
index 0000000..ceefabb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-select_xform.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK: t1
+; CHECK: mvn r0, #-2147483648
+; CHECK: add r0, r1
+; CHECK: cmp r2, #10
+; CHECK: it  gt
+; CHECK: movgt r0, r1
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
+        %tmp3 = add i32 %tmp2, %b
+        ret i32 %tmp3
+}
+
+define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK: t2
+; CHECK: add.w r0, r1, #-2147483648
+; CHECK: cmp r2, #10
+; CHECK: it  gt
+; CHECK: movgt r0, r1
+
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 2147483648
+        %tmp3 = add i32 %tmp2, %b
+        ret i32 %tmp3
+}
+
+define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; CHECK: t3
+; CHECK: sub.w r0, r1, #10
+; CHECK: cmp r2, #10
+; CHECK: it  gt
+; CHECK: movgt r0, r1
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 10
+        %tmp3 = sub i32 %b, %tmp2
+        ret i32 %tmp3
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-shifter.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-shifter.ll
new file mode 100644
index 0000000..98854a1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-shifter.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+
+define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_lsl
+; CHECK: add.w  r0, r0, r1, lsl #16
+        %A = shl i32 %Y, 16
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_lsr(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_lsr
+; CHECK: add.w  r0, r0, r1, lsr #16
+        %A = lshr i32 %Y, 16
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_asr
+; CHECK: add.w  r0, r0, r1, asr #16
+        %A = ashr i32 %Y, 16
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+; i32 ror(n) = (x >> n) | (x << (32 - n))
+define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_ror
+; CHECK: add.w  r0, r0, r1, ror #16
+        %A = lshr i32 %Y, 16
+        %B = shl  i32 %Y, 16
+        %C = or   i32 %B, %A
+        %R = add  i32 %X, %C
+        ret i32 %R
+}
+
+define i32 @t2ADDrs_noRegShift(i32 %X, i32 %Y, i8 %sh) {
+; CHECK: t2ADDrs_noRegShift
+; CHECK: uxtb r2, r2
+; CHECK: lsls r1, r2
+; CHECK: add  r0, r1
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = shl i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-smla.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-smla.ll
new file mode 100644
index 0000000..c128ecc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-smla.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk,+t2dsp | FileCheck %s
+
+define i32 @f3(i32 %a, i16 %x, i32 %y) {
+; CHECK: f3
+; CHECK: smlabt r0, r1, r2, r0
+        %tmp = sext i16 %x to i32               ; <i32> [#uses=1]
+        %tmp2 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp3 = mul i32 %tmp2, %tmp             ; <i32> [#uses=1]
+        %tmp5 = add i32 %tmp3, %a               ; <i32> [#uses=1]
+        ret i32 %tmp5
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-smul.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-smul.ll
new file mode 100644
index 0000000..7a13269
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-smul.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk,+t2dsp |  FileCheck %s
+
+@x = weak global i16 0          ; <i16*> [#uses=1]
+@y = weak global i16 0          ; <i16*> [#uses=0]
+
+define i32 @f1(i32 %y) {
+; CHECK: f1
+; CHECK: smulbt r0, r1, r0
+        %tmp = load i16* @x             ; <i16> [#uses=1]
+        %tmp1 = add i16 %tmp, 2         ; <i16> [#uses=1]
+        %tmp2 = sext i16 %tmp1 to i32           ; <i32> [#uses=1]
+        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp4 = mul i32 %tmp2, %tmp3            ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @f2(i32 %x, i32 %y) {
+; CHECK: f2
+; CHECK: smultt r0, r1, r0
+        %tmp1 = ashr i32 %x, 16         ; <i32> [#uses=1]
+        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp4 = mul i32 %tmp3, %tmp1            ; <i32> [#uses=1]
+        ret i32 %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-spill-q.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-spill-q.ll
new file mode 100644
index 0000000..d9a0617
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-spill-q.ll

@@ -0,0 +1,91 @@
+; RUN: llc < %s -mtriple=thumbv7-elf -mattr=+neon | FileCheck %s
+; PR4789
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+
+define void @aaa(%quuz* %this, i8* %block) {
+; CHECK: aaa:
+; CHECK: bic r4, r4, #15
+; CHECK: vst1.64 {{.*}}[{{.*}}, :128]
+; CHECK: vld1.64 {{.*}}[{{.*}}, :128]
+entry:
+  %aligned_vec = alloca <4 x float>, align 16
+  %"alloca point" = bitcast i32 0 to i32
+  %vecptr = bitcast <4 x float>* %aligned_vec to i8*
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind 
+  store float 6.300000e+01, float* undef, align 4
+  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  store float 0.000000e+00, float* undef, align 4
+  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
+  br label %bb4
+
+bb4:                                              ; preds = %bb193, %entry
+  %besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
+  %part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
+  %3 = fmul <4 x float> zeroinitializer, %0       ; <<4 x float>> [#uses=2]
+  %4 = fadd <4 x float> %3, %part0.0.0261         ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %7 = fmul <4 x float> %1, undef                 ; <<4 x float>> [#uses=1]
+  %8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %9 = fptosi <4 x float> %8 to <4 x i32>         ; <<4 x i32>> [#uses=1]
+  %10 = sitofp <4 x i32> %9 to <4 x float>        ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, %2                  ; <<4 x float>> [#uses=1]
+  %12 = fmul <4 x float> undef, %6                ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> %11, %4                  ; <<4 x float>> [#uses=1]
+  %14 = fsub <4 x float> %12, %13                 ; <<4 x float>> [#uses=1]
+  %15 = fsub <4 x float> %14, undef               ; <<4 x float>> [#uses=1]
+  %16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %16, undef               ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %tmp1 = fadd <4 x float> %20, %ld3
+  %tmp2 = fadd <4 x float> %tmp1, %ld4
+  %tmp3 = fadd <4 x float> %tmp2, %ld5
+  %tmp4 = fadd <4 x float> %tmp3, %ld6
+  %tmp5 = fadd <4 x float> %tmp4, %ld7
+  %tmp6 = fadd <4 x float> %tmp5, %ld8
+  %tmp7 = fadd <4 x float> %tmp6, %ld9
+  %tmp8 = fadd <4 x float> %tmp7, %ld10
+  %tmp9 = fadd <4 x float> %tmp8, %ld11
+  %21 = fadd <4 x float> %tmp9, %ld12
+  %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
+  %tmp = extractelement <4 x i1> %22, i32 0
+  br i1 %tmp, label %bb193, label %bb186
+
+bb186:                                            ; preds = %bb4
+  br label %bb193
+
+bb193:                                            ; preds = %bb186, %bb4
+  %besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
+  %23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
+  br label %bb4
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-str.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-str.ll
new file mode 100644
index 0000000..11bb936
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-str.ll

@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32* %v) {
+; CHECK: f1:
+; CHECK: str r0, [r1]
+        store i32 %a, i32* %v
+        ret i32 %a
+}
+
+define i32 @f2(i32 %a, i32* %v) {
+; CHECK: f2:
+; CHECK: str.w r0, [r1, #4092]
+        %tmp2 = getelementptr i32* %v, i32 1023
+        store i32 %a, i32* %tmp2
+        ret i32 %a
+}
+
+define i32 @f2a(i32 %a, i32* %v) {
+; CHECK: f2a:
+; CHECK: str r0, [r1, #-128]
+        %tmp2 = getelementptr i32* %v, i32 -32
+        store i32 %a, i32* %tmp2
+        ret i32 %a
+}
+
+define i32 @f3(i32 %a, i32* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: str r0, [r1, r2]
+        %tmp2 = getelementptr i32* %v, i32 1024
+        store i32 %a, i32* %tmp2
+        ret i32 %a
+}
+
+define i32 @f4(i32 %a, i32 %base) {
+entry:
+; CHECK: f4:
+; CHECK: str r0, [r1, #-128]
+        %tmp1 = sub i32 %base, 128
+        %tmp2 = inttoptr i32 %tmp1 to i32*
+        store i32 %a, i32* %tmp2
+        ret i32 %a
+}
+
+define i32 @f5(i32 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f5:
+; CHECK: str r0, [r1, r2]
+        %tmp1 = add i32 %base, %offset
+        %tmp2 = inttoptr i32 %tmp1 to i32*
+        store i32 %a, i32* %tmp2
+        ret i32 %a
+}
+
+define i32 @f6(i32 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f6:
+; CHECK: str.w r0, [r1, r2, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        store i32 %a, i32* %tmp3
+        ret i32 %a
+}
+
+define i32 @f7(i32 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: str r0, [r1, r2]
+        %tmp1 = lshr i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        store i32 %a, i32* %tmp3
+        ret i32 %a
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-str_post.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-str_post.ll
new file mode 100644
index 0000000..bbfb447
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-str_post.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i16 @test1(i32* %X, i16* %A) {
+; CHECK: test1:
+; CHECK: strh {{.*}}[{{.*}}], #-4
+        %Y = load i32* %X               ; <i32> [#uses=1]
+        %tmp1 = trunc i32 %Y to i16             ; <i16> [#uses=1]
+        store i16 %tmp1, i16* %A
+        %tmp2 = ptrtoint i16* %A to i16         ; <i16> [#uses=1]
+        %tmp3 = sub i16 %tmp2, 4                ; <i16> [#uses=1]
+        ret i16 %tmp3
+}
+
+define i32 @test2(i32* %X, i32* %A) {
+; CHECK: test2:
+; CHECK: str {{.*}}[{{.*}}],
+        %Y = load i32* %X               ; <i32> [#uses=1]
+        store i32 %Y, i32* %A
+        %tmp1 = ptrtoint i32* %A to i32         ; <i32> [#uses=1]
+        %tmp2 = sub i32 %tmp1, 4                ; <i32> [#uses=1]
+        ret i32 %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-str_pre.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-str_pre.ll
new file mode 100644
index 0000000..1e6616a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-str_pre.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define void @test1(i32* %X, i32* %A, i32** %dest) {
+; CHECK: test1
+; CHECK: str  r1, [r0, #16]!
+        %B = load i32* %A               ; <i32> [#uses=1]
+        %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]
+        store i32 %B, i32* %Y
+        store i32* %Y, i32** %dest
+        ret void
+}
+
+define i16* @test2(i16* %X, i32* %A) {
+; CHECK: test2
+; CHECK: strh r1, [r0, #8]!
+        %B = load i32* %A               ; <i32> [#uses=1]
+        %Y = getelementptr i16* %X, i32 4               ; <i16*> [#uses=2]
+        %tmp = trunc i32 %B to i16              ; <i16> [#uses=1]
+        store i16 %tmp, i16* %Y
+        ret i16* %Y
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-strb.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-strb.ll
new file mode 100644
index 0000000..7978e7f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-strb.ll

@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i8 @f1(i8 %a, i8* %v) {
+; CHECK: f1:
+; CHECK: strb r0, [r1]
+        store i8 %a, i8* %v
+        ret i8 %a
+}
+
+define i8 @f2(i8 %a, i8* %v) {
+; CHECK: f2:
+; CHECK: strb.w r0, [r1, #4092]
+        %tmp2 = getelementptr i8* %v, i32 4092
+        store i8 %a, i8* %tmp2
+        ret i8 %a
+}
+
+define i8 @f2a(i8 %a, i8* %v) {
+; CHECK: f2a:
+; CHECK: strb r0, [r1, #-128]
+        %tmp2 = getelementptr i8* %v, i32 -128
+        store i8 %a, i8* %tmp2
+        ret i8 %a
+}
+
+define i8 @f3(i8 %a, i8* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: strb r0, [r1, r2]
+        %tmp2 = getelementptr i8* %v, i32 4096
+        store i8 %a, i8* %tmp2
+        ret i8 %a
+}
+
+define i8 @f4(i8 %a, i32 %base) {
+entry:
+; CHECK: f4:
+; CHECK: strb r0, [r1, #-128]
+        %tmp1 = sub i32 %base, 128
+        %tmp2 = inttoptr i32 %tmp1 to i8*
+        store i8 %a, i8* %tmp2
+        ret i8 %a
+}
+
+define i8 @f5(i8 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f5:
+; CHECK: strb r0, [r1, r2]
+        %tmp1 = add i32 %base, %offset
+        %tmp2 = inttoptr i32 %tmp1 to i8*
+        store i8 %a, i8* %tmp2
+        ret i8 %a
+}
+
+define i8 @f6(i8 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f6:
+; CHECK: strb.w r0, [r1, r2, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i8*
+        store i8 %a, i8* %tmp3
+        ret i8 %a
+}
+
+define i8 @f7(i8 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: strb r0, [r1, r2]
+        %tmp1 = lshr i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i8*
+        store i8 %a, i8* %tmp3
+        ret i8 %a
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-strh.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-strh.ll
new file mode 100644
index 0000000..97110a7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-strh.ll

@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i16 @f1(i16 %a, i16* %v) {
+; CHECK: f1:
+; CHECK: strh r0, [r1]
+        store i16 %a, i16* %v
+        ret i16 %a
+}
+
+define i16 @f2(i16 %a, i16* %v) {
+; CHECK: f2:
+; CHECK: strh.w r0, [r1, #4092]
+        %tmp2 = getelementptr i16* %v, i32 2046
+        store i16 %a, i16* %tmp2
+        ret i16 %a
+}
+
+define i16 @f2a(i16 %a, i16* %v) {
+; CHECK: f2a:
+; CHECK: strh r0, [r1, #-128]
+        %tmp2 = getelementptr i16* %v, i32 -64
+        store i16 %a, i16* %tmp2
+        ret i16 %a
+}
+
+define i16 @f3(i16 %a, i16* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: strh r0, [r1, r2]
+        %tmp2 = getelementptr i16* %v, i32 2048
+        store i16 %a, i16* %tmp2
+        ret i16 %a
+}
+
+define i16 @f4(i16 %a, i32 %base) {
+entry:
+; CHECK: f4:
+; CHECK: strh r0, [r1, #-128]
+        %tmp1 = sub i32 %base, 128
+        %tmp2 = inttoptr i32 %tmp1 to i16*
+        store i16 %a, i16* %tmp2
+        ret i16 %a
+}
+
+define i16 @f5(i16 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f5:
+; CHECK: strh r0, [r1, r2]
+        %tmp1 = add i32 %base, %offset
+        %tmp2 = inttoptr i32 %tmp1 to i16*
+        store i16 %a, i16* %tmp2
+        ret i16 %a
+}
+
+define i16 @f6(i16 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f6:
+; CHECK: strh.w r0, [r1, r2, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i16*
+        store i16 %a, i16* %tmp3
+        ret i16 %a
+}
+
+define i16 @f7(i16 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: strh r0, [r1, r2]
+        %tmp1 = lshr i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i16*
+        store i16 %a, i16* %tmp3
+        ret i16 %a
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-sub.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-sub.ll
new file mode 100644
index 0000000..95335a2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-sub.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: subs r0, #171
+    %tmp = sub i32 %a, 171
+    ret i32 %tmp
+}
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: sub.w r0, r0, #1179666
+    %tmp = sub i32 %a, 1179666
+    ret i32 %tmp
+}
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: sub.w r0, r0, #872428544
+    %tmp = sub i32 %a, 872428544
+    ret i32 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: sub.w r0, r0, #1448498774
+    %tmp = sub i32 %a, 1448498774
+    ret i32 %tmp
+}
+
+; 510 = 0x000001fe
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: sub.w r0, r0, #510
+    %tmp = sub i32 %a, 510
+    ret i32 %tmp
+}
+
+; Don't change this to an add.
+define i32 @f6(i32 %a) {
+; CHECK: f6:
+; CHECK: subs r0, #1
+    %tmp = sub i32 %a, 1
+    ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-sub2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-sub2.ll
new file mode 100644
index 0000000..bb99cbd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-sub2.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+    %tmp = sub i32 %a, 4095
+    ret i32 %tmp
+}
+; CHECK: f1:
+; CHECK: 	subw	r0, r0, #4095

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-sub3.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-sub3.ll
new file mode 100644
index 0000000..1dbda57
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-sub3.ll

@@ -0,0 +1,55 @@
+; RUN: llc -march=thumb -mattr=+thumb2 < %s | FileCheck %s
+
+; 171 = 0x000000ab
+define i64 @f1(i64 %a) {
+; CHECK: f1
+; CHECK: subs  r0, #171
+; CHECK: sbc r1, r1, #0
+    %tmp = sub i64 %a, 171
+    ret i64 %tmp
+}
+
+; 1179666 = 0x00120012
+define i64 @f2(i64 %a) {
+; CHECK: f2
+; CHECK: subs.w  r0, r0, #1179666
+; CHECK: sbc r1, r1, #0
+    %tmp = sub i64 %a, 1179666
+    ret i64 %tmp
+}
+
+; 872428544 = 0x34003400
+define i64 @f3(i64 %a) {
+; CHECK: f3
+; CHECK: subs.w  r0, r0, #872428544
+; CHECK: sbc r1, r1, #0
+    %tmp = sub i64 %a, 872428544
+    ret i64 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i64 @f4(i64 %a) {
+; CHECK: f4
+; CHECK: subs.w  r0, r0, #1448498774
+; CHECK: sbc r1, r1, #0
+    %tmp = sub i64 %a, 1448498774
+    ret i64 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i64 @f5(i64 %a) {
+; CHECK: f5
+; CHECK: subs.w  r0, r0, #66846720
+; CHECK: sbc r1, r1, #0
+    %tmp = sub i64 %a, 66846720
+    ret i64 %tmp
+}
+
+; 734439407618 = 0x000000ab00000002
+define i64 @f6(i64 %a) {
+; CHECK: f6
+; CHECK: subs r0, #2
+; CHECK: sbc r1, r1, #171
+   %tmp = sub i64 %a, 734439407618
+   ret i64 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-sub4.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-sub4.ll
new file mode 100644
index 0000000..a040d17
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-sub4.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: subs r0, r0, r1
+    %tmp = sub i32 %a, %b
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: sub.w r0, r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = sub i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: sub.w r0, r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = sub i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: sub.w r0, r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = sub i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: sub.w r0, r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = sub i32 %a, %tmp
+    ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-sub5.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-sub5.ll
new file mode 100644
index 0000000..6edd789
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-sub5.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -mattr=+32bit | FileCheck %s
+
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: subs.w r0, r0, r2
+; To test dead_carry, +32bit prevents sbc conveting to 16-bit sbcs
+; CHECK: sbc.w  r1, r1, r3
+    %tmp = sub i64 %a, %b
+    ret i64 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-sxt-uxt.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-sxt-uxt.ll
new file mode 100644
index 0000000..ab888e6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-sxt-uxt.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s
+
+define i32 @test1(i16 zeroext %z) nounwind {
+; CHECK: test1:
+; CHECK: sxth
+  %r = sext i16 %z to i32
+  ret i32 %r
+}
+
+define i32 @test2(i8 zeroext %z) nounwind {
+; CHECK: test2:
+; CHECK: sxtb
+  %r = sext i8 %z to i32
+  ret i32 %r
+}
+
+define i32 @test3(i16 signext %z) nounwind {
+; CHECK: test3:
+; CHECK: uxth
+  %r = zext i16 %z to i32
+  ret i32 %r
+}
+
+define i32 @test4(i8 signext %z) nounwind {
+; CHECK: test4:
+; CHECK: uxtb
+  %r = zext i8 %z to i32
+  ret i32 %r
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-sxt_rot.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
new file mode 100644
index 0000000..f3d0edf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-sxt_rot.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+
+define i32 @test0(i8 %A) {
+; CHECK: test0
+; CHECK: sxtb r0, r0
+        %B = sext i8 %A to i32
+	ret i32 %B
+}
+
+define signext i8 @test1(i32 %A)  {
+; CHECK: test1
+; CHECK: sxtb.w r0, r0, ror #8
+	%B = lshr i32 %A, 8
+	%C = shl i32 %A, 24
+	%D = or i32 %B, %C
+	%E = trunc i32 %D to i8
+	ret i8 %E
+}
+
+define signext i32 @test2(i32 %A, i32 %X)  {
+; CHECK: test2
+; CHECK: lsrs r0, r0, #8
+; CHECK: sxtab  r0, r1, r0
+	%B = lshr i32 %A, 8
+	%C = shl i32 %A, 24
+	%D = or i32 %B, %C
+	%E = trunc i32 %D to i8
+        %F = sext i8 %E to i32
+        %G = add i32 %F, %X
+	ret i32 %G
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-tbb.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-tbb.ll
new file mode 100644
index 0000000..5dc3cc3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-tbb.ll

@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s
+
+define void @bar(i32 %n.u) {
+entry:
+; CHECK: bar:
+; CHECK: tbb
+; CHECK: .align 1
+
+    switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
+bb:
+    tail call void(...)* @foo1()
+    ret void
+bb1:
+    tail call void(...)* @foo2()
+    ret void
+bb2:
+    tail call void(...)* @foo6()
+    ret void
+bb3:
+    tail call void(...)* @foo3()
+    ret void
+bb4:
+    tail call void(...)* @foo4()
+    ret void
+bb5:
+    tail call void(...)* @foo5()
+    ret void
+bb6:
+    tail call void(...)* @foo1()
+    ret void
+bb7:
+    tail call void(...)* @foo2()
+    ret void
+bb8:
+    tail call void(...)* @foo6()
+    ret void
+bb9:
+    tail call void(...)* @foo3()
+    ret void
+bb10:
+    tail call void(...)* @foo4()
+    ret void
+bb11:
+    tail call void(...)* @foo5()
+    ret void
+bb12:
+    tail call void(...)* @foo6()
+    ret void
+}
+
+declare void @foo1(...)
+declare void @foo2(...)
+declare void @foo6(...)
+declare void @foo3(...)
+declare void @foo4(...)
+declare void @foo5(...)

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-tbh.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-tbh.ll
new file mode 100644
index 0000000..cd9c8e1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-tbh.ll

@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s
+
+; Thumb2 target should reorder the bb's in order to use tbb / tbh.
+
+	%struct.R_flstr = type { i32, i32, i8* }
+	%struct._T_tstr = type { i32, %struct.R_flstr*, %struct._T_tstr* }
+@_C_nextcmd = external global i32		; <i32*> [#uses=3]
+@.str31 = external constant [28 x i8], align 1		; <[28 x i8]*> [#uses=1]
+@_T_gtol = external global %struct._T_tstr*		; <%struct._T_tstr**> [#uses=2]
+
+declare i32 @strlen(i8* nocapture) nounwind readonly
+
+declare void @Z_fatal(i8*) noreturn nounwind
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+; CHECK: main:
+; CHECK: tbb
+entry:
+	br label %bb42.i
+
+bb1.i2:		; preds = %bb42.i
+	br label %bb40.i
+
+bb5.i:		; preds = %bb42.i
+	%0 = or i32 %argc, 32		; <i32> [#uses=1]
+	br label %bb40.i
+
+bb7.i:		; preds = %bb42.i
+	call  void @_T_addtol(%struct._T_tstr** @_T_gtol, i32 0, i8* null) nounwind
+	unreachable
+
+bb15.i:		; preds = %bb42.i
+	call  void @_T_addtol(%struct._T_tstr** @_T_gtol, i32 2, i8* null) nounwind
+	unreachable
+
+bb23.i:		; preds = %bb42.i
+	%1 = call  i32 @strlen(i8* null) nounwind readonly		; <i32> [#uses=0]
+	unreachable
+
+bb33.i:		; preds = %bb42.i
+	store i32 0, i32* @_C_nextcmd, align 4
+	%2 = call  noalias i8* @calloc(i32 21, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb34.i:		; preds = %bb42.i
+	%3 = load i32* @_C_nextcmd, align 4		; <i32> [#uses=1]
+	%4 = add i32 %3, 1		; <i32> [#uses=1]
+	store i32 %4, i32* @_C_nextcmd, align 4
+	%5 = call  noalias i8* @calloc(i32 22, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb35.i:		; preds = %bb42.i
+	%6 = call  noalias i8* @calloc(i32 20, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb37.i:		; preds = %bb42.i
+	%7 = call  noalias i8* @calloc(i32 14, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb39.i:		; preds = %bb42.i
+	call  void @Z_fatal(i8* getelementptr ([28 x i8]* @.str31, i32 0, i32 0)) nounwind
+	unreachable
+
+bb40.i:		; preds = %bb42.i, %bb5.i, %bb1.i2
+	br label %bb42.i
+
+bb42.i:		; preds = %bb40.i, %entry
+	switch i32 %argc, label %bb39.i [
+		i32 67, label %bb33.i
+		i32 70, label %bb35.i
+		i32 77, label %bb37.i
+		i32 83, label %bb34.i
+		i32 97, label %bb7.i
+		i32 100, label %bb5.i
+		i32 101, label %bb40.i
+		i32 102, label %bb23.i
+		i32 105, label %bb15.i
+		i32 116, label %bb1.i2
+	]
+}
+
+declare void @_T_addtol(%struct._T_tstr** nocapture, i32, i8*) nounwind

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-teq.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-teq.ll
new file mode 100644
index 0000000..00c928f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-teq.ll

@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'. So far, that requires physreg joining.
+
+; 0x000000bb = 187
+define i1 @f2(i32 %a) {
+    %tmp = xor i32 %a, 187
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f2:
+; CHECK: 	teq.w	r0, #187
+
+; 0x00aa00aa = 11141290
+define i1 @f3(i32 %a) {
+    %tmp = xor i32 %a, 11141290 
+    %tmp1 = icmp eq i32 %tmp, 0
+    ret i1 %tmp1
+}
+; CHECK: f3:
+; CHECK: 	teq.w	r0, #11141290
+
+; 0xcc00cc00 = 3422604288
+define i1 @f6(i32 %a) {
+    %tmp = xor i32 %a, 3422604288
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f6:
+; CHECK: 	teq.w	r0, #-872363008
+
+; 0xdddddddd = 3722304989
+define i1 @f7(i32 %a) {
+    %tmp = xor i32 %a, 3722304989
+    %tmp1 = icmp eq i32 %tmp, 0
+    ret i1 %tmp1
+}
+; CHECK: f7:
+; CHECK: 	teq.w	r0, #-572662307
+
+; 0xdddddddd = 3722304989
+define i1 @f8(i32 %a) {
+    %tmp = xor i32 %a, 3722304989
+    %tmp1 = icmp ne i32 0, %tmp
+    ret i1 %tmp1
+}
+
+; 0x00110000 = 1114112
+define i1 @f10(i32 %a) {
+    %tmp = xor i32 %a, 1114112
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f10:
+; CHECK: 	teq.w	r0, #1114112
+

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-teq2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-teq2.ll
new file mode 100644
index 0000000..8acae90
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-teq2.ll

@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
+
+define i1 @f2(i32 %a, i32 %b) {
+; CHECK: f2
+; CHECK: teq.w r0, r1
+    %tmp = xor i32 %a, %b
+    %tmp1 = icmp eq i32 %tmp, 0
+    ret i1 %tmp1
+}
+
+define i1 @f4(i32 %a, i32 %b) {
+; CHECK: f4
+; CHECK: teq.w  r0, r1
+    %tmp = xor i32 %a, %b
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+
+define i1 @f6(i32 %a, i32 %b) {
+; CHECK: f6
+; CHECK: teq.w  r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = xor i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}
+
+define i1 @f7(i32 %a, i32 %b) {
+; CHECK: f7
+; CHECK: teq.w  r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = xor i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}
+
+define i1 @f8(i32 %a, i32 %b) {
+; CHECK: f8
+; CHECK: teq.w  r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = xor i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}
+
+define i1 @f9(i32 %a, i32 %b) {
+; CHECK: f9
+; CHECK: teq.w  r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = xor i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-tst.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-tst.ll
new file mode 100644
index 0000000..43e208c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-tst.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
+
+; 0x000000bb = 187
+define i1 @f2(i32 %a) {
+    %tmp = and i32 %a, 187
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f2:
+; CHECK: 	tst.w	r0, #187
+
+; 0x00aa00aa = 11141290
+define i1 @f3(i32 %a) {
+    %tmp = and i32 %a, 11141290 
+    %tmp1 = icmp eq i32 %tmp, 0
+    ret i1 %tmp1
+}
+; CHECK: f3:
+; CHECK: 	tst.w	r0, #11141290
+
+; 0xcc00cc00 = 3422604288
+define i1 @f6(i32 %a) {
+    %tmp = and i32 %a, 3422604288
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f6:
+; CHECK: 	tst.w	r0, #-872363008
+
+; 0xdddddddd = 3722304989
+define i1 @f7(i32 %a) {
+    %tmp = and i32 %a, 3722304989
+    %tmp1 = icmp eq i32 %tmp, 0
+    ret i1 %tmp1
+}
+; CHECK: f7:
+; CHECK: 	tst.w	r0, #-572662307
+
+; 0x00110000 = 1114112
+define i1 @f10(i32 %a) {
+    %tmp = and i32 %a, 1114112
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f10:
+; CHECK: 	tst.w	r0, #1114112

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-tst2.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-tst2.ll
new file mode 100644
index 0000000..bfe016f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-tst2.ll

@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+
+; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
+
+define i1 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: tst r0, r1
+    %tmp = and i32 %a, %b
+    %tmp1 = icmp eq i32 %tmp, 0
+    ret i1 %tmp1
+}
+
+define i1 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: tst r0, r1
+    %tmp = and i32 %a, %b
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+
+define i1 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: tst.w r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = and i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}
+
+define i1 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: tst.w r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = and i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}
+
+define i1 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: tst.w r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = and i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}
+
+define i1 @f9(i32 %a, i32 %b) {
+; CHECK: f9:
+; CHECK: tst.w r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = and i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
new file mode 100644
index 0000000..03189aa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-uxt_rot.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+
+define zeroext i8 @test1(i32 %A.u)  {
+; CHECK: test1
+; CHECK: uxtb r0, r0
+    %B.u = trunc i32 %A.u to i8
+    ret i8 %B.u
+}
+
+define zeroext i32 @test2(i32 %A.u, i32 %B.u)  {
+; CHECK: test2
+; CHECK: uxtab  r0, r0, r1
+    %C.u = trunc i32 %B.u to i8
+    %D.u = zext i8 %C.u to i32
+    %E.u = add i32 %A.u, %D.u
+    ret i32 %E.u
+}
+
+define zeroext i32 @test3(i32 %A.u)  {
+; CHECK: test3
+; CHECK: uxth.w r0, r0, ror #8
+    %B.u = lshr i32 %A.u, 8
+    %C.u = shl i32 %A.u, 24
+    %D.u = or i32 %B.u, %C.u
+    %E.u = trunc i32 %D.u to i16
+    %F.u = zext i16 %E.u to i32
+    ret i32 %F.u
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/thumb2-uxtb.ll b/src/LLVM/test/CodeGen/Thumb2/thumb2-uxtb.ll
new file mode 100644
index 0000000..35914b1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/thumb2-uxtb.ll

@@ -0,0 +1,141 @@
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARMv7A
+; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=ARMv7M
+
+define i32 @test1(i32 %x) {
+; ARMv7A: test1
+; ARMv7A: uxtb16 r0, r0
+
+; ARMv7M: test1
+; ARMv7M: bic r0, r0, #-16711936
+	%tmp1 = and i32 %x, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+; PR7503
+define i32 @test2(i32 %x) {
+; ARMv7A: test2
+; ARMv7A: uxtb16  r0, r0, ror #8
+
+; ARMv7M: test2
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @test3(i32 %x) {
+; ARMv7A: test3
+; ARMv7A: uxtb16  r0, r0, ror #8
+
+; ARMv7M: test3
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @test4(i32 %x) {
+; ARMv7A: test4
+; ARMv7A: uxtb16  r0, r0, ror #8
+
+; ARMv7M: test4
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
+	%tmp6 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test5(i32 %x) {
+; ARMv7A: test5
+; ARMv7A: uxtb16  r0, r0, ror #8
+
+; ARMv7M: test5
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @test6(i32 %x) {
+; ARMv7A: test6
+; ARMv7A: uxtb16  r0, r0, ror #16
+
+; ARMv7M: test6
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #16
+	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
+	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test7(i32 %x) {
+; ARMv7A: test7
+; ARMv7A: uxtb16  r0, r0, ror #16
+
+; ARMv7M: test7
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #16
+	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
+	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test8(i32 %x) {
+; ARMv7A: test8
+; ARMv7A: uxtb16  r0, r0, ror #24
+
+; ARMv7M: test8
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #24
+	%tmp1 = shl i32 %x, 8		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16711680		; <i32> [#uses=1]
+	%tmp5 = lshr i32 %x, 24		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test9(i32 %x) {
+; ARMv7A: test9
+; ARMv7A: uxtb16  r0, r0, ror #24
+
+; ARMv7M: test9
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #24
+	%tmp1 = lshr i32 %x, 24		; <i32> [#uses=1]
+	%tmp4 = shl i32 %x, 8		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp5, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test10(i32 %p0) {
+; ARMv7A: test10
+; ARMv7A: mov.w r1, #16253176
+; ARMv7A: and.w r0, r1, r0, lsr #7
+; ARMv7A: lsrs  r1, r0, #5
+; ARMv7A: uxtb16  r1, r1
+; ARMv7A: orrs r0, r1
+
+; ARMv7M: test10
+; ARMv7M: mov.w r1, #16253176
+; ARMv7M: mov.w r2, #458759
+; ARMv7M: and.w r0, r1, r0, lsr #7
+; ARMv7M: and.w r1, r2, r0, lsr #5
+; ARMv7M: orrs r0, r1
+	%tmp1 = lshr i32 %p0, 7		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16253176		; <i32> [#uses=2]
+	%tmp4 = lshr i32 %tmp2, 5		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, 458759		; <i32> [#uses=1]
+	%tmp7 = or i32 %tmp5, %tmp2		; <i32> [#uses=1]
+	ret i32 %tmp7
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/tls1.ll b/src/LLVM/test/CodeGen/Thumb2/tls1.ll
new file mode 100644
index 0000000..1e55557
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/tls1.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \
+; RUN:     grep {i(tpoff)}
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \
+; RUN:     grep {__aeabi_read_tp}
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi \
+; RUN:     -relocation-model=pic | grep {__tls_get_addr}
+
+
+@i = thread_local global i32 15		; <i32*> [#uses=2]
+
+define i32 @f() {
+entry:
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+define i32* @g() {
+entry:
+	ret i32* @i
+}

diff --git a/src/LLVM/test/CodeGen/Thumb2/tls2.ll b/src/LLVM/test/CodeGen/Thumb2/tls2.ll
new file mode 100644
index 0000000..b8a0657
--- /dev/null
+++ b/src/LLVM/test/CodeGen/Thumb2/tls2.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | FileCheck %s -check-prefix=CHECK-NOT-PIC
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=CHECK-PIC
+
+@i = external thread_local global i32		; <i32*> [#uses=2]
+
+define i32 @f() {
+entry:
+; CHECK-NOT-PIC: f:
+; CHECK-NOT-PIC: add r0, pc
+; CHECK-NOT-PIC: ldr r1, [r0]
+; CHECK-NOT-PIC: i(gottpoff)
+
+; CHECK-PIC: f:
+; CHECK-PIC: bl __tls_get_addr(PLT)
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+define i32* @g() {
+entry:
+; CHECK-NOT-PIC: g:
+; CHECK-NOT-PIC: add r0, pc
+; CHECK-NOT-PIC: ldr r1, [r0]
+; CHECK-NOT-PIC: i(gottpoff)
+
+; CHECK-PIC: g:
+; CHECK-PIC: bl __tls_get_addr(PLT)
+	ret i32* @i
+}

diff --git a/src/LLVM/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll b/src/LLVM/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
new file mode 100644
index 0000000..353faf6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll

@@ -0,0 +1,18 @@
+; The old instruction selector used to load all arguments to a call up in 

+; registers, then start pushing them all onto the stack.  This is bad news as

+; it makes a ton of annoying overlapping live ranges.  This code should not

+; cause spills!

+;

+; RUN: llc < %s -march=x86 -stats |& not grep spilled

+

+target datalayout = "e-p:32:32"

+

+define i32 @test(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) {

+        ret i32 0

+}

+

+define i32 @main() {

+        %X = call i32 @test( i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 )            ; <i32> [#uses=1]

+        ret i32 %X

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll b/src/LLVM/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll
new file mode 100644
index 0000000..88d0c4f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86

+

+define i32 @test() {

+entry:

+        ret i32 7

+Test:           ; No predecessors!

+        %A = call i32 @test( )          ; <i32> [#uses=1]

+        %B = call i32 @test( )          ; <i32> [#uses=1]

+        %C = add i32 %A, %B             ; <i32> [#uses=1]

+        ret i32 %C

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2003-11-03-GlobalBool.ll b/src/LLVM/test/CodeGen/X86/2003-11-03-GlobalBool.ll
new file mode 100644
index 0000000..ca9e250
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2003-11-03-GlobalBool.ll

@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=x86 | \

+; RUN:   not grep {.byte\[\[:space:\]\]*true}

+

+@X = global i1 true             ; <i1*> [#uses=0]


diff --git a/src/LLVM/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll b/src/LLVM/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
new file mode 100644
index 0000000..a4e85a9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | grep {(%esp}

+; RUN: llc < %s -march=x86 | grep {pushl	%ebp} | count 1

+; RUN: llc < %s -march=x86 | grep {popl	%ebp} | count 1

+

+declare i8* @llvm.returnaddress(i32)

+

+declare i8* @llvm.frameaddress(i32)

+

+define i8* @test1() {

+        %X = call i8* @llvm.returnaddress( i32 0 )              ; <i8*> [#uses=1]

+        ret i8* %X

+}

+

+define i8* @test2() {

+        %X = call i8* @llvm.frameaddress( i32 0 )               ; <i8*> [#uses=1]

+        ret i8* %X

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll b/src/LLVM/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll
new file mode 100644
index 0000000..40dd887
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 | grep -i ESP | not grep sub

+

+define i32 @test(i32 %X) {

+        ret i32 %X

+}


diff --git a/src/LLVM/test/CodeGen/X86/2004-02-22-Casts.ll b/src/LLVM/test/CodeGen/X86/2004-02-22-Casts.ll
new file mode 100644
index 0000000..7ff12a9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2004-02-22-Casts.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86

+define i1 @test1(double %X) {

+        %V = fcmp one double %X, 0.000000e+00           ; <i1> [#uses=1]

+        ret i1 %V

+}

+

+define double @test2(i64 %X) {

+        %V = uitofp i64 %X to double            ; <double> [#uses=1]

+        ret double %V

+}

+

+


diff --git a/src/LLVM/test/CodeGen/X86/2004-03-30-Select-Max.ll b/src/LLVM/test/CodeGen/X86/2004-03-30-Select-Max.ll
new file mode 100644
index 0000000..28c47b3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2004-03-30-Select-Max.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep {j\[lgbe\]}

+

+define i32 @max(i32 %A, i32 %B) nounwind {

+        %gt = icmp sgt i32 %A, %B               ; <i1> [#uses=1]

+        %R = select i1 %gt, i32 %A, i32 %B              ; <i32> [#uses=1]

+        ret i32 %R

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll b/src/LLVM/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
new file mode 100644
index 0000000..d8f26bd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll

@@ -0,0 +1,13 @@
+; Linear scan does not currently coalesce any two variables that have

+; overlapping live intervals. When two overlapping intervals have the same

+; value, they can be joined though.

+;

+; RUN: llc < %s -march=x86 -regalloc=linearscan | \

+; RUN:   not grep {mov %\[A-Z\]\\\{2,3\\\}, %\[A-Z\]\\\{2,3\\\}}

+

+define i64 @test(i64 %x) {

+entry:

+        %tmp.1 = mul i64 %x, 4294967297         ; <i64> [#uses=1]

+        ret i64 %tmp.1

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll b/src/LLVM/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll
new file mode 100644
index 0000000..5fdf8f7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86

+

+define double @test(double %d) {

+        %X = select i1 false, double %d, double %d              ; <double> [#uses=1]

+        ret double %X

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2004-06-10-StackifierCrash.ll b/src/LLVM/test/CodeGen/X86/2004-06-10-StackifierCrash.ll
new file mode 100644
index 0000000..5a1a709
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2004-06-10-StackifierCrash.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86

+

+define i1 @T(double %X) {

+        %V = fcmp oeq double %X, %X             ; <i1> [#uses=1]

+        ret i1 %V

+}


diff --git a/src/LLVM/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll b/src/LLVM/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll
new file mode 100644
index 0000000..14e14b1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86

+

+define i1 @test(i1 %C, i1 %D, i32 %X, i32 %Y) {

+        %E = icmp slt i32 %X, %Y                ; <i1> [#uses=1]

+        %F = select i1 %C, i1 %D, i1 %E         ; <i1> [#uses=1]

+        ret i1 %F

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2005-01-17-CycleInDAG.ll b/src/LLVM/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
new file mode 100644
index 0000000..36a2649
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2005-01-17-CycleInDAG.ll

@@ -0,0 +1,17 @@
+; This testcase was distilled from 132.ijpeg.  Bsaically we cannot fold the

+; load into the sub instruction here as it induces a cycle in the dag, which

+; is invalid code (there is no correct way to order the instruction).  Check

+; that we do not fold the load into the sub.

+

+; RUN: llc < %s -march=x86 | not grep sub.*GLOBAL

+

+@GLOBAL = external global i32           ; <i32*> [#uses=1]

+

+define i32 @test(i32* %P1, i32* %P2, i32* %P3) nounwind {

+        %L = load i32* @GLOBAL          ; <i32> [#uses=1]

+        store i32 12, i32* %P2

+        %Y = load i32* %P3              ; <i32> [#uses=1]

+        %Z = sub i32 %Y, %L             ; <i32> [#uses=1]

+        ret i32 %Z

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll b/src/LLVM/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
new file mode 100644
index 0000000..a61a452
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 | not grep 18446744073709551612

+

+@A = external global i32                ; <i32*> [#uses=1]

+@Y = global i32* getelementptr (i32* @A, i32 -1)                ; <i32**> [#uses=0]

+


diff --git a/src/LLVM/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll b/src/LLVM/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
new file mode 100644
index 0000000..a31d062
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86 -mcpu=generic

+; Make sure LLC doesn't crash in the stackifier due to FP PHI nodes.

+

+define void @radfg_() {

+entry:

+        br i1 false, label %no_exit.16.preheader, label %loopentry.0

+loopentry.0:            ; preds = %entry

+        ret void

+no_exit.16.preheader:           ; preds = %entry

+        br label %no_exit.16

+no_exit.16:             ; preds = %no_exit.16, %no_exit.16.preheader

+        br i1 false, label %loopexit.16.loopexit, label %no_exit.16

+loopexit.16.loopexit:           ; preds = %no_exit.16

+        br label %no_exit.18

+no_exit.18:             ; preds = %loopexit.20, %loopexit.16.loopexit

+        %tmp.882 = fadd float 0.000000e+00, 0.000000e+00         ; <float> [#uses=2]

+        br i1 false, label %loopexit.19, label %no_exit.19.preheader

+no_exit.19.preheader:           ; preds = %no_exit.18

+        ret void

+loopexit.19:            ; preds = %no_exit.18

+        br i1 false, label %loopexit.20, label %no_exit.20

+no_exit.20:             ; preds = %loopexit.21, %loopexit.19

+        %ai2.1122.tmp.3 = phi float [ %tmp.958, %loopexit.21 ], [ %tmp.882, %loopexit.19 ]              ; <float> [#uses=1]

+        %tmp.950 = fmul float %tmp.882, %ai2.1122.tmp.3          ; <float> [#uses=1]

+        %tmp.951 = fsub float 0.000000e+00, %tmp.950             ; <float> [#uses=1]

+        %tmp.958 = fadd float 0.000000e+00, 0.000000e+00         ; <float> [#uses=1]

+        br i1 false, label %loopexit.21, label %no_exit.21.preheader

+no_exit.21.preheader:           ; preds = %no_exit.20

+        ret void

+loopexit.21:            ; preds = %no_exit.20

+        br i1 false, label %loopexit.20, label %no_exit.20

+loopexit.20:            ; preds = %loopexit.21, %loopexit.19

+        %ar2.1124.tmp.2 = phi float [ 0.000000e+00, %loopexit.19 ], [ %tmp.951, %loopexit.21 ]          ; <float> [#uses=0]

+        br i1 false, label %loopexit.18.loopexit, label %no_exit.18

+loopexit.18.loopexit:           ; preds = %loopexit.20

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll b/src/LLVM/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
new file mode 100644
index 0000000..2dc92c0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | \

+; RUN:   grep shld | count 1

+;

+; Check that the isel does not fold the shld, which already folds a load

+; and has two uses, into a store.

+

+@A = external global i32                ; <i32*> [#uses=2]

+

+define i32 @test5(i32 %B, i8 %C) {

+        %tmp.1 = load i32* @A           ; <i32> [#uses=1]

+        %shift.upgrd.1 = zext i8 %C to i32              ; <i32> [#uses=1]

+        %tmp.2 = shl i32 %tmp.1, %shift.upgrd.1         ; <i32> [#uses=1]

+        %tmp.3 = sub i8 32, %C          ; <i8> [#uses=1]

+        %shift.upgrd.2 = zext i8 %tmp.3 to i32          ; <i32> [#uses=1]

+        %tmp.4 = lshr i32 %B, %shift.upgrd.2            ; <i32> [#uses=1]

+        %tmp.5 = or i32 %tmp.4, %tmp.2          ; <i32> [#uses=2]

+        store i32 %tmp.5, i32* @A

+        ret i32 %tmp.5

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll b/src/LLVM/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
new file mode 100644
index 0000000..cce5868
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | not grep {subl.*%esp}

+

+define i32 @f(i32 %a, i32 %b) {

+        %tmp.2 = mul i32 %a, %a         ; <i32> [#uses=1]

+        %tmp.5 = shl i32 %a, 1          ; <i32> [#uses=1]

+        %tmp.6 = mul i32 %tmp.5, %b             ; <i32> [#uses=1]

+        %tmp.10 = mul i32 %b, %b                ; <i32> [#uses=1]

+        %tmp.7 = add i32 %tmp.10, %tmp.2                ; <i32> [#uses=1]

+        %tmp.11 = add i32 %tmp.7, %tmp.6                ; <i32> [#uses=1]

+        ret i32 %tmp.11

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll b/src/LLVM/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
new file mode 100644
index 0000000..155fd57
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86  -stats |& \

+; RUN:   grep asm-printer | grep 7

+

+define i32 @g(i32 %a, i32 %b) nounwind {

+        %tmp.1 = shl i32 %b, 1          ; <i32> [#uses=1]

+        %tmp.3 = add i32 %tmp.1, %a             ; <i32> [#uses=1]

+        %tmp.5 = mul i32 %tmp.3, %a             ; <i32> [#uses=1]

+        %tmp.8 = mul i32 %b, %b         ; <i32> [#uses=1]

+        %tmp.9 = add i32 %tmp.5, %tmp.8         ; <i32> [#uses=1]

+        ret i32 %tmp.9

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll b/src/LLVM/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
new file mode 100644
index 0000000..f55c6f3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah

+; END.

+

+target datalayout = "e-p:32:32"

+target triple = "i686-apple-darwin8.6.1"

+	%struct.GLTColor4 = type { float, float, float, float }

+	%struct.GLTCoord3 = type { float, float, float }

+	%struct.__GLIContextRec = type { { %struct.anon, { [24 x [16 x float]], [24 x [16 x float]] }, %struct.GLTColor4, { float, float, float, float, %struct.GLTCoord3, float } }, { float, float, float, float, float, float, float, float, [4 x i32], [4 x i32], [4 x i32] } }

+	%struct.__GLvertex = type { %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTCoord3, float, %struct.GLTColor4, float, float, float, i8, i8, i8, i8, [4 x float], [2 x i8*], i32, i32, [16 x %struct.GLTColor4] }

+	%struct.anon = type { float, float, float, float, float, float, float, float }

+

+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)

+

+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)

+

+declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)

+

+define void @gleLLVMVecInterpolateClip() {

+entry:

+	br i1 false, label %cond_false, label %cond_false183

+cond_false:		; preds = %entry

+	br i1 false, label %cond_false183, label %cond_true69

+cond_true69:		; preds = %cond_false

+	ret void

+cond_false183:		; preds = %cond_false, %entry

+	%vuizmsk.0.1 = phi <4 x i32> [ < i32 -1, i32 -1, i32 -1, i32 0 >, %entry ], [ < i32 -1, i32 0, i32 0, i32 0 >, %cond_false ]		; <<4 x i32>> [#uses=2]

+	%tmp192 = extractelement <4 x i32> %vuizmsk.0.1, i32 2		; <i32> [#uses=1]

+	%tmp193 = extractelement <4 x i32> %vuizmsk.0.1, i32 3		; <i32> [#uses=2]

+	%tmp195 = insertelement <4 x i32> zeroinitializer, i32 %tmp192, i32 1		; <<4 x i32>> [#uses=1]

+	%tmp196 = insertelement <4 x i32> %tmp195, i32 %tmp193, i32 2		; <<4 x i32>> [#uses=1]

+	%tmp197 = insertelement <4 x i32> %tmp196, i32 %tmp193, i32 3		; <<4 x i32>> [#uses=1]

+	%tmp336 = and <4 x i32> zeroinitializer, %tmp197		; <<4 x i32>> [#uses=1]

+	%tmp337 = bitcast <4 x i32> %tmp336 to <4 x float>		; <<4 x float>> [#uses=1]

+	%tmp378 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp337, <4 x float> zeroinitializer, i8 1 )		; <<4 x float>> [#uses=1]

+	%tmp379 = bitcast <4 x float> %tmp378 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp388 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> zeroinitializer, <4 x i32> %tmp379 )		; <<4 x i32>> [#uses=1]

+	%tmp392 = bitcast <8 x i16> %tmp388 to <8 x i16>		; <<8 x i16>> [#uses=1]

+	%tmp399 = extractelement <8 x i16> %tmp392, i32 7		; <i16> [#uses=1]

+	%tmp423 = insertelement <8 x i16> zeroinitializer, i16 %tmp399, i32 7		; <<8 x i16>> [#uses=1]

+	%tmp427 = bitcast <8 x i16> %tmp423 to <16 x i8>		; <<16 x i8>> [#uses=1]

+	%tmp428 = tail call i32 @llvm.x86.sse2.pmovmskb.128( <16 x i8> %tmp427 )		; <i32> [#uses=1]

+	%tmp432 = trunc i32 %tmp428 to i8		; <i8> [#uses=1]

+	%tmp = and i8 %tmp432, 42		; <i8> [#uses=1]

+	%tmp436 = bitcast i8 %tmp to i8		; <i8> [#uses=1]

+	%tmp446 = zext i8 %tmp436 to i32		; <i32> [#uses=1]

+	%tmp447 = shl i32 %tmp446, 24		; <i32> [#uses=1]

+	%tmp449 = or i32 0, %tmp447		; <i32> [#uses=1]

+	store i32 %tmp449, i32* null

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/src/LLVM/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
new file mode 100644
index 0000000..29f4959
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 -relocation-model=static > %t

+; RUN: grep {movl	_last} %t | count 1

+; RUN: grep {cmpl.*_last} %t | count 1

+

+@block = external global i8*            ; <i8**> [#uses=1]

+@last = external global i32             ; <i32*> [#uses=3]

+

+define i1 @loadAndRLEsource_no_exit_2E_1_label_2E_0(i32 %tmp.21.reload, i32 %tmp.8) {

+newFuncRoot:

+        br label %label.0

+label.0.no_exit.1_crit_edge.exitStub:           ; preds = %label.0

+        ret i1 true

+codeRepl5.exitStub:             ; preds = %label.0

+        ret i1 false

+label.0:                ; preds = %newFuncRoot

+        %tmp.35 = load i32* @last               ; <i32> [#uses=1]

+        %inc.1 = add i32 %tmp.35, 1             ; <i32> [#uses=2]

+        store i32 %inc.1, i32* @last

+        %tmp.36 = load i8** @block              ; <i8*> [#uses=1]

+        %tmp.38 = getelementptr i8* %tmp.36, i32 %inc.1         ; <i8*> [#uses=1]

+        %tmp.40 = trunc i32 %tmp.21.reload to i8                ; <i8> [#uses=1]

+        store i8 %tmp.40, i8* %tmp.38

+        %tmp.910 = load i32* @last              ; <i32> [#uses=1]

+        %tmp.1111 = icmp slt i32 %tmp.910, %tmp.8               ; <i1> [#uses=1]

+        %tmp.1412 = icmp ne i32 %tmp.21.reload, 257             ; <i1> [#uses=1]

+        %tmp.1613 = and i1 %tmp.1111, %tmp.1412         ; <i1> [#uses=1]

+        br i1 %tmp.1613, label %label.0.no_exit.1_crit_edge.exitStub, label %codeRepl5.exitStub

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/src/LLVM/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
new file mode 100644
index 0000000..77a8833
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll

@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& \

+; RUN:   not grep {Number of register spills}

+; END.

+

+

+define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>* %d) {

+	%tmp44 = load <4 x float>* %a		; <<4 x float>> [#uses=9]

+	%tmp46 = load <4 x float>* %b		; <<4 x float>> [#uses=1]

+	%tmp48 = load <4 x float>* %c		; <<4 x float>> [#uses=1]

+	%tmp50 = load <4 x float>* %d		; <<4 x float>> [#uses=1]

+	%tmp51 = bitcast <4 x float> %tmp44 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp = shufflevector <4 x i32> %tmp51, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>> [#uses=2]

+	%tmp52 = bitcast <4 x i32> %tmp to <4 x float>		; <<4 x float>> [#uses=1]

+	%tmp60 = xor <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]

+	%tmp61 = bitcast <4 x i32> %tmp60 to <4 x float>		; <<4 x float>> [#uses=1]

+	%tmp74 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp52, <4 x float> %tmp44, i8 1 )		; <<4 x float>> [#uses=1]

+	%tmp75 = bitcast <4 x float> %tmp74 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp88 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp61, i8 1 )		; <<4 x float>> [#uses=1]

+	%tmp89 = bitcast <4 x float> %tmp88 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp98 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp75, <4 x i32> %tmp89 )		; <<4 x i32>> [#uses=1]

+	%tmp102 = bitcast <8 x i16> %tmp98 to <8 x i16>		; <<8 x i16>> [#uses=1]

+	%tmp.upgrd.1 = shufflevector <8 x i16> %tmp102, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]

+	%tmp105 = shufflevector <8 x i16> %tmp.upgrd.1, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]

+	%tmp105.upgrd.2 = bitcast <8 x i16> %tmp105 to <4 x float>		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp105.upgrd.2, <4 x float>* %a

+	%tmp108 = bitcast <4 x float> %tmp46 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp109 = shufflevector <4 x i32> %tmp108, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>> [#uses=2]

+	%tmp109.upgrd.3 = bitcast <4 x i32> %tmp109 to <4 x float>		; <<4 x float>> [#uses=1]

+	%tmp119 = xor <4 x i32> %tmp109, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]

+	%tmp120 = bitcast <4 x i32> %tmp119 to <4 x float>		; <<4 x float>> [#uses=1]

+	%tmp133 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp109.upgrd.3, <4 x float> %tmp44, i8 1 )		; <<4 x float>> [#uses=1]

+	%tmp134 = bitcast <4 x float> %tmp133 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp147 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp120, i8 1 )		; <<4 x float>> [#uses=1]

+	%tmp148 = bitcast <4 x float> %tmp147 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp159 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp134, <4 x i32> %tmp148 )		; <<4 x i32>> [#uses=1]

+	%tmp163 = bitcast <8 x i16> %tmp159 to <8 x i16>		; <<8 x i16>> [#uses=1]

+	%tmp164 = shufflevector <8 x i16> %tmp163, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]

+	%tmp166 = shufflevector <8 x i16> %tmp164, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]

+	%tmp166.upgrd.4 = bitcast <8 x i16> %tmp166 to <4 x float>		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp166.upgrd.4, <4 x float>* %b

+	%tmp169 = bitcast <4 x float> %tmp48 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp170 = shufflevector <4 x i32> %tmp169, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>> [#uses=2]

+	%tmp170.upgrd.5 = bitcast <4 x i32> %tmp170 to <4 x float>		; <<4 x float>> [#uses=1]

+	%tmp180 = xor <4 x i32> %tmp170, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]

+	%tmp181 = bitcast <4 x i32> %tmp180 to <4 x float>		; <<4 x float>> [#uses=1]

+	%tmp194 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp170.upgrd.5, <4 x float> %tmp44, i8 1 )		; <<4 x float>> [#uses=1]

+	%tmp195 = bitcast <4 x float> %tmp194 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp208 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp181, i8 1 )		; <<4 x float>> [#uses=1]

+	%tmp209 = bitcast <4 x float> %tmp208 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp220 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp195, <4 x i32> %tmp209 )		; <<4 x i32>> [#uses=1]

+	%tmp224 = bitcast <8 x i16> %tmp220 to <8 x i16>		; <<8 x i16>> [#uses=1]

+	%tmp225 = shufflevector <8 x i16> %tmp224, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]

+	%tmp227 = shufflevector <8 x i16> %tmp225, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]

+	%tmp227.upgrd.6 = bitcast <8 x i16> %tmp227 to <4 x float>		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp227.upgrd.6, <4 x float>* %c

+	%tmp230 = bitcast <4 x float> %tmp50 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp231 = shufflevector <4 x i32> %tmp230, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>> [#uses=2]

+	%tmp231.upgrd.7 = bitcast <4 x i32> %tmp231 to <4 x float>		; <<4 x float>> [#uses=1]

+	%tmp241 = xor <4 x i32> %tmp231, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]

+	%tmp242 = bitcast <4 x i32> %tmp241 to <4 x float>		; <<4 x float>> [#uses=1]

+	%tmp255 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp231.upgrd.7, <4 x float> %tmp44, i8 1 )		; <<4 x float>> [#uses=1]

+	%tmp256 = bitcast <4 x float> %tmp255 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp269 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp242, i8 1 )		; <<4 x float>> [#uses=1]

+	%tmp270 = bitcast <4 x float> %tmp269 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp281 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp256, <4 x i32> %tmp270 )		; <<4 x i32>> [#uses=1]

+	%tmp285 = bitcast <8 x i16> %tmp281 to <8 x i16>		; <<8 x i16>> [#uses=1]

+	%tmp286 = shufflevector <8 x i16> %tmp285, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]

+	%tmp288 = shufflevector <8 x i16> %tmp286, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]

+	%tmp288.upgrd.8 = bitcast <8 x i16> %tmp288 to <4 x float>		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp288.upgrd.8, <4 x float>* %d

+	ret i32 0

+}

+

+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)

+

+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)


diff --git a/src/LLVM/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/src/LLVM/test/CodeGen/X86/2006-05-02-InstrSched1.ll
new file mode 100644
index 0000000..4ffae30
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-05-02-InstrSched1.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -relocation-model=static -stats |& \

+; RUN:   grep asm-printer | grep 14

+;

+@size20 = external global i32		; <i32*> [#uses=1]

+@in5 = external global i8*		; <i8**> [#uses=1]

+

+define i32 @compare(i8* %a, i8* %b) nounwind {

+	%tmp = bitcast i8* %a to i32*		; <i32*> [#uses=1]

+	%tmp1 = bitcast i8* %b to i32*		; <i32*> [#uses=1]

+	%tmp.upgrd.1 = load i32* @size20		; <i32> [#uses=1]

+	%tmp.upgrd.2 = load i8** @in5		; <i8*> [#uses=2]

+	%tmp3 = load i32* %tmp1		; <i32> [#uses=1]

+	%gep.upgrd.3 = zext i32 %tmp3 to i64		; <i64> [#uses=1]

+	%tmp4 = getelementptr i8* %tmp.upgrd.2, i64 %gep.upgrd.3		; <i8*> [#uses=2]

+	%tmp7 = load i32* %tmp		; <i32> [#uses=1]

+	%gep.upgrd.4 = zext i32 %tmp7 to i64		; <i64> [#uses=1]

+	%tmp8 = getelementptr i8* %tmp.upgrd.2, i64 %gep.upgrd.4		; <i8*> [#uses=2]

+	%tmp.upgrd.5 = tail call i32 @memcmp( i8* %tmp8, i8* %tmp4, i32 %tmp.upgrd.1 )		; <i32> [#uses=1]

+	ret i32 %tmp.upgrd.5

+}

+

+declare i32 @memcmp(i8*, i8*, i32)

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-05-02-InstrSched2.ll b/src/LLVM/test/CodeGen/X86/2006-05-02-InstrSched2.ll
new file mode 100644
index 0000000..e997085
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-05-02-InstrSched2.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -stats  |& \

+; RUN:   grep asm-printer | grep 13

+

+define void @_ZN9__gnu_cxx9hashtableISt4pairIKPKciES3_NS_4hashIS3_EESt10_Select1stIS5_E5eqstrSaIiEE14find_or_insertERKS5__cond_true456.i(i8* %tmp435.i, i32* %tmp449.i.out) nounwind {

+newFuncRoot:

+	br label %cond_true456.i

+bb459.i.exitStub:		; preds = %cond_true456.i

+	store i32 %tmp449.i, i32* %tmp449.i.out

+	ret void

+cond_true456.i:		; preds = %cond_true456.i, %newFuncRoot

+	%__s441.2.4.i = phi i8* [ %tmp451.i.upgrd.1, %cond_true456.i ], [ %tmp435.i, %newFuncRoot ]		; <i8*> [#uses=2]

+	%__h.2.4.i = phi i32 [ %tmp449.i, %cond_true456.i ], [ 0, %newFuncRoot ]	; <i32> [#uses=1]

+	%tmp446.i = mul i32 %__h.2.4.i, 5		; <i32> [#uses=1]

+	%tmp.i = load i8* %__s441.2.4.i		; <i8> [#uses=1]

+	%tmp448.i = sext i8 %tmp.i to i32		; <i32> [#uses=1]

+	%tmp449.i = add i32 %tmp448.i, %tmp446.i		; <i32> [#uses=2]

+	%tmp450.i = ptrtoint i8* %__s441.2.4.i to i32		; <i32> [#uses=1]

+	%tmp451.i = add i32 %tmp450.i, 1		; <i32> [#uses=1]

+	%tmp451.i.upgrd.1 = inttoptr i32 %tmp451.i to i8*		; <i8*> [#uses=2]

+	%tmp45435.i = load i8* %tmp451.i.upgrd.1		; <i8> [#uses=1]

+	%tmp45536.i = icmp eq i8 %tmp45435.i, 0		; <i1> [#uses=1]

+	br i1 %tmp45536.i, label %bb459.i.exitStub, label %cond_true456.i

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll b/src/LLVM/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
new file mode 100644
index 0000000..748f176
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll

@@ -0,0 +1,25 @@
+; Coalescing from R32 to a subset R32_. Once another register coalescer bug is

+; fixed, the movb should go away as well.

+

+; RUN: llc < %s -march=x86 -relocation-model=static | \

+; RUN:   grep movl

+

+@B = external global i32		; <i32*> [#uses=2]

+@C = external global i16*		; <i16**> [#uses=2]

+

+define void @test(i32 %A) {

+	%A.upgrd.1 = trunc i32 %A to i8		; <i8> [#uses=1]

+	%tmp2 = load i32* @B		; <i32> [#uses=1]

+	%tmp3 = and i8 %A.upgrd.1, 16		; <i8> [#uses=1]

+	%shift.upgrd.2 = zext i8 %tmp3 to i32		; <i32> [#uses=1]

+	%tmp4 = shl i32 %tmp2, %shift.upgrd.2		; <i32> [#uses=1]

+	store i32 %tmp4, i32* @B

+	%tmp6 = lshr i32 %A, 3		; <i32> [#uses=1]

+	%tmp = load i16** @C		; <i16*> [#uses=1]

+	%tmp8 = ptrtoint i16* %tmp to i32		; <i32> [#uses=1]

+	%tmp9 = add i32 %tmp8, %tmp6		; <i32> [#uses=1]

+	%tmp9.upgrd.3 = inttoptr i32 %tmp9 to i16*		; <i16*> [#uses=1]

+	store i16* %tmp9.upgrd.3, i16** @C

+	ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-05-08-InstrSched.ll b/src/LLVM/test/CodeGen/X86/2006-05-08-InstrSched.ll
new file mode 100644
index 0000000..5b04ea1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-05-08-InstrSched.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | not grep {subl.*%esp}

+

+@A = external global i16*		; <i16**> [#uses=1]

+@B = external global i32		; <i32*> [#uses=1]

+@C = external global i32		; <i32*> [#uses=2]

+

+define void @test() {

+	%tmp = load i16** @A		; <i16*> [#uses=1]

+	%tmp1 = getelementptr i16* %tmp, i32 1		; <i16*> [#uses=1]

+	%tmp.upgrd.1 = load i16* %tmp1		; <i16> [#uses=1]

+	%tmp3 = zext i16 %tmp.upgrd.1 to i32		; <i32> [#uses=1]

+	%tmp.upgrd.2 = load i32* @B		; <i32> [#uses=1]

+	%tmp4 = and i32 %tmp.upgrd.2, 16		; <i32> [#uses=1]

+	%tmp5 = load i32* @C		; <i32> [#uses=1]

+	%tmp6 = trunc i32 %tmp4 to i8		; <i8> [#uses=2]

+	%shift.upgrd.3 = zext i8 %tmp6 to i32		; <i32> [#uses=1]

+	%tmp7 = shl i32 %tmp5, %shift.upgrd.3		; <i32> [#uses=1]

+	%tmp9 = xor i8 %tmp6, 16		; <i8> [#uses=1]

+	%shift.upgrd.4 = zext i8 %tmp9 to i32		; <i32> [#uses=1]

+	%tmp11 = lshr i32 %tmp3, %shift.upgrd.4		; <i32> [#uses=1]

+	%tmp12 = or i32 %tmp11, %tmp7		; <i32> [#uses=1]

+	store i32 %tmp12, i32* @C

+	ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-05-11-InstrSched.ll b/src/LLVM/test/CodeGen/X86/2006-05-11-InstrSched.ll
new file mode 100644
index 0000000..b917efa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-05-11-InstrSched.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 -stats -realign-stack=0 |&\

+; RUN:     grep {asm-printer} | grep 34

+

+target datalayout = "e-p:32:32"

+define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {

+entry:

+	%tmp9 = icmp slt i32 %M, 5		; <i1> [#uses=1]

+	br i1 %tmp9, label %return, label %cond_true

+

+cond_true:		; preds = %cond_true, %entry

+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]

+	%tmp. = shl i32 %indvar, 2		; <i32> [#uses=1]

+	%tmp.10 = add nsw i32 %tmp., 1		; <i32> [#uses=2]

+	%tmp31 = add nsw i32 %tmp.10, -1		; <i32> [#uses=4]

+	%tmp32 = getelementptr i32* %mpp, i32 %tmp31		; <i32*> [#uses=1]

+	%tmp34 = bitcast i32* %tmp32 to <16 x i8>*		; <i8*> [#uses=1]

+	%tmp = load <16 x i8>* %tmp34, align 1

+	%tmp42 = getelementptr i32* %tpmm, i32 %tmp31		; <i32*> [#uses=1]

+	%tmp42.upgrd.1 = bitcast i32* %tmp42 to <4 x i32>*		; <<4 x i32>*> [#uses=1]

+	%tmp46 = load <4 x i32>* %tmp42.upgrd.1		; <<4 x i32>> [#uses=1]

+	%tmp54 = bitcast <16 x i8> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp55 = add <4 x i32> %tmp54, %tmp46		; <<4 x i32>> [#uses=2]

+	%tmp55.upgrd.2 = bitcast <4 x i32> %tmp55 to <2 x i64>		; <<2 x i64>> [#uses=1]

+	%tmp62 = getelementptr i32* %ip, i32 %tmp31		; <i32*> [#uses=1]

+	%tmp65 = bitcast i32* %tmp62 to <16 x i8>*		; <i8*> [#uses=1]

+	%tmp66 = load <16 x i8>* %tmp65, align 1

+	%tmp73 = getelementptr i32* %tpim, i32 %tmp31		; <i32*> [#uses=1]

+	%tmp73.upgrd.3 = bitcast i32* %tmp73 to <4 x i32>*		; <<4 x i32>*> [#uses=1]

+	%tmp77 = load <4 x i32>* %tmp73.upgrd.3		; <<4 x i32>> [#uses=1]

+	%tmp87 = bitcast <16 x i8> %tmp66 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp88 = add <4 x i32> %tmp87, %tmp77		; <<4 x i32>> [#uses=2]

+	%tmp88.upgrd.4 = bitcast <4 x i32> %tmp88 to <2 x i64>		; <<2 x i64>> [#uses=1]

+	%tmp99 = tail call <4 x i32> @llvm.x86.sse2.pcmpgt.d( <4 x i32> %tmp88, <4 x i32> %tmp55 )		; <<4 x i32>> [#uses=1]

+	%tmp99.upgrd.5 = bitcast <4 x i32> %tmp99 to <2 x i64>		; <<2 x i64>> [#uses=2]

+	%tmp110 = xor <2 x i64> %tmp99.upgrd.5, < i64 -1, i64 -1 >		; <<2 x i64>> [#uses=1]

+	%tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2		; <<2 x i64>> [#uses=1]

+	%tmp121 = and <2 x i64> %tmp99.upgrd.5, %tmp88.upgrd.4		; <<2 x i64>> [#uses=1]

+	%tmp131 = or <2 x i64> %tmp121, %tmp111		; <<2 x i64>> [#uses=1]

+	%tmp137 = getelementptr i32* %mc, i32 %tmp.10		; <i32*> [#uses=1]

+	%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>*		; <<2 x i64>*> [#uses=1]

+	store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7

+	%tmp147 = add nsw i32 %tmp.10, 8		; <i32> [#uses=1]

+	%tmp.upgrd.8 = icmp ne i32 %tmp147, %M		; <i1> [#uses=1]

+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]

+	br i1 %tmp.upgrd.8, label %cond_true, label %return

+

+return:		; preds = %cond_true, %entry

+	ret void

+}

+

+declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>)


diff --git a/src/LLVM/test/CodeGen/X86/2006-05-17-VectorArg.ll b/src/LLVM/test/CodeGen/X86/2006-05-17-VectorArg.ll
new file mode 100644
index 0000000..e7f26a4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-05-17-VectorArg.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2

+

+define <4 x float> @opRSQ(<4 x float> %a) nounwind {

+entry:

+	%tmp2 = extractelement <4 x float> %a, i32 3		; <float> [#uses=2]

+	%abscond = fcmp oge float %tmp2, -0.000000e+00		; <i1> [#uses=1]

+	%abs = select i1 %abscond, float %tmp2, float 0.000000e+00		; <float> [#uses=1]

+	%tmp3 = tail call float @llvm.sqrt.f32( float %abs )		; <float> [#uses=1]

+	%tmp4 = fdiv float 1.000000e+00, %tmp3		; <float> [#uses=1]

+	%tmp11 = insertelement <4 x float> zeroinitializer, float %tmp4, i32 3		; <<4 x float>> [#uses=1]

+	ret <4 x float> %tmp11

+}

+

+declare float @llvm.sqrt.f32(float)

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/src/LLVM/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
new file mode 100644
index 0000000..20697eb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-05-22-FPSetEQ.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=-sse | grep setnp

+; RUN: llc < %s -march=x86 -mattr=-sse -enable-unsafe-fp-math -enable-no-nans-fp-math | \

+; RUN:   not grep setnp

+

+define i32 @test(float %f) {

+	%tmp = fcmp oeq float %f, 0.000000e+00		; <i1> [#uses=1]

+	%tmp.upgrd.1 = zext i1 %tmp to i32		; <i32> [#uses=1]

+	ret i32 %tmp.upgrd.1

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-05-25-CycleInDAG.ll b/src/LLVM/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
new file mode 100644
index 0000000..5f80eb2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-05-25-CycleInDAG.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86

+

+define i32 @test() {

+	br i1 false, label %cond_next33, label %cond_true12

+cond_true12:		; preds = %0

+	ret i32 0

+cond_next33:		; preds = %0

+	%tmp44.i = call double @foo( double 0.000000e+00, i32 32 )		; <double> [#uses=1]

+	%tmp61.i = load i8* null		; <i8> [#uses=1]

+	%tmp61.i.upgrd.1 = zext i8 %tmp61.i to i32		; <i32> [#uses=1]

+	%tmp58.i = or i32 0, %tmp61.i.upgrd.1		; <i32> [#uses=1]

+	%tmp62.i = or i32 %tmp58.i, 0		; <i32> [#uses=1]

+	%tmp62.i.upgrd.2 = sitofp i32 %tmp62.i to double		; <double> [#uses=1]

+	%tmp64.i = fadd double %tmp62.i.upgrd.2, %tmp44.i		; <double> [#uses=1]

+	%tmp68.i = call double @foo( double %tmp64.i, i32 0 )		; <double> [#uses=0]

+	ret i32 0

+}

+

+declare double @foo(double, i32)

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll b/src/LLVM/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll
new file mode 100644
index 0000000..2dee1c4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86

+; PR825

+

+define i64 @test() {

+	%tmp.i5 = call i64 asm sideeffect "rdtsc", "=A,~{dirflag},~{fpsr},~{flags}"( )		; <i64> [#uses=1]

+	ret i64 %tmp.i5

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll b/src/LLVM/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll
new file mode 100644
index 0000000..bc757dd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86

+; PR828

+

+target datalayout = "e-p:32:32"

+target triple = "i686-pc-linux-gnu"

+

+define void @_ZN5() {

+cond_true9:

+	%tmp3.i.i = call i32 asm sideeffect "lock; cmpxchg $1,$2", "={ax},q,m,0,~{dirflag},~{fpsr},~{flags},~{memory}"( i32 0, i32* null, i32 0 )		; <i32> [#uses=0]

+	ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-07-20-InlineAsm.ll b/src/LLVM/test/CodeGen/X86/2006-07-20-InlineAsm.ll
new file mode 100644
index 0000000..9ba545d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-07-20-InlineAsm.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86

+; PR833

+

+@G = weak global i32 0		; <i32*> [#uses=3]

+

+define i32 @foo(i32 %X) {

+entry:

+	%X_addr = alloca i32		; <i32*> [#uses=3]

+	store i32 %X, i32* %X_addr

+	call void asm sideeffect "xchg{l} {$0,$1|$1,$0}", "=*m,=*r,m,1,~{dirflag},~{fpsr},~{flags}"( i32* @G, i32* %X_addr, i32* @G, i32 %X )

+	%tmp1 = load i32* %X_addr		; <i32> [#uses=1]

+	ret i32 %tmp1

+}

+

+define i32 @foo2(i32 %X) {

+entry:

+	%X_addr = alloca i32		; <i32*> [#uses=3]

+	store i32 %X, i32* %X_addr

+	call void asm sideeffect "xchg{l} {$0,$1|$1,$0}", "=*m,=*r,1,~{dirflag},~{fpsr},~{flags}"( i32* @G, i32* %X_addr, i32 %X )

+	%tmp1 = load i32* %X_addr		; <i32> [#uses=1]

+	ret i32 %tmp1

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll b/src/LLVM/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll
new file mode 100644
index 0000000..e364e4d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 | grep -- 4294967240

+; PR853

+

+@X = global i32* inttoptr (i64 -56 to i32*)		; <i32**> [#uses=0]

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-07-31-SingleRegClass.ll b/src/LLVM/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
new file mode 100644
index 0000000..5b8c456
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-07-31-SingleRegClass.ll

@@ -0,0 +1,10 @@
+; PR850

+; RUN: llc < %s -march=x86 -x86-asm-syntax=att > %t

+; RUN: grep {movl 4(%eax),%ebp} %t

+; RUN: grep {movl 0(%eax), %ebx} %t

+

+define i32 @foo(i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i) {

+	%tmp9.i.i = call i32 asm sideeffect "push %ebp\0Apush %ebx\0Amovl 4($2),%ebp\0Amovl 0($2), %ebx\0Amovl $1,%eax\0Aint  $$0x80\0Apop  %ebx\0Apop %ebp", "={ax},i,0,{cx},{dx},{si},{di}"( i32 192, i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i )		; <i32> [#uses=1]

+	ret i32 %tmp9.i.i

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-08-07-CycleInDAG.ll b/src/LLVM/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
new file mode 100644
index 0000000..8c58ad5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-08-07-CycleInDAG.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2

+	%struct.foo = type opaque

+

+define fastcc i32 @test(%struct.foo* %v, %struct.foo* %vi) {

+	br i1 false, label %ilog2.exit, label %cond_true.i

+

+cond_true.i:		; preds = %0

+	ret i32 0

+

+ilog2.exit:		; preds = %0

+	%tmp24.i = load i32* null		; <i32> [#uses=1]

+	%tmp13.i12.i = tail call double @ldexp( double 0.000000e+00, i32 0 )		; <double> [#uses=1]

+	%tmp13.i13.i = fptrunc double %tmp13.i12.i to float		; <float> [#uses=1]

+	%tmp11.s = load i32* null		; <i32> [#uses=1]

+	%tmp11.i = bitcast i32 %tmp11.s to i32		; <i32> [#uses=1]

+	%n.i = bitcast i32 %tmp24.i to i32		; <i32> [#uses=1]

+	%tmp13.i7 = mul i32 %tmp11.i, %n.i		; <i32> [#uses=1]

+	%tmp.i8 = tail call i8* @calloc( i32 %tmp13.i7, i32 4 )		; <i8*> [#uses=0]

+	br i1 false, label %bb224.preheader.i, label %bb.i

+

+bb.i:		; preds = %ilog2.exit

+	ret i32 0

+

+bb224.preheader.i:		; preds = %ilog2.exit

+	%tmp165.i = fpext float %tmp13.i13.i to double		; <double> [#uses=0]

+	ret i32 0

+}

+

+declare i8* @calloc(i32, i32)

+

+declare double @ldexp(double, i32)


diff --git a/src/LLVM/test/CodeGen/X86/2006-08-16-CycleInDAG.ll b/src/LLVM/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
new file mode 100644
index 0000000..8232c5e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-08-16-CycleInDAG.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86

+	%struct.expr = type { %struct.rtx_def*, i32, %struct.expr*, %struct.occr*, %struct.occr*, %struct.rtx_def* }

+	%struct.hash_table = type { %struct.expr**, i32, i32, i32 }

+	%struct.occr = type { %struct.occr*, %struct.rtx_def*, i8, i8 }

+	%struct.rtx_def = type { i16, i8, i8, %struct.u }

+	%struct.u = type { [1 x i64] }

+

+define void @test() {

+	%tmp = load i32* null		; <i32> [#uses=1]

+	%tmp8 = call i32 @hash_rtx( )		; <i32> [#uses=1]

+	%tmp11 = urem i32 %tmp8, %tmp		; <i32> [#uses=1]

+	br i1 false, label %cond_next, label %return

+

+cond_next:		; preds = %0

+	%gep.upgrd.1 = zext i32 %tmp11 to i64		; <i64> [#uses=1]

+	%tmp17 = getelementptr %struct.expr** null, i64 %gep.upgrd.1		; <%struct.expr**> [#uses=0]

+	ret void

+

+return:		; preds = %0

+	ret void

+}

+

+declare i32 @hash_rtx()


diff --git a/src/LLVM/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll b/src/LLVM/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
new file mode 100644
index 0000000..6c9a602
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 | \

+; RUN:    not grep {movl %eax, %edx}

+

+define i32 @foo(i32 %t, i32 %C) {

+entry:

+        br label %cond_true

+

+cond_true:              ; preds = %cond_true, %entry

+        %t_addr.0.0 = phi i32 [ %t, %entry ], [ %tmp7, %cond_true ]             ; <i32> [#uses=2]

+        %tmp7 = add i32 %t_addr.0.0, 1          ; <i32> [#uses=1]

+        %tmp = icmp sgt i32 %C, 39              ; <i1> [#uses=1]

+        br i1 %tmp, label %bb12, label %cond_true

+

+bb12:           ; preds = %cond_true

+        ret i32 %t_addr.0.0

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-09-01-CycleInDAG.ll b/src/LLVM/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
new file mode 100644
index 0000000..bfd5e4c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-09-01-CycleInDAG.ll

@@ -0,0 +1,131 @@
+; RUN: llc < %s -march=x86

+target datalayout = "e-p:32:32"

+target triple = "i686-apple-darwin8"

+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }

+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }

+	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }

+	%struct.VEC_tree = type { i32, i32, [1 x %struct.tree_node*] }

+	%struct.__sFILEX = type opaque

+	%struct.__sbuf = type { i8*, i32 }

+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }

+	%struct._var_map = type { %struct.partition_def*, i32*, i32*, %struct.tree_node**, i32, i32, i32* }

+	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }

+	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }

+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }

+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }

+	%struct.bitmap_iterator = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, i32 }

+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }

+	%struct.block_stmt_iterator = type { %struct.tree_stmt_iterator, %struct.basic_block_def* }

+	%struct.coalesce_list_d = type { %struct._var_map*, %struct.partition_pair_d**, i1 }

+	%struct.conflict_graph_def = type opaque

+	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }

+	%struct.def_operand_ptr = type { %struct.tree_node** }

+	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }

+	%struct.die_struct = type opaque

+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.__sbuf*, i32, i32, i64, i32 }

+	%struct.edge_def_insns = type { %struct.rtx_def* }

+	%struct.edge_iterator = type { i32, %struct.VEC_edge** }

+	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }

+	%struct.eh_status = type opaque

+	%struct.elt_list = type opaque

+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.__sbuf, i32, i8*, %struct.rtx_def** }

+	%struct.et_node = type opaque

+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }

+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i1, i1, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.__sbuf, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }

+	%struct.ht_identifier = type { i8*, i32, i32 }

+	%struct.initial_value_struct = type opaque

+	%struct.lang_decl = type opaque

+	%struct.lang_type = type opaque

+	%struct.language_function = type opaque

+	%struct.location_t = type { i8*, i32 }

+	%struct.loop = type opaque

+	%struct.machine_function = type { i32, i32, i8*, i32, i32 }

+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }

+	%struct.partition_def = type { i32, [1 x %struct.partition_elem] }

+	%struct.partition_elem = type { i32, %struct.partition_elem*, i32 }

+	%struct.partition_pair_d = type { i32, i32, i32, %struct.partition_pair_d* }

+	%struct.phi_arg_d = type { %struct.tree_node*, i1 }

+	%struct.pointer_set_t = type opaque

+	%struct.ptr_info_def = type { i8, %struct.bitmap_head_def*, %struct.tree_node* }

+	%struct.real_value = type opaque

+	%struct.reg_info_def = type opaque

+	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }

+	%struct.rtvec_def = type opaque

+	%struct.rtx_def = type opaque

+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }

+	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }

+	%struct.ssa_op_iter = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.stmt_operands_d*, i1 }

+	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }

+	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }

+	%struct.temp_slot = type opaque

+	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }

+	%struct.tree_ann_d = type { %struct.stmt_ann_d }

+	%struct.tree_binfo = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree }

+	%struct.tree_block = type { %struct.tree_common, i8, [3 x i8], %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }

+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }

+	%struct.tree_complex = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }

+	%struct.tree_decl = type { %struct.tree_common, %struct.__sbuf, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }

+	%struct.tree_decl_u1 = type { i64 }

+	%struct.tree_decl_u1_a = type { i32 }

+	%struct.tree_decl_u2 = type { %struct.function* }

+	%struct.tree_exp = type { %struct.tree_common, %struct.__sbuf*, i32, %struct.tree_node*, [1 x %struct.tree_node*] }

+	%struct.tree_identifier = type { %struct.tree_common, %struct.ht_identifier }

+	%struct.tree_int_cst = type { %struct.tree_common, %struct.tree_int_cst_lowhi }

+	%struct.tree_int_cst_lowhi = type { i64, i64 }

+	%struct.tree_list = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }

+	%struct.tree_live_info_d = type { %struct._var_map*, %struct.bitmap_head_def*, %struct.bitmap_head_def**, i32, %struct.bitmap_head_def** }

+	%struct.tree_node = type { %struct.tree_decl }

+	%struct.tree_partition_associator_d = type { %struct.varray_head_tag*, %struct.varray_head_tag*, i32*, i32*, i32, i32, %struct._var_map* }

+	%struct.tree_phi_node = type { %struct.tree_common, %struct.tree_node*, i32, i32, i32, %struct.basic_block_def*, %struct.dataflow_d*, [1 x %struct.phi_arg_d] }

+	%struct.tree_real_cst = type { %struct.tree_common, %struct.real_value* }

+	%struct.tree_ssa_name = type { %struct.tree_common, %struct.tree_node*, i32, %struct.ptr_info_def*, %struct.tree_node*, i8* }

+	%struct.tree_statement_list = type { %struct.tree_common, %struct.tree_statement_list_node*, %struct.tree_statement_list_node* }

+	%struct.tree_statement_list_node = type { %struct.tree_statement_list_node*, %struct.tree_statement_list_node*, %struct.tree_node* }

+	%struct.tree_stmt_iterator = type { %struct.tree_statement_list_node*, %struct.tree_node* }

+	%struct.tree_string = type { %struct.tree_common, i32, [1 x i8] }

+	%struct.tree_type = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32, i16, i8, i8, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_decl_u1_a, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_type* }

+	%struct.tree_type_symtab = type { i32 }

+	%struct.tree_value_handle = type { %struct.tree_common, %struct.value_set*, i32 }

+	%struct.tree_vec = type { %struct.tree_common, i32, [1 x %struct.tree_node*] }

+	%struct.tree_vector = type { %struct.tree_common, %struct.tree_node* }

+	%struct.use_operand_ptr = type { %struct.tree_node** }

+	%struct.use_optype_d = type { i32, [1 x %struct.def_operand_ptr] }

+	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }

+	%struct.v_may_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }

+	%struct.v_must_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }

+	%struct.value_set = type opaque

+	%struct.var_ann_d = type { %struct.tree_ann_common_d, i8, i8, %struct.tree_node*, %struct.varray_head_tag*, i32, i32, i32, %struct.tree_node*, %struct.tree_node* }

+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }

+	%struct.varasm_status = type opaque

+	%struct.varray_data = type { [1 x i64] }

+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.varray_data }

+	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }

+@basic_block_info = external global %struct.varray_head_tag*		; <%struct.varray_head_tag**> [#uses=1]

+

+define void @calculate_live_on_entry_cond_true3632(%struct.varray_head_tag* %stack3023.6, i32* %tmp3629, %struct.VEC_edge*** %tmp3397.out) {

+newFuncRoot:

+	br label %cond_true3632

+

+bb3502.exitStub:		; preds = %cond_true3632

+	store %struct.VEC_edge** %tmp3397, %struct.VEC_edge*** %tmp3397.out

+	ret void

+

+cond_true3632:		; preds = %newFuncRoot

+	%tmp3378 = load i32* %tmp3629		; <i32> [#uses=1]

+	%tmp3379 = add i32 %tmp3378, -1		; <i32> [#uses=1]

+	%tmp3381 = getelementptr %struct.varray_head_tag* %stack3023.6, i32 0, i32 4		; <%struct.varray_data*> [#uses=1]

+	%tmp3382 = bitcast %struct.varray_data* %tmp3381 to [1 x i32]*		; <[1 x i32]*> [#uses=1]

+	%gep.upgrd.1 = zext i32 %tmp3379 to i64		; <i64> [#uses=1]

+	%tmp3383 = getelementptr [1 x i32]* %tmp3382, i32 0, i64 %gep.upgrd.1		; <i32*> [#uses=1]

+	%tmp3384 = load i32* %tmp3383		; <i32> [#uses=1]

+	%tmp3387 = load i32* %tmp3629		; <i32> [#uses=1]

+	%tmp3388 = add i32 %tmp3387, -1		; <i32> [#uses=1]

+	store i32 %tmp3388, i32* %tmp3629

+	%tmp3391 = load %struct.varray_head_tag** @basic_block_info		; <%struct.varray_head_tag*> [#uses=1]

+	%tmp3393 = getelementptr %struct.varray_head_tag* %tmp3391, i32 0, i32 4		; <%struct.varray_data*> [#uses=1]

+	%tmp3394 = bitcast %struct.varray_data* %tmp3393 to [1 x %struct.basic_block_def*]*		; <[1 x %struct.basic_block_def*]*> [#uses=1]

+	%tmp3395 = getelementptr [1 x %struct.basic_block_def*]* %tmp3394, i32 0, i32 %tmp3384		; <%struct.basic_block_def**> [#uses=1]

+	%tmp3396 = load %struct.basic_block_def** %tmp3395		; <%struct.basic_block_def*> [#uses=1]

+	%tmp3397 = getelementptr %struct.basic_block_def* %tmp3396, i32 0, i32 3		; <%struct.VEC_edge**> [#uses=1]

+	br label %bb3502.exitStub

+}


diff --git a/src/LLVM/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll b/src/LLVM/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
new file mode 100644
index 0000000..9c352ad
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s 

+; PR933

+

+define fastcc i1 @test() {

+        ret i1 true

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll b/src/LLVM/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
new file mode 100644
index 0000000..b23d59a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=sse | grep movaps

+; Test that the load is NOT folded into the intrinsic, which would zero the top

+; elts of the loaded vector.

+

+target datalayout = "e-p:32:32"

+target triple = "i686-apple-darwin8.7.2"

+

+define <4 x float> @test(<4 x float> %A, <4 x float>* %B) nounwind {

+        %BV = load <4 x float>* %B              ; <<4 x float>> [#uses=1]

+        %tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %A, <4 x float> %BV )       ; <<4 x float>> [#uses=1]

+        ret <4 x float> %tmp28

+}

+

+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-10-09-CycleInDAG.ll b/src/LLVM/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
new file mode 100644
index 0000000..384b990
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-10-09-CycleInDAG.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86

+

+define void @_ZN13QFSFileEngine4readEPcx() {

+	%tmp201 = load i32* null		; <i32> [#uses=1]

+	%tmp201.upgrd.1 = sext i32 %tmp201 to i64		; <i64> [#uses=1]

+	%tmp202 = load i64* null		; <i64> [#uses=1]

+	%tmp203 = add i64 %tmp201.upgrd.1, %tmp202		; <i64> [#uses=1]

+	store i64 %tmp203, i64* null

+	ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll b/src/LLVM/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
new file mode 100644
index 0000000..4c70b97
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 | grep shrl

+; Bug in FindModifiedNodeSlot cause tmp14 load to become a zextload and shr 31

+; is then optimized away.

+@tree_code_type = external global [0 x i32]		; <[0 x i32]*> [#uses=1]

+

+define void @copy_if_shared_r() {

+	%tmp = load i32* null		; <i32> [#uses=1]

+	%tmp56 = and i32 %tmp, 255		; <i32> [#uses=1]

+	%gep.upgrd.1 = zext i32 %tmp56 to i64		; <i64> [#uses=1]

+	%tmp8 = getelementptr [0 x i32]* @tree_code_type, i32 0, i64 %gep.upgrd.1	; <i32*> [#uses=1]

+	%tmp9 = load i32* %tmp8		; <i32> [#uses=1]

+	%tmp10 = add i32 %tmp9, -1		; <i32> [#uses=1]

+	%tmp.upgrd.2 = icmp ugt i32 %tmp10, 2		; <i1> [#uses=1]

+	%tmp14 = load i32* null		; <i32> [#uses=1]

+	%tmp15 = lshr i32 %tmp14, 31		; <i32> [#uses=1]

+	%tmp15.upgrd.3 = trunc i32 %tmp15 to i8		; <i8> [#uses=1]

+	%tmp16 = icmp ne i8 %tmp15.upgrd.3, 0		; <i1> [#uses=1]

+	br i1 %tmp.upgrd.2, label %cond_false25, label %cond_true

+cond_true:		; preds = %0

+	br i1 %tmp16, label %cond_true17, label %cond_false

+cond_true17:		; preds = %cond_true

+	ret void

+cond_false:		; preds = %cond_true

+	ret void

+cond_false25:		; preds = %0

+	ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-10-12-CycleInDAG.ll b/src/LLVM/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
new file mode 100644
index 0000000..e23c2d8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-10-12-CycleInDAG.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86

+	%struct.function = type opaque

+	%struct.lang_decl = type opaque

+	%struct.location_t = type { i8*, i32 }

+	%struct.rtx_def = type opaque

+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }

+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }

+	%struct.tree_decl_u1 = type { i64 }

+	%struct.tree_decl_u2 = type { %struct.function* }

+	%struct.tree_node = type { %struct.tree_decl }

+	%union.tree_ann_d = type opaque

+

+define void @check_format_arg() {

+	br i1 false, label %cond_next196, label %bb12.preheader

+

+bb12.preheader:		; preds = %0

+	ret void

+

+cond_next196:		; preds = %0

+	br i1 false, label %cond_next330, label %cond_true304

+

+cond_true304:		; preds = %cond_next196

+	ret void

+

+cond_next330:		; preds = %cond_next196

+	br i1 false, label %cond_next472, label %bb441

+

+bb441:		; preds = %cond_next330

+	ret void

+

+cond_next472:		; preds = %cond_next330

+	%tmp490 = load %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]

+	%tmp492 = getelementptr %struct.tree_node* %tmp490, i32 0, i32 0, i32 0, i32 3		; <i8*> [#uses=1]

+	%tmp492.upgrd.1 = bitcast i8* %tmp492 to i32*		; <i32*> [#uses=1]

+	%tmp493 = load i32* %tmp492.upgrd.1		; <i32> [#uses=1]

+	%tmp495 = trunc i32 %tmp493 to i8		; <i8> [#uses=1]

+	%tmp496 = icmp eq i8 %tmp495, 11		; <i1> [#uses=1]

+	%tmp496.upgrd.2 = zext i1 %tmp496 to i8		; <i8> [#uses=1]

+	store i8 %tmp496.upgrd.2, i8* null

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/2006-10-13-CycleInDAG.ll b/src/LLVM/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
new file mode 100644
index 0000000..8a2a332
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-10-13-CycleInDAG.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86

+@str = external global [18 x i8]		; <[18 x i8]*> [#uses=1]

+

+define void @test() {

+bb.i:

+	%tmp.i660 = load <4 x float>* null		; <<4 x float>> [#uses=1]

+	call void (i32, ...)* @printf( i32 0, i8* getelementptr ([18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )

+	%tmp152.i = load <4 x i32>* null		; <<4 x i32>> [#uses=1]

+	%tmp156.i = bitcast <4 x i32> %tmp152.i to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp175.i = bitcast <4 x float> %tmp.i660 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp176.i = xor <4 x i32> %tmp156.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]

+	%tmp177.i = and <4 x i32> %tmp176.i, %tmp175.i		; <<4 x i32>> [#uses=1]

+	%tmp190.i = or <4 x i32> %tmp177.i, zeroinitializer		; <<4 x i32>> [#uses=1]

+	%tmp191.i = bitcast <4 x i32> %tmp190.i to <4 x float>		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp191.i, <4 x float>* null

+	ret void

+}

+

+declare void @printf(i32, ...)


diff --git a/src/LLVM/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll b/src/LLVM/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
new file mode 100644
index 0000000..b99839a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -asm-verbose | FileCheck %s

+

+@str = internal constant [14 x i8] c"Hello world!\0A\00"		; <[14 x i8]*> [#uses=1]

+@str.upgrd.1 = internal constant [13 x i8] c"Blah world!\0A\00"		; <[13 x i8]*> [#uses=1]

+

+define i32 @test(i32 %argc, i8** %argv) nounwind {

+entry:

+; CHECK: cmpl	$2

+; CHECK-NEXT: je

+; CHECK-NEXT: %entry

+

+	switch i32 %argc, label %UnifiedReturnBlock [

+		 i32 1, label %bb

+		 i32 2, label %bb2

+	]

+

+bb:		; preds = %entry

+	%tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8]* @str, i32 0, i64 0) )		; <i32> [#uses=0]

+	ret i32 0

+

+bb2:		; preds = %entry

+	%tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8]* @str.upgrd.1, i32 0, i64 0) )		; <i32> [#uses=0]

+	ret i32 0

+

+UnifiedReturnBlock:		; preds = %entry

+	ret i32 0

+}

+

+declare i32 @printf(i8*, ...)


diff --git a/src/LLVM/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/src/LLVM/test/CodeGen/X86/2006-11-12-CSRetCC.ll
new file mode 100644
index 0000000..9848d81
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-11-12-CSRetCC.ll

@@ -0,0 +1,64 @@
+; RUN: llc < %s -march=x86 | FileCheck %s

+

+target triple = "i686-pc-linux-gnu"

+@str = internal constant [9 x i8] c"%f+%f*i\0A\00"              ; <[9 x i8]*> [#uses=1]

+

+define i32 @main() {

+; CHECK: main:

+; CHECK-NOT: ret

+; CHECK: subl $4, %{{.*}}

+; CHECK: ret

+

+entry:

+        %retval = alloca i32, align 4           ; <i32*> [#uses=1]

+        %tmp = alloca { double, double }, align 16              ; <{ double, double }*> [#uses=4]

+        %tmp1 = alloca { double, double }, align 16             ; <{ double, double }*> [#uses=4]

+        %tmp2 = alloca { double, double }, align 16             ; <{ double, double }*> [#uses=3]

+        %pi = alloca double, align 8            ; <double*> [#uses=2]

+        %z = alloca { double, double }, align 16                ; <{ double, double }*> [#uses=4]

+        %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]

+        store double 0x400921FB54442D18, double* %pi

+        %tmp.upgrd.1 = load double* %pi         ; <double> [#uses=1]

+        %real = getelementptr { double, double }* %tmp1, i64 0, i32 0           ; <double*> [#uses=1]

+        store double 0.000000e+00, double* %real

+        %real3 = getelementptr { double, double }* %tmp1, i64 0, i32 1          ; <double*> [#uses=1]

+        store double %tmp.upgrd.1, double* %real3

+        %tmp.upgrd.2 = getelementptr { double, double }* %tmp, i64 0, i32 0             ; <double*> [#uses=1]

+        %tmp4 = getelementptr { double, double }* %tmp1, i64 0, i32 0           ; <double*> [#uses=1]

+        %tmp5 = load double* %tmp4              ; <double> [#uses=1]

+        store double %tmp5, double* %tmp.upgrd.2

+        %tmp6 = getelementptr { double, double }* %tmp, i64 0, i32 1            ; <double*> [#uses=1]

+        %tmp7 = getelementptr { double, double }* %tmp1, i64 0, i32 1           ; <double*> [#uses=1]

+        %tmp8 = load double* %tmp7              ; <double> [#uses=1]

+        store double %tmp8, double* %tmp6

+        %tmp.upgrd.3 = bitcast { double, double }* %tmp to { i64, i64 }*                ; <{ i64, i64 }*> [#uses=1]

+        %tmp.upgrd.4 = getelementptr { i64, i64 }* %tmp.upgrd.3, i64 0, i32 0           ; <i64*> [#uses=1]

+        %tmp.upgrd.5 = load i64* %tmp.upgrd.4           ; <i64> [#uses=1]

+        %tmp9 = bitcast { double, double }* %tmp to { i64, i64 }*               ; <{ i64, i64 }*> [#uses=1]

+        %tmp10 = getelementptr { i64, i64 }* %tmp9, i64 0, i32 1                ; <i64*> [#uses=1]

+        %tmp11 = load i64* %tmp10               ; <i64> [#uses=1]

+        call void @cexp( { double, double }* sret  %tmp2, i64 %tmp.upgrd.5, i64 %tmp11 )

+        %tmp12 = getelementptr { double, double }* %z, i64 0, i32 0             ; <double*> [#uses=1]

+        %tmp13 = getelementptr { double, double }* %tmp2, i64 0, i32 0          ; <double*> [#uses=1]

+        %tmp14 = load double* %tmp13            ; <double> [#uses=1]

+        store double %tmp14, double* %tmp12

+        %tmp15 = getelementptr { double, double }* %z, i64 0, i32 1             ; <double*> [#uses=1]

+        %tmp16 = getelementptr { double, double }* %tmp2, i64 0, i32 1          ; <double*> [#uses=1]

+        %tmp17 = load double* %tmp16            ; <double> [#uses=1]

+        store double %tmp17, double* %tmp15

+        %tmp18 = getelementptr { double, double }* %z, i64 0, i32 1             ; <double*> [#uses=1]

+        %tmp19 = load double* %tmp18            ; <double> [#uses=1]

+        %tmp20 = getelementptr { double, double }* %z, i64 0, i32 0             ; <double*> [#uses=1]

+        %tmp21 = load double* %tmp20            ; <double> [#uses=1]

+        %tmp.upgrd.6 = getelementptr [9 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]

+        %tmp.upgrd.7 = call i32 (i8*, ...)* @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 )           ; <i32> [#uses=0]

+        br label %return

+return:         ; preds = %entry

+        %retval.upgrd.8 = load i32* %retval             ; <i32> [#uses=1]

+        ret i32 %retval.upgrd.8

+}

+

+declare void @cexp({ double, double }* sret , i64, i64)

+

+declare i32 @printf(i8*, ...)

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/src/LLVM/test/CodeGen/X86/2006-11-17-IllegalMove.ll
new file mode 100644
index 0000000..21fe033
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-11-17-IllegalMove.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86-64 > %t

+; RUN: grep movb %t | count 2

+; RUN: grep {movzb\[wl\]} %t

+

+

+define void @handle_vector_size_attribute() nounwind {

+entry:

+	%tmp69 = load i32* null		; <i32> [#uses=1]

+	switch i32 %tmp69, label %bb84 [

+		 i32 2, label %bb77

+		 i32 1, label %bb77

+	]

+

+bb77:		; preds = %entry, %entry

+	%tmp99 = udiv i64 0, 0		; <i64> [#uses=1]

+	%tmp = load i8* null		; <i8> [#uses=1]

+	%tmp114 = icmp eq i64 0, 0		; <i1> [#uses=1]

+	br label %cond_true115

+

+bb84:		; preds = %entry

+	ret void

+

+cond_true115:		; preds = %bb77

+	%tmp118 = load i8* null		; <i8> [#uses=1]

+	br label %cond_true120

+

+cond_true120:		; preds = %cond_true115

+	%tmp127 = udiv i8 %tmp, %tmp118		; <i8> [#uses=1]

+	%tmp127.upgrd.1 = zext i8 %tmp127 to i64		; <i64> [#uses=1]

+	br label %cond_next129

+

+cond_next129:		; preds = %cond_true120, %cond_true115

+	%iftmp.30.0 = phi i64 [ %tmp127.upgrd.1, %cond_true120 ]		; <i64> [#uses=1]

+	%tmp132 = icmp eq i64 %iftmp.30.0, %tmp99		; <i1> [#uses=1]

+	br i1 %tmp132, label %cond_false148, label %cond_next136

+

+cond_next136:		; preds = %cond_next129, %bb77

+	ret void

+

+cond_false148:		; preds = %cond_next129

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/2006-11-27-SelectLegalize.ll b/src/LLVM/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
new file mode 100644
index 0000000..ffd1770
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-11-27-SelectLegalize.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep test.*1

+; PR1016

+

+define i32 @test(i32 %A, i32 %B, i32 %C) {

+        %a = trunc i32 %A to i1         ; <i1> [#uses=1]

+        %D = select i1 %a, i32 %B, i32 %C               ; <i32> [#uses=1]

+        ret i32 %D

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll b/src/LLVM/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll
new file mode 100644
index 0000000..577de68
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86

+; PR1049

+target datalayout = "e-p:32:32"

+target triple = "i686-pc-linux-gnu"

+	%struct.QBasicAtomic = type { i32 }

+	%struct.QByteArray = type { %"struct.QByteArray::Data"* }

+	%"struct.QByteArray::Data" = type { %struct.QBasicAtomic, i32, i32, i8*, [1 x i8] }

+	%struct.QFactoryLoader = type { %struct.QObject }

+	%struct.QImageIOHandler = type { i32 (...)**, %struct.QImageIOHandlerPrivate* }

+	%struct.QImageIOHandlerPrivate = type opaque

+	%struct.QImageWriter = type { %struct.QImageWriterPrivate* }

+	%struct.QImageWriterPrivate = type { %struct.QByteArray, %struct.QFactoryLoader*, i1, %struct.QImageIOHandler*, i32, float, %struct.QString, %struct.QString, i32, %struct.QString, %struct.QImageWriter* }

+	%"struct.QList<QByteArray>" = type { %"struct.QList<QByteArray>::._20" }

+	%"struct.QList<QByteArray>::._20" = type { %struct.QListData }

+	%struct.QListData = type { %"struct.QListData::Data"* }

+	%"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }

+	%struct.QObject = type { i32 (...)**, %struct.QObjectData* }

+	%struct.QObjectData = type { i32 (...)**, %struct.QObject*, %struct.QObject*, %"struct.QList<QByteArray>", i8, [3 x i8], i32, i32 }

+	%struct.QString = type { %"struct.QString::Data"* }

+	%"struct.QString::Data" = type { %struct.QBasicAtomic, i32, i32, i16*, i8, i8, [1 x i16] }

+

+define i1 @_ZNK12QImageWriter8canWriteEv() {

+	%tmp62 = load %struct.QImageWriterPrivate** null		; <%struct.QImageWriterPrivate*> [#uses=1]

+	%tmp = getelementptr %struct.QImageWriterPrivate* %tmp62, i32 0, i32 9		; <%struct.QString*> [#uses=1]

+	%tmp75 = call %struct.QString* @_ZN7QStringaSERKS_( %struct.QString* %tmp, %struct.QString* null )		; <%struct.QString*> [#uses=0]

+	call void asm sideeffect "lock\0Adecl $0\0Asetne 1", "=*m"( i32* null )

+	ret i1 false

+}

+

+declare %struct.QString* @_ZN7QStringaSERKS_(%struct.QString*, %struct.QString*)


diff --git a/src/LLVM/test/CodeGen/X86/2006-12-19-IntelSyntax.ll b/src/LLVM/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
new file mode 100644
index 0000000..5d4ca2b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2006-12-19-IntelSyntax.ll

@@ -0,0 +1,86 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel

+; PR1061

+target datalayout = "e-p:32:32"

+target triple = "i686-pc-linux-gnu"

+

+define void @bar(i32 %n) {

+entry:

+	switch i32 %n, label %bb12 [

+		 i32 1, label %bb

+		 i32 2, label %bb6

+		 i32 4, label %bb7

+		 i32 5, label %bb8

+		 i32 6, label %bb10

+		 i32 7, label %bb1

+		 i32 8, label %bb3

+		 i32 9, label %bb4

+		 i32 10, label %bb9

+		 i32 11, label %bb2

+		 i32 12, label %bb5

+		 i32 13, label %bb11

+	]

+

+bb:		; preds = %entry

+	call void (...)* @foo1( )

+	ret void

+

+bb1:		; preds = %entry

+	call void (...)* @foo2( )

+	ret void

+

+bb2:		; preds = %entry

+	call void (...)* @foo6( )

+	ret void

+

+bb3:		; preds = %entry

+	call void (...)* @foo3( )

+	ret void

+

+bb4:		; preds = %entry

+	call void (...)* @foo4( )

+	ret void

+

+bb5:		; preds = %entry

+	call void (...)* @foo5( )

+	ret void

+

+bb6:		; preds = %entry

+	call void (...)* @foo1( )

+	ret void

+

+bb7:		; preds = %entry

+	call void (...)* @foo2( )

+	ret void

+

+bb8:		; preds = %entry

+	call void (...)* @foo6( )

+	ret void

+

+bb9:		; preds = %entry

+	call void (...)* @foo3( )

+	ret void

+

+bb10:		; preds = %entry

+	call void (...)* @foo4( )

+	ret void

+

+bb11:		; preds = %entry

+	call void (...)* @foo5( )

+	ret void

+

+bb12:		; preds = %entry

+	call void (...)* @foo6( )

+	ret void

+}

+

+declare void @foo1(...)

+

+declare void @foo2(...)

+

+declare void @foo6(...)

+

+declare void @foo3(...)

+

+declare void @foo4(...)

+

+declare void @foo5(...)


diff --git a/src/LLVM/test/CodeGen/X86/2007-01-08-InstrSched.ll b/src/LLVM/test/CodeGen/X86/2007-01-08-InstrSched.ll
new file mode 100644
index 0000000..96eec22
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-01-08-InstrSched.ll

@@ -0,0 +1,22 @@
+; PR1075

+; RUN: llc < %s -mtriple=x86_64-apple-darwin -O3 | FileCheck %s

+

+define float @foo(float %x) nounwind {

+    %tmp1 = fmul float %x, 3.000000e+00

+    %tmp3 = fmul float %x, 5.000000e+00

+    %tmp5 = fmul float %x, 7.000000e+00

+    %tmp7 = fmul float %x, 1.100000e+01

+    %tmp10 = fadd float %tmp1, %tmp3

+    %tmp12 = fadd float %tmp10, %tmp5

+    %tmp14 = fadd float %tmp12, %tmp7

+    ret float %tmp14

+

+; CHECK: mulss

+; CHECK: mulss

+; CHECK: addss

+; CHECK: mulss

+; CHECK: addss

+; CHECK: mulss

+; CHECK: addss

+; CHECK: ret

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll b/src/LLVM/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
new file mode 100644
index 0000000..6afbe3e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s

+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s

+; CHECK-NOT: {{addq.*8}}

+; CHECK:     ({{%rdi|%rcx}},%rax,8)

+; CHECK-NOT: {{addq.*8}}

+

+define void @foo(double* %y) nounwind {

+entry:

+        br label %bb

+

+bb:

+        %i = phi i64 [ 0, %entry ], [ %k, %bb ]

+        %j = getelementptr double* %y, i64 %i

+        store double 0.000000e+00, double* %j

+        %k = add i64 %i, 1

+        %n = icmp eq i64 %k, 0

+        br i1 %n, label %return, label %bb

+

+return:

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/src/LLVM/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
new file mode 100644
index 0000000..676723a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll

@@ -0,0 +1,461 @@
+; RUN: llc < %s -march=x86-64 > %t

+; RUN: not grep {,%rsp)} %t

+; PR1103

+

+target datalayout = "e-p:64:64"

+@i6000 = global [128 x i64] zeroinitializer, align 16

+

+

+define void @foo(i32* %a0, i32* %a1, i32* %a2, i32* %a3, i32* %a4, i32* %a5) {

+b:

+	%r = load i32* %a0

+	%r2 = load i32* %a1

+	%r4 = load i32* %a2

+	%r6 = load i32* %a3

+	%r8 = load i32* %a4

+	%r14 = load i32* %a5

+	%rx = sext i32 %r2 to i64

+	%r9 = sext i32 %r to i64

+	%r11 = add i64 %rx, 0

+	%ras = icmp slt i64 %r11, 0

+	%r12 = select i1 %ras, i64 0, i64 %r11

+	%r16 = sext i32 %r14 to i64

+	%r17 = sext i32 %r8 to i64

+	%r18 = sub i64 %r16, 0

+	%r19 = add i64 %r18, 0

+	%r20 = icmp slt i64 %r19, 0

+	%r19h = add i64 %r18, 0

+	%r22 = select i1 %r20, i64 1, i64 %r19h

+	%r23 = mul i64 %r22, 0

+	%r23a = trunc i64 %r23 to i32

+	%r24 = shl i32 %r23a, 0

+	%r25 = add i32 %r24, 0

+	%ras2 = alloca i8, i32 %r25, align 16

+	%r28 = getelementptr i8* %ras2, i32 0

+	%r38 = shl i64 %r12, 0

+	%s2013 = add i64 %r38, 0

+	%c22012 = getelementptr i8* %ras2, i64 %s2013

+	%r42 = shl i64 %r12, 0

+	%s2011 = add i64 %r42, 16

+	%c22010 = getelementptr i8* %ras2, i64 %s2011

+	%r50 = add i64 %r16, 0

+	%r51 = icmp slt i64 %r50, 0

+	%r50sh = shl i64 %r50, 0

+	%r50j = add i64 %r50sh, 0

+	%r54 = select i1 %r51, i64 0, i64 %r50j

+	%r56 = mul i64 %r54, %r12

+	%r28s = add i64 %r56, 16

+	%c2 = getelementptr i8* %ras2, i64 %r28s

+	%r60 = sub i32 %r2, %r

+	%r61 = icmp slt i32 %r60, 0

+	br i1 %r61, label %a29b, label %b63

+a29b:

+	%r155 = sub i32 %r6, %r4

+	%r156 = icmp slt i32 %r155, 0

+	br i1 %r156, label %a109b, label %b158

+b63:

+	%r66 = sext i32 %r60 to i64

+	%r67 = add i64 %r66, 0

+	%r76 = mul i64 %r17, 0

+	%r82 = add i64 %r76, 0

+	%r84 = icmp slt i64 %r67, 0

+	br i1 %r84, label %b85, label %a25b

+b85:

+	%e641 = phi i64 [ 0, %b63 ], [ %r129, %a25b ]

+	%r137 = icmp slt i64 %e641, 0

+	br i1 %r137, label %a25b140q, label %a29b

+a25b140q:

+	br label %a25b140

+a25b:

+	%w1989 = phi i64 [ 0, %b63 ], [ %v1990, %a25b ]

+	%e642 = shl i64 %w1989, 0

+	%r129 = add i64 %e642, 0

+	%r132 = add i64 %e642, 0

+	%r134 = icmp slt i64 %r132, 0

+	%v1990 = add i64 %w1989, 0

+	br i1 %r134, label %b85, label %a25b

+a25b140:

+	%w1982 = phi i64 [ 0, %a25b140q ], [ %v1983, %a25b140 ]

+	%r145 = add i64 %r82, 0

+	%v1983 = add i64 %w1982, 0

+	%u1987 = icmp slt i64 %v1983, 0

+	br i1 %u1987, label %a29b, label %a25b140

+b158:

+	%r161 = sext i32 %r to i64

+	%r163 = sext i32 %r4 to i64

+	br label %a29b173

+a29b173:

+	%w1964 = phi i64 [ 0, %b158 ], [ %v1973, %b1606 ]

+	%b1974 = mul i64 %r163, 0

+	%b1975 = add i64 %r161, 0

+	%b1976 = mul i64 %w1964, 0

+	%b1977 = add i64 %b1976, 0

+	%s761 = bitcast i64 %b1977 to i64

+	%b1980 = mul i64 %w1964, 0

+	%s661 = add i64 %b1980, 0

+	br i1 %r61, label %a33b, label %b179

+a33b:

+	%r328 = icmp slt i32 %r14, 0

+	%r335 = or i1 %r328, %r61

+	br i1 %r335, label %a50b, label %b341

+b179:

+	%r182 = sext i32 %r60 to i64

+	%r183 = add i64 %r182, 0

+	%r187 = icmp slt i64 %r183, 0

+	br i1 %r187, label %b188, label %a30b

+b188:

+	%e653 = phi i64 [ 0, %b179 ], [ %r283, %a30b ]

+	%r291 = icmp slt i64 %e653, 0

+	br i1 %r291, label %a30b294q, label %a33b

+a30b294q:

+	br label %a30b294

+a30b:

+	%w = phi i64 [ 0, %b179 ], [ %v, %a30b ]

+	%b2 = shl i64 %w, 0

+	%r283 = add i64 %b2, 0

+	%r286 = add i64 %b2, 0

+	%r288 = icmp slt i64 %r286, 0

+	%v = add i64 %w, 0

+	br i1 %r288, label %b188, label %a30b

+a30b294:

+	%w1847 = phi i64 [ 0, %a30b294q ], [ %v1848, %a30b294 ]

+	%v1848 = add i64 %w1847, 0

+	%u = icmp slt i64 %v1848, 0

+	br i1 %u, label %a33b, label %a30b294

+a50b:

+	%r814 = add i32 %r14, 0

+	%r815 = icmp slt i32 %r814, 0

+	%r817 = or i1 %r61, %r815

+	br i1 %r817, label %a57b, label %b820

+b341:

+	%w1874 = phi i64 [ 0, %a33b ], [ %v1880, %b463 ]

+	%d753 = bitcast i64 %w1874 to i64

+	%r343 = add i64 %s661, 0

+	%r346 = add i64 %r343, 0

+	%r347 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r346

+	%r348 = load float* %r347

+	%r352 = add i64 %r343, 0

+	%r353 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r352

+	%r354 = load float* %r353

+	%r362 = load float* bitcast ([128 x i64]* @i6000 to float*)

+	%r363 = fadd float 0.000000e+00, %r362

+	%r370 = load float* bitcast ([128 x i64]* @i6000 to float*)

+	%r376 = icmp slt i64 %r16, 0

+	br i1 %r376, label %b377, label %a35b

+b377:

+	%d753p = phi i64 [ %d753, %b341 ], [ %r411, %a35b ]

+	%s761p = phi i64 [ %s761, %b341 ], [ 322, %a35b ]

+	%e784 = phi i64 [ 0, %b341 ], [ %r454, %a35b ]

+	%s794 = add i64 %d753p, 0

+	%r462 = icmp slt i64 %e784, 0

+	br i1 %r462, label %a35b465, label %b463

+a35b:

+	%w1865 = phi i64 [ 0, %b341 ], [ %v1866, %a35b ]

+	%e785 = shl i64 %w1865, 0

+	%b1877 = mul i64 %w1865, 0

+	%s795 = add i64 %b1877, 0

+	%r399 = fadd float %r354, 0.000000e+00

+	%r402 = fadd float %r370, 0.000000e+00

+	%r403 = fadd float %r348, 0.000000e+00

+	%r411 = add i64 %s795, 0

+	%r431 = fadd float %r362, 0.000000e+00

+	%r454 = add i64 %e785, 0

+	%r457 = add i64 %e785, 0

+	%r459 = icmp slt i64 %r457, 0

+	%v1866 = add i64 %w1865, 0

+	br i1 %r459, label %b377, label %a35b

+b463:

+	%r506 = add i64 %d753, 0

+	%r511 = sext i32 %r60 to i64

+	%r512 = add i64 %r511, 0

+	%r513 = icmp slt i64 %r506, 0

+	%v1880 = add i64 %w1874, 0

+	br i1 %r513, label %b341, label %b514

+a35b465:

+	%r469 = add i64 %s794, 0

+	br label %b463

+b514:

+	%r525 = mul i64 %r17, 0

+	%r533 = add i64 %r525, 0

+	br label %b535

+b535:

+	%w1855 = phi i64 [ 0, %b514 ], [ %v1856, %b712 ]

+	%s923 = phi i64 [ 0, %b514 ], [ %r799, %b712 ]

+	%s933 = phi i64 [ %r533, %b514 ], [ %r795, %b712 ]

+	%r538 = add i64 %w1855, 0

+	%r539 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r538

+	%r540 = load float* %r539

+	%r551 = load float* bitcast ([128 x i64]* @i6000 to float*)

+	%r562 = sub i64 %s933, 0

+	%r564 = icmp slt i64 %r512, 0

+	br i1 %r564, label %b565, label %a45b

+b565:

+	%e944 = phi i64 [ 0, %b535 ], [ %r703, %a45b ]

+	%r711 = icmp slt i64 %e944, 0

+	br i1 %r711, label %a45b714, label %b712

+a45b:

+	%w1852 = phi i64 [ 0, %b535 ], [ %v1853, %a45b ]

+	%e945 = shl i64 %w1852, 0

+	%r609 = add i64 %r562, 0

+	%r703 = add i64 %e945, 0

+	%r706 = add i64 %e945, 0

+	%r708 = icmp slt i64 %r706, 0

+	%v1853 = add i64 %w1852, 0

+	br i1 %r708, label %b565, label %a45b

+b712:

+	%r795 = add i64 %rx, 0

+	%r799 = add i64 %s923, 0

+	%r802 = add i64 %w1855, 0

+	%r807 = icmp slt i64 %r802, 0

+	%v1856 = add i64 %w1855, 0

+	br i1 %r807, label %b535, label %a50b

+a45b714:

+	%r717 = add i64 %e944, 0

+	%r720 = add i64 %r717, 0

+	%r721 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r720

+	%r722 = load float* %r721

+	%r726 = add i64 %r717, 0

+	%r727 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r726

+	%r728 = load float* %r727

+	%r732 = add i64 %r717, 0

+	%r733 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r732

+	%r734 = load float* %r733

+	%r738 = add i64 %r717, 0

+	%r739 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r738

+	%r740 = load float* %r739

+	%r744 = add i64 %r717, 0

+	%r745 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r744

+	%r746 = load float* %r745

+	%r750 = add i64 %r717, 0

+	%r751 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r750

+	%r752 = load float* %r751

+	%r753 = fadd float %r752, %r746

+	%r754 = fadd float %r728, %r722

+	%r755 = fadd float %r734, %r754

+	%r756 = fadd float %r755, %r740

+	%r757 = fadd float %r753, %r756

+	%r759 = fadd float %r757, %r540

+	%r770 = add i64 %r717, 0

+	%r771 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r770

+	%r772 = load float* %r771

+	%r776 = add i64 %r717, 0

+	%r777 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r776

+	%r778 = load float* %r777

+	%r781 = fadd float %r363, %r772

+	%r782 = fadd float %r781, %r778

+	%r783 = fadd float %r551, %r782

+	br label %b712

+a57b:

+	br i1 %r335, label %a66b, label %b1086

+b820:

+	%r823 = sext i32 %r2 to i64

+	%r834 = sext i32 %r8 to i64

+	%r844 = add i64 %r16, 0

+	%r846 = sext i32 %r60 to i64

+	%r847 = add i64 %r846, 0

+	%r851 = load float* bitcast ([128 x i64]* @i6000 to float*)

+	%r856 = sub i64 %rx, 0

+	br label %b858

+b858:

+	%w1891 = phi i64 [ 0, %b820 ], [ %v1892, %b1016 ]

+	%s1193 = phi i64 [ 0, %b820 ], [ %r1068, %b1016 ]

+	%b1894 = mul i64 %r834, 0

+	%b1896 = shl i64 %r823, 0

+	%b1902 = mul i64 %w1891, 0

+	%s1173 = add i64 %b1902, 0

+	%r859 = add i64 %r856, 0

+	%r862 = add i64 %w1891, 0

+	%r863 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r862

+	%r864 = load float* %r863

+	%r868 = add i64 %w1891, 0

+	%r869 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r868

+	%r870 = load float* %r869

+	%r873 = sub i64 %r859, 0

+	%r876 = sub i64 %s1173, 0

+	%r878 = icmp slt i64 %r847, 0

+	br i1 %r878, label %b879, label %a53b

+b879:

+	%e1204 = phi i64 [ 0, %b858 ], [ %r1007, %a53b ]

+	%r1015 = icmp slt i64 %e1204, 0

+	br i1 %r1015, label %a53b1019q, label %b1016

+a53b1019q:

+	%b1888 = sub i64 %r846, 0

+	%b1889 = add i64 %b1888, 0

+	br label %a53b1019

+a53b:

+	%w1881 = phi i64 [ 0, %b858 ], [ %v1882, %a53b ]

+	%e1205 = shl i64 %w1881, 0

+	%r1007 = add i64 %e1205, 0

+	%r1010 = add i64 %e1205, 0

+	%r1012 = icmp slt i64 %r1010, 0

+	%v1882 = add i64 %w1881, 0

+	br i1 %r1012, label %b879, label %a53b

+b1016:

+	%r1068 = add i64 %s1193, 0

+	%r1071 = add i64 %w1891, 0

+	%r1073 = icmp slt i64 %r1071, %r844

+	%v1892 = add i64 %w1891, 0

+	br i1 %r1073, label %b858, label %a57b

+a53b1019:

+	%w1885 = phi i64 [ 0, %a53b1019q ], [ %v1886, %a53b1019 ]

+	%r1022 = add i64 %r876, 0

+	%r1024 = bitcast i8* %c2 to float*

+	%r1025 = add i64 %r1022, 0

+	%r1026 = getelementptr float* %r1024, i64 %r1025

+	%r1027 = load float* %r1026

+	%r1032 = add i64 %r873, 0

+	%r1033 = add i64 %r1032, 0

+	%r1034 = getelementptr float* %r1024, i64 %r1033

+	%r1035 = load float* %r1034

+	%r1037 = bitcast i8* %c22010 to float*

+	%r1040 = getelementptr float* %r1037, i64 %r1025

+	%r1044 = fadd float %r864, %r1035

+	%r1046 = fadd float %r870, %r1027

+	%r1047 = fadd float %r1044, %r1046

+	%r1048 = fadd float %r851, %r1047

+	%v1886 = add i64 %w1885, 0

+	%u1890 = icmp slt i64 %v1886, %b1889

+	br i1 %u1890, label %b1016, label %a53b1019

+a66b:

+	br i1 %r817, label %a93b, label %b1321

+b1086:

+	%r1089 = sext i32 %r2 to i64

+	%r1090 = add i64 %rx, 0

+	%r1096 = mul i64 %r9, 0

+	%r1101 = sext i32 %r8 to i64

+	%r1104 = add i64 %r1096, 0

+	%r1108 = sub i64 %r1104, 0

+	%r1110 = sext i32 %r60 to i64

+	%r1111 = add i64 %r1110, 0

+	%r1113 = sext i32 %r14 to i64

+	%r1114 = add i64 %r16, 0

+	br label %b1117

+b1117:

+	%w1915 = phi i64 [ 0, %b1086 ], [ %v1957, %b1263 ]

+	%d1353 = bitcast i64 %w1915 to i64

+	%r1120 = add i64 %s661, 0

+	%r1121 = add i64 %r1120, 0

+	%r1122 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1121

+	%r1123 = load float* %r1122

+	%r1132 = bitcast i8* %c22012 to float*

+	%r1134 = getelementptr float* %r1132, i64 %w1915

+	%r1135 = load float* %r1134

+	%r1136 = fadd float %r1123, %r1135

+	%r1138 = icmp slt i64 %r1114, 0

+	br i1 %r1138, label %b1139, label %a63b

+b1139:

+	%e1364 = phi i64 [ 0, %b1117 ], [ %r1254, %a63b ]

+	%p1998 = phi i64 [ %s761, %b1117 ], [ %r1216, %a63b ]

+	%r1108p = phi i64 [ %r1108, %b1117 ], [ %r1219, %a63b ]

+	%p2004 = phi i64 [ %d1353, %b1117 ], [ %r1090, %a63b ]

+	%s1374 = phi i64 [ 0, %b1117 ], [ %r1251, %a63b ]

+	%s1384 = add i64 %r1108p, 0

+	%s1394 = add i64 %p1998, 0

+	%r1262 = icmp slt i64 %e1364, %r1114

+	br i1 %r1262, label %a63b1266q, label %b1263

+a63b1266q:

+	%b1947 = sub i64 %r1113, 0

+	%b1948 = add i64 %b1947, 0

+	br label %a63b1266

+a63b:

+	%w1904 = phi i64 [ 0, %b1117 ], [ %v1905, %a63b ]

+	%s1375 = phi i64 [ 0, %b1117 ], [ %r1251, %a63b ]

+	%b1906 = add i64 %r1089, 0

+	%b1907 = mul i64 %r1101, 0

+	%b1929 = mul i64 %w1904, 0

+	%s1395 = add i64 %b1929, 0

+	%e1365 = shl i64 %w1904, 0

+	%r1163 = add i64 %r1090, 0

+	%r1167 = add i64 %s1375, 0

+	%r1191 = add i64 %r1163, 0

+	%r1195 = add i64 %r1167, 0

+	%r1216 = add i64 %s1395, 0

+	%r1219 = add i64 %r1191, 0

+	%r1223 = add i64 %r1195, 0

+	%r1251 = add i64 %r1223, 0

+	%r1254 = add i64 %e1365, 0

+	%r1257 = add i64 %e1365, 0

+	%r1259 = icmp slt i64 %r1257, %r1114

+	%v1905 = add i64 %w1904, 0

+	br i1 %r1259, label %b1139, label %a63b

+b1263:

+	%r1306 = add i64 %d1353, 0

+	%r1308 = icmp slt i64 %r1306, %r1111

+	%v1957 = add i64 %w1915, 0

+	br i1 %r1308, label %b1117, label %a66b

+a63b1266:

+	%w1944 = phi i64 [ 0, %a63b1266q ], [ %v1945, %a63b1266 ]

+	%s1377 = phi i64 [ %s1374, %a63b1266q ], [ %r1297, %a63b1266 ]

+	%r1282 = fadd float %r1136, 0.000000e+00

+	%r1297 = add i64 %s1377, 0

+	%v1945 = add i64 %w1944, 0

+	%u1949 = icmp slt i64 %v1945, %b1948

+	br i1 %u1949, label %b1263, label %a63b1266

+a93b:

+	br i1 %r61, label %b1606, label %a97b

+b1321:

+	%r1331 = mul i64 %r17, 0

+	%r1339 = add i64 %r1331, 0

+	br label %b1342

+b1342:

+	%w1960 = phi i64 [ 0, %b1321 ], [ %v1961, %b1582 ]

+	%s1523 = phi i64 [ %r1339, %b1321 ], [ %r1587, %b1582 ]

+	%s1563 = phi i64 [ 0, %b1321 ], [ %r1591, %b1582 ]

+	%d1533 = bitcast i64 %w1960 to i64

+	%b1968 = mul i64 %w1960, 0

+	%s1543 = add i64 %b1968, 0

+	%r1345 = add i64 %s1523, 0

+	%r1348 = sub i64 %r1345, 0

+	%r1352 = add i64 %s1523, 0

+	%r1355 = sub i64 %r1352, 0

+	%r1370 = add i64 %d1533, 0

+	%r1371 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1370

+	%r1372 = load float* %r1371

+	br label %a74b

+a74b:

+	%w1958 = phi i64 [ 0, %b1342 ], [ %v1959, %a74b ]

+	%r1379 = add i64 %s1543, 0

+	%r1403 = add i64 %r1355, 0

+	%r1422 = add i64 %r1348, 0

+	%r1526 = fadd float %r1372, 0.000000e+00

+	%r1573 = add i64 %w1958, 0

+	%r1581 = icmp slt i64 %r1573, 0

+	%v1959 = add i64 %w1958, 0

+	br i1 %r1581, label %a74b, label %b1582

+b1582:

+	%r1587 = add i64 %rx, 0

+	%r1591 = add i64 %s1563, 0

+	%r1596 = add i64 %d1533, 0

+	%r1601 = icmp slt i64 %r1596, 0

+	%v1961 = add i64 %w1960, 0

+	br i1 %r1601, label %b1342, label %a93b

+b1606:

+	%r1833 = add i64 %w1964, 0

+	%r1840 = icmp slt i64 %r1833, 0

+	%v1973 = add i64 %w1964, 0

+	br i1 %r1840, label %a29b173, label %a109b

+a97b:

+	%w1970 = phi i64 [ 0, %a93b ], [ %v1971, %a97b ]

+	%r1613 = add i64 %w1964, 0

+	%r1614 = mul i64 %r1613, 0

+	%r1622 = add i64 %r1614, 0

+	%r1754 = bitcast i8* %r28 to float*

+	%r1756 = getelementptr float* %r1754, i64 %w1970

+	%r1757 = load float* %r1756

+	%r1761 = add i64 %r1622, 0

+	%r1762 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1761

+	%r1763 = load float* %r1762

+	%r1767 = add i64 %r1622, 0

+	%r1768 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1767

+	%r1772 = fadd float %r1763, 0.000000e+00

+	%r1773 = fadd float %r1772, 0.000000e+00

+	%r1809 = fadd float %r1757, 0.000000e+00

+	%r1810 = fadd float %r1773, %r1809

+	store float %r1810, float* %r1768

+	%r1818 = add i64 %w1970, 0

+	%r1826 = icmp slt i64 %r1818, 0

+	%v1971 = add i64 %w1970, 0

+	br i1 %r1826, label %a97b, label %b1606

+a109b:

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll b/src/LLVM/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll
new file mode 100644
index 0000000..250d2b8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86

+; Test 'ri' constraint.

+

+define void @run_init_process() {

+          %tmp = call i32 asm sideeffect "push %ebx ; movl $2,%ebx ; int $$0x80 ; pop %ebx", "={ax},0,ri,{cx},{dx},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 11, i32 0, i32 0, i32 0 )          

+          unreachable

+  }


diff --git a/src/LLVM/test/CodeGen/X86/2007-02-04-OrAddrMode.ll b/src/LLVM/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
new file mode 100644
index 0000000..410e628
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-02-04-OrAddrMode.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 | FileCheck %s

+

+;; This example can't fold the or into an LEA.

+define i32 @test(float ** %tmp2, i32 %tmp12) nounwind {

+; CHECK: test:

+; CHECK-NOT: ret

+; CHECK: orl $1, %{{.*}}

+; CHECK: ret

+

+	%tmp3 = load float** %tmp2

+	%tmp132 = shl i32 %tmp12, 2		; <i32> [#uses=1]

+	%tmp4 = bitcast float* %tmp3 to i8*		; <i8*> [#uses=1]

+	%ctg2 = getelementptr i8* %tmp4, i32 %tmp132		; <i8*> [#uses=1]

+	%tmp6 = ptrtoint i8* %ctg2 to i32		; <i32> [#uses=1]

+	%tmp14 = or i32 %tmp6, 1		; <i32> [#uses=1]

+	ret i32 %tmp14

+}

+

+;; This can!

+define i32 @test2(i32 %a, i32 %b) nounwind {

+; CHECK: test2:

+; CHECK-NOT: ret

+; CHECK: leal 3(,%{{.*}},8)

+; CHECK: ret

+

+	%c = shl i32 %a, 3

+	%d = or i32 %c, 3

+	ret i32 %d

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-02-16-BranchFold.ll b/src/LLVM/test/CodeGen/X86/2007-02-16-BranchFold.ll
new file mode 100644
index 0000000..eff9b0e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-02-16-BranchFold.ll

@@ -0,0 +1,95 @@
+; PR 1200

+; RUN: llc < %s -enable-tail-merge=0 | not grep jmp 

+

+; ModuleID = '<stdin>'

+target datalayout = "e-p:32:32"

+target triple = "i686-apple-darwin8"

+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }

+	%struct.Index_Map = type { i32, %struct.item_set** }

+	%struct.Item = type { [4 x i16], %struct.rule* }

+	%struct.__sFILEX = type opaque

+	%struct.__sbuf = type { i8*, i32 }

+	%struct.dimension = type { i16*, %struct.Index_Map, %struct.mapping*, i32, %struct.plankMap* }

+	%struct.item_set = type { i32, i32, %struct.operator*, [2 x %struct.item_set*], %struct.item_set*, i16*, %struct.Item*, %struct.Item* }

+	%struct.list = type { i8*, %struct.list* }

+	%struct.mapping = type { %struct.list**, i32, i32, i32, %struct.item_set** }

+	%struct.nonterminal = type { i8*, i32, i32, i32, %struct.plankMap*, %struct.rule* }

+	%struct.operator = type { i8*, i8, i32, i32, i32, i32, %struct.table* }

+	%struct.pattern = type { %struct.nonterminal*, %struct.operator*, [2 x %struct.nonterminal*] }

+	%struct.plank = type { i8*, %struct.list*, i32 }

+	%struct.plankMap = type { %struct.list*, i32, %struct.stateMap* }

+	%struct.rule = type { [4 x i16], i32, i32, i32, %struct.nonterminal*, %struct.pattern*, i8 }

+	%struct.stateMap = type { i8*, %struct.plank*, i32, i16* }

+	%struct.table = type { %struct.operator*, %struct.list*, i16*, [2 x %struct.dimension*], %struct.item_set** }

+@outfile = external global %struct.FILE*		; <%struct.FILE**> [#uses=1]

+@str1 = external global [11 x i8]		; <[11 x i8]*> [#uses=1]

+

+declare i32 @fprintf(%struct.FILE*, i8*, ...)

+

+define i16 @main_bb_2E_i9_2E_i_2E_i932_2E_ce(%struct.list* %l_addr.01.0.i2.i.i929, %struct.operator** %tmp66.i62.i.out) {

+newFuncRoot:

+	br label %bb.i9.i.i932.ce

+

+NewDefault:		; preds = %LeafBlock, %LeafBlock1, %LeafBlock2, %LeafBlock3

+	br label %bb36.i.i.exitStub

+

+bb36.i.i.exitStub:		; preds = %NewDefault

+	store %struct.operator* %tmp66.i62.i, %struct.operator** %tmp66.i62.i.out

+	ret i16 0

+

+bb.i14.i.exitStub:		; preds = %LeafBlock

+	store %struct.operator* %tmp66.i62.i, %struct.operator** %tmp66.i62.i.out

+	ret i16 1

+

+bb12.i.i935.exitStub:		; preds = %LeafBlock1

+	store %struct.operator* %tmp66.i62.i, %struct.operator** %tmp66.i62.i.out

+	ret i16 2

+

+bb20.i.i937.exitStub:		; preds = %LeafBlock2

+	store %struct.operator* %tmp66.i62.i, %struct.operator** %tmp66.i62.i.out

+	ret i16 3

+

+bb28.i.i938.exitStub:		; preds = %LeafBlock3

+	store %struct.operator* %tmp66.i62.i, %struct.operator** %tmp66.i62.i.out

+	ret i16 4

+

+bb.i9.i.i932.ce:		; preds = %newFuncRoot

+	%tmp1.i3.i.i930 = getelementptr %struct.list* %l_addr.01.0.i2.i.i929, i32 0, i32 0		; <i8**> [#uses=1]

+	%tmp2.i4.i.i931 = load i8** %tmp1.i3.i.i930		; <i8*> [#uses=1]

+	%tmp66.i62.i = bitcast i8* %tmp2.i4.i.i931 to %struct.operator*		; <%struct.operator*> [#uses=7]

+	%tmp1.i6.i = getelementptr %struct.operator* %tmp66.i62.i, i32 0, i32 2		; <i32*> [#uses=1]

+	%tmp2.i7.i = load i32* %tmp1.i6.i		; <i32> [#uses=1]

+	%tmp3.i8.i = load %struct.FILE** @outfile		; <%struct.FILE*> [#uses=1]

+	%tmp5.i9.i = call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %tmp3.i8.i, i8* getelementptr ([11 x i8]* @str1, i32 0, i32 0), i32 %tmp2.i7.i )		; <i32> [#uses=0]

+	%tmp7.i10.i = getelementptr %struct.operator* %tmp66.i62.i, i32 0, i32 5		; <i32*> [#uses=1]

+	%tmp8.i11.i = load i32* %tmp7.i10.i		; <i32> [#uses=7]

+	br label %NodeBlock5

+

+NodeBlock5:		; preds = %bb.i9.i.i932.ce

+	icmp slt i32 %tmp8.i11.i, 1		; <i1>:0 [#uses=1]

+	br i1 %0, label %NodeBlock, label %NodeBlock4

+

+NodeBlock4:		; preds = %NodeBlock5

+	icmp slt i32 %tmp8.i11.i, 2		; <i1>:1 [#uses=1]

+	br i1 %1, label %LeafBlock2, label %LeafBlock3

+

+LeafBlock3:		; preds = %NodeBlock4

+	icmp eq i32 %tmp8.i11.i, 2		; <i1>:2 [#uses=1]

+	br i1 %2, label %bb28.i.i938.exitStub, label %NewDefault

+

+LeafBlock2:		; preds = %NodeBlock4

+	icmp eq i32 %tmp8.i11.i, 1		; <i1>:3 [#uses=1]

+	br i1 %3, label %bb20.i.i937.exitStub, label %NewDefault

+

+NodeBlock:		; preds = %NodeBlock5

+	icmp slt i32 %tmp8.i11.i, 0		; <i1>:4 [#uses=1]

+	br i1 %4, label %LeafBlock, label %LeafBlock1

+

+LeafBlock1:		; preds = %NodeBlock

+	icmp eq i32 %tmp8.i11.i, 0		; <i1>:5 [#uses=1]

+	br i1 %5, label %bb12.i.i935.exitStub, label %NewDefault

+

+LeafBlock:		; preds = %NodeBlock

+	icmp eq i32 %tmp8.i11.i, -1		; <i1>:6 [#uses=1]

+	br i1 %6, label %bb.i14.i.exitStub, label %NewDefault

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll b/src/LLVM/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
new file mode 100644
index 0000000..ae8e192
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu -relocation-model=pic

+; PR1027

+

+	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }

+	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }

+@stderr = external global %struct._IO_FILE*

+

+define void @__eprintf(i8* %string, i8* %expression, i32 %line, i8* %filename) {

+	%tmp = load %struct._IO_FILE** @stderr

+	%tmp5 = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp, i8* %string, i8* %expression, i32 %line, i8* %filename )

+	%tmp6 = load %struct._IO_FILE** @stderr

+	%tmp7 = tail call i32 @fflush( %struct._IO_FILE* %tmp6 )

+	tail call void @abort( )

+	unreachable

+}

+

+declare i32 @fprintf(%struct._IO_FILE*, i8*, ...)

+

+declare i32 @fflush(%struct._IO_FILE*)

+

+declare void @abort()


diff --git a/src/LLVM/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll b/src/LLVM/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll
new file mode 100644
index 0000000..e3924ca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll

@@ -0,0 +1,17 @@
+; PR1219

+; RUN: llc < %s -march=x86 | FileCheck %s

+

+define i32 @test(i1 %X) {

+; CHECK: test:

+; CHECK-NOT: ret

+; CHECK: movl $1, %eax

+; CHECK: ret

+

+  %hvar2 = zext i1 %X to i32

+	%C = icmp sgt i32 %hvar2, -1

+	br i1 %C, label %cond_true15, label %cond_true

+cond_true15:

+  ret i32 1

+cond_true:

+  ret i32 2

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-02-25-FastCCStack.ll b/src/LLVM/test/CodeGen/X86/2007-02-25-FastCCStack.ll
new file mode 100644
index 0000000..8d02524
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-02-25-FastCCStack.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 -mcpu=pentium3

+

+define internal fastcc double @ggc_rlimit_bound(double %limit) {

+    ret double %limit

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-03-01-SpillerCrash.ll b/src/LLVM/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
new file mode 100644
index 0000000..fc5c97a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-03-01-SpillerCrash.ll

@@ -0,0 +1,86 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2

+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2 | not grep movhlps

+

+define void @test() nounwind {

+test.exit:

+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:0 [#uses=4]

+	load <4 x float>* null		; <<4 x float>>:1 [#uses=1]

+	shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:2 [#uses=1]

+	fmul <4 x float> %0, %2		; <<4 x float>>:3 [#uses=1]

+	fsub <4 x float> zeroinitializer, %3		; <<4 x float>>:4 [#uses=1]

+	fmul <4 x float> %4, zeroinitializer		; <<4 x float>>:5 [#uses=2]

+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:6 [#uses=1]

+	and <4 x i32> %6, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 >		; <<4 x i32>>:7 [#uses=1]

+	bitcast <4 x i32> %7 to <4 x float>		; <<4 x float>>:8 [#uses=2]

+	extractelement <4 x float> %8, i32 0		; <float>:9 [#uses=1]

+	extractelement <4 x float> %8, i32 1		; <float>:10 [#uses=2]

+	br i1 false, label %11, label %19

+

+; <label>:11		; preds = %test.exit

+	br i1 false, label %17, label %12

+

+; <label>:12		; preds = %11

+	br i1 false, label %19, label %13

+

+; <label>:13		; preds = %12

+	fsub float -0.000000e+00, 0.000000e+00		; <float>:14 [#uses=1]

+	%tmp207 = extractelement <4 x float> zeroinitializer, i32 0		; <float> [#uses=1]

+	%tmp208 = extractelement <4 x float> zeroinitializer, i32 2		; <float> [#uses=1]

+	fsub float -0.000000e+00, %tmp208		; <float>:15 [#uses=1]

+	%tmp155 = extractelement <4 x float> zeroinitializer, i32 0		; <float> [#uses=1]

+	%tmp156 = extractelement <4 x float> zeroinitializer, i32 2		; <float> [#uses=1]

+	fsub float -0.000000e+00, %tmp156		; <float>:16 [#uses=1]

+	br label %19

+

+; <label>:17		; preds = %11

+	br i1 false, label %19, label %18

+

+; <label>:18		; preds = %17

+	br label %19

+

+; <label>:19		; preds = %18, %17, %13, %12, %test.exit

+	phi i32 [ 5, %18 ], [ 3, %13 ], [ 1, %test.exit ], [ 2, %12 ], [ 4, %17 ]		; <i32>:20 [#uses=0]

+	phi float [ 0.000000e+00, %18 ], [ %16, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:21 [#uses=1]

+	phi float [ 0.000000e+00, %18 ], [ %tmp155, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:22 [#uses=1]

+	phi float [ 0.000000e+00, %18 ], [ %15, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:23 [#uses=1]

+	phi float [ 0.000000e+00, %18 ], [ %tmp207, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:24 [#uses=1]

+	phi float [ 0.000000e+00, %18 ], [ %10, %13 ], [ %9, %test.exit ], [ %10, %12 ], [ 0.000000e+00, %17 ]		; <float>:25 [#uses=2]

+	phi float [ 0.000000e+00, %18 ], [ %14, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:26 [#uses=1]

+	phi float [ 0.000000e+00, %18 ], [ 0.000000e+00, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:27 [#uses=1]

+	insertelement <4 x float> undef, float %27, i32 0		; <<4 x float>>:28 [#uses=1]

+	insertelement <4 x float> %28, float %26, i32 1		; <<4 x float>>:29 [#uses=0]

+	insertelement <4 x float> undef, float %24, i32 0		; <<4 x float>>:30 [#uses=1]

+	insertelement <4 x float> %30, float %23, i32 1		; <<4 x float>>:31 [#uses=1]

+	insertelement <4 x float> %31, float %25, i32 2		; <<4 x float>>:32 [#uses=1]

+	insertelement <4 x float> %32, float %25, i32 3		; <<4 x float>>:33 [#uses=1]

+	fdiv <4 x float> %33, zeroinitializer		; <<4 x float>>:34 [#uses=1]

+	fmul <4 x float> %34, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 >		; <<4 x float>>:35 [#uses=1]

+	insertelement <4 x float> undef, float %22, i32 0		; <<4 x float>>:36 [#uses=1]

+	insertelement <4 x float> %36, float %21, i32 1		; <<4 x float>>:37 [#uses=0]

+	br i1 false, label %foo.exit, label %38

+

+; <label>:38		; preds = %19

+	extractelement <4 x float> %0, i32 0		; <float>:39 [#uses=1]

+	fcmp ogt float %39, 0.000000e+00		; <i1>:40 [#uses=1]

+	extractelement <4 x float> %0, i32 2		; <float>:41 [#uses=1]

+	extractelement <4 x float> %0, i32 1		; <float>:42 [#uses=1]

+	fsub float -0.000000e+00, %42		; <float>:43 [#uses=2]

+	%tmp189 = extractelement <4 x float> %5, i32 2		; <float> [#uses=1]

+	br i1 %40, label %44, label %46

+

+; <label>:44		; preds = %38

+	fsub float -0.000000e+00, %tmp189		; <float>:45 [#uses=0]

+	br label %foo.exit

+

+; <label>:46		; preds = %38

+	%tmp192 = extractelement <4 x float> %5, i32 1		; <float> [#uses=1]

+	fsub float -0.000000e+00, %tmp192		; <float>:47 [#uses=1]

+	br label %foo.exit

+

+foo.exit:		; preds = %46, %44, %19

+	phi float [ 0.000000e+00, %44 ], [ %47, %46 ], [ 0.000000e+00, %19 ]		; <float>:48 [#uses=0]

+	phi float [ %43, %44 ], [ %43, %46 ], [ 0.000000e+00, %19 ]		; <float>:49 [#uses=0]

+	phi float [ 0.000000e+00, %44 ], [ %41, %46 ], [ 0.000000e+00, %19 ]		; <float>:50 [#uses=0]

+	shufflevector <4 x float> %35, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:51 [#uses=0]

+	unreachable

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/src/LLVM/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
new file mode 100644
index 0000000..7d0f642
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll

@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \

+; RUN:   grep push | count 3

+

+define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) nounwind {

+entry:

+	icmp sgt i32 %size, 0		; <i1>:0 [#uses=1]

+	br i1 %0, label %bb.preheader, label %return

+

+bb.preheader:		; preds = %entry

+	%tmp5.sum72 = add i32 %col, 7		; <i32> [#uses=1]

+	%tmp5.sum71 = add i32 %col, 5		; <i32> [#uses=1]

+	%tmp5.sum70 = add i32 %col, 3		; <i32> [#uses=1]

+	%tmp5.sum69 = add i32 %col, 2		; <i32> [#uses=1]

+	%tmp5.sum68 = add i32 %col, 1		; <i32> [#uses=1]

+	%tmp5.sum66 = add i32 %col, 4		; <i32> [#uses=1]

+	%tmp5.sum = add i32 %col, 6		; <i32> [#uses=1]

+	br label %bb

+

+bb:		; preds = %bb, %bb.preheader

+	%i.073.0 = phi i32 [ 0, %bb.preheader ], [ %indvar.next, %bb ]		; <i32> [#uses=3]

+	%p_addr.076.0.rec = mul i32 %i.073.0, 9		; <i32> [#uses=9]

+	%p_addr.076.0 = getelementptr i8* %p, i32 %p_addr.076.0.rec		; <i8*> [#uses=1]

+	%tmp2 = getelementptr i8** %buf, i32 %i.073.0		; <i8**> [#uses=1]

+	%tmp3 = load i8** %tmp2		; <i8*> [#uses=8]

+	%tmp5 = getelementptr i8* %tmp3, i32 %col		; <i8*> [#uses=1]

+	%tmp7 = load i8* %p_addr.076.0		; <i8> [#uses=1]

+	store i8 %tmp7, i8* %tmp5

+	%p_addr.076.0.sum93 = add i32 %p_addr.076.0.rec, 1		; <i32> [#uses=1]

+	%tmp11 = getelementptr i8* %p, i32 %p_addr.076.0.sum93		; <i8*> [#uses=1]

+	%tmp13 = load i8* %tmp11		; <i8> [#uses=1]

+	%tmp15 = getelementptr i8* %tmp3, i32 %tmp5.sum72		; <i8*> [#uses=1]

+	store i8 %tmp13, i8* %tmp15

+	%p_addr.076.0.sum92 = add i32 %p_addr.076.0.rec, 2		; <i32> [#uses=1]

+	%tmp17 = getelementptr i8* %p, i32 %p_addr.076.0.sum92		; <i8*> [#uses=1]

+	%tmp19 = load i8* %tmp17		; <i8> [#uses=1]

+	%tmp21 = getelementptr i8* %tmp3, i32 %tmp5.sum71		; <i8*> [#uses=1]

+	store i8 %tmp19, i8* %tmp21

+	%p_addr.076.0.sum91 = add i32 %p_addr.076.0.rec, 3		; <i32> [#uses=1]

+	%tmp23 = getelementptr i8* %p, i32 %p_addr.076.0.sum91		; <i8*> [#uses=1]

+	%tmp25 = load i8* %tmp23		; <i8> [#uses=1]

+	%tmp27 = getelementptr i8* %tmp3, i32 %tmp5.sum70		; <i8*> [#uses=1]

+	store i8 %tmp25, i8* %tmp27

+	%p_addr.076.0.sum90 = add i32 %p_addr.076.0.rec, 4		; <i32> [#uses=1]

+	%tmp29 = getelementptr i8* %p, i32 %p_addr.076.0.sum90		; <i8*> [#uses=1]

+	%tmp31 = load i8* %tmp29		; <i8> [#uses=1]

+	%tmp33 = getelementptr i8* %tmp3, i32 %tmp5.sum69		; <i8*> [#uses=2]

+	store i8 %tmp31, i8* %tmp33

+	%p_addr.076.0.sum89 = add i32 %p_addr.076.0.rec, 5		; <i32> [#uses=1]

+	%tmp35 = getelementptr i8* %p, i32 %p_addr.076.0.sum89		; <i8*> [#uses=1]

+	%tmp37 = load i8* %tmp35		; <i8> [#uses=1]

+	%tmp39 = getelementptr i8* %tmp3, i32 %tmp5.sum68		; <i8*> [#uses=1]

+	store i8 %tmp37, i8* %tmp39

+	%p_addr.076.0.sum88 = add i32 %p_addr.076.0.rec, 6		; <i32> [#uses=1]

+	%tmp41 = getelementptr i8* %p, i32 %p_addr.076.0.sum88		; <i8*> [#uses=1]

+	%tmp43 = load i8* %tmp41		; <i8> [#uses=1]

+	store i8 %tmp43, i8* %tmp33

+	%p_addr.076.0.sum87 = add i32 %p_addr.076.0.rec, 7		; <i32> [#uses=1]

+	%tmp47 = getelementptr i8* %p, i32 %p_addr.076.0.sum87		; <i8*> [#uses=1]

+	%tmp49 = load i8* %tmp47		; <i8> [#uses=1]

+	%tmp51 = getelementptr i8* %tmp3, i32 %tmp5.sum66		; <i8*> [#uses=1]

+	store i8 %tmp49, i8* %tmp51

+	%p_addr.076.0.sum = add i32 %p_addr.076.0.rec, 8		; <i32> [#uses=1]

+	%tmp53 = getelementptr i8* %p, i32 %p_addr.076.0.sum		; <i8*> [#uses=1]

+	%tmp55 = load i8* %tmp53		; <i8> [#uses=1]

+	%tmp57 = getelementptr i8* %tmp3, i32 %tmp5.sum		; <i8*> [#uses=1]

+	store i8 %tmp55, i8* %tmp57

+	%indvar.next = add i32 %i.073.0, 1		; <i32> [#uses=2]

+	icmp eq i32 %indvar.next, %size		; <i1>:1 [#uses=1]

+	br i1 %1, label %return, label %bb

+

+return:		; preds = %bb, %entry

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-03-16-InlineAsm.ll b/src/LLVM/test/CodeGen/X86/2007-03-16-InlineAsm.ll
new file mode 100644
index 0000000..ec5f2ce
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-03-16-InlineAsm.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86

+

+; ModuleID = 'a.bc'

+

+define i32 @foo(i32 %A, i32 %B) {

+entry:

+	%A_addr = alloca i32		; <i32*> [#uses=2]

+	%B_addr = alloca i32		; <i32*> [#uses=1]

+	%retval = alloca i32, align 4		; <i32*> [#uses=2]

+	%tmp = alloca i32, align 4		; <i32*> [#uses=2]

+	%ret = alloca i32, align 4		; <i32*> [#uses=2]

+	store i32 %A, i32* %A_addr

+	store i32 %B, i32* %B_addr

+	%tmp1 = load i32* %A_addr		; <i32> [#uses=1]

+	%tmp2 = call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"( i32 7, i32 %tmp1 )		; <i32> [#uses=1]

+	store i32 %tmp2, i32* %ret

+	%tmp3 = load i32* %ret		; <i32> [#uses=1]

+	store i32 %tmp3, i32* %tmp

+	%tmp4 = load i32* %tmp		; <i32> [#uses=1]

+	store i32 %tmp4, i32* %retval

+	br label %return

+

+return:		; preds = %entry

+	%retval5 = load i32* %retval		; <i32> [#uses=1]

+	ret i32 %retval5

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll b/src/LLVM/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll
new file mode 100644
index 0000000..f9106fe
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86

+; PR1259

+

+define void @test() {

+        %tmp2 = call i32 asm "...", "=r,~{dirflag},~{fpsr},~{flags},~{dx},~{cx},~{ax}"( )

+        unreachable

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll b/src/LLVM/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll
new file mode 100644
index 0000000..dc17422
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86

+

+define i32 @test(i16 %tmp40414244) {

+  %tmp48 = call i32 asm sideeffect "inl ${1:w}, $0", "={ax},N{dx},~{dirflag},~{fpsr},~{flags}"( i16 %tmp40414244 )

+  ret i32 %tmp48

+}

+

+define i32 @test2(i16 %tmp40414244) {

+  %tmp48 = call i32 asm sideeffect "inl ${1:w}, $0", "={ax},N{dx},~{dirflag},~{fpsr},~{flags}"( i16 14 )

+  ret i32 %tmp48

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll b/src/LLVM/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
new file mode 100644
index 0000000..d773000
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | grep {mov %gs:72, %eax}

+target datalayout = "e-p:32:32"

+target triple = "i686-apple-darwin9"

+

+define void @test() {

+	%tmp1 = tail call i32* asm sideeffect "mov %gs:${1:P}, $0", "=r,i,~{dirflag},~{fpsr},~{flags}"( i32 72 )		; <%struct._pthread*> [#uses=1]

+	ret void

+}

+

+


diff --git a/src/LLVM/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll b/src/LLVM/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
new file mode 100644
index 0000000..43a2f1d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mcpu=yonah -march=x86 | \

+; RUN:   grep {cmpltsd %xmm0, %xmm0}

+target datalayout = "e-p:32:32"

+target triple = "i686-apple-darwin9"

+

+

+define void @acoshf() {

+	%tmp19 = tail call <2 x double> asm sideeffect "pcmpeqd $0, $0 \0A\09 cmpltsd $0, $0", "=x,0,~{dirflag},~{fpsr},~{flags}"( <2 x double> zeroinitializer )		; <<2 x double>> [#uses=0]

+	ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll b/src/LLVM/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
new file mode 100644
index 0000000..3d9a19f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | FileCheck %s

+target datalayout = "e-p:32:32"

+target triple = "i686-apple-darwin9"

+

+define void @test() {

+; CHECK: test:

+; CHECK-NOT: ret

+; CHECK: psrlw $8, %xmm0

+; CHECK: ret

+

+  tail call void asm sideeffect "psrlw $0, %xmm0", "X,~{dirflag},~{fpsr},~{flags}"( i32 8 )

+  ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2007-03-26-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
new file mode 100644
index 0000000..9676f14
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-03-26-CoalescerBug.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86
+
+@data = external global [339 x i64]
+
+define void @foo(...) {
+bb1:
+	%t43 = load i64* getelementptr ([339 x i64]* @data, i32 0, i64 212), align 4
+	br i1 false, label %bb80, label %bb6
+bb6:
+	br i1 false, label %bb38, label %bb265
+bb265:
+	ret void
+bb38:
+	br i1 false, label %bb80, label %bb49
+bb80:
+	br i1 false, label %bb146, label %bb268
+bb49:
+	ret void
+bb113:
+	ret void
+bb268:
+	%t1062 = shl i64 %t43, 3
+	%t1066 = shl i64 0, 3
+	br label %bb85
+bb85:
+	%t1025 = phi i64 [ 0, %bb268 ], [ %t102.0, %bb234 ]
+	%t1028 = phi i64 [ 0, %bb268 ], [ %t1066, %bb234 ]
+	%t1031 = phi i64 [ 0, %bb268 ], [ %t103.0, %bb234 ]
+	%t1034 = phi i64 [ 0, %bb268 ], [ %t1066, %bb234 ]
+	%t102.0 = add i64 %t1028, %t1025
+	%t103.0 = add i64 %t1034, %t1031
+	br label %bb86
+bb86:
+	%t108.0 = phi i64 [ %t102.0, %bb85 ], [ %t1139, %bb248 ]
+	%t110.0 = phi i64 [ %t103.0, %bb85 ], [ %t1142, %bb248 ]
+	br label %bb193
+bb193:
+	%t1081 = add i64 %t110.0, -8
+	%t1087 = add i64 %t108.0, -8
+	br i1 false, label %bb193, label %bb248
+bb248:
+	%t1139 = add i64 %t108.0, %t1062
+	%t1142 = add i64 %t110.0, %t1062
+	br i1 false, label %bb86, label %bb234
+bb234:
+	br i1 false, label %bb85, label %bb113
+bb146:
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll b/src/LLVM/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
new file mode 100644
index 0000000..373aecc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s

+; PR1314

+

+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "x86_64-unknown-linux-gnu"

+	%struct.CycleCount = type { i64, i64 }

+	%struct.bc_struct = type { i32, i32, i32, i32, %struct.bc_struct*, i8*, i8* }

+@_programStartTime = external global %struct.CycleCount		; <%struct.CycleCount*> [#uses=1]

+

+define fastcc i32 @bc_divide(%struct.bc_struct* %n1, %struct.bc_struct* %n2, %struct.bc_struct** %quot, i32 %scale) nounwind {

+entry:

+	%tmp7.i46 = tail call i64 asm sideeffect ".byte 0x0f,0x31", "={dx},=*{ax},~{dirflag},~{fpsr},~{flags}"( i64* getelementptr (%struct.CycleCount* @_programStartTime, i32 0, i32 1) )		; <i64> [#uses=0]

+	%tmp221 = sdiv i32 10, 0		; <i32> [#uses=1]

+	tail call fastcc void @_one_mult( i8* null, i32 0, i32 %tmp221, i8* null )

+	ret i32 0

+}

+

+declare fastcc void @_one_mult(i8*, i32, i32, i8*)


diff --git a/src/LLVM/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll b/src/LLVM/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
new file mode 100644
index 0000000..0aa9568
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah

+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "i686-apple-darwin8"

+

+define void @test(<4 x float> %tmp42i) {

+	%tmp42 = call <4 x float> asm "movss $1, $0", "=x,m,~{dirflag},~{fpsr},~{flags}"( float* null )		; <<4 x float>> [#uses=1]

+	%tmp49 = shufflevector <4 x float> %tmp42, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>> [#uses=1]

+	br label %bb

+

+bb:		; preds = %bb, %cond_true10

+	%tmp52 = bitcast <4 x float> %tmp49 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp53 = call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> %tmp52, <4 x i32> < i32 8, i32 undef, i32 undef, i32 undef > )		; <<4 x i32>> [#uses=1]

+	%tmp105 = bitcast <4 x i32> %tmp53 to <4 x float>		; <<4 x float>> [#uses=1]

+	%tmp108 = fsub <4 x float> zeroinitializer, %tmp105		; <<4 x float>> [#uses=0]

+	br label %bb

+

+return:		; preds = %entry

+	ret void

+}

+

+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>)


diff --git a/src/LLVM/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll b/src/LLVM/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
new file mode 100644
index 0000000..a82a4d0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic --disable-fp-elim

+

+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }

+	%struct.__sFILEX = type opaque

+	%struct.__sbuf = type { i8*, i32 }

+	%struct.partition_def = type { i32, [1 x %struct.partition_elem] }

+	%struct.partition_elem = type { i32, %struct.partition_elem*, i32 }

+

+define void @partition_print(%struct.partition_def* %part) {

+entry:

+	br i1 false, label %bb.preheader, label %bb99

+

+bb.preheader:		; preds = %entry

+	br i1 false, label %cond_true, label %cond_next90

+

+cond_true:		; preds = %bb.preheader

+	br i1 false, label %bb32, label %bb87.critedge

+

+bb32:		; preds = %bb32, %cond_true

+	%i.2115.0 = phi i32 [ 0, %cond_true ], [ %indvar.next127, %bb32 ]		; <i32> [#uses=1]

+	%c.2112.0 = phi i32 [ 0, %cond_true ], [ %tmp49, %bb32 ]		; <i32> [#uses=1]

+	%tmp43 = getelementptr %struct.partition_def* %part, i32 0, i32 1, i32 %c.2112.0, i32 1		; <%struct.partition_elem**> [#uses=1]

+	%tmp44 = load %struct.partition_elem** %tmp43		; <%struct.partition_elem*> [#uses=1]

+	%tmp4445 = ptrtoint %struct.partition_elem* %tmp44 to i32		; <i32> [#uses=1]

+	%tmp48 = sub i32 %tmp4445, 0		; <i32> [#uses=1]

+	%tmp49 = sdiv i32 %tmp48, 12		; <i32> [#uses=1]

+	%indvar.next127 = add i32 %i.2115.0, 1		; <i32> [#uses=2]

+	%exitcond128 = icmp eq i32 %indvar.next127, 0		; <i1> [#uses=1]

+	br i1 %exitcond128, label %bb58, label %bb32

+

+bb58:		; preds = %bb32

+	ret void

+

+bb87.critedge:		; preds = %cond_true

+	ret void

+

+cond_next90:		; preds = %bb.preheader

+	ret void

+

+bb99:		; preds = %entry

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-04-24-Huge-Stack.ll b/src/LLVM/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
new file mode 100644
index 0000000..0d389ce
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-04-24-Huge-Stack.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | not grep 4294967112

+; PR1348

+

+	%struct.md5_ctx = type { i32, i32, i32, i32, [2 x i32], i32, [128 x i8], [4294967288 x i8] }

+

+define i8* @md5_buffer(i8* %buffer, i64 %len, i8* %resblock) {

+entry:

+	%ctx = alloca %struct.md5_ctx, align 16		; <%struct.md5_ctx*> [#uses=3]

+	call void @md5_init_ctx( %struct.md5_ctx* %ctx )

+	call void @md5_process_bytes( i8* %buffer, i64 %len, %struct.md5_ctx* %ctx )

+	%tmp4 = call i8* @md5_finish_ctx( %struct.md5_ctx* %ctx, i8* %resblock )		; <i8*> [#uses=1]

+	ret i8* %tmp4

+}

+

+declare void @md5_init_ctx(%struct.md5_ctx*)

+

+declare i8* @md5_finish_ctx(%struct.md5_ctx*, i8*)

+

+declare void @md5_process_bytes(i8*, i64, %struct.md5_ctx*)


diff --git a/src/LLVM/test/CodeGen/X86/2007-04-24-VectorCrash.ll b/src/LLVM/test/CodeGen/X86/2007-04-24-VectorCrash.ll
new file mode 100644
index 0000000..cf6e943
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-04-24-VectorCrash.ll

@@ -0,0 +1,63 @@
+; RUN: llc < %s -mcpu=yonah

+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "i686-apple-darwin8"

+

+declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)

+

+define void @test(float* %P) {

+entry:

+	or <4 x i32> zeroinitializer, and (<4 x i32> bitcast (<4 x float> shufflevector (<4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer) to <4 x i32>), <4 x i32> < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >)		; <<4 x i32>>:0 [#uses=1]

+	bitcast <4 x i32> %0 to <4 x float>		; <<4 x float>>:1 [#uses=1]

+	fsub <4 x float> %1, zeroinitializer		; <<4 x float>>:2 [#uses=1]

+	fsub <4 x float> shufflevector (<4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer), %2		; <<4 x float>>:3 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %3, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:4 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %4, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:5 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %5, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:6 [#uses=1]

+	shufflevector <4 x float> %6, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:7 [#uses=1]

+	shufflevector <4 x float> %7, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:8 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %8, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:9 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %9, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:10 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %10, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:11 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %11, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:12 [#uses=1]

+	shufflevector <4 x float> %12, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:13 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %13, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:14 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %14, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:15 [#uses=1]

+	shufflevector <4 x float> %15, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:16 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %16, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:17 [#uses=1]

+	shufflevector <4 x float> %17, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:18 [#uses=1]

+	shufflevector <4 x float> %18, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:19 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %19, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:20 [#uses=1]

+	shufflevector <4 x float> %20, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:21 [#uses=1]

+	shufflevector <4 x float> %21, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:22 [#uses=1]

+	fmul <4 x float> %22, zeroinitializer		; <<4 x float>>:23 [#uses=1]

+	shufflevector <4 x float> %23, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:24 [#uses=1]

+	call <4 x float> @llvm.x86.sse.add.ss( <4 x float> zeroinitializer, <4 x float> %24 )		; <<4 x float>>:25 [#uses=1]

+	shufflevector <4 x float> %25, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:26 [#uses=1]

+	shufflevector <4 x float> %26, <4 x float> zeroinitializer, <4 x i32> zeroinitializer		; <<4 x float>>:27 [#uses=1]

+	shufflevector <4 x float> %27, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:28 [#uses=1]

+	fmul <4 x float> zeroinitializer, %28		; <<4 x float>>:29 [#uses=1]

+	fadd <4 x float> %29, zeroinitializer		; <<4 x float>>:30 [#uses=1]

+	fmul <4 x float> zeroinitializer, %30		; <<4 x float>>:31 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %31, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:32 [#uses=1]

+	fmul <4 x float> zeroinitializer, %32		; <<4 x float>>:33 [#uses=1]

+	shufflevector <4 x float> %33, <4 x float> zeroinitializer, <4 x i32> zeroinitializer		; <<4 x float>>:34 [#uses=1]

+	fmul <4 x float> zeroinitializer, %34		; <<4 x float>>:35 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %35, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >		; <<4 x float>>:36 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %36, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:37 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %37, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:38 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %38, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:39 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %39, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:40 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %40, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:41 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %41, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:42 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %42, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:43 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %43, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:44 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %44, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:45 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %45, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:46 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %46, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:47 [#uses=1]

+	shufflevector <4 x float> zeroinitializer, <4 x float> %47, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:48 [#uses=1]

+	shufflevector <4 x float> %48, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>>:49 [#uses=1]

+	fadd <4 x float> %49, zeroinitializer		; <<4 x float>>:50 [#uses=1]

+	%tmp5845 = extractelement <4 x float> %50, i32 2		; <float> [#uses=1]

+	store float %tmp5845, float* %P

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll b/src/LLVM/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
new file mode 100644
index 0000000..209b82c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll

@@ -0,0 +1,64 @@
+; RUN: llc < %s -o - -march=x86 -mattr=+mmx | FileCheck %s

+; There are no MMX instructions here.  We use add+adcl for the adds.

+

+define <1 x i64> @unsigned_add3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {

+entry:

+	%tmp2942 = icmp eq i32 %count, 0		; <i1> [#uses=1]

+	br i1 %tmp2942, label %bb31, label %bb26

+

+bb26:		; preds = %bb26, %entry

+

+; CHECK:  addl

+; CHECK:  adcl

+

+	%i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ]		; <i32> [#uses=3]

+	%sum.035.0 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ]		; <<1 x i64>> [#uses=1]

+	%tmp13 = getelementptr <1 x i64>* %b, i32 %i.037.0		; <<1 x i64>*> [#uses=1]

+	%tmp14 = load <1 x i64>* %tmp13		; <<1 x i64>> [#uses=1]

+	%tmp18 = getelementptr <1 x i64>* %a, i32 %i.037.0		; <<1 x i64>*> [#uses=1]

+	%tmp19 = load <1 x i64>* %tmp18		; <<1 x i64>> [#uses=1]

+	%tmp21 = add <1 x i64> %tmp19, %tmp14		; <<1 x i64>> [#uses=1]

+	%tmp22 = add <1 x i64> %tmp21, %sum.035.0		; <<1 x i64>> [#uses=2]

+	%tmp25 = add i32 %i.037.0, 1		; <i32> [#uses=2]

+	%tmp29 = icmp ult i32 %tmp25, %count		; <i1> [#uses=1]

+	br i1 %tmp29, label %bb26, label %bb31

+

+bb31:		; preds = %bb26, %entry

+	%sum.035.1 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ]		; <<1 x i64>> [#uses=1]

+	ret <1 x i64> %sum.035.1

+}

+

+

+; This is the original test converted to use MMX intrinsics.

+

+define <1 x i64> @unsigned_add3a(x86_mmx* %a, x86_mmx* %b, i32 %count) nounwind {

+entry:

+        %tmp2943 = bitcast <1 x i64><i64 0> to x86_mmx

+	%tmp2942 = icmp eq i32 %count, 0		; <i1> [#uses=1]

+	br i1 %tmp2942, label %bb31, label %bb26

+

+bb26:		; preds = %bb26, %entry

+

+; CHECK:  movq	({{.*}},8), %mm

+; CHECK:  paddq	({{.*}},8), %mm

+; CHECK:  paddq	%mm{{[0-7]}}, %mm

+

+	%i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ]		; <i32> [#uses=3]

+	%sum.035.0 = phi x86_mmx [ %tmp2943, %entry ], [ %tmp22, %bb26 ]		; <x86_mmx> [#uses=1]

+	%tmp13 = getelementptr x86_mmx* %b, i32 %i.037.0		; <x86_mmx*> [#uses=1]

+	%tmp14 = load x86_mmx* %tmp13		; <x86_mmx> [#uses=1]

+	%tmp18 = getelementptr x86_mmx* %a, i32 %i.037.0		; <x86_mmx*> [#uses=1]

+	%tmp19 = load x86_mmx* %tmp18		; <x86_mmx> [#uses=1]

+	%tmp21 = call x86_mmx @llvm.x86.mmx.padd.q (x86_mmx %tmp19, x86_mmx %tmp14)		; <x86_mmx> [#uses=1]

+	%tmp22 = call x86_mmx @llvm.x86.mmx.padd.q (x86_mmx %tmp21, x86_mmx %sum.035.0)		; <x86_mmx> [#uses=2]

+	%tmp25 = add i32 %i.037.0, 1		; <i32> [#uses=2]

+	%tmp29 = icmp ult i32 %tmp25, %count		; <i1> [#uses=1]

+	br i1 %tmp29, label %bb26, label %bb31

+

+bb31:		; preds = %bb26, %entry

+	%sum.035.1 = phi x86_mmx [ %tmp2943, %entry ], [ %tmp22, %bb26 ]		; <x86_mmx> [#uses=1]

+        %t = bitcast x86_mmx %sum.035.1 to <1 x i64>

+	ret <1 x i64> %t

+}

+

+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)


diff --git a/src/LLVM/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll b/src/LLVM/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
new file mode 100644
index 0000000..82c45f5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s | not grep {bsrl.*10}

+; PR1356

+

+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "i686-apple-darwin8"

+

+define i32 @main() {

+entry:

+        %tmp4 = tail call i32 asm "bsrl  $1, $0", "=r,ro,~{dirflag},~{fpsr},~{flags},~{cc}"( i32 10 )           ; <i32> [#uses=1]

+        ret i32 %tmp4

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/2007-05-05-Personality.ll b/src/LLVM/test/CodeGen/X86/2007-05-05-Personality.ll
new file mode 100644
index 0000000..36ca3de
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-05-05-Personality.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -o - | FileCheck %s

+

+; CHECK: .cfi_personality 0, __gnat_eh_personality

+; CHECK: .cfi_lsda 0, .Lexception0

+

+@error = external global i8

+

+define void @_ada_x() {

+entry:

+  invoke void @raise()

+          to label %eh_then unwind label %unwind

+

+unwind:                                           ; preds = %entry

+  %eh_ptr = tail call i8* @llvm.eh.exception()

+  %eh_select = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i8* @error)

+  %eh_typeid = tail call i32 @llvm.eh.typeid.for(i8* @error)

+  %tmp2 = icmp eq i32 %eh_select, %eh_typeid

+  br i1 %tmp2, label %eh_then, label %Unwind

+

+eh_then:                                          ; preds = %unwind, %entry

+  ret void

+

+Unwind:                                           ; preds = %unwind

+  %0 = tail call i32 (...)* @_Unwind_Resume(i8* %eh_ptr)

+  unreachable

+}

+

+declare void @raise()

+

+declare i8* @llvm.eh.exception() nounwind readonly

+

+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind

+

+declare i32 @llvm.eh.typeid.for(i8*) nounwind

+

+declare i32 @__gnat_eh_personality(...)

+

+declare i32 @_Unwind_Resume(...)


diff --git a/src/LLVM/test/CodeGen/X86/2007-05-05-VecCastExpand.ll b/src/LLVM/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
new file mode 100644
index 0000000..ec91c2d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-05-05-VecCastExpand.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 -mattr=+sse

+; PR1371

+

+@str = external global [18 x i8]		; <[18 x i8]*> [#uses=1]

+

+define void @test() {

+bb.i:

+	%tmp.i660 = load <4 x float>* null		; <<4 x float>> [#uses=1]

+	call void (i32, ...)* @printf( i32 0, i8* getelementptr ([18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )

+	%tmp152.i = load <4 x i32>* null		; <<4 x i32>> [#uses=1]

+	%tmp156.i = bitcast <4 x i32> %tmp152.i to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp175.i = bitcast <4 x float> %tmp.i660 to <4 x i32>		; <<4 x i32>> [#uses=1]

+	%tmp176.i = xor <4 x i32> %tmp156.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]

+	%tmp177.i = and <4 x i32> %tmp176.i, %tmp175.i		; <<4 x i32>> [#uses=1]

+	%tmp190.i = or <4 x i32> %tmp177.i, zeroinitializer		; <<4 x i32>> [#uses=1]

+	%tmp191.i = bitcast <4 x i32> %tmp190.i to <4 x float>		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp191.i, <4 x float>* null

+	ret void

+}

+

+declare void @printf(i32, ...)


diff --git a/src/LLVM/test/CodeGen/X86/2007-05-07-InvokeSRet.ll b/src/LLVM/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
new file mode 100644
index 0000000..15b4fde
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-05-07-InvokeSRet.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -disable-fp-elim | not grep {addl .12, %esp}

+; PR1398

+

+	%struct.S = type { i32, i32 }

+

+declare void @invokee(%struct.S* sret )

+

+define void @invoker(%struct.S* %name.0.0) {

+entry:

+	invoke void @invokee( %struct.S* sret %name.0.0   )

+			to label %return unwind label %return

+

+return:		; preds = %entry, %entry

+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0

+                 cleanup

+	ret void

+}

+

+declare i32 @__gxx_personality_v0(...)


diff --git a/src/LLVM/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll b/src/LLVM/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
new file mode 100644
index 0000000..6229ccf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64

+

+	%struct.XDesc = type <{ i32, %struct.OpaqueXDataStorageType** }>

+	%struct.OpaqueXDataStorageType = type opaque

+

+declare signext i16 @GetParamDesc(%struct.XDesc*, i32, i32, %struct.XDesc*)  

+

+declare void @r_raise(i64, i8*, ...)

+

+define i64 @app_send_event(i64 %self, i64 %event_class, i64 %event_id, i64 %params, i64 %need_retval) {

+entry:

+	br i1 false, label %cond_true109, label %bb83.preheader

+

+bb83.preheader:		; preds = %entry

+	ret i64 0

+

+cond_true109:		; preds = %entry

+	br i1 false, label %cond_next164, label %cond_true239

+

+cond_next164:		; preds = %cond_true109

+	%tmp176 = call signext i16 @GetParamDesc( %struct.XDesc* null, i32 1701999219, i32 1413830740, %struct.XDesc* null ) 

+	call void (i64, i8*, ...)* @r_raise( i64 0, i8* null )

+	unreachable

+

+cond_true239:		; preds = %cond_true109

+	ret i64 0

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-05-15-maskmovq.ll b/src/LLVM/test/CodeGen/X86/2007-05-15-maskmovq.ll
new file mode 100644
index 0000000..7cbbdac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-05-15-maskmovq.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mcpu=yonah

+

+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "i686-apple-darwin8"

+

+define void @test(<1 x i64> %c64, <1 x i64> %mask1, i8* %P) {

+entry:

+	%tmp4 = bitcast <1 x i64> %mask1 to x86_mmx		; <x86_mmx> [#uses=1]

+	%tmp6 = bitcast <1 x i64> %c64 to x86_mmx		; <x86_mmx> [#uses=1]

+	tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp4, x86_mmx %tmp6, i8* %P )

+	ret void

+}

+

+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)


diff --git a/src/LLVM/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll b/src/LLVM/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
new file mode 100644
index 0000000..0470956
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2

+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep punpckhwd

+

+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)

+

+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>)

+

+define fastcc void @test(i32* %src, i32 %sbpr, i32* %dst, i32 %dbpr, i32 %w, i32 %h, i32 %dstalpha, i32 %mask) {

+	%tmp633 = shufflevector <8 x i16> zeroinitializer, <8 x i16> undef, <8 x i32> < i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7 >

+	%tmp715 = mul <8 x i16> zeroinitializer, %tmp633

+	%tmp776 = bitcast <8 x i16> %tmp715 to <4 x i32>

+	%tmp777 = add <4 x i32> %tmp776, shufflevector (<4 x i32> < i32 65537, i32 0, i32 0, i32 0 >, <4 x i32> < i32 65537, i32 0, i32 0, i32 0 >, <4 x i32> zeroinitializer)

+	%tmp805 = add <4 x i32> %tmp777, zeroinitializer

+	%tmp832 = bitcast <4 x i32> %tmp805 to <8 x i16>

+	%tmp838 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp832, <8 x i16> < i16 8, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef > )

+	%tmp1020 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128( <8 x i16> zeroinitializer, <8 x i16> %tmp838 )

+	%tmp1030 = bitcast <16 x i8> %tmp1020 to <4 x i32>

+	%tmp1033 = add <4 x i32> zeroinitializer, %tmp1030

+	%tmp1048 = bitcast <4 x i32> %tmp1033 to <2 x i64>

+	%tmp1049 = or <2 x i64> %tmp1048, zeroinitializer

+	store <2 x i64> %tmp1049, <2 x i64>* null

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll b/src/LLVM/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
new file mode 100644
index 0000000..a278b36
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep GOTPCREL

+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep ".align.*3"

+

+	%struct.A = type { [1024 x i8] }

+@_ZN1A1aE = global %struct.A zeroinitializer, align 32		; <%struct.A*> [#uses=1]

+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN1A1aE } ]		; <[1 x { i32, void ()* }]*> [#uses=0]

+

+define internal void @_GLOBAL__I__ZN1A1aE() section "__TEXT,__StaticInit,regular,pure_instructions" {

+entry:

+	br label %bb.i

+

+bb.i:		; preds = %bb.i, %entry

+	%i.1.i1.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb.i ]		; <i32> [#uses=2]

+	%tmp1012.i = sext i32 %i.1.i1.0 to i64		; <i64> [#uses=1]

+	%tmp13.i = getelementptr %struct.A* @_ZN1A1aE, i32 0, i32 0, i64 %tmp1012.i		; <i8*> [#uses=1]

+	store i8 0, i8* %tmp13.i

+	%indvar.next = add i32 %i.1.i1.0, 1		; <i32> [#uses=2]

+	%exitcond = icmp eq i32 %indvar.next, 1024		; <i1> [#uses=1]

+	br i1 %exitcond, label %_Z41__static_initialization_and_destruction_0ii.exit, label %bb.i

+

+_Z41__static_initialization_and_destruction_0ii.exit:		; preds = %bb.i

+	ret void

+}

+

+define i32 @main(i32 %argc, i8** %argv) {

+entry:

+	ret i32 0

+}


diff --git a/src/LLVM/test/CodeGen/X86/2007-06-15-IntToMMX.ll b/src/LLVM/test/CodeGen/X86/2007-06-15-IntToMMX.ll
new file mode 100644
index 0000000..de65afd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-06-15-IntToMMX.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep paddusw

+@R = external global x86_mmx          ; <x86_mmx*> [#uses=1]

+

+define void @foo(<1 x i64> %A, <1 x i64> %B) {

+entry:

+        %tmp2 = bitcast <1 x i64> %A to x86_mmx

+        %tmp3 = bitcast <1 x i64> %B to x86_mmx

+        %tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp2, x86_mmx %tmp3 )   ; <x86_mmx> [#uses=1]

+        store x86_mmx %tmp7, x86_mmx* @R

+        tail call void @llvm.x86.mmx.emms( )

+        ret void

+}

+

+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)

+

+declare void @llvm.x86.mmx.emms()


diff --git a/src/LLVM/test/CodeGen/X86/2007-06-28-X86-64-isel.ll b/src/LLVM/test/CodeGen/X86/2007-06-28-X86-64-isel.ll
new file mode 100644
index 0000000..3837581
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-06-28-X86-64-isel.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2

+

+define void @test() {

+	%tmp1 = call <8 x i16> @llvm.x86.sse2.pmins.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 7, i32 7, i32 7, i32 7 > to <8 x i16>) )

+	%tmp2 = bitcast <8 x i16> %tmp1 to <4 x i32>

+	br i1 false, label %bb1, label %bb2

+

+bb2:

+	%tmp38007.i = extractelement <4 x i32> %tmp2, i32 3

+	ret void

+

+bb1:

+	ret void

+}

+

+declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>)


diff --git a/src/LLVM/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll b/src/LLVM/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll
new file mode 100644
index 0000000..d2d6388
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define void @test() {
+entry:
+	br i1 false, label %bb13944.preheader, label %cond_true418
+
+cond_true418:		; preds = %entry
+	ret void
+
+bb13944.preheader:		; preds = %entry
+	br i1 false, label %bb3517, label %bb13968.preheader
+
+bb3517:		; preds = %bb13944.preheader
+	br i1 false, label %cond_false7408, label %cond_next11422
+
+cond_false7408:		; preds = %bb3517
+	switch i32 0, label %cond_false10578 [
+		 i32 7, label %cond_next11422
+		 i32 6, label %cond_true7828
+		 i32 1, label %cond_true10095
+		 i32 3, label %cond_true10095
+		 i32 5, label %cond_true10176
+		 i32 24, label %cond_true10176
+	]
+
+cond_true7828:		; preds = %cond_false7408
+	br i1 false, label %cond_next8191, label %cond_true8045
+
+cond_true8045:		; preds = %cond_true7828
+	ret void
+
+cond_next8191:		; preds = %cond_true7828
+	%tmp8234 = sub <4 x i32> < i32 939524096, i32 939524096, i32 939524096, i32 939524096 >, zeroinitializer		; <<4 x i32>> [#uses=0]
+	ret void
+
+cond_true10095:		; preds = %cond_false7408, %cond_false7408
+	ret void
+
+cond_true10176:		; preds = %cond_false7408, %cond_false7408
+	ret void
+
+cond_false10578:		; preds = %cond_false7408
+	ret void
+
+cond_next11422:		; preds = %cond_false7408, %bb3517
+	ret void
+
+bb13968.preheader:		; preds = %bb13944.preheader
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll b/src/LLVM/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
new file mode 100644
index 0000000..dc11eec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define void @test(<4 x float>* %arg) {
+	%tmp89 = getelementptr <4 x float>* %arg, i64 3
+	%tmp1144 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, zeroinitializer
+	store <4 x float> %tmp1144, <4 x float>* null
+	%tmp1149 = load <4 x float>* %tmp89
+	%tmp1150 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1149
+	store <4 x float> %tmp1150, <4 x float>* %tmp89
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll b/src/LLVM/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
new file mode 100644
index 0000000..187c3e4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | FileCheck %s
+
+; CHECK: movd %rsi, [[MM0:%mm[0-9]+]]
+; CHECK: movd %rdi, [[MM1:%mm[0-9]+]]
+; CHECK: paddusw [[MM0]], [[MM1]]
+
+@R = external global x86_mmx		; <x86_mmx*> [#uses=1]
+
+define void @foo(<1 x i64> %A, <1 x i64> %B) nounwind {
+entry:
+	%tmp4 = bitcast <1 x i64> %B to x86_mmx		; <<4 x i16>> [#uses=1]
+	%tmp6 = bitcast <1 x i64> %A to x86_mmx		; <<4 x i16>> [#uses=1]
+	%tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp6, x86_mmx %tmp4 )		; <x86_mmx> [#uses=1]
+	store x86_mmx %tmp7, x86_mmx* @R
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
+declare void @llvm.x86.mmx.emms()

diff --git a/src/LLVM/test/CodeGen/X86/2007-07-10-StackerAssert.ll b/src/LLVM/test/CodeGen/X86/2007-07-10-StackerAssert.ll
new file mode 100644
index 0000000..d611677
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-07-10-StackerAssert.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -mcpu=athlon -relocation-model=pic
+; PR1545
+
+@.str97 = external constant [56 x i8]		; <[56 x i8]*> [#uses=1]
+
+declare void @PR_LogPrint(i8*, ...)
+
+define i32 @_ZN13nsPrintEngine19SetupToPrintContentEP16nsIDeviceContextP12nsIDOMWindow() {
+entry:
+	br i1 false, label %cond_true122, label %cond_next453
+
+cond_true122:		; preds = %entry
+	br i1 false, label %bb164, label %cond_true136
+
+cond_true136:		; preds = %cond_true122
+	ret i32 0
+
+bb164:		; preds = %cond_true122
+	br i1 false, label %bb383, label %cond_true354
+
+cond_true354:		; preds = %bb164
+	ret i32 0
+
+bb383:		; preds = %bb164
+	%tmp408 = load float* null		; <float> [#uses=2]
+	br i1 false, label %cond_true425, label %cond_next443
+
+cond_true425:		; preds = %bb383
+	%tmp430 = load float* null		; <float> [#uses=1]
+	%tmp432 = fsub float %tmp430, %tmp408		; <float> [#uses=1]
+	%tmp432433 = fpext float %tmp432 to double		; <double> [#uses=1]
+	%tmp434435 = fpext float %tmp408 to double		; <double> [#uses=1]
+	call void (i8*, ...)* @PR_LogPrint( i8* getelementptr ([56 x i8]* @.str97, i32 0, i32 0), double 0.000000e+00, double %tmp434435, double %tmp432433 )
+	ret i32 0
+
+cond_next443:		; preds = %bb383
+	ret i32 0
+
+cond_next453:		; preds = %entry
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-07-18-Vector-Extract.ll b/src/LLVM/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
new file mode 100644
index 0000000..6288c4a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-07-18-Vector-Extract.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse | FileCheck %s
+; CHECK: movq ([[A0:%rdi|%rcx]]), %rax
+; CHECK: movq 8([[A0]]), %rax
+define i64 @foo_0(<2 x i64>* %val) {
+entry:
+        %val12 = getelementptr <2 x i64>* %val, i32 0, i32 0            ; <i64*> [#uses=1]
+        %tmp7 = load i64* %val12                ; <i64> [#uses=1]
+        ret i64 %tmp7
+}
+
+define i64 @foo_1(<2 x i64>* %val) {
+entry:
+        %tmp2.gep = getelementptr <2 x i64>* %val, i32 0, i32 1         ; <i64*> [#uses=1]
+        %tmp4 = load i64* %tmp2.gep             ; <i64> [#uses=1]
+        ret i64 %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll b/src/LLVM/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
new file mode 100644
index 0000000..62624a7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | not grep movl
+
+define zeroext i8 @t(i8 zeroext  %x, i8 zeroext  %y)   {
+	%tmp2 = add i8 %x, 2
+	%tmp4 = add i8 %y, -2
+	%tmp5 = mul i8 %tmp4, %tmp2
+	ret i8 %tmp5
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/src/LLVM/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
new file mode 100644
index 0000000..7768f36
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll

@@ -0,0 +1,235 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep "movb   %ah, %r"
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, [4 x i8], i64 }
+	%struct.PyBoolScalarObject = type { i64, %struct._typeobject*, i8 }
+	%struct.PyBufferProcs = type { i64 (%struct.PyObject*, i64, i8**)*, i64 (%struct.PyObject*, i64, i8**)*, i64 (%struct.PyObject*, i64*)*, i64 (%struct.PyObject*, i64, i8**)* }
+	%struct.PyGetSetDef = type { i8*, %struct.PyObject* (%struct.PyObject*, i8*)*, i32 (%struct.PyObject*, %struct.PyObject*, i8*)*, i8*, i8* }
+	%struct.PyMappingMethods = type { i64 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)* }
+	%struct.PyMemberDef = type opaque
+	%struct.PyMethodDef = type { i8*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, i32, i8* }
+	%struct.PyNumberMethods = type { %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, i32 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, i32 (%struct.PyObject**, %struct.PyObject**)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)* }
+	%struct.PyObject = type { i64, %struct._typeobject* }
+	%struct.PySequenceMethods = type { i64 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, i64)*, %struct.PyObject* (%struct.PyObject*, i64)*, %struct.PyObject* (%struct.PyObject*, i64, i64)*, i32 (%struct.PyObject*, i64, %struct.PyObject*)*, i32 (%struct.PyObject*, i64, i64, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, i64)* }
+	%struct.PyTupleObject = type { i64, %struct._typeobject*, i64, [1 x %struct.PyObject*] }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct._typeobject = type { i64, %struct._typeobject*, i64, i8*, i64, i64, void (%struct.PyObject*)*, i32 (%struct.PyObject*, %struct.FILE*, i32)*, %struct.PyObject* (%struct.PyObject*, i8*)*, i32 (%struct.PyObject*, i8*, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyNumberMethods*, %struct.PySequenceMethods*, %struct.PyMappingMethods*, i64 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyBufferProcs*, i64, i8*, i32 (%struct.PyObject*, i32 (%struct.PyObject*, i8*)*, i8*)*, i32 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, i32)*, i64, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyMethodDef*, %struct.PyMemberDef*, %struct.PyGetSetDef*, %struct._typeobject*, %struct.PyObject*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, i64, i32 (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct._typeobject*, i64)*, %struct.PyObject* (%struct._typeobject*, %struct.PyObject*, %struct.PyObject*)*, void (i8*)*, i32 (%struct.PyObject*)*, %struct.PyObject*, %struct.PyObject*, %struct.PyObject*, %struct.PyObject*, %struct.PyObject*, void (%struct.PyObject*)* }
+@PyArray_API = external global i8**		; <i8***> [#uses=4]
+@PyUFunc_API = external global i8**		; <i8***> [#uses=4]
+@.str5 = external constant [14 x i8]		; <[14 x i8]*> [#uses=1]
+
+define %struct.PyObject* @ubyte_divmod(%struct.PyObject* %a, %struct.PyObject* %b) {
+entry:
+	%arg1 = alloca i8, align 1		; <i8*> [#uses=3]
+	%arg2 = alloca i8, align 1		; <i8*> [#uses=3]
+	%first = alloca i32, align 4		; <i32*> [#uses=2]
+	%bufsize = alloca i32, align 4		; <i32*> [#uses=1]
+	%errmask = alloca i32, align 4		; <i32*> [#uses=2]
+	%errobj = alloca %struct.PyObject*, align 8		; <%struct.PyObject**> [#uses=2]
+	%tmp3.i = call fastcc i32 @_ubyte_convert_to_ctype( %struct.PyObject* %a, i8* %arg1 )		; <i32> [#uses=2]
+	%tmp5.i = icmp slt i32 %tmp3.i, 0		; <i1> [#uses=1]
+	br i1 %tmp5.i, label %_ubyte_convert2_to_ctypes.exit, label %cond_next.i
+
+cond_next.i:		; preds = %entry
+	%tmp11.i = call fastcc i32 @_ubyte_convert_to_ctype( %struct.PyObject* %b, i8* %arg2 )		; <i32> [#uses=2]
+	%tmp13.i = icmp slt i32 %tmp11.i, 0		; <i1> [#uses=1]
+	%retval.i = select i1 %tmp13.i, i32 %tmp11.i, i32 0		; <i32> [#uses=1]
+	switch i32 %retval.i, label %bb35 [
+		 i32 -2, label %bb17
+		 i32 -1, label %bb4
+	]
+
+_ubyte_convert2_to_ctypes.exit:		; preds = %entry
+	switch i32 %tmp3.i, label %bb35 [
+		 i32 -2, label %bb17
+		 i32 -1, label %bb4
+	]
+
+bb4:		; preds = %_ubyte_convert2_to_ctypes.exit, %cond_next.i
+	%tmp5 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp6 = getelementptr i8** %tmp5, i64 2		; <i8**> [#uses=1]
+	%tmp7 = load i8** %tmp6		; <i8*> [#uses=1]
+	%tmp78 = bitcast i8* %tmp7 to %struct._typeobject*		; <%struct._typeobject*> [#uses=1]
+	%tmp9 = getelementptr %struct._typeobject* %tmp78, i32 0, i32 12		; <%struct.PyNumberMethods**> [#uses=1]
+	%tmp10 = load %struct.PyNumberMethods** %tmp9		; <%struct.PyNumberMethods*> [#uses=1]
+	%tmp11 = getelementptr %struct.PyNumberMethods* %tmp10, i32 0, i32 5		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)**> [#uses=1]
+	%tmp12 = load %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)** %tmp11		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*> [#uses=1]
+	%tmp15 = call %struct.PyObject* %tmp12( %struct.PyObject* %a, %struct.PyObject* %b )		; <%struct.PyObject*> [#uses=1]
+	ret %struct.PyObject* %tmp15
+
+bb17:		; preds = %_ubyte_convert2_to_ctypes.exit, %cond_next.i
+	%tmp18 = call %struct.PyObject* @PyErr_Occurred( )		; <%struct.PyObject*> [#uses=1]
+	%tmp19 = icmp eq %struct.PyObject* %tmp18, null		; <i1> [#uses=1]
+	br i1 %tmp19, label %cond_next, label %UnifiedReturnBlock
+
+cond_next:		; preds = %bb17
+	%tmp22 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp23 = getelementptr i8** %tmp22, i64 10		; <i8**> [#uses=1]
+	%tmp24 = load i8** %tmp23		; <i8*> [#uses=1]
+	%tmp2425 = bitcast i8* %tmp24 to %struct._typeobject*		; <%struct._typeobject*> [#uses=1]
+	%tmp26 = getelementptr %struct._typeobject* %tmp2425, i32 0, i32 12		; <%struct.PyNumberMethods**> [#uses=1]
+	%tmp27 = load %struct.PyNumberMethods** %tmp26		; <%struct.PyNumberMethods*> [#uses=1]
+	%tmp28 = getelementptr %struct.PyNumberMethods* %tmp27, i32 0, i32 5		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)**> [#uses=1]
+	%tmp29 = load %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)** %tmp28		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*> [#uses=1]
+	%tmp32 = call %struct.PyObject* %tmp29( %struct.PyObject* %a, %struct.PyObject* %b )		; <%struct.PyObject*> [#uses=1]
+	ret %struct.PyObject* %tmp32
+
+bb35:		; preds = %_ubyte_convert2_to_ctypes.exit, %cond_next.i
+	%tmp36 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp37 = getelementptr i8** %tmp36, i64 27		; <i8**> [#uses=1]
+	%tmp38 = load i8** %tmp37		; <i8*> [#uses=1]
+	%tmp3839 = bitcast i8* %tmp38 to void ()*		; <void ()*> [#uses=1]
+	call void %tmp3839( )
+	%tmp40 = load i8* %arg2, align 1		; <i8> [#uses=4]
+	%tmp1.i = icmp eq i8 %tmp40, 0		; <i1> [#uses=2]
+	br i1 %tmp1.i, label %cond_true.i, label %cond_false.i
+
+cond_true.i:		; preds = %bb35
+	%tmp3.i196 = call i32 @feraiseexcept( i32 4 )		; <i32> [#uses=0]
+	%tmp46207 = load i8* %arg2, align 1		; <i8> [#uses=3]
+	%tmp48208 = load i8* %arg1, align 1		; <i8> [#uses=2]
+	%tmp1.i197210 = icmp eq i8 %tmp48208, 0		; <i1> [#uses=1]
+	%tmp4.i212 = icmp eq i8 %tmp46207, 0		; <i1> [#uses=1]
+	%tmp7.i198213 = or i1 %tmp1.i197210, %tmp4.i212		; <i1> [#uses=1]
+	br i1 %tmp7.i198213, label %cond_true.i200, label %cond_next17.i
+
+cond_false.i:		; preds = %bb35
+	%tmp42 = load i8* %arg1, align 1		; <i8> [#uses=3]
+	%tmp7.i = udiv i8 %tmp42, %tmp40		; <i8> [#uses=2]
+	%tmp1.i197 = icmp eq i8 %tmp42, 0		; <i1> [#uses=1]
+	%tmp7.i198 = or i1 %tmp1.i197, %tmp1.i		; <i1> [#uses=1]
+	br i1 %tmp7.i198, label %cond_true.i200, label %cond_next17.i
+
+cond_true.i200:		; preds = %cond_false.i, %cond_true.i
+	%out.0 = phi i8 [ 0, %cond_true.i ], [ %tmp7.i, %cond_false.i ]		; <i8> [#uses=2]
+	%tmp46202.0 = phi i8 [ %tmp46207, %cond_true.i ], [ %tmp40, %cond_false.i ]		; <i8> [#uses=1]
+	%tmp11.i199 = icmp eq i8 %tmp46202.0, 0		; <i1> [#uses=1]
+	br i1 %tmp11.i199, label %cond_true14.i, label %ubyte_ctype_remainder.exit
+
+cond_true14.i:		; preds = %cond_true.i200
+	%tmp15.i = call i32 @feraiseexcept( i32 4 )		; <i32> [#uses=0]
+	br label %ubyte_ctype_remainder.exit
+
+cond_next17.i:		; preds = %cond_false.i, %cond_true.i
+	%out.1 = phi i8 [ 0, %cond_true.i ], [ %tmp7.i, %cond_false.i ]		; <i8> [#uses=1]
+	%tmp46202.1 = phi i8 [ %tmp46207, %cond_true.i ], [ %tmp40, %cond_false.i ]		; <i8> [#uses=1]
+	%tmp48205.1 = phi i8 [ %tmp48208, %cond_true.i ], [ %tmp42, %cond_false.i ]		; <i8> [#uses=1]
+	%tmp20.i = urem i8 %tmp48205.1, %tmp46202.1		; <i8> [#uses=1]
+	br label %ubyte_ctype_remainder.exit
+
+ubyte_ctype_remainder.exit:		; preds = %cond_next17.i, %cond_true14.i, %cond_true.i200
+	%out2.0 = phi i8 [ %tmp20.i, %cond_next17.i ], [ 0, %cond_true14.i ], [ 0, %cond_true.i200 ]		; <i8> [#uses=1]
+	%out.2 = phi i8 [ %out.1, %cond_next17.i ], [ %out.0, %cond_true14.i ], [ %out.0, %cond_true.i200 ]		; <i8> [#uses=1]
+	%tmp52 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp53 = getelementptr i8** %tmp52, i64 28		; <i8**> [#uses=1]
+	%tmp54 = load i8** %tmp53		; <i8*> [#uses=1]
+	%tmp5455 = bitcast i8* %tmp54 to i32 ()*		; <i32 ()*> [#uses=1]
+	%tmp56 = call i32 %tmp5455( )		; <i32> [#uses=2]
+	%tmp58 = icmp eq i32 %tmp56, 0		; <i1> [#uses=1]
+	br i1 %tmp58, label %cond_next89, label %cond_true61
+
+cond_true61:		; preds = %ubyte_ctype_remainder.exit
+	%tmp62 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp63 = getelementptr i8** %tmp62, i64 25		; <i8**> [#uses=1]
+	%tmp64 = load i8** %tmp63		; <i8*> [#uses=1]
+	%tmp6465 = bitcast i8* %tmp64 to i32 (i8*, i32*, i32*, %struct.PyObject**)*		; <i32 (i8*, i32*, i32*, %struct.PyObject**)*> [#uses=1]
+	%tmp67 = call i32 %tmp6465( i8* getelementptr ([14 x i8]* @.str5, i32 0, i64 0), i32* %bufsize, i32* %errmask, %struct.PyObject** %errobj )		; <i32> [#uses=1]
+	%tmp68 = icmp slt i32 %tmp67, 0		; <i1> [#uses=1]
+	br i1 %tmp68, label %UnifiedReturnBlock, label %cond_next73
+
+cond_next73:		; preds = %cond_true61
+	store i32 1, i32* %first, align 4
+	%tmp74 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp75 = getelementptr i8** %tmp74, i64 29		; <i8**> [#uses=1]
+	%tmp76 = load i8** %tmp75		; <i8*> [#uses=1]
+	%tmp7677 = bitcast i8* %tmp76 to i32 (i32, %struct.PyObject*, i32, i32*)*		; <i32 (i32, %struct.PyObject*, i32, i32*)*> [#uses=1]
+	%tmp79 = load %struct.PyObject** %errobj, align 8		; <%struct.PyObject*> [#uses=1]
+	%tmp80 = load i32* %errmask, align 4		; <i32> [#uses=1]
+	%tmp82 = call i32 %tmp7677( i32 %tmp80, %struct.PyObject* %tmp79, i32 %tmp56, i32* %first )		; <i32> [#uses=1]
+	%tmp83 = icmp eq i32 %tmp82, 0		; <i1> [#uses=1]
+	br i1 %tmp83, label %cond_next89, label %UnifiedReturnBlock
+
+cond_next89:		; preds = %cond_next73, %ubyte_ctype_remainder.exit
+	%tmp90 = call %struct.PyObject* @PyTuple_New( i64 2 )		; <%struct.PyObject*> [#uses=9]
+	%tmp92 = icmp eq %struct.PyObject* %tmp90, null		; <i1> [#uses=1]
+	br i1 %tmp92, label %UnifiedReturnBlock, label %cond_next97
+
+cond_next97:		; preds = %cond_next89
+	%tmp98 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp99 = getelementptr i8** %tmp98, i64 25		; <i8**> [#uses=1]
+	%tmp100 = load i8** %tmp99		; <i8*> [#uses=1]
+	%tmp100101 = bitcast i8* %tmp100 to %struct._typeobject*		; <%struct._typeobject*> [#uses=2]
+	%tmp102 = getelementptr %struct._typeobject* %tmp100101, i32 0, i32 38		; <%struct.PyObject* (%struct._typeobject*, i64)**> [#uses=1]
+	%tmp103 = load %struct.PyObject* (%struct._typeobject*, i64)** %tmp102		; <%struct.PyObject* (%struct._typeobject*, i64)*> [#uses=1]
+	%tmp108 = call %struct.PyObject* %tmp103( %struct._typeobject* %tmp100101, i64 0 )		; <%struct.PyObject*> [#uses=3]
+	%tmp110 = icmp eq %struct.PyObject* %tmp108, null		; <i1> [#uses=1]
+	br i1 %tmp110, label %cond_true113, label %cond_next135
+
+cond_true113:		; preds = %cond_next97
+	%tmp115 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 0		; <i64*> [#uses=2]
+	%tmp116 = load i64* %tmp115		; <i64> [#uses=1]
+	%tmp117 = add i64 %tmp116, -1		; <i64> [#uses=2]
+	store i64 %tmp117, i64* %tmp115
+	%tmp123 = icmp eq i64 %tmp117, 0		; <i1> [#uses=1]
+	br i1 %tmp123, label %cond_true126, label %UnifiedReturnBlock
+
+cond_true126:		; preds = %cond_true113
+	%tmp128 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 1		; <%struct._typeobject**> [#uses=1]
+	%tmp129 = load %struct._typeobject** %tmp128		; <%struct._typeobject*> [#uses=1]
+	%tmp130 = getelementptr %struct._typeobject* %tmp129, i32 0, i32 6		; <void (%struct.PyObject*)**> [#uses=1]
+	%tmp131 = load void (%struct.PyObject*)** %tmp130		; <void (%struct.PyObject*)*> [#uses=1]
+	call void %tmp131( %struct.PyObject* %tmp90 )
+	ret %struct.PyObject* null
+
+cond_next135:		; preds = %cond_next97
+	%tmp136137 = bitcast %struct.PyObject* %tmp108 to %struct.PyBoolScalarObject*		; <%struct.PyBoolScalarObject*> [#uses=1]
+	%tmp139 = getelementptr %struct.PyBoolScalarObject* %tmp136137, i32 0, i32 2		; <i8*> [#uses=1]
+	store i8 %out.2, i8* %tmp139
+	%tmp140141 = bitcast %struct.PyObject* %tmp90 to %struct.PyTupleObject*		; <%struct.PyTupleObject*> [#uses=2]
+	%tmp143 = getelementptr %struct.PyTupleObject* %tmp140141, i32 0, i32 3, i64 0		; <%struct.PyObject**> [#uses=1]
+	store %struct.PyObject* %tmp108, %struct.PyObject** %tmp143
+	%tmp145 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp146 = getelementptr i8** %tmp145, i64 25		; <i8**> [#uses=1]
+	%tmp147 = load i8** %tmp146		; <i8*> [#uses=1]
+	%tmp147148 = bitcast i8* %tmp147 to %struct._typeobject*		; <%struct._typeobject*> [#uses=2]
+	%tmp149 = getelementptr %struct._typeobject* %tmp147148, i32 0, i32 38		; <%struct.PyObject* (%struct._typeobject*, i64)**> [#uses=1]
+	%tmp150 = load %struct.PyObject* (%struct._typeobject*, i64)** %tmp149		; <%struct.PyObject* (%struct._typeobject*, i64)*> [#uses=1]
+	%tmp155 = call %struct.PyObject* %tmp150( %struct._typeobject* %tmp147148, i64 0 )		; <%struct.PyObject*> [#uses=3]
+	%tmp157 = icmp eq %struct.PyObject* %tmp155, null		; <i1> [#uses=1]
+	br i1 %tmp157, label %cond_true160, label %cond_next182
+
+cond_true160:		; preds = %cond_next135
+	%tmp162 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 0		; <i64*> [#uses=2]
+	%tmp163 = load i64* %tmp162		; <i64> [#uses=1]
+	%tmp164 = add i64 %tmp163, -1		; <i64> [#uses=2]
+	store i64 %tmp164, i64* %tmp162
+	%tmp170 = icmp eq i64 %tmp164, 0		; <i1> [#uses=1]
+	br i1 %tmp170, label %cond_true173, label %UnifiedReturnBlock
+
+cond_true173:		; preds = %cond_true160
+	%tmp175 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 1		; <%struct._typeobject**> [#uses=1]
+	%tmp176 = load %struct._typeobject** %tmp175		; <%struct._typeobject*> [#uses=1]
+	%tmp177 = getelementptr %struct._typeobject* %tmp176, i32 0, i32 6		; <void (%struct.PyObject*)**> [#uses=1]
+	%tmp178 = load void (%struct.PyObject*)** %tmp177		; <void (%struct.PyObject*)*> [#uses=1]
+	call void %tmp178( %struct.PyObject* %tmp90 )
+	ret %struct.PyObject* null
+
+cond_next182:		; preds = %cond_next135
+	%tmp183184 = bitcast %struct.PyObject* %tmp155 to %struct.PyBoolScalarObject*		; <%struct.PyBoolScalarObject*> [#uses=1]
+	%tmp186 = getelementptr %struct.PyBoolScalarObject* %tmp183184, i32 0, i32 2		; <i8*> [#uses=1]
+	store i8 %out2.0, i8* %tmp186
+	%tmp190 = getelementptr %struct.PyTupleObject* %tmp140141, i32 0, i32 3, i64 1		; <%struct.PyObject**> [#uses=1]
+	store %struct.PyObject* %tmp155, %struct.PyObject** %tmp190
+	ret %struct.PyObject* %tmp90
+
+UnifiedReturnBlock:		; preds = %cond_true160, %cond_true113, %cond_next89, %cond_next73, %cond_true61, %bb17
+	ret %struct.PyObject* null
+}
+
+declare i32 @feraiseexcept(i32)
+
+declare fastcc i32 @_ubyte_convert_to_ctype(%struct.PyObject*, i8*)
+
+declare %struct.PyObject* @PyErr_Occurred()
+
+declare %struct.PyObject* @PyTuple_New(i64)

diff --git a/src/LLVM/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll b/src/LLVM/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
new file mode 100644
index 0000000..77291f0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | grep {movsbl}
+
+@X = global i32 0               ; <i32*> [#uses=1]
+
+define signext i8 @_Z3fooi(i32 %x)   {
+entry:
+        store i32 %x, i32* @X, align 4
+        %retval67 = trunc i32 %x to i8          ; <i8> [#uses=1]
+        ret i8 %retval67
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll b/src/LLVM/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
new file mode 100644
index 0000000..c90a85f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | not grep drectve
+; PR1607
+
+%hlvm_programs_element = type { i8*, i32 (i32, i8**)* }
+@hlvm_programs = appending constant [1 x %hlvm_programs_element]
+zeroinitializer
+
+define %hlvm_programs_element* @hlvm_get_programs() {
+entry:
+  ret %hlvm_programs_element* getelementptr([1 x %hlvm_programs_element]*  
+                                            @hlvm_programs, i32 0, i32 0)
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-09-05-InvalidAsm.ll b/src/LLVM/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
new file mode 100644
index 0000000..5acb051
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-09-05-InvalidAsm.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | not grep {lea\[\[:space:\]\]R}
+
+	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
+	%struct.AGenericManager = type <{ i8 }>
+	%struct.ComponentInstanceRecord = type opaque
+	%struct.ComponentParameters = type { [1 x i64] }
+
+define i32 @_ZN12AGenericCall10MapIDPtrAtEsRP23ComponentInstanceRecord(%struct.AGenericCall* %this, i16 signext  %param, %struct.ComponentInstanceRecord** %instance) {
+entry:
+	%tmp4 = icmp slt i16 %param, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	%tmp1415 = shl i16 %param, 3		; <i16> [#uses=1]
+	%tmp17 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
+	%tmp18 = load %struct.ComponentParameters** %tmp17, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp1920 = bitcast %struct.ComponentParameters* %tmp18 to i8*		; <i8*> [#uses=1]
+	%tmp212223 = sext i16 %tmp1415 to i64		; <i64> [#uses=1]
+	%tmp24 = getelementptr i8* %tmp1920, i64 %tmp212223		; <i8*> [#uses=1]
+	%tmp2425 = bitcast i8* %tmp24 to i64*		; <i64*> [#uses=1]
+	%tmp28 = load i64* %tmp2425, align 8		; <i64> [#uses=1]
+	%tmp2829 = inttoptr i64 %tmp28 to i32*		; <i32*> [#uses=1]
+	%tmp31 = getelementptr %struct.AGenericCall* %this, i32 0, i32 2		; <i32**> [#uses=1]
+	store i32* %tmp2829, i32** %tmp31, align 8
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4243 = shl i16 %param, 3		; <i16> [#uses=1]
+	%tmp46 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
+	%tmp47 = load %struct.ComponentParameters** %tmp46, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp4849 = bitcast %struct.ComponentParameters* %tmp47 to i8*		; <i8*> [#uses=1]
+	%tmp505152 = sext i16 %tmp4243 to i64		; <i64> [#uses=1]
+	%tmp53 = getelementptr i8* %tmp4849, i64 %tmp505152		; <i8*> [#uses=1]
+	%tmp5354 = bitcast i8* %tmp53 to i64*		; <i64*> [#uses=1]
+	%tmp58 = load i64* %tmp5354, align 8		; <i64> [#uses=1]
+	%tmp59 = icmp eq i64 %tmp58, 0		; <i1> [#uses=1]
+	br i1 %tmp59, label %UnifiedReturnBlock, label %cond_true63
+
+cond_true63:		; preds = %cond_next
+	%tmp65 = getelementptr %struct.AGenericCall* %this, i32 0, i32 0		; <%struct.AGenericManager**> [#uses=1]
+	%tmp66 = load %struct.AGenericManager** %tmp65, align 8		; <%struct.AGenericManager*> [#uses=1]
+	%tmp69 = tail call i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord( %struct.AGenericManager* %tmp66, %struct.ComponentInstanceRecord** %instance )		; <i32> [#uses=1]
+	ret i32 %tmp69
+
+UnifiedReturnBlock:		; preds = %cond_next
+	ret i32 undef
+}
+
+declare i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord(%struct.AGenericManager*, %struct.ComponentInstanceRecord**)

diff --git a/src/LLVM/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll b/src/LLVM/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
new file mode 100644
index 0000000..c5d2a46
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll

@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=x86 | grep weak | count 2
+@__gthrw_pthread_once = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+
+declare extern_weak i32 @pthread_once(i32*, void ()*)

diff --git a/src/LLVM/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll b/src/LLVM/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
new file mode 100644
index 0000000..15466a1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -disable-cfi -march=x86 -mtriple=i686-apple-darwin | FileCheck %s
+
+; CHECK: "_-[NSString(local) isNullOrNil].eh":
+
+	%struct.NSString = type {  }
+	%struct._objc__method_prototype_list = type opaque
+	%struct._objc_category = type { i8*, i8*, %struct._objc_method_list*, %struct._objc_method_list*, %struct._objc_protocol**, i32, %struct._prop_list_t* }
+	%struct._objc_method = type { %struct.objc_selector*, i8*, i8* }
+	%struct._objc_method_list = type opaque
+	%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* }
+	%struct._objc_protocol = type { %struct._objc_protocol_extension*, i8*, %struct._objc_protocol**, %struct._objc__method_prototype_list*, %struct._objc__method_prototype_list* }
+	%struct._objc_protocol_extension = type opaque
+	%struct._objc_symtab = type { i32, %struct.objc_selector**, i16, i16, [1 x i8*] }
+	%struct._prop_list_t = type opaque
+	%struct.anon = type { %struct._objc__method_prototype_list*, i32, [1 x %struct._objc_method] }
+	%struct.objc_selector = type opaque
+@"\01L_OBJC_SYMBOLS" = internal global { i32, i32, i16, i16, [1 x %struct._objc_category*] } {
+    i32 0, 
+    i32 0, 
+    i16 0, 
+    i16 1, 
+    [1 x %struct._objc_category*] [ %struct._objc_category* bitcast ({ i8*, i8*, %struct._objc_method_list*, i32, i32, i32, i32 }* @"\01L_OBJC_CATEGORY_NSString_local" to %struct._objc_category*) ] }, section "__OBJC,__symbols,regular,no_dead_strip"		; <{ i32, i32, i16, i16, [1 x %struct._objc_category*] }*> [#uses=2]
+@"\01L_OBJC_CATEGORY_INSTANCE_METHODS_NSString_local" = internal global { i32, i32, [1 x %struct._objc_method] } {
+    i32 0, 
+    i32 1, 
+    [1 x %struct._objc_method] [ %struct._objc_method {
+        %struct.objc_selector* bitcast ([12 x i8]* @"\01L_OBJC_METH_VAR_NAME_0" to %struct.objc_selector*), 
+        i8* getelementptr ([7 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0), 
+        i8* bitcast (i8 (%struct.NSString*, %struct.objc_selector*)  * @"-[NSString(local) isNullOrNil]" to i8*) } ] }, section "__OBJC,__cat_inst_meth,regular,no_dead_strip"		; <{ i32, i32, [1 x %struct._objc_method] }*> [#uses=3]
+@"\01L_OBJC_CATEGORY_NSString_local" = internal global { i8*, i8*, %struct._objc_method_list*, i32, i32, i32, i32 } {
+    i8* getelementptr ([6 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), 
+    i8* getelementptr ([9 x i8]* @"\01L_OBJC_CLASS_NAME_1", i32 0, i32 0), 
+    %struct._objc_method_list* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_CATEGORY_INSTANCE_METHODS_NSString_local" to %struct._objc_method_list*), 
+    i32 0, 
+    i32 0, 
+    i32 28, 
+    i32 0 }, section "__OBJC,__category,regular,no_dead_strip"		; <{ i8*, i8*, %struct._objc_method_list*, i32, i32, i32, i32 }*> [#uses=2]
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] zeroinitializer, section "__OBJC,__image_info,regular"		; <[2 x i32]*> [#uses=1]
+@"\01L_OBJC_MODULES" = internal global %struct._objc_module {
+    i32 7, 
+    i32 16, 
+    i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), 
+    %struct._objc_symtab* bitcast ({ i32, i32, i16, i16, [1 x %struct._objc_category*] }* @"\01L_OBJC_SYMBOLS" to %struct._objc_symtab*) }, section "__OBJC,__module_info,regular,no_dead_strip"		; <%struct._objc_module*> [#uses=1]
+@"\01.objc_class_ref_NSString" = internal global i8* @"\01.objc_class_name_NSString"		; <i8**> [#uses=0]
+@"\01.objc_class_name_NSString" = external global i8		; <i8*> [#uses=1]
+@"\01.objc_category_name_NSString_local" = constant i32 0		; <i32*> [#uses=1]
+@"\01L_OBJC_CLASS_NAME_2" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals"		; <[1 x i8]*> [#uses=2]
+@"\01L_OBJC_CLASS_NAME_1" = internal global [9 x i8] c"NSString\00", section "__TEXT,__cstring,cstring_literals"		; <[9 x i8]*> [#uses=2]
+@"\01L_OBJC_CLASS_NAME_0" = internal global [6 x i8] c"local\00", section "__TEXT,__cstring,cstring_literals"		; <[6 x i8]*> [#uses=2]
+@"\01L_OBJC_METH_VAR_NAME_0" = internal global [12 x i8] c"isNullOrNil\00", section "__TEXT,__cstring,cstring_literals"		; <[12 x i8]*> [#uses=3]
+@"\01L_OBJC_METH_VAR_TYPE_0" = internal global [7 x i8] c"c8@0:4\00", section "__TEXT,__cstring,cstring_literals"		; <[7 x i8]*> [#uses=2]
+@llvm.used = appending global [11 x i8*] [ i8* bitcast ({ i32, i32, i16, i16, [1 x %struct._objc_category*] }* @"\01L_OBJC_SYMBOLS" to i8*), i8* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_CATEGORY_INSTANCE_METHODS_NSString_local" to i8*), i8* bitcast ({ i8*, i8*, %struct._objc_method_list*, i32, i32, i32, i32 }* @"\01L_OBJC_CATEGORY_NSString_local" to i8*), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*), i8* bitcast (i32* @"\01.objc_category_name_NSString_local" to i8*), i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), i8* getelementptr ([9 x i8]* @"\01L_OBJC_CLASS_NAME_1", i32 0, i32 0), i8* getelementptr ([6 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i8* getelementptr ([12 x i8]* @"\01L_OBJC_METH_VAR_NAME_0", i32 0, i32 0), i8* getelementptr ([7 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0) ], section "llvm.metadata"		; <[11 x i8*]*> [#uses=0]
+
+define internal signext i8 @"-[NSString(local) isNullOrNil]"(%struct.NSString* %self, %struct.objc_selector* %_cmd)   {
+entry:
+	%self_addr = alloca %struct.NSString*		; <%struct.NSString**> [#uses=1]
+	%_cmd_addr = alloca %struct.objc_selector*		; <%struct.objc_selector**> [#uses=1]
+	%retval = alloca i8, align 1		; <i8*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store %struct.NSString* %self, %struct.NSString** %self_addr
+	store %struct.objc_selector* %_cmd, %struct.objc_selector** %_cmd_addr
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i8* %retval		; <i8> [#uses=1]
+	ret i8 %retval1
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll b/src/LLVM/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
new file mode 100644
index 0000000..0ae1897
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep -- -86
+
+define i16 @f(<4 x float>* %tmp116117.i1061.i) nounwind {
+entry:
+	alloca [4 x <4 x float>]		; <[4 x <4 x float>]*>:0 [#uses=167]
+	alloca [4 x <4 x float>]		; <[4 x <4 x float>]*>:1 [#uses=170]
+	alloca [4 x <4 x i32>]		; <[4 x <4 x i32>]*>:2 [#uses=12]
+	%.sub6235.i = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0		; <<4 x float>*> [#uses=76]
+	%.sub.i = getelementptr [4 x <4 x float>]* %1, i32 0, i32 0		; <<4 x float>*> [#uses=59]
+
+	%tmp124.i1062.i = getelementptr <4 x float>* %tmp116117.i1061.i, i32 63		; <<4 x float>*> [#uses=1]
+	%tmp125.i1063.i = load <4 x float>* %tmp124.i1062.i		; <<4 x float>> [#uses=5]
+	%tmp828.i1077.i = shufflevector <4 x float> %tmp125.i1063.i, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>> [#uses=4]
+	%tmp704.i1085.i = load <4 x float>* %.sub6235.i		; <<4 x float>> [#uses=1]
+	%tmp712.i1086.i = call <4 x float> @llvm.x86.sse.max.ps( <4 x float> %tmp704.i1085.i, <4 x float> %tmp828.i1077.i )		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp712.i1086.i, <4 x float>* %.sub.i
+
+	%tmp2587.i1145.gep.i = getelementptr [4 x <4 x float>]* %1, i32 0, i32 0, i32 2		; <float*> [#uses=1]
+	%tmp5334.i = load float* %tmp2587.i1145.gep.i		; <float> [#uses=5]
+	%tmp2723.i1170.i = insertelement <4 x float> undef, float %tmp5334.i, i32 2		; <<4 x float>> [#uses=5]
+	store <4 x float> %tmp2723.i1170.i, <4 x float>* %.sub6235.i
+
+	%tmp1406.i1367.i = shufflevector <4 x float> %tmp2723.i1170.i, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>> [#uses=1]
+	%tmp84.i1413.i = load <4 x float>* %.sub6235.i		; <<4 x float>> [#uses=1]
+	%tmp89.i1415.i = fmul <4 x float> %tmp84.i1413.i, %tmp1406.i1367.i		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp89.i1415.i, <4 x float>* %.sub.i
+        ret i16 0
+}
+
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)

diff --git a/src/LLVM/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll b/src/LLVM/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
new file mode 100644
index 0000000..f7ffb93
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define x86_fp80 @foo(x86_fp80 %x) nounwind{
+entry:
+	%tmp2 = call x86_fp80 @llvm.sqrt.f80( x86_fp80 %x )
+	ret x86_fp80 %tmp2
+        
+; CHECK: foo:
+; CHECK: fldt 4(%esp)
+; CHECK-NEXT: fsqrt
+; CHECK-NEXT: ret
+}
+
+declare x86_fp80 @llvm.sqrt.f80(x86_fp80)
+
+define x86_fp80 @bar(x86_fp80 %x) nounwind {
+entry:
+	%tmp2 = call x86_fp80 @llvm.powi.f80( x86_fp80 %x, i32 3 )
+	ret x86_fp80 %tmp2
+; CHECK: bar:
+; CHECK: fldt 4(%esp)
+; CHECK-NEXT: fld	%st(0)
+; CHECK-NEXT: fmul	%st(1)
+; CHECK-NEXT: fmulp
+; CHECK-NEXT: ret
+}
+
+declare x86_fp80 @llvm.powi.f80(x86_fp80, i32)

diff --git a/src/LLVM/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll b/src/LLVM/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
new file mode 100644
index 0000000..6fc8ec9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | not grep pushf
+
+	%struct.gl_texture_image = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8* }
+	%struct.gl_texture_object = type { i32, i32, i32, float, [4 x i32], i32, i32, i32, i32, i32, float, [11 x %struct.gl_texture_image*], [1024 x i8], i32, i32, i32, i8, i8*, i8, void (%struct.gl_texture_object*, i32, float*, float*, float*, float*, i8*, i8*, i8*, i8*)*, %struct.gl_texture_object* }
+
+define fastcc void @sample_3d_linear(%struct.gl_texture_object* %tObj, %struct.gl_texture_image* %img, float %s, float %t, float %r, i8* %red, i8* %green, i8* %blue, i8* %alpha) {
+entry:
+	%tmp15 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp16 = icmp eq i32 %tmp15, 10497		; <i1> [#uses=1]
+	%tmp2152 = call float @floorf( float 0.000000e+00 )		; <float> [#uses=0]
+	br i1 %tmp16, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	ret void
+
+cond_false:		; preds = %entry
+	ret void
+}
+
+declare float @floorf(float)

diff --git a/src/LLVM/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/src/LLVM/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
new file mode 100644
index 0000000..8091bd1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 | not grep movb
+
+define signext i16 @f(i32* %bp, i32* %ss)   {
+entry:
+	br label %cond_next127
+
+cond_next127:		; preds = %cond_next391, %entry
+	%v.1 = phi i32 [ undef, %entry ], [ %tmp411, %cond_next391 ]		; <i32> [#uses=1]
+	%tmp149 = mul i32 0, %v.1		; <i32> [#uses=0]
+	%tmp254 = and i32 0, 15		; <i32> [#uses=1]
+	%tmp256 = and i32 0, 15		; <i32> [#uses=2]
+	br label %cond_next391
+
+cond_next391:		; preds = %cond_next127
+	%tmp393 = load i32* %ss, align 4		; <i32> [#uses=1]
+	%tmp395 = load i32* %bp, align 4		; <i32> [#uses=2]
+	%tmp396 = shl i32 %tmp393, %tmp395		; <i32> [#uses=2]
+	%tmp398 = sub i32 32, %tmp256		; <i32> [#uses=2]
+	%tmp399 = lshr i32 %tmp396, %tmp398		; <i32> [#uses=1]
+	%tmp405 = lshr i32 %tmp396, 31		; <i32> [#uses=1]
+	%tmp406 = add i32 %tmp405, -1		; <i32> [#uses=1]
+	%tmp409 = lshr i32 %tmp406, %tmp398		; <i32> [#uses=1]
+	%tmp411 = sub i32 %tmp399, %tmp409		; <i32> [#uses=1]
+	%tmp422445 = add i32 %tmp254, 0		; <i32> [#uses=1]
+	%tmp426447 = add i32 %tmp395, %tmp256		; <i32> [#uses=1]
+	store i32 %tmp426447, i32* %bp, align 4
+	%tmp429448 = icmp ult i32 %tmp422445, 63		; <i1> [#uses=1]
+	br i1 %tmp429448, label %cond_next127, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %cond_next391
+	ret i16 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll b/src/LLVM/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
new file mode 100644
index 0000000..ea1bbc4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep addss | not grep esp
+
+define fastcc void @fht(float* %fz, i16 signext  %n) {
+entry:
+	br i1 true, label %bb171.preheader, label %bb431
+
+bb171.preheader:		; preds = %entry
+	%tmp176 = fadd float 0.000000e+00, 1.000000e+00		; <float> [#uses=2]
+	%gi.1 = getelementptr float* %fz, i32 0		; <float*> [#uses=2]
+	%tmp240 = load float* %gi.1, align 4		; <float> [#uses=1]
+	%tmp242 = fsub float %tmp240, 0.000000e+00		; <float> [#uses=2]
+	%tmp251 = getelementptr float* %fz, i32 0		; <float*> [#uses=1]
+	%tmp252 = load float* %tmp251, align 4		; <float> [#uses=1]
+	%tmp258 = getelementptr float* %fz, i32 0		; <float*> [#uses=2]
+	%tmp259 = load float* %tmp258, align 4		; <float> [#uses=2]
+	%tmp261 = fmul float %tmp259, %tmp176		; <float> [#uses=1]
+	%tmp262 = fsub float 0.000000e+00, %tmp261		; <float> [#uses=2]
+	%tmp269 = fmul float %tmp252, %tmp176		; <float> [#uses=1]
+	%tmp276 = fmul float %tmp259, 0.000000e+00		; <float> [#uses=1]
+	%tmp277 = fadd float %tmp269, %tmp276		; <float> [#uses=2]
+	%tmp281 = getelementptr float* %fz, i32 0		; <float*> [#uses=1]
+	%tmp282 = load float* %tmp281, align 4		; <float> [#uses=2]
+	%tmp284 = fsub float %tmp282, %tmp277		; <float> [#uses=1]
+	%tmp291 = fadd float %tmp282, %tmp277		; <float> [#uses=1]
+	%tmp298 = fsub float 0.000000e+00, %tmp262		; <float> [#uses=1]
+	%tmp305 = fadd float 0.000000e+00, %tmp262		; <float> [#uses=1]
+	%tmp315 = fmul float 0.000000e+00, %tmp291		; <float> [#uses=1]
+	%tmp318 = fmul float 0.000000e+00, %tmp298		; <float> [#uses=1]
+	%tmp319 = fadd float %tmp315, %tmp318		; <float> [#uses=1]
+	%tmp329 = fadd float 0.000000e+00, %tmp319		; <float> [#uses=1]
+	store float %tmp329, float* null, align 4
+	%tmp336 = fsub float %tmp242, 0.000000e+00		; <float> [#uses=1]
+	store float %tmp336, float* %tmp258, align 4
+	%tmp343 = fadd float %tmp242, 0.000000e+00		; <float> [#uses=1]
+	store float %tmp343, float* null, align 4
+	%tmp355 = fmul float 0.000000e+00, %tmp305		; <float> [#uses=1]
+	%tmp358 = fmul float 0.000000e+00, %tmp284		; <float> [#uses=1]
+	%tmp359 = fadd float %tmp355, %tmp358		; <float> [#uses=1]
+	%tmp369 = fadd float 0.000000e+00, %tmp359		; <float> [#uses=1]
+	store float %tmp369, float* %gi.1, align 4
+	ret void
+
+bb431:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll b/src/LLVM/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
new file mode 100644
index 0000000..7a3d72d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll

@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=x86 | grep sarl | not grep esp
+
+define signext   i16 @t(i16* %qmatrix, i16* %dct, i16* %acBaseTable, i16* %acExtTable, i16 signext  %acBaseRes, i16 signext  %acMaskRes, i16 signext  %acExtRes, i32* %bitptr, i32* %source, i32 %markerPrefix, i8** %byteptr, i32 %scale, i32 %round, i32 %bits) {
+entry:
+	br label %cond_next127
+
+cond_next127:		; preds = %cond_next391, %entry
+	%tmp151 = add i32 0, %round		; <i32> [#uses=1]
+	%tmp153 = ashr i32 %tmp151, %scale		; <i32> [#uses=1]
+	%tmp158 = xor i32 0, %tmp153		; <i32> [#uses=1]
+	%tmp160 = or i32 %tmp158, 0		; <i32> [#uses=1]
+	%tmp180181 = sext i16 0 to i32		; <i32> [#uses=1]
+	%tmp183 = add i32 %tmp160, 1		; <i32> [#uses=1]
+	br i1 false, label %cond_true188, label %cond_next245
+
+cond_true188:		; preds = %cond_next127
+	ret i16 0
+
+cond_next245:		; preds = %cond_next127
+	%tmp253444 = lshr i32 %tmp180181, 4		; <i32> [#uses=1]
+	%tmp254 = and i32 %tmp253444, 15		; <i32> [#uses=1]
+	br i1 false, label %cond_true267, label %cond_next391
+
+cond_true267:		; preds = %cond_next245
+	%tmp269 = load i8** %byteptr, align 4		; <i8*> [#uses=3]
+	%tmp270 = load i8* %tmp269, align 1		; <i8> [#uses=1]
+	%tmp270271 = zext i8 %tmp270 to i32		; <i32> [#uses=1]
+	%tmp272 = getelementptr i8* %tmp269, i32 1		; <i8*> [#uses=2]
+	store i8* %tmp272, i8** %byteptr, align 4
+	%tmp276 = load i8* %tmp272, align 1		; <i8> [#uses=1]
+	%tmp278 = getelementptr i8* %tmp269, i32 2		; <i8*> [#uses=1]
+	store i8* %tmp278, i8** %byteptr, align 4
+	%tmp286 = icmp eq i32 %tmp270271, %markerPrefix		; <i1> [#uses=1]
+	%cond = icmp eq i8 %tmp276, 0		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp286, %cond		; <i1> [#uses=1]
+	br i1 %bothcond, label %cond_true294, label %cond_next327
+
+cond_true294:		; preds = %cond_true267
+	ret i16 0
+
+cond_next327:		; preds = %cond_true267
+	br i1 false, label %cond_true343, label %cond_next391
+
+cond_true343:		; preds = %cond_next327
+	%tmp345 = load i8** %byteptr, align 4		; <i8*> [#uses=1]
+	store i8* null, i8** %byteptr, align 4
+	store i8* %tmp345, i8** %byteptr, align 4
+	br label %cond_next391
+
+cond_next391:		; preds = %cond_true343, %cond_next327, %cond_next245
+	%tmp422445 = add i32 %tmp254, %tmp183		; <i32> [#uses=1]
+	%tmp429448 = icmp ult i32 %tmp422445, 63		; <i1> [#uses=1]
+	br i1 %tmp429448, label %cond_next127, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %cond_next391
+	ret i16 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll b/src/LLVM/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
new file mode 100644
index 0000000..8a55935
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+
+        %struct._Unwind_Context = type {  }
+
+define i32 @execute_stack_op(i8* %op_ptr, i8* %op_end, %struct._Unwind_Context* %context, i64 %initial) {
+entry:
+        br i1 false, label %bb, label %return
+
+bb:             ; preds = %bb31, %entry
+        br i1 false, label %bb6, label %bb31
+
+bb6:            ; preds = %bb
+        %tmp10 = load i64* null, align 8                ; <i64> [#uses=1]
+        %tmp16 = load i64* null, align 8                ; <i64> [#uses=1]
+        br i1 false, label %bb23, label %bb31
+
+bb23:           ; preds = %bb6
+        %tmp2526.cast = and i64 %tmp16, 4294967295              ; <i64> [#uses=1]
+        %tmp27 = ashr i64 %tmp10, %tmp2526.cast         ; <i64> [#uses=1]
+        br label %bb31
+
+bb31:           ; preds = %bb23, %bb6, %bb
+        %result.0 = phi i64 [ %tmp27, %bb23 ], [ 0, %bb ], [ 0, %bb6 ]          ; <i64> [#uses=0]
+        br i1 false, label %bb, label %return
+
+return:         ; preds = %bb31, %entry
+        ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll b/src/LLVM/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
new file mode 100644
index 0000000..2b56b4e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll

@@ -0,0 +1,400 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu
+; PR1729
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct.VEC_tree = type { i32, i32, [1 x %struct.tree_node*] }
+	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.addr_diff_vec_flags = type <{ i8, i8, i8, i8 }>
+	%struct.alloc_pool_def = type { i8*, i64, i64, %struct.alloc_pool_list_def*, i64, i64, i64, %struct.alloc_pool_list_def*, i64, i64 }
+	%struct.alloc_pool_list_def = type { %struct.alloc_pool_list_def* }
+	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
+	%struct.bb_ann_d = type opaque
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [2 x i64] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.cselib_val_struct = type opaque
+	%struct.dataflow_d = type opaque
+	%struct.die_struct = type opaque
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_iterator = type { i32, %struct.VEC_edge** }
+	%struct.eh_status = type opaque
+	%struct.elt_list = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, %struct.tree_node*, i8, i8, i8 }
+	%struct.ht_identifier = type { i8*, i32, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.lang_type = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.loop = type opaque
+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
+	%struct.mem_attrs = type { i64, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32 }
+	%struct.obstack = type { i64, %struct._obstack_chunk*, i8*, i8*, i8*, i64, i32, %struct._obstack_chunk* (i8*, i64)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.phi_arg_d = type { %struct.tree_node*, i8 }
+	%struct.ptr_info_def = type opaque
+	%struct.real_value = type opaque
+	%struct.reg_attrs = type { %struct.tree_node*, i64 }
+	%struct.reg_info_def = type { i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
+	%struct.rtunion = type { i8* }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }
+	%struct.stack_local_entry = type opaque
+	%struct.temp_slot = type opaque
+	%struct.tree_binfo = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree }
+	%struct.tree_block = type { %struct.tree_common, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_complex = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u1_a = type <{ i32 }>
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_exp = type { %struct.tree_common, %struct.location_t*, i32, %struct.tree_node*, [1 x %struct.tree_node*] }
+	%struct.tree_identifier = type { %struct.tree_common, %struct.ht_identifier }
+	%struct.tree_int_cst = type { %struct.tree_common, %struct.tree_int_cst_lowhi }
+	%struct.tree_int_cst_lowhi = type { i64, i64 }
+	%struct.tree_list = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.tree_phi_node = type { %struct.tree_common, %struct.tree_node*, i32, i32, i32, %struct.basic_block_def*, %struct.dataflow_d*, [1 x %struct.phi_arg_d] }
+	%struct.tree_real_cst = type { %struct.tree_common, %struct.real_value* }
+	%struct.tree_ssa_name = type { %struct.tree_common, %struct.tree_node*, i32, %struct.ptr_info_def*, %struct.tree_node*, i8* }
+	%struct.tree_statement_list = type { %struct.tree_common, %struct.tree_statement_list_node*, %struct.tree_statement_list_node* }
+	%struct.tree_statement_list_node = type { %struct.tree_statement_list_node*, %struct.tree_statement_list_node*, %struct.tree_node* }
+	%struct.tree_string = type { %struct.tree_common, i32, [1 x i8] }
+	%struct.tree_type = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32, i16, i8, i8, i32, %struct.tree_node*, %struct.tree_node*, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_type* }
+	%struct.tree_type_symtab = type { i8* }
+	%struct.tree_value_handle = type { %struct.tree_common, %struct.value_set*, i32 }
+	%struct.tree_vec = type { %struct.tree_common, i32, [1 x %struct.tree_node*] }
+	%struct.tree_vector = type { %struct.tree_common, %struct.tree_node* }
+	%struct.u = type { [1 x %struct.rtunion] }
+	%struct.value_set = type opaque
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_data = type { [1 x i64] }
+	%struct.varray_head_tag = type { i64, i64, i32, i8*, %struct.varray_data }
+	%union.tree_ann_d = type opaque
+@first_edge_aux_obj = external global i8*		; <i8**> [#uses=0]
+@first_block_aux_obj = external global i8*		; <i8**> [#uses=0]
+@n_edges = external global i32		; <i32*> [#uses=0]
+@ENTRY_BLOCK_PTR = external global %struct.basic_block_def*		; <%struct.basic_block_def**> [#uses=0]
+@EXIT_BLOCK_PTR = external global %struct.basic_block_def*		; <%struct.basic_block_def**> [#uses=0]
+@n_basic_blocks = external global i32		; <i32*> [#uses=0]
+@.str = external constant [9 x i8]		; <[9 x i8]*> [#uses=0]
+@rbi_pool = external global %struct.alloc_pool_def*		; <%struct.alloc_pool_def**> [#uses=0]
+@__FUNCTION__.19643 = external constant [18 x i8]		; <[18 x i8]*> [#uses=0]
+@.str1 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@__FUNCTION__.19670 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@basic_block_info = external global %struct.varray_head_tag*		; <%struct.varray_head_tag**> [#uses=0]
+@last_basic_block = external global i32		; <i32*> [#uses=0]
+@__FUNCTION__.19696 = external constant [14 x i8]		; <[14 x i8]*> [#uses=0]
+@__FUNCTION__.20191 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@block_aux_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=0]
+@__FUNCTION__.20301 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@__FUNCTION__.20316 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@edge_aux_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=0]
+@stderr = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=0]
+@__FUNCTION__.20463 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@.str2 = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@.str3 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@.str4 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str5 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@.str6 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@.str7 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@bitnames.20157 = external constant [13 x i8*]		; <[13 x i8*]*> [#uses=0]
+@.str8 = external constant [9 x i8]		; <[9 x i8]*> [#uses=0]
+@.str9 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str10 = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@.str11 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str12 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str13 = external constant [9 x i8]		; <[9 x i8]*> [#uses=0]
+@.str14 = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@.str15 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@.str16 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@.str17 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str18 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str19 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@.str20 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str21 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str22 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@__FUNCTION__.19709 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@.str23 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str24 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@__FUNCTION__.19813 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@.str25 = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@.str26 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@initialized.20241.b = external global i1		; <i1*> [#uses=0]
+@__FUNCTION__.20244 = external constant [21 x i8]		; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.19601 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@__FUNCTION__.14571 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@__FUNCTION__.14535 = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@.str27 = external constant [28 x i8]		; <[28 x i8]*> [#uses=0]
+@__FUNCTION__.14589 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@__FUNCTION__.19792 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@__FUNCTION__.19851 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@profile_status = external global i32		; <i32*> [#uses=0]
+@.str29 = external constant [46 x i8]		; <[46 x i8]*> [#uses=0]
+@.str30 = external constant [49 x i8]		; <[49 x i8]*> [#uses=0]
+@.str31 = external constant [54 x i8]		; <[54 x i8]*> [#uses=0]
+@.str32 = external constant [49 x i8]		; <[49 x i8]*> [#uses=1]
+@__FUNCTION__.19948 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@reg_n_info = external global %struct.varray_head_tag*		; <%struct.varray_head_tag**> [#uses=0]
+@reload_completed = external global i32		; <i32*> [#uses=0]
+@.str33 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@.str34 = external constant [43 x i8]		; <[43 x i8]*> [#uses=0]
+@.str35 = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@.str36 = external constant [1 x i8]		; <[1 x i8]*> [#uses=0]
+@.str37 = external constant [2 x i8]		; <[2 x i8]*> [#uses=0]
+@.str38 = external constant [16 x i8]		; <[16 x i8]*> [#uses=0]
+@cfun = external global %struct.function*		; <%struct.function**> [#uses=0]
+@.str39 = external constant [14 x i8]		; <[14 x i8]*> [#uses=0]
+@.str40 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@.str41 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@.str42 = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@.str43 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@mode_size = external global [48 x i8]		; <[48 x i8]*> [#uses=0]
+@target_flags = external global i32		; <i32*> [#uses=0]
+@.str44 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@reg_class_names = external global [0 x i8*]		; <[0 x i8*]*> [#uses=0]
+@.str45 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str46 = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@.str47 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@.str48 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@.str49 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str50 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str51 = external constant [29 x i8]		; <[29 x i8]*> [#uses=0]
+@.str52 = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@.str53 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@.str54 = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@.str55 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str56 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@.str57 = external constant [26 x i8]		; <[26 x i8]*> [#uses=0]
+@.str58 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@.str59 = external constant [14 x i8]		; <[14 x i8]*> [#uses=0]
+@.str60 = external constant [26 x i8]		; <[26 x i8]*> [#uses=0]
+@.str61 = external constant [24 x i8]		; <[24 x i8]*> [#uses=0]
+@initialized.20366.b = external global i1		; <i1*> [#uses=0]
+@__FUNCTION__.20369 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@__FUNCTION__.20442 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@bb_bitnames.20476 = external constant [6 x i8*]		; <[6 x i8*]*> [#uses=0]
+@.str62 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@.str63 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str64 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str65 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@.str66 = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@.str67 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@.str68 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@.str69 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str70 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@__FUNCTION__.20520 = external constant [32 x i8]		; <[32 x i8]*> [#uses=0]
+@dump_file = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=0]
+@.str71 = external constant [86 x i8]		; <[86 x i8]*> [#uses=0]
+@.str72 = external constant [94 x i8]		; <[94 x i8]*> [#uses=0]
+@reg_obstack = external global %struct.bitmap_obstack		; <%struct.bitmap_obstack*> [#uses=0]
+
+declare void @init_flow()
+
+declare i8* @ggc_alloc_cleared_stat(i64)
+
+declare fastcc void @free_edge(%struct.edge_def*)
+
+declare void @ggc_free(i8*)
+
+declare %struct.basic_block_def* @alloc_block()
+
+declare void @alloc_rbi_pool()
+
+declare %struct.alloc_pool_def* @create_alloc_pool(i8*, i64, i64)
+
+declare void @free_rbi_pool()
+
+declare void @free_alloc_pool(%struct.alloc_pool_def*)
+
+declare void @initialize_bb_rbi(%struct.basic_block_def*)
+
+declare void @fancy_abort(i8*, i32, i8*)
+
+declare i8* @pool_alloc(%struct.alloc_pool_def*)
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32)
+
+declare void @link_block(%struct.basic_block_def*, %struct.basic_block_def*)
+
+declare void @unlink_block(%struct.basic_block_def*)
+
+declare void @compact_blocks()
+
+declare void @varray_check_failed(%struct.varray_head_tag*, i64, i8*, i32, i8*)
+
+declare void @expunge_block(%struct.basic_block_def*)
+
+declare void @clear_bb_flags()
+
+declare void @alloc_aux_for_block(%struct.basic_block_def*, i32)
+
+declare void @_obstack_newchunk(%struct.obstack*, i32)
+
+declare void @clear_aux_for_blocks()
+
+declare void @free_aux_for_blocks()
+
+declare void @obstack_free(%struct.obstack*, i8*)
+
+declare void @alloc_aux_for_edge(%struct.edge_def*, i32)
+
+declare void @debug_bb(%struct.basic_block_def*)
+
+declare void @dump_bb(%struct.basic_block_def*, %struct._IO_FILE*, i32)
+
+declare %struct.basic_block_def* @debug_bb_n(i32)
+
+declare void @dump_edge_info(%struct._IO_FILE*, %struct.edge_def*, i32)
+
+declare i32 @fputs_unlocked(i8* noalias , %struct._IO_FILE* noalias )
+
+declare i32 @fprintf(%struct._IO_FILE* noalias , i8* noalias , ...)
+
+declare i64 @fwrite(i8*, i64, i64, i8*)
+
+declare i32 @__overflow(%struct._IO_FILE*, i32)
+
+declare %struct.edge_def* @unchecked_make_edge(%struct.basic_block_def*, %struct.basic_block_def*, i32)
+
+declare i8* @vec_gc_p_reserve(i8*, i32)
+
+declare void @vec_assert_fail(i8*, i8*, i8*, i32, i8*)
+
+declare void @execute_on_growing_pred(%struct.edge_def*)
+
+declare %struct.edge_def* @make_edge(%struct.basic_block_def*, %struct.basic_block_def*, i32)
+
+declare %struct.edge_def* @find_edge(%struct.basic_block_def*, %struct.basic_block_def*)
+
+declare %struct.edge_def* @make_single_succ_edge(%struct.basic_block_def*, %struct.basic_block_def*, i32)
+
+declare %struct.edge_def* @cached_make_edge(%struct.simple_bitmap_def**, %struct.basic_block_def*, %struct.basic_block_def*, i32)
+
+declare void @redirect_edge_succ(%struct.edge_def*, %struct.basic_block_def*)
+
+declare void @execute_on_shrinking_pred(%struct.edge_def*)
+
+declare void @alloc_aux_for_blocks(i32)
+
+declare i8* @xmalloc(i64)
+
+declare i32 @_obstack_begin(%struct.obstack*, i32, i32, i8* (i64)*, void (i8*)*)
+
+declare void @free(i8*)
+
+declare void @clear_edges()
+
+declare void @remove_edge(%struct.edge_def*)
+
+declare %struct.edge_def* @redirect_edge_succ_nodup(%struct.edge_def*, %struct.basic_block_def*)
+
+declare void @redirect_edge_pred(%struct.edge_def*, %struct.basic_block_def*)
+
+define void @check_bb_profile(%struct.basic_block_def* %bb, %struct._IO_FILE* %file) {
+entry:
+	br i1 false, label %cond_false759.preheader, label %cond_false149.preheader
+
+cond_false149.preheader:		; preds = %entry
+	ret void
+
+cond_false759.preheader:		; preds = %entry
+	br i1 false, label %cond_next873, label %cond_true794
+
+bb644:		; preds = %cond_next873
+	ret void
+
+cond_true794:		; preds = %cond_false759.preheader
+	ret void
+
+cond_next873:		; preds = %cond_false759.preheader
+	br i1 false, label %bb882, label %bb644
+
+bb882:		; preds = %cond_next873
+	br i1 false, label %cond_true893, label %cond_next901
+
+cond_true893:		; preds = %bb882
+	br label %cond_false1036
+
+cond_next901:		; preds = %bb882
+	ret void
+
+bb929:		; preds = %cond_next1150
+	%tmp934 = add i64 0, %lsum.11225.0		; <i64> [#uses=1]
+	br i1 false, label %cond_next979, label %cond_true974
+
+cond_true974:		; preds = %bb929
+	ret void
+
+cond_next979:		; preds = %bb929
+	br label %cond_false1036
+
+cond_false1036:		; preds = %cond_next979, %cond_true893
+	%lsum.11225.0 = phi i64 [ 0, %cond_true893 ], [ %tmp934, %cond_next979 ]		; <i64> [#uses=2]
+	br i1 false, label %cond_next1056, label %cond_true1051
+
+cond_true1051:		; preds = %cond_false1036
+	ret void
+
+cond_next1056:		; preds = %cond_false1036
+	br i1 false, label %cond_next1150, label %cond_true1071
+
+cond_true1071:		; preds = %cond_next1056
+	ret void
+
+cond_next1150:		; preds = %cond_next1056
+	%tmp1156 = icmp eq %struct.edge_def* null, null		; <i1> [#uses=1]
+	br i1 %tmp1156, label %bb1159, label %bb929
+
+bb1159:		; preds = %cond_next1150
+	br i1 false, label %cond_true1169, label %UnifiedReturnBlock
+
+cond_true1169:		; preds = %bb1159
+	%tmp11741175 = trunc i64 %lsum.11225.0 to i32		; <i32> [#uses=1]
+	%tmp1178 = tail call i32 (%struct._IO_FILE*  , i8*  , ...)* @fprintf( %struct._IO_FILE* noalias %file  , i8* getelementptr ([49 x i8]* @.str32, i32 0, i64 0)  , i32 %tmp11741175, i32 0 )		; <i32> [#uses=0]
+	ret void
+
+UnifiedReturnBlock:		; preds = %bb1159
+	ret void
+}
+
+declare void @dump_flow_info(%struct._IO_FILE*)
+
+declare i32 @max_reg_num()
+
+declare void @rtl_check_failed_flag(i8*, %struct.rtx_def*, i8*, i32, i8*)
+
+declare i32 @reg_preferred_class(i32)
+
+declare i32 @reg_alternate_class(i32)
+
+declare zeroext i8 @maybe_hot_bb_p(%struct.basic_block_def*)  
+
+declare zeroext i8 @probably_never_executed_bb_p(%struct.basic_block_def*)  
+
+declare void @dump_regset(%struct.bitmap_head_def*, %struct._IO_FILE*)
+
+declare void @debug_flow_info()
+
+declare void @alloc_aux_for_edges(i32)
+
+declare void @clear_aux_for_edges()
+
+declare void @free_aux_for_edges()
+
+declare void @brief_dump_cfg(%struct._IO_FILE*)
+
+declare i32 @fputc(i32, i8*)
+
+declare void @update_bb_profile_for_threading(%struct.basic_block_def*, i32, i64, %struct.edge_def*)

diff --git a/src/LLVM/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll b/src/LLVM/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll
new file mode 100644
index 0000000..fbcac50
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+
+define i64 @__ashldi3(i64 %u, i64 %b) {
+entry:
+        br i1 false, label %UnifiedReturnBlock, label %cond_next
+
+cond_next:              ; preds = %entry
+        %tmp9 = sub i64 32, %b          ; <i64> [#uses=2]
+        %tmp11 = icmp slt i64 %tmp9, 1          ; <i1> [#uses=1]
+        %tmp2180 = trunc i64 %u to i32          ; <i32> [#uses=2]
+        %tmp2223 = trunc i64 %tmp9 to i32               ; <i32> [#uses=2]
+        br i1 %tmp11, label %cond_true14, label %cond_false
+
+cond_true14:            ; preds = %cond_next
+        %tmp24 = sub i32 0, %tmp2223            ; <i32> [#uses=1]
+        %tmp25 = shl i32 %tmp2180, %tmp24               ; <i32> [#uses=1]
+        %tmp2569 = zext i32 %tmp25 to i64               ; <i64> [#uses=1]
+        %tmp256970 = shl i64 %tmp2569, 32               ; <i64> [#uses=1]
+        ret i64 %tmp256970
+
+cond_false:             ; preds = %cond_next
+        %tmp35 = lshr i32 %tmp2180, %tmp2223            ; <i32> [#uses=1]
+        %tmp54 = or i32 %tmp35, 0               ; <i32> [#uses=1]
+        %tmp5464 = zext i32 %tmp54 to i64               ; <i64> [#uses=1]
+        %tmp546465 = shl i64 %tmp5464, 32               ; <i64> [#uses=1]
+        %tmp546465.ins = or i64 %tmp546465, 0           ; <i64> [#uses=1]
+        ret i64 %tmp546465.ins
+
+UnifiedReturnBlock:
+        ret i64 %u
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-10-17-IllegalAsm.ll b/src/LLVM/test/CodeGen/X86/2007-10-17-IllegalAsm.ll
new file mode 100644
index 0000000..c0bb55e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-10-17-IllegalAsm.ll

@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep addb | not grep x
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep cmpb | not grep x
+; PR1734
+
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, %struct.tree_node*, i8, i8, i8 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
+	%struct.rtunion = type { i8* }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.stack_local_entry = type opaque
+	%struct.temp_slot = type opaque
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.u = type { [1 x %struct.rtunion] }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_data = type { [1 x i64] }
+	%struct.varray_head_tag = type { i64, i64, i32, i8*, %struct.varray_data }
+	%union.tree_ann_d = type opaque
+
+define void @layout_type(%struct.tree_node* %type) {
+entry:
+	%tmp32 = load i32* null, align 8		; <i32> [#uses=3]
+	%tmp3435 = trunc i32 %tmp32 to i8		; <i8> [#uses=1]
+	%tmp53 = icmp eq %struct.tree_node* null, null		; <i1> [#uses=1]
+	br i1 %tmp53, label %cond_next57, label %UnifiedReturnBlock
+
+cond_next57:		; preds = %entry
+	%tmp65 = and i32 %tmp32, 255		; <i32> [#uses=1]
+	switch i32 %tmp65, label %UnifiedReturnBlock [
+		 i32 6, label %bb140
+		 i32 7, label %bb140
+		 i32 8, label %bb140
+		 i32 13, label %bb478
+	]
+
+bb140:		; preds = %cond_next57, %cond_next57, %cond_next57
+	%tmp219 = load i32* null, align 8		; <i32> [#uses=1]
+	%tmp221222 = trunc i32 %tmp219 to i8		; <i8> [#uses=1]
+	%tmp223 = icmp eq i8 %tmp221222, 24		; <i1> [#uses=1]
+	br i1 %tmp223, label %cond_true226, label %cond_next340
+
+cond_true226:		; preds = %bb140
+	switch i8 %tmp3435, label %cond_true288 [
+		 i8 6, label %cond_next340
+		 i8 9, label %cond_next340
+		 i8 7, label %cond_next340
+		 i8 8, label %cond_next340
+		 i8 10, label %cond_next340
+	]
+
+cond_true288:		; preds = %cond_true226
+	unreachable
+
+cond_next340:		; preds = %cond_true226, %cond_true226, %cond_true226, %cond_true226, %cond_true226, %bb140
+	ret void
+
+bb478:		; preds = %cond_next57
+	br i1 false, label %cond_next500, label %cond_true497
+
+cond_true497:		; preds = %bb478
+	unreachable
+
+cond_next500:		; preds = %bb478
+	%tmp513 = load i32* null, align 8		; <i32> [#uses=1]
+	%tmp545 = and i32 %tmp513, 8192		; <i32> [#uses=1]
+	%tmp547 = and i32 %tmp32, -8193		; <i32> [#uses=1]
+	%tmp548 = or i32 %tmp547, %tmp545		; <i32> [#uses=1]
+	store i32 %tmp548, i32* null, align 8
+	ret void
+
+UnifiedReturnBlock:		; preds = %cond_next57, %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll b/src/LLVM/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
new file mode 100644
index 0000000..d3120f3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll

@@ -0,0 +1,84 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep inc | not grep PTR
+
+define signext   i16 @t(i32* %bitptr, i32* %source, i8** %byteptr, i32 %scale, i32 %round) {
+entry:
+	br label %bb
+
+bb:		; preds = %cond_next391, %entry
+	%cnt.0 = phi i32 [ 0, %entry ], [ %tmp422445, %cond_next391 ]		; <i32> [#uses=1]
+	%v.1 = phi i32 [ undef, %entry ], [ %tmp411, %cond_next391 ]		; <i32> [#uses=0]
+	br i1 false, label %cond_true, label %cond_next127
+
+cond_true:		; preds = %bb
+	store i8* null, i8** %byteptr, align 4
+	store i8* null, i8** %byteptr, align 4
+	br label %cond_next127
+
+cond_next127:		; preds = %cond_true, %bb
+	%tmp151 = add i32 0, %round		; <i32> [#uses=1]
+	%tmp153 = ashr i32 %tmp151, %scale		; <i32> [#uses=2]
+	%tmp154155 = trunc i32 %tmp153 to i16		; <i16> [#uses=1]
+	%tmp154155156 = sext i16 %tmp154155 to i32		; <i32> [#uses=1]
+	%tmp158 = xor i32 %tmp154155156, %tmp153		; <i32> [#uses=1]
+	%tmp160 = or i32 %tmp158, %cnt.0		; <i32> [#uses=1]
+	%tmp171 = load i32* %bitptr, align 4		; <i32> [#uses=1]
+	%tmp180181 = sext i16 0 to i32		; <i32> [#uses=3]
+	%tmp183 = add i32 %tmp160, 1		; <i32> [#uses=1]
+	br i1 false, label %cond_true188, label %cond_next245
+
+cond_true188:		; preds = %cond_next127
+	ret i16 0
+
+cond_next245:		; preds = %cond_next127
+	%tmp249 = ashr i32 %tmp180181, 8		; <i32> [#uses=1]
+	%tmp250 = add i32 %tmp171, %tmp249		; <i32> [#uses=1]
+	%tmp253444 = lshr i32 %tmp180181, 4		; <i32> [#uses=1]
+	%tmp254 = and i32 %tmp253444, 15		; <i32> [#uses=1]
+	%tmp256 = and i32 %tmp180181, 15		; <i32> [#uses=2]
+	%tmp264 = icmp ugt i32 %tmp250, 15		; <i1> [#uses=1]
+	br i1 %tmp264, label %cond_true267, label %cond_next391
+
+cond_true267:		; preds = %cond_next245
+	store i8* null, i8** %byteptr, align 4
+	store i8* null, i8** %byteptr, align 4
+	br i1 false, label %cond_true289, label %cond_next327
+
+cond_true289:		; preds = %cond_true267
+	ret i16 0
+
+cond_next327:		; preds = %cond_true267
+	br i1 false, label %cond_true343, label %cond_next385
+
+cond_true343:		; preds = %cond_next327
+	%tmp345 = load i8** %byteptr, align 4		; <i8*> [#uses=1]
+	store i8* null, i8** %byteptr, align 4
+	br i1 false, label %cond_next385, label %cond_true352
+
+cond_true352:		; preds = %cond_true343
+	store i8* %tmp345, i8** %byteptr, align 4
+	br i1 false, label %cond_true364, label %cond_next385
+
+cond_true364:		; preds = %cond_true352
+	ret i16 0
+
+cond_next385:		; preds = %cond_true352, %cond_true343, %cond_next327
+	br label %cond_next391
+
+cond_next391:		; preds = %cond_next385, %cond_next245
+	%tmp393 = load i32* %source, align 4		; <i32> [#uses=1]
+	%tmp395 = load i32* %bitptr, align 4		; <i32> [#uses=2]
+	%tmp396 = shl i32 %tmp393, %tmp395		; <i32> [#uses=1]
+	%tmp398 = sub i32 32, %tmp256		; <i32> [#uses=1]
+	%tmp405 = lshr i32 %tmp396, 31		; <i32> [#uses=1]
+	%tmp406 = add i32 %tmp405, -1		; <i32> [#uses=1]
+	%tmp409 = lshr i32 %tmp406, %tmp398		; <i32> [#uses=1]
+	%tmp411 = sub i32 0, %tmp409		; <i32> [#uses=1]
+	%tmp422445 = add i32 %tmp254, %tmp183		; <i32> [#uses=2]
+	%tmp426447 = add i32 %tmp395, %tmp256		; <i32> [#uses=1]
+	store i32 %tmp426447, i32* %bitptr, align 4
+	%tmp429448 = icmp ult i32 %tmp422445, 63		; <i1> [#uses=1]
+	br i1 %tmp429448, label %bb, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %cond_next391
+	ret i16 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll b/src/LLVM/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll
new file mode 100644
index 0000000..984094d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s
+; PR1748
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @kernel_init(i8* %unused) {
+entry:
+	call void asm sideeffect "foo ${0:q}", "=*imr"( i64* null )
+	ret i32 0
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll b/src/LLVM/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
new file mode 100644
index 0000000..573a217
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 | grep mov | count 1
+
+define signext i16 @t()   {
+entry:
+	%tmp180 = load i16* null, align 2		; <i16> [#uses=3]
+	%tmp180181 = sext i16 %tmp180 to i32		; <i32> [#uses=1]
+	%tmp185 = icmp slt i16 %tmp180, 0		; <i1> [#uses=1]
+	br i1 %tmp185, label %cond_true188, label %cond_next245
+
+cond_true188:		; preds = %entry
+	%tmp195196 = trunc i16 %tmp180 to i8		; <i8> [#uses=0]
+	ret i16 0
+
+cond_next245:		; preds = %entry
+	%tmp256 = and i32 %tmp180181, 15		; <i32> [#uses=0]
+	ret i16 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-10-30-LSRCrash.ll b/src/LLVM/test/CodeGen/X86/2007-10-30-LSRCrash.ll
new file mode 100644
index 0000000..42db98b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-10-30-LSRCrash.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86
+
+define i32 @unique(i8* %full, i32 %p, i32 %len, i32 %mode, i32 %verbos, i32 %flags) {
+entry:
+	br i1 false, label %cond_true15, label %cond_next107
+
+cond_true15:		; preds = %entry
+	br i1 false, label %bb98.preheader, label %bb
+
+bb:		; preds = %cond_true15
+	ret i32 0
+
+bb98.preheader:		; preds = %cond_true15
+	br i1 false, label %bb103, label %bb69.outer
+
+bb76.split:		; preds = %bb69.outer.split.split, %bb69.us208
+	br i1 false, label %bb103, label %bb69.outer
+
+bb69.outer:		; preds = %bb76.split, %bb98.preheader
+	%from.0.reg2mem.0.ph.rec = phi i32 [ %tmp75.rec, %bb76.split ], [ 0, %bb98.preheader ]		; <i32> [#uses=1]
+	%tmp75.rec = add i32 %from.0.reg2mem.0.ph.rec, 1		; <i32> [#uses=2]
+	%tmp75 = getelementptr i8* null, i32 %tmp75.rec		; <i8*> [#uses=6]
+	br i1 false, label %bb69.us208, label %bb69.outer.split.split
+
+bb69.us208:		; preds = %bb69.outer
+	switch i32 0, label %bb76.split [
+		 i32 47, label %bb89
+		 i32 58, label %bb89
+		 i32 92, label %bb89
+	]
+
+bb69.outer.split.split:		; preds = %bb69.outer
+	switch i8 0, label %bb76.split [
+		 i8 47, label %bb89
+		 i8 58, label %bb89
+		 i8 92, label %bb89
+	]
+
+bb89:		; preds = %bb69.outer.split.split, %bb69.outer.split.split, %bb69.outer.split.split, %bb69.us208, %bb69.us208, %bb69.us208
+	%tmp75.lcssa189 = phi i8* [ %tmp75, %bb69.us208 ], [ %tmp75, %bb69.us208 ], [ %tmp75, %bb69.us208 ], [ %tmp75, %bb69.outer.split.split ], [ %tmp75, %bb69.outer.split.split ], [ %tmp75, %bb69.outer.split.split ]		; <i8*> [#uses=0]
+	ret i32 0
+
+bb103:		; preds = %bb76.split, %bb98.preheader
+	ret i32 0
+
+cond_next107:		; preds = %entry
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-10-31-extractelement-i64.ll b/src/LLVM/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
new file mode 100644
index 0000000..1b8e67d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-10-31-extractelement-i64.ll

@@ -0,0 +1,82 @@
+; RUN: llc < %s -march=x86 -mattr=sse2
+; ModuleID = 'yyy.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define <1 x i64> @a(<2 x i64> %__A) {
+entry:
+	%__A_addr = alloca <2 x i64>		; <<2 x i64>*> [#uses=2]
+	%retval = alloca <1 x i64>, align 8		; <<1 x i64>*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <2 x i64> %__A, <2 x i64>* %__A_addr
+	%tmp = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp1 = bitcast <2 x i64> %tmp to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp2 = extractelement <2 x i64> %tmp1, i32 0		; <i64> [#uses=1]
+	%tmp3 = bitcast i64 %tmp2 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %tmp3, <1 x i64>* %retval, align 8
+	%tmp4 = load <1 x i64>* %retval, align 8		; <<1 x i64>> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load <1 x i64>* %retval		; <<1 x i64>> [#uses=1]
+	ret <1 x i64> %retval5
+}
+
+define <1 x i64> @b(<2 x i64> %__A) {
+entry:
+	%__A_addr = alloca <2 x i64>		; <<2 x i64>*> [#uses=2]
+	%retval = alloca <1 x i64>, align 8		; <<1 x i64>*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <2 x i64> %__A, <2 x i64>* %__A_addr
+	%tmp = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp1 = bitcast <2 x i64> %tmp to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp2 = extractelement <2 x i64> %tmp1, i32 1		; <i64> [#uses=1]
+	%tmp3 = bitcast i64 %tmp2 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %tmp3, <1 x i64>* %retval, align 8
+	%tmp4 = load <1 x i64>* %retval, align 8		; <<1 x i64>> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load <1 x i64>* %retval		; <<1 x i64>> [#uses=1]
+	ret <1 x i64> %retval5
+}
+
+define i64 @c(<2 x i64> %__A) {
+entry:
+	%__A_addr = alloca <2 x i64>		; <<2 x i64>*> [#uses=2]
+	%retval = alloca i64, align 8		; <i64*> [#uses=2]
+	%tmp = alloca i64, align 8		; <i64*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <2 x i64> %__A, <2 x i64>* %__A_addr
+	%tmp1 = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp2 = bitcast <2 x i64> %tmp1 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp3 = extractelement <2 x i64> %tmp2, i32 0		; <i64> [#uses=1]
+	store i64 %tmp3, i64* %tmp, align 8
+	%tmp4 = load i64* %tmp, align 8		; <i64> [#uses=1]
+	store i64 %tmp4, i64* %retval, align 8
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load i64* %retval		; <i64> [#uses=1]
+	ret i64 %retval5
+}
+
+define i64 @d(<2 x i64> %__A) {
+entry:
+	%__A_addr = alloca <2 x i64>		; <<2 x i64>*> [#uses=2]
+	%retval = alloca i64, align 8		; <i64*> [#uses=2]
+	%tmp = alloca i64, align 8		; <i64*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <2 x i64> %__A, <2 x i64>* %__A_addr
+	%tmp1 = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp2 = bitcast <2 x i64> %tmp1 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp3 = extractelement <2 x i64> %tmp2, i32 1		; <i64> [#uses=1]
+	store i64 %tmp3, i64* %tmp, align 8
+	%tmp4 = load i64* %tmp, align 8		; <i64> [#uses=1]
+	store i64 %tmp4, i64* %retval, align 8
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load i64* %retval		; <i64> [#uses=1]
+	ret i64 %retval5
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-11-01-ISelCrash.ll b/src/LLVM/test/CodeGen/X86/2007-11-01-ISelCrash.ll
new file mode 100644
index 0000000..019c6a8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-11-01-ISelCrash.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86
+
+        %"struct.K::JL" = type <{ i8 }>
+        %struct.jv = type { i64 }
+
+declare fastcc i64 @f(i32, %"struct.K::JL"*, i8*, i8*, %struct.jv*)
+
+define void @t(%"struct.K::JL"* %obj, i8* %name, i8* %sig, %struct.jv* %args) {
+entry:
+        %tmp5 = tail call fastcc i64 @f( i32 1, %"struct.K::JL"* %obj, i8* %name, i8* %sig, %struct.jv* %args )         ; <i64> [#uses=0]
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll b/src/LLVM/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll
new file mode 100644
index 0000000..27ec826
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s
+; PR1763
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @yield() {
+        %tmp9 = call i64 asm sideeffect "xchgb ${0:b},$1", "=q,*m,0,~{dirflag},~{fpsr},~{flags},~{memory}"( i64* null, i64 0 )   ; <i64>
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll b/src/LLVM/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
new file mode 100644
index 0000000..4045618
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR1766
+
+        %struct.dentry = type { %struct.dentry_operations* }
+        %struct.dentry_operations = type { i32 (%struct.dentry*, %struct.qstr*)* }
+        %struct.qstr = type { i32, i32, i8* }
+
+define %struct.dentry* @d_hash_and_lookup(%struct.dentry* %dir, %struct.qstr* %name) {
+entry:
+        br i1 false, label %bb37, label %bb
+
+bb:             ; preds = %bb, %entry
+        %name8.0.reg2mem.0.rec = phi i64 [ %indvar.next, %bb ], [ 0, %entry ]           ; <i64> [#uses=1]
+        %hash.0.reg2mem.0 = phi i64 [ %tmp27, %bb ], [ 0, %entry ]              ; <i64> [#uses=1]
+        %tmp13 = load i8* null, align 1         ; <i8> [#uses=1]
+        %tmp1314 = zext i8 %tmp13 to i64                ; <i64> [#uses=1]
+        %tmp25 = lshr i64 %tmp1314, 4           ; <i64> [#uses=1]
+        %tmp22 = add i64 %tmp25, %hash.0.reg2mem.0              ; <i64> [#uses=1]
+        %tmp26 = add i64 %tmp22, 0              ; <i64> [#uses=1]
+        %tmp27 = mul i64 %tmp26, 11             ; <i64> [#uses=2]
+        %indvar.next = add i64 %name8.0.reg2mem.0.rec, 1                ; <i64> [#uses=2]
+        %exitcond = icmp eq i64 %indvar.next, 0         ; <i1> [#uses=1]
+        br i1 %exitcond, label %bb37.loopexit, label %bb
+
+bb37.loopexit:          ; preds = %bb
+        %phitmp = trunc i64 %tmp27 to i32               ; <i32> [#uses=1]
+        br label %bb37
+
+bb37:           ; preds = %bb37.loopexit, %entry
+        %hash.0.reg2mem.1 = phi i32 [ %phitmp, %bb37.loopexit ], [ 0, %entry ]          ; <i32> [#uses=1]
+        store i32 %hash.0.reg2mem.1, i32* null, align 8
+        %tmp75 = tail call i32 null( %struct.dentry* %dir, %struct.qstr* %name )                ; <i32> [#uses=0]
+        %tmp84 = tail call i32 (...)* @d_lookup( %struct.dentry* %dir, %struct.qstr* %name )            ; <i32> [#uses=0]
+        ret %struct.dentry* null
+}
+
+declare i32 @d_lookup(...)

diff --git a/src/LLVM/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll b/src/LLVM/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll
new file mode 100644
index 0000000..6b871aa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR1767
+
+define void @xor_sse_2(i64 %bytes, i64* %p1, i64* %p2) {
+entry:
+        %p2_addr = alloca i64*          ; <i64**> [#uses=2]
+        %lines = alloca i32             ; <i32*> [#uses=2]
+        store i64* %p2, i64** %p2_addr, align 8
+        %tmp1 = lshr i64 %bytes, 8              ; <i64> [#uses=1]
+        %tmp12 = trunc i64 %tmp1 to i32         ; <i32> [#uses=2]
+        store i32 %tmp12, i32* %lines, align 4
+        %tmp6 = call i64* asm sideeffect "foo",
+"=r,=*r,=*r,r,0,1,2,~{dirflag},~{fpsr},~{flags},~{memory}"( i64** %p2_addr,
+i32* %lines, i64 256, i64* %p1, i64* %p2, i32 %tmp12 )              ; <i64*> [#uses=0]
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll b/src/LLVM/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
new file mode 100644
index 0000000..228a915
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -relocation-model=static | grep {foo str$}
+; PR1761
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-pc-linux"
+@str = internal constant [12 x i8] c"init/main.c\00"		; <[12 x i8]*> [#uses=1]
+
+define i32 @unknown_bootoption() {
+entry:
+	tail call void asm sideeffect "foo ${0:c}\0A", "i,~{dirflag},~{fpsr},~{flags}"( i8* getelementptr ([12 x i8]* @str, i32 0, i64 0) )
+	ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-11-06-InstrSched.ll b/src/LLVM/test/CodeGen/X86/2007-11-06-InstrSched.ll
new file mode 100644
index 0000000..f6db0d0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-11-06-InstrSched.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lea
+
+define float @foo(i32* %x, float* %y, i32 %c) nounwind {
+entry:
+	%tmp2132 = icmp eq i32 %c, 0		; <i1> [#uses=1]
+	br i1 %tmp2132, label %bb23, label %bb18
+
+bb18:		; preds = %bb18, %entry
+	%i.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp17, %bb18 ]		; <i32> [#uses=3]
+	%res.0.reg2mem.0 = phi float [ 0.000000e+00, %entry ], [ %tmp14, %bb18 ]		; <float> [#uses=1]
+	%tmp3 = getelementptr i32* %x, i32 %i.0.reg2mem.0		; <i32*> [#uses=1]
+	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
+	%tmp45 = sitofp i32 %tmp4 to float		; <float> [#uses=1]
+	%tmp8 = getelementptr float* %y, i32 %i.0.reg2mem.0		; <float*> [#uses=1]
+	%tmp9 = load float* %tmp8, align 4		; <float> [#uses=1]
+	%tmp11 = fmul float %tmp9, %tmp45		; <float> [#uses=1]
+	%tmp14 = fadd float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
+	%tmp17 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%tmp21 = icmp ult i32 %tmp17, %c		; <i1> [#uses=1]
+	br i1 %tmp21, label %bb18, label %bb23
+
+bb23:		; preds = %bb18, %entry
+	%res.0.reg2mem.1 = phi float [ 0.000000e+00, %entry ], [ %tmp14, %bb18 ]		; <float> [#uses=1]
+	ret float %res.0.reg2mem.1
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-11-07-MulBy4.ll b/src/LLVM/test/CodeGen/X86/2007-11-07-MulBy4.ll
new file mode 100644
index 0000000..d5b630b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-11-07-MulBy4.ll

@@ -0,0 +1,129 @@
+; RUN: llc < %s -march=x86 | not grep imul
+
+	%struct.eebb = type { %struct.eebb*, i16* }
+	%struct.hf = type { %struct.hf*, i16*, i8*, i32, i32, %struct.eebb*, i32, i32, i8*, i8*, i8*, i8*, i16*, i8*, i16*, %struct.ri, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [30 x i32], %struct.eebb, i32, i8* }
+	%struct.foo_data = type { i32, i32, i32, i32*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i16*, i16*, i16*, i16*, i32, i32, i32, %struct.ri*, i8*, %struct.hf* }
+	%struct.ri = type { %struct.ri*, i32, i8*, i16*, i32*, i32 }
+
+define fastcc i32 @foo(i16* %eptr, i8* %ecode, %struct.foo_data* %md, i32 %ims) {
+entry:
+	%tmp36 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp37 = icmp ult i32 0, %tmp36		; <i1> [#uses=1]
+	br i1 %tmp37, label %cond_next79, label %cond_true
+
+cond_true:		; preds = %entry
+	ret i32 0
+
+cond_next79:		; preds = %entry
+	%tmp85 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp86 = icmp ult i32 0, %tmp85		; <i1> [#uses=1]
+	br i1 %tmp86, label %cond_next130, label %cond_true89
+
+cond_true89:		; preds = %cond_next79
+	ret i32 0
+
+cond_next130:		; preds = %cond_next79
+	%tmp173 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp173, label %cond_next201, label %cond_true176
+
+cond_true176:		; preds = %cond_next130
+	ret i32 0
+
+cond_next201:		; preds = %cond_next130
+	switch i32 0, label %bb19955 [
+		 i32 0, label %bb1266
+		 i32 1, label %bb5018
+		 i32 2, label %bb5075
+		 i32 3, label %cond_true5534
+		 i32 4, label %cond_true5534
+		 i32 5, label %bb6039
+		 i32 6, label %bb6181
+		 i32 7, label %bb6323
+		 i32 8, label %bb6463
+		 i32 9, label %bb6605
+		 i32 10, label %bb6746
+		 i32 11, label %cond_next5871
+		 i32 16, label %bb5452
+		 i32 17, label %bb5395
+		 i32 19, label %bb4883
+		 i32 20, label %bb5136
+		 i32 23, label %bb12899
+		 i32 64, label %bb2162
+		 i32 69, label %bb1447
+		 i32 70, label %bb1737
+		 i32 71, label %bb1447
+		 i32 72, label %bb1737
+		 i32 73, label %cond_true1984
+		 i32 75, label %bb740
+		 i32 80, label %bb552
+	]
+
+bb552:		; preds = %cond_next201
+	ret i32 0
+
+bb740:		; preds = %cond_next201
+	ret i32 0
+
+bb1266:		; preds = %cond_next201
+	ret i32 0
+
+bb1447:		; preds = %cond_next201, %cond_next201
+	ret i32 0
+
+bb1737:		; preds = %cond_next201, %cond_next201
+	ret i32 0
+
+cond_true1984:		; preds = %cond_next201
+	ret i32 0
+
+bb2162:		; preds = %cond_next201
+	ret i32 0
+
+bb4883:		; preds = %cond_next201
+	ret i32 0
+
+bb5018:		; preds = %cond_next201
+	ret i32 0
+
+bb5075:		; preds = %cond_next201
+	ret i32 0
+
+bb5136:		; preds = %cond_next201
+	ret i32 0
+
+bb5395:		; preds = %cond_next201
+	ret i32 0
+
+bb5452:		; preds = %cond_next201
+	ret i32 0
+
+cond_true5534:		; preds = %cond_next201, %cond_next201
+	ret i32 0
+
+cond_next5871:		; preds = %cond_next201
+	ret i32 0
+
+bb6039:		; preds = %cond_next201
+	ret i32 0
+
+bb6181:		; preds = %cond_next201
+	ret i32 0
+
+bb6323:		; preds = %cond_next201
+	ret i32 0
+
+bb6463:		; preds = %cond_next201
+	ret i32 0
+
+bb6605:		; preds = %cond_next201
+	ret i32 0
+
+bb6746:		; preds = %cond_next201
+	ret i32 0
+
+bb12899:		; preds = %cond_next201
+	ret i32 0
+
+bb19955:		; preds = %cond_next201
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/src/LLVM/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
new file mode 100644
index 0000000..8e315f4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll

@@ -0,0 +1,86 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; Increment in loop bb.i28.i adjusted to 2, to prevent loop reversal from
+; kicking in.
+
+declare fastcc void @rdft(i32, i32, double*, i32*, double*)
+
+define fastcc void @mp_sqrt(i32 %n, i32 %radix, i32* %in, i32* %out, i32* %tmp1, i32* %tmp2, i32 %nfft, double* %tmp1fft, double* %tmp2fft, i32* %ip, double* %w) nounwind {
+entry:
+	br label %bb.i5
+
+bb.i5:		; preds = %bb.i5, %entry
+	%nfft_init.0.i = phi i32 [ 1, %entry ], [ %tmp7.i3, %bb.i5 ]		; <i32> [#uses=1]
+	%foo = phi i1 [1, %entry], [0, %bb.i5]
+	%tmp7.i3 = shl i32 %nfft_init.0.i, 1		; <i32> [#uses=2]
+	br i1 %foo, label %bb.i5, label %mp_unexp_mp2d.exit.i
+
+mp_unexp_mp2d.exit.i:		; preds = %bb.i5
+	br i1 %foo, label %cond_next.i, label %cond_true.i
+
+cond_true.i:		; preds = %mp_unexp_mp2d.exit.i
+	ret void
+
+cond_next.i:		; preds = %mp_unexp_mp2d.exit.i
+	%tmp22.i = sdiv i32 0, 2		; <i32> [#uses=2]
+	br i1 %foo, label %cond_true29.i, label %cond_next36.i
+
+cond_true29.i:		; preds = %cond_next.i
+	ret void
+
+cond_next36.i:		; preds = %cond_next.i
+	store i32 %tmp22.i, i32* null, align 4
+	%tmp8.i14.i = select i1 %foo, i32 1, i32 0		; <i32> [#uses=1]
+	br label %bb.i28.i
+
+bb.i28.i:		; preds = %bb.i28.i, %cond_next36.i
+; CHECK: %bb.i28.i
+; CHECK: addl $2
+; CHECK: addl $-2
+	%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ]		; <i32> [#uses=2]
+	%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ]		; <double> [#uses=1]
+	%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32		; <i32> [#uses=2]
+	%tmp4.i19.i = icmp slt i32 %tmp1.i18.i, %radix		; <i1> [#uses=1]
+	%x.0.i21.i = select i1 %tmp4.i19.i, i32 %tmp1.i18.i, i32 0		; <i32> [#uses=1]
+	%tmp41.sum.i = add i32 %j.0.reg2mem.0.i16.i, 2		; <i32> [#uses=0]
+	%tmp1213.i23.i = sitofp i32 %x.0.i21.i to double		; <double> [#uses=1]
+	%tmp15.i24.i = fsub double 0.000000e+00, %tmp1213.i23.i		; <double> [#uses=1]
+	%tmp16.i25.i = fmul double 0.000000e+00, %tmp15.i24.i		; <double> [#uses=1]
+	%indvar.next39.i = add i32 %j.0.reg2mem.0.i16.i, 2		; <i32> [#uses=2]
+	%exitcond40.i = icmp eq i32 %indvar.next39.i, %tmp8.i14.i		; <i1> [#uses=1]
+	br i1 %exitcond40.i, label %mp_unexp_d2mp.exit29.i, label %bb.i28.i
+
+mp_unexp_d2mp.exit29.i:		; preds = %bb.i28.i
+	%tmp46.i = sub i32 0, %tmp22.i		; <i32> [#uses=1]
+	store i32 %tmp46.i, i32* null, align 4
+	br i1 %exitcond40.i, label %bb.i.i, label %mp_sqrt_init.exit
+
+bb.i.i:		; preds = %bb.i.i, %mp_unexp_d2mp.exit29.i
+	br label %bb.i.i
+
+mp_sqrt_init.exit:		; preds = %mp_unexp_d2mp.exit29.i
+	tail call fastcc void @mp_mul_csqu( i32 0, double* %tmp1fft )
+	tail call fastcc void @rdft( i32 0, i32 -1, double* null, i32* %ip, double* %w )
+	tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 0, double* %tmp1fft, i32* %tmp2 )
+	br i1 %exitcond40.i, label %cond_false.i, label %cond_true36.i
+
+cond_true36.i:		; preds = %mp_sqrt_init.exit
+	ret void
+
+cond_false.i:		; preds = %mp_sqrt_init.exit
+	tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, i32* %out )
+	tail call fastcc void @mp_add( i32 0, i32 %radix, i32* %tmp1, i32* %tmp2, i32* %tmp1 )
+	tail call fastcc void @mp_sub( i32 0, i32 %radix, i32* %in, i32* %tmp2, i32* %tmp2 )
+	tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, i32* %tmp1 )
+	tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 %tmp7.i3, double* %tmp2fft, i32* %tmp2 )
+	ret void
+}
+
+declare fastcc void @mp_add(i32, i32, i32*, i32*, i32*)
+
+declare fastcc void @mp_sub(i32, i32, i32*, i32*, i32*)
+
+declare fastcc void @mp_round(i32, i32, i32, i32*)
+
+declare fastcc void @mp_mul_csqu(i32, double*)
+
+declare fastcc void @mp_mul_d2i(i32, i32, i32, double*, i32*)

diff --git a/src/LLVM/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll b/src/LLVM/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
new file mode 100644
index 0000000..455de91
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu
+; PR1799
+
+	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
+	%struct.c34007g__pkg__parent = type { i32*, %struct.c34007g__designated___XUB* }
+
+define void @_ada_c34007g() {
+entry:
+	%x8 = alloca %struct.c34007g__pkg__parent, align 8		; <%struct.c34007g__pkg__parent*> [#uses=2]
+	br i1 true, label %bb1271, label %bb848
+
+bb848:		; preds = %entry
+	ret void
+
+bb1271:		; preds = %bb898
+	%tmp1272 = getelementptr %struct.c34007g__pkg__parent* %x8, i32 0, i32 0		; <i32**> [#uses=1]
+	%x82167 = bitcast %struct.c34007g__pkg__parent* %x8 to i64*		; <i64*> [#uses=1]
+	br i1 true, label %bb4668, label %bb848
+
+bb4668:		; preds = %bb4648
+	%tmp5464 = load i64* %x82167, align 8		; <i64> [#uses=1]
+	%tmp5467 = icmp ne i64 0, %tmp5464		; <i1> [#uses=1]
+	%tmp5470 = load i32** %tmp1272, align 8		; <i32*> [#uses=1]
+	%tmp5471 = icmp eq i32* %tmp5470, null		; <i1> [#uses=1]
+	call fastcc void @c34007g__pkg__create.311( %struct.c34007g__pkg__parent* null, i32 7, i32 9, i32 2, i32 4, i32 1 )
+	%tmp5475 = or i1 %tmp5471, %tmp5467		; <i1> [#uses=1]
+	%tmp5497 = or i1 %tmp5475, false		; <i1> [#uses=1]
+	br i1 %tmp5497, label %bb848, label %bb5507
+
+bb5507:		; preds = %bb4668
+	ret void
+
+}
+
+declare fastcc void @c34007g__pkg__create.311(%struct.c34007g__pkg__parent*, i32, i32, i32, i32, i32)

diff --git a/src/LLVM/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/src/LLVM/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
new file mode 100644
index 0000000..265d968
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 | grep {(%esp)} | count 2
+; PR1872
+
+	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
+	%struct.c34007g__pkg__parent = type { i32*, %struct.c34007g__designated___XUB* }
+
+define void @_ada_c34007g() {
+entry:
+	%x8 = alloca %struct.c34007g__pkg__parent, align 8		; <%struct.c34007g__pkg__parent*> [#uses=2]
+	%tmp1272 = getelementptr %struct.c34007g__pkg__parent* %x8, i32 0, i32 0		; <i32**> [#uses=1]
+	%x82167 = bitcast %struct.c34007g__pkg__parent* %x8 to i64*		; <i64*> [#uses=1]
+	br i1 true, label %bb4668, label %bb848
+
+bb4668:		; preds = %bb4648
+	%tmp5464 = load i64* %x82167, align 8		; <i64> [#uses=1]
+	%tmp5467 = icmp ne i64 0, %tmp5464		; <i1> [#uses=1]
+	%tmp5470 = load i32** %tmp1272, align 8		; <i32*> [#uses=1]
+	%tmp5471 = icmp eq i32* %tmp5470, null		; <i1> [#uses=1]
+	%tmp5475 = or i1 %tmp5471, %tmp5467		; <i1> [#uses=1]
+	%tmp5497 = or i1 %tmp5475, false		; <i1> [#uses=1]
+	br i1 %tmp5497, label %bb848, label %bb5507
+
+bb848:		; preds = %entry
+	ret void
+
+bb5507:		; preds = %bb4668
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-01-08-IllegalCMP.ll b/src/LLVM/test/CodeGen/X86/2008-01-08-IllegalCMP.ll
new file mode 100644
index 0000000..7aec613
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-01-08-IllegalCMP.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define i64 @__absvdi2(i64 %a) nounwind  {
+entry:
+	%w.0 = select i1 false, i64 0, i64 %a		; <i64> [#uses=2]
+	%tmp9 = icmp slt i64 %w.0, 0		; <i1> [#uses=1]
+	br i1 %tmp9, label %bb12, label %bb13
+
+bb12:		; preds = %entry
+	unreachable
+
+bb13:		; preds = %entry
+	ret i64 %w.0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll b/src/LLVM/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
new file mode 100644
index 0000000..266fd7b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86 -mattr=+cmov | FileCheck %s
+;
+; Test scheduling a multi-use compare. We should neither spill flags
+; nor clone the compare.
+; CHECK: cmp
+; CHECK-NOT: pushf
+; CHECK: cmov
+; CHECK-NOT: cmp
+; CHECK: cmov
+
+	%struct.indexentry = type { i32, i8*, i8*, i8*, i8*, i8* }
+
+define i32 @_bfd_stab_section_find_nearest_line(i32 %offset) nounwind  {
+entry:
+	%tmp910 = add i32 0, %offset		; <i32> [#uses=1]
+	br i1 true, label %bb951, label %bb917
+
+bb917:		; preds = %entry
+	ret i32 0
+
+bb951:		; preds = %bb986, %entry
+	%tmp955 = sdiv i32 0, 2		; <i32> [#uses=3]
+	%tmp961 = getelementptr %struct.indexentry* null, i32 %tmp955, i32 0		; <i32*> [#uses=1]
+	br i1 true, label %bb986, label %bb967
+
+bb967:		; preds = %bb951
+	ret i32 0
+
+bb986:		; preds = %bb951
+	%tmp993 = load i32* %tmp961, align 4		; <i32> [#uses=1]
+	%tmp995 = icmp ugt i32 %tmp993, %tmp910		; <i1> [#uses=2]
+	%tmp1002 = add i32 %tmp955, 1		; <i32> [#uses=1]
+	%low.0 = select i1 %tmp995, i32 0, i32 %tmp1002		; <i32> [#uses=1]
+	%high.0 = select i1 %tmp995, i32 %tmp955, i32 0		; <i32> [#uses=1]
+	%tmp1006 = icmp eq i32 %low.0, %high.0		; <i1> [#uses=1]
+	br i1 %tmp1006, label %UnifiedReturnBlock, label %bb951
+
+UnifiedReturnBlock:		; preds = %bb986
+	ret i32 1
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll b/src/LLVM/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
new file mode 100644
index 0000000..6997d53
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -o - | grep sinl
+
+target triple = "i686-pc-linux-gnu"
+
+define x86_fp80 @f(x86_fp80 %x) nounwind  {
+entry:
+	%tmp2 = tail call x86_fp80 @sinl( x86_fp80 %x ) nounwind readonly 		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %tmp2
+}
+
+declare x86_fp80 @sinl(x86_fp80) nounwind readonly 

diff --git a/src/LLVM/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/src/LLVM/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
new file mode 100644
index 0000000..0091397
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=fast
+
+define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %solutions, double* %x) {
+entry:
+	%tmp71 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp72 = fdiv x86_fp80 %tmp71, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%tmp73 = fadd x86_fp80 0xK00000000000000000000, %tmp72		; <x86_fp80> [#uses=1]
+	%tmp7374 = fptrunc x86_fp80 %tmp73 to double		; <double> [#uses=1]
+	store double %tmp7374, double* null, align 8
+	%tmp81 = load double* null, align 8		; <double> [#uses=1]
+	%tmp82 = fadd double %tmp81, 0x401921FB54442D18		; <double> [#uses=1]
+	%tmp83 = fdiv double %tmp82, 3.000000e+00		; <double> [#uses=1]
+	%tmp84 = call double @cos( double %tmp83 )		; <double> [#uses=1]
+	%tmp85 = fmul double 0.000000e+00, %tmp84		; <double> [#uses=1]
+	%tmp8586 = fpext double %tmp85 to x86_fp80		; <x86_fp80> [#uses=1]
+	%tmp87 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp88 = fdiv x86_fp80 %tmp87, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%tmp89 = fadd x86_fp80 %tmp8586, %tmp88		; <x86_fp80> [#uses=1]
+	%tmp8990 = fptrunc x86_fp80 %tmp89 to double		; <double> [#uses=1]
+	store double %tmp8990, double* null, align 8
+	%tmp97 = load double* null, align 8		; <double> [#uses=1]
+	%tmp98 = fadd double %tmp97, 0x402921FB54442D18		; <double> [#uses=1]
+	%tmp99 = fdiv double %tmp98, 3.000000e+00		; <double> [#uses=1]
+	%tmp100 = call double @cos( double %tmp99 )		; <double> [#uses=1]
+	%tmp101 = fmul double 0.000000e+00, %tmp100		; <double> [#uses=1]
+	%tmp101102 = fpext double %tmp101 to x86_fp80		; <x86_fp80> [#uses=1]
+	%tmp103 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp104 = fdiv x86_fp80 %tmp103, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%tmp105 = fadd x86_fp80 %tmp101102, %tmp104		; <x86_fp80> [#uses=1]
+	%tmp105106 = fptrunc x86_fp80 %tmp105 to double		; <double> [#uses=1]
+	store double %tmp105106, double* null, align 8
+	ret void
+}
+
+declare double @cos(double)

diff --git a/src/LLVM/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll b/src/LLVM/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
new file mode 100644
index 0000000..e91f52e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 | not grep IMPLICIT_DEF
+
+	%struct.node_t = type { double*, %struct.node_t*, %struct.node_t**, double**, double*, i32, i32 }
+
+define void @localize_local_bb19_bb(%struct.node_t** %cur_node) {
+newFuncRoot:
+	%tmp1 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp2 = getelementptr %struct.node_t* %tmp1, i32 0, i32 4		; <double**> [#uses=1]
+	%tmp3 = load double** %tmp2, align 4		; <double*> [#uses=1]
+	%tmp4 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp5 = getelementptr %struct.node_t* %tmp4, i32 0, i32 4		; <double**> [#uses=1]
+	store double* %tmp3, double** %tmp5, align 4
+	%tmp6 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp7 = getelementptr %struct.node_t* %tmp6, i32 0, i32 3		; <double***> [#uses=1]
+	%tmp8 = load double*** %tmp7, align 4		; <double**> [#uses=1]
+	%tmp9 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp10 = getelementptr %struct.node_t* %tmp9, i32 0, i32 3		; <double***> [#uses=1]
+	store double** %tmp8, double*** %tmp10, align 4
+	%tmp11 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp12 = getelementptr %struct.node_t* %tmp11, i32 0, i32 0		; <double**> [#uses=1]
+	%tmp13 = load double** %tmp12, align 4		; <double*> [#uses=1]
+	%tmp14 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp15 = getelementptr %struct.node_t* %tmp14, i32 0, i32 0		; <double**> [#uses=1]
+	store double* %tmp13, double** %tmp15, align 4
+	%tmp16 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp17 = getelementptr %struct.node_t* %tmp16, i32 0, i32 1		; <%struct.node_t**> [#uses=1]
+	%tmp18 = load %struct.node_t** %tmp17, align 4		; <%struct.node_t*> [#uses=1]
+	store %struct.node_t* %tmp18, %struct.node_t** %cur_node, align 4
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-01-16-Trampoline.ll b/src/LLVM/test/CodeGen/X86/2008-01-16-Trampoline.ll
new file mode 100644
index 0000000..704b2ba
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-01-16-Trampoline.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+	%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets = type { i32, i32, void (i32, i32)*, i8 (i32, i32)* }
+
+define fastcc i32 @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets.5146(i64 %table.0.0, i64 %table.0.1, i32 %last, i32 %pos) {
+entry:
+	%tramp22 = call i8* @llvm.init.trampoline( i8* null, i8* bitcast (void (%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets*, i32, i32)* @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177 to i8*), i8* null )		; <i8*> [#uses=0]
+	unreachable
+}
+
+declare void @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177(%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets* nest , i32, i32) nounwind 
+
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 

diff --git a/src/LLVM/test/CodeGen/X86/2008-02-05-ISelCrash.ll b/src/LLVM/test/CodeGen/X86/2008-02-05-ISelCrash.ll
new file mode 100644
index 0000000..443a32d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-02-05-ISelCrash.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86
+; PR1975
+
+@nodes = external global i64		; <i64*> [#uses=2]
+
+define fastcc i32 @ab(i32 %alpha, i32 %beta) nounwind  {
+entry:
+	%tmp1 = load i64* @nodes, align 8		; <i64> [#uses=1]
+	%tmp2 = add i64 %tmp1, 1		; <i64> [#uses=1]
+	store i64 %tmp2, i64* @nodes, align 8
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll b/src/LLVM/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
new file mode 100644
index 0000000..d2d5149
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xor | grep CPI
+
+define void @casin({ double, double }* sret  %agg.result, double %z.0, double %z.1) nounwind  {
+entry:
+	%memtmp = alloca { double, double }, align 8		; <{ double, double }*> [#uses=3]
+	%tmp4 = fsub double -0.000000e+00, %z.1		; <double> [#uses=1]
+	call void @casinh( { double, double }* sret  %memtmp, double %tmp4, double %z.0 ) nounwind 
+	%tmp19 = getelementptr { double, double }* %memtmp, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp20 = load double* %tmp19, align 8		; <double> [#uses=1]
+	%tmp22 = getelementptr { double, double }* %memtmp, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp23 = load double* %tmp22, align 8		; <double> [#uses=1]
+	%tmp32 = fsub double -0.000000e+00, %tmp20		; <double> [#uses=1]
+	%tmp37 = getelementptr { double, double }* %agg.result, i32 0, i32 0		; <double*> [#uses=1]
+	store double %tmp23, double* %tmp37, align 8
+	%tmp40 = getelementptr { double, double }* %agg.result, i32 0, i32 1		; <double*> [#uses=1]
+	store double %tmp32, double* %tmp40, align 8
+	ret void
+}
+
+declare void @casinh({ double, double }* sret , double, double) nounwind 

diff --git a/src/LLVM/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll b/src/LLVM/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
new file mode 100644
index 0000000..b772d77
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll

@@ -0,0 +1,99 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep andpd | not grep esp
+
+declare double @llvm.sqrt.f64(double) nounwind readnone 
+
+declare fastcc void @ApplyGivens(double**, double, double, i32, i32, i32, i32) nounwind 
+
+declare double @fabs(double)
+
+define void @main_bb114_2E_outer_2E_i_bb3_2E_i27(double** %tmp12.sub.i.i, [51 x double*]* %tmp12.i.i.i, i32 %i.0.reg2mem.0.ph.i, i32 %tmp11688.i, i32 %tmp19.i, i32 %tmp24.i, [51 x double*]* %tmp12.i.i) {
+newFuncRoot:
+	br label %bb3.i27
+
+bb111.i77.bb121.i_crit_edge.exitStub:		; preds = %bb111.i77
+	ret void
+
+bb3.i27:		; preds = %bb111.i77.bb3.i27_crit_edge, %newFuncRoot
+	%indvar94.i = phi i32 [ 0, %newFuncRoot ], [ %tmp113.i76, %bb111.i77.bb3.i27_crit_edge ]		; <i32> [#uses=6]
+	%tmp6.i20 = getelementptr [51 x double*]* %tmp12.i.i, i32 0, i32 %indvar94.i		; <double**> [#uses=1]
+	%tmp7.i21 = load double** %tmp6.i20, align 4		; <double*> [#uses=2]
+	%tmp10.i = add i32 %indvar94.i, %i.0.reg2mem.0.ph.i		; <i32> [#uses=5]
+	%tmp11.i22 = getelementptr double* %tmp7.i21, i32 %tmp10.i		; <double*> [#uses=1]
+	%tmp12.i23 = load double* %tmp11.i22, align 8		; <double> [#uses=4]
+	%tmp20.i24 = add i32 %tmp19.i, %indvar94.i		; <i32> [#uses=3]
+	%tmp21.i = getelementptr double* %tmp7.i21, i32 %tmp20.i24		; <double*> [#uses=1]
+	%tmp22.i25 = load double* %tmp21.i, align 8		; <double> [#uses=3]
+	%tmp1.i.i26 = fcmp oeq double %tmp12.i23, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %tmp1.i.i26, label %bb3.i27.Givens.exit.i49_crit_edge, label %bb5.i.i31
+
+bb5.i.i31:		; preds = %bb3.i27
+	%tmp7.i.i28 = call double @fabs( double %tmp12.i23 ) nounwind 		; <double> [#uses=1]
+	%tmp9.i.i29 = call double @fabs( double %tmp22.i25 ) nounwind 		; <double> [#uses=1]
+	%tmp10.i.i30 = fcmp ogt double %tmp7.i.i28, %tmp9.i.i29		; <i1> [#uses=1]
+	br i1 %tmp10.i.i30, label %bb13.i.i37, label %bb30.i.i43
+
+bb13.i.i37:		; preds = %bb5.i.i31
+	%tmp15.i.i32 = fsub double -0.000000e+00, %tmp22.i25		; <double> [#uses=1]
+	%tmp17.i.i33 = fdiv double %tmp15.i.i32, %tmp12.i23		; <double> [#uses=3]
+	%tmp20.i4.i = fmul double %tmp17.i.i33, %tmp17.i.i33		; <double> [#uses=1]
+	%tmp21.i.i34 = fadd double %tmp20.i4.i, 1.000000e+00		; <double> [#uses=1]
+	%tmp22.i.i35 = call double @llvm.sqrt.f64( double %tmp21.i.i34 ) nounwind 		; <double> [#uses=1]
+	%tmp23.i5.i = fdiv double 1.000000e+00, %tmp22.i.i35		; <double> [#uses=2]
+	%tmp28.i.i36 = fmul double %tmp23.i5.i, %tmp17.i.i33		; <double> [#uses=1]
+	br label %Givens.exit.i49
+
+bb30.i.i43:		; preds = %bb5.i.i31
+	%tmp32.i.i38 = fsub double -0.000000e+00, %tmp12.i23		; <double> [#uses=1]
+	%tmp34.i.i39 = fdiv double %tmp32.i.i38, %tmp22.i25		; <double> [#uses=3]
+	%tmp37.i6.i = fmul double %tmp34.i.i39, %tmp34.i.i39		; <double> [#uses=1]
+	%tmp38.i.i40 = fadd double %tmp37.i6.i, 1.000000e+00		; <double> [#uses=1]
+	%tmp39.i7.i = call double @llvm.sqrt.f64( double %tmp38.i.i40 ) nounwind 		; <double> [#uses=1]
+	%tmp40.i.i41 = fdiv double 1.000000e+00, %tmp39.i7.i		; <double> [#uses=2]
+	%tmp45.i.i42 = fmul double %tmp40.i.i41, %tmp34.i.i39		; <double> [#uses=1]
+	br label %Givens.exit.i49
+
+Givens.exit.i49:		; preds = %bb3.i27.Givens.exit.i49_crit_edge, %bb30.i.i43, %bb13.i.i37
+	%s.0.i44 = phi double [ %tmp45.i.i42, %bb30.i.i43 ], [ %tmp23.i5.i, %bb13.i.i37 ], [ 0.000000e+00, %bb3.i27.Givens.exit.i49_crit_edge ]		; <double> [#uses=2]
+	%c.0.i45 = phi double [ %tmp40.i.i41, %bb30.i.i43 ], [ %tmp28.i.i36, %bb13.i.i37 ], [ 1.000000e+00, %bb3.i27.Givens.exit.i49_crit_edge ]		; <double> [#uses=2]
+	%tmp26.i46 = add i32 %tmp24.i, %indvar94.i		; <i32> [#uses=2]
+	%tmp27.i47 = icmp slt i32 %tmp26.i46, 51		; <i1> [#uses=1]
+	%min.i48 = select i1 %tmp27.i47, i32 %tmp26.i46, i32 50		; <i32> [#uses=1]
+	call fastcc void @ApplyGivens( double** %tmp12.sub.i.i, double %s.0.i44, double %c.0.i45, i32 %tmp20.i24, i32 %tmp10.i, i32 %indvar94.i, i32 %min.i48 ) nounwind 
+	br label %codeRepl
+
+codeRepl:		; preds = %Givens.exit.i49
+	call void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb_2E_i48_2E_i( i32 %tmp10.i, i32 %tmp20.i24, double %s.0.i44, double %c.0.i45, [51 x double*]* %tmp12.i.i.i )
+	br label %ApplyRGivens.exit49.i
+
+ApplyRGivens.exit49.i:		; preds = %codeRepl
+	%tmp10986.i = icmp sgt i32 %tmp11688.i, %tmp10.i		; <i1> [#uses=1]
+	br i1 %tmp10986.i, label %ApplyRGivens.exit49.i.bb52.i57_crit_edge, label %ApplyRGivens.exit49.i.bb111.i77_crit_edge
+
+codeRepl1:		; preds = %ApplyRGivens.exit49.i.bb52.i57_crit_edge
+	call void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb52_2E_i57( i32 %tmp10.i, double** %tmp12.sub.i.i, [51 x double*]* %tmp12.i.i.i, i32 %i.0.reg2mem.0.ph.i, i32 %tmp11688.i, i32 %tmp19.i, i32 %tmp24.i, [51 x double*]* %tmp12.i.i )
+	br label %bb105.i.bb111.i77_crit_edge
+
+bb111.i77:		; preds = %bb105.i.bb111.i77_crit_edge, %ApplyRGivens.exit49.i.bb111.i77_crit_edge
+	%tmp113.i76 = add i32 %indvar94.i, 1		; <i32> [#uses=2]
+	%tmp118.i = icmp sgt i32 %tmp11688.i, %tmp113.i76		; <i1> [#uses=1]
+	br i1 %tmp118.i, label %bb111.i77.bb3.i27_crit_edge, label %bb111.i77.bb121.i_crit_edge.exitStub
+
+bb3.i27.Givens.exit.i49_crit_edge:		; preds = %bb3.i27
+	br label %Givens.exit.i49
+
+ApplyRGivens.exit49.i.bb52.i57_crit_edge:		; preds = %ApplyRGivens.exit49.i
+	br label %codeRepl1
+
+ApplyRGivens.exit49.i.bb111.i77_crit_edge:		; preds = %ApplyRGivens.exit49.i
+	br label %bb111.i77
+
+bb105.i.bb111.i77_crit_edge:		; preds = %codeRepl1
+	br label %bb111.i77
+
+bb111.i77.bb3.i27_crit_edge:		; preds = %bb111.i77
+	br label %bb3.i27
+}
+
+declare void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb_2E_i48_2E_i(i32, i32, double, double, [51 x double*]*)
+
+declare void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb52_2E_i57(i32, double**, [51 x double*]*, i32, i32, i32, i32, [51 x double*]*)

diff --git a/src/LLVM/test/CodeGen/X86/2008-02-14-BitMiscompile.ll b/src/LLVM/test/CodeGen/X86/2008-02-14-BitMiscompile.ll
new file mode 100644
index 0000000..1983f1d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-02-14-BitMiscompile.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep and
+define i32 @test(i1 %A) {
+	%B = zext i1 %A to i32		; <i32> [#uses=1]
+	%C = sub i32 0, %B		; <i32> [#uses=1]
+	%D = and i32 %C, 255		; <i32> [#uses=1]
+	ret i32 %D
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/src/LLVM/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
new file mode 100644
index 0000000..bdacf50
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-02-18-TailMergingBug.ll

@@ -0,0 +1,219 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 16
+; PR1909
+
+@.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00"		; <[48 x i8]*> [#uses=1]
+
+define void @minmax(float* %result) nounwind optsize {
+entry:
+	%tmp2 = load float* %result, align 4		; <float> [#uses=6]
+	%tmp4 = getelementptr float* %result, i32 2		; <float*> [#uses=5]
+	%tmp5 = load float* %tmp4, align 4		; <float> [#uses=10]
+	%tmp7 = getelementptr float* %result, i32 4		; <float*> [#uses=5]
+	%tmp8 = load float* %tmp7, align 4		; <float> [#uses=8]
+	%tmp10 = getelementptr float* %result, i32 6		; <float*> [#uses=3]
+	%tmp11 = load float* %tmp10, align 4		; <float> [#uses=8]
+	%tmp12 = fcmp olt float %tmp8, %tmp11		; <i1> [#uses=5]
+	br i1 %tmp12, label %bb, label %bb21
+
+bb:		; preds = %entry
+	%tmp23469 = fcmp olt float %tmp5, %tmp8		; <i1> [#uses=1]
+	br i1 %tmp23469, label %bb26, label %bb30
+
+bb21:		; preds = %entry
+	%tmp23 = fcmp olt float %tmp5, %tmp11		; <i1> [#uses=1]
+	br i1 %tmp23, label %bb26, label %bb30
+
+bb26:		; preds = %bb21, %bb
+	%tmp52471 = fcmp olt float %tmp2, %tmp5		; <i1> [#uses=1]
+	br i1 %tmp52471, label %bb111, label %bb59
+
+bb30:		; preds = %bb21, %bb
+	br i1 %tmp12, label %bb40, label %bb50
+
+bb40:		; preds = %bb30
+	%tmp52473 = fcmp olt float %tmp2, %tmp8		; <i1> [#uses=1]
+	br i1 %tmp52473, label %bb111, label %bb59
+
+bb50:		; preds = %bb30
+	%tmp52 = fcmp olt float %tmp2, %tmp11		; <i1> [#uses=1]
+	br i1 %tmp52, label %bb111, label %bb59
+
+bb59:		; preds = %bb50, %bb40, %bb26
+	br i1 %tmp12, label %bb72, label %bb80
+
+bb72:		; preds = %bb59
+	%tmp82475 = fcmp olt float %tmp5, %tmp8		; <i1> [#uses=2]
+	%brmerge786 = or i1 %tmp82475, %tmp12		; <i1> [#uses=1]
+	%tmp4.mux787 = select i1 %tmp82475, float* %tmp4, float* %tmp7		; <float*> [#uses=1]
+	br i1 %brmerge786, label %bb111, label %bb103
+
+bb80:		; preds = %bb59
+	%tmp82 = fcmp olt float %tmp5, %tmp11		; <i1> [#uses=2]
+	%brmerge = or i1 %tmp82, %tmp12		; <i1> [#uses=1]
+	%tmp4.mux = select i1 %tmp82, float* %tmp4, float* %tmp7		; <float*> [#uses=1]
+	br i1 %brmerge, label %bb111, label %bb103
+
+bb103:		; preds = %bb80, %bb72
+	br label %bb111
+
+bb111:		; preds = %bb103, %bb80, %bb72, %bb50, %bb40, %bb26
+	%iftmp.0.0.in = phi float* [ %tmp10, %bb103 ], [ %result, %bb26 ], [ %result, %bb40 ], [ %result, %bb50 ], [ %tmp4.mux, %bb80 ], [ %tmp4.mux787, %bb72 ]		; <float*> [#uses=1]
+	%iftmp.0.0 = load float* %iftmp.0.0.in		; <float> [#uses=1]
+	%tmp125 = fcmp ogt float %tmp8, %tmp11		; <i1> [#uses=5]
+	br i1 %tmp125, label %bb128, label %bb136
+
+bb128:		; preds = %bb111
+	%tmp138477 = fcmp ogt float %tmp5, %tmp8		; <i1> [#uses=1]
+	br i1 %tmp138477, label %bb141, label %bb145
+
+bb136:		; preds = %bb111
+	%tmp138 = fcmp ogt float %tmp5, %tmp11		; <i1> [#uses=1]
+	br i1 %tmp138, label %bb141, label %bb145
+
+bb141:		; preds = %bb136, %bb128
+	%tmp167479 = fcmp ogt float %tmp2, %tmp5		; <i1> [#uses=1]
+	br i1 %tmp167479, label %bb226, label %bb174
+
+bb145:		; preds = %bb136, %bb128
+	br i1 %tmp125, label %bb155, label %bb165
+
+bb155:		; preds = %bb145
+	%tmp167481 = fcmp ogt float %tmp2, %tmp8		; <i1> [#uses=1]
+	br i1 %tmp167481, label %bb226, label %bb174
+
+bb165:		; preds = %bb145
+	%tmp167 = fcmp ogt float %tmp2, %tmp11		; <i1> [#uses=1]
+	br i1 %tmp167, label %bb226, label %bb174
+
+bb174:		; preds = %bb165, %bb155, %bb141
+	br i1 %tmp125, label %bb187, label %bb195
+
+bb187:		; preds = %bb174
+	%tmp197483 = fcmp ogt float %tmp5, %tmp8		; <i1> [#uses=2]
+	%brmerge790 = or i1 %tmp197483, %tmp125		; <i1> [#uses=1]
+	%tmp4.mux791 = select i1 %tmp197483, float* %tmp4, float* %tmp7		; <float*> [#uses=1]
+	br i1 %brmerge790, label %bb226, label %bb218
+
+bb195:		; preds = %bb174
+	%tmp197 = fcmp ogt float %tmp5, %tmp11		; <i1> [#uses=2]
+	%brmerge788 = or i1 %tmp197, %tmp125		; <i1> [#uses=1]
+	%tmp4.mux789 = select i1 %tmp197, float* %tmp4, float* %tmp7		; <float*> [#uses=1]
+	br i1 %brmerge788, label %bb226, label %bb218
+
+bb218:		; preds = %bb195, %bb187
+	br label %bb226
+
+bb226:		; preds = %bb218, %bb195, %bb187, %bb165, %bb155, %bb141
+	%iftmp.7.0.in = phi float* [ %tmp10, %bb218 ], [ %result, %bb141 ], [ %result, %bb155 ], [ %result, %bb165 ], [ %tmp4.mux789, %bb195 ], [ %tmp4.mux791, %bb187 ]		; <float*> [#uses=1]
+	%iftmp.7.0 = load float* %iftmp.7.0.in		; <float> [#uses=1]
+	%tmp229 = getelementptr float* %result, i32 1		; <float*> [#uses=7]
+	%tmp230 = load float* %tmp229, align 4		; <float> [#uses=6]
+	%tmp232 = getelementptr float* %result, i32 3		; <float*> [#uses=5]
+	%tmp233 = load float* %tmp232, align 4		; <float> [#uses=10]
+	%tmp235 = getelementptr float* %result, i32 5		; <float*> [#uses=5]
+	%tmp236 = load float* %tmp235, align 4		; <float> [#uses=8]
+	%tmp238 = getelementptr float* %result, i32 7		; <float*> [#uses=3]
+	%tmp239 = load float* %tmp238, align 4		; <float> [#uses=8]
+	%tmp240 = fcmp olt float %tmp236, %tmp239		; <i1> [#uses=5]
+	br i1 %tmp240, label %bb243, label %bb251
+
+bb243:		; preds = %bb226
+	%tmp253485 = fcmp olt float %tmp233, %tmp236		; <i1> [#uses=1]
+	br i1 %tmp253485, label %bb256, label %bb260
+
+bb251:		; preds = %bb226
+	%tmp253 = fcmp olt float %tmp233, %tmp239		; <i1> [#uses=1]
+	br i1 %tmp253, label %bb256, label %bb260
+
+bb256:		; preds = %bb251, %bb243
+	%tmp282487 = fcmp olt float %tmp230, %tmp233		; <i1> [#uses=1]
+	br i1 %tmp282487, label %bb341, label %bb289
+
+bb260:		; preds = %bb251, %bb243
+	br i1 %tmp240, label %bb270, label %bb280
+
+bb270:		; preds = %bb260
+	%tmp282489 = fcmp olt float %tmp230, %tmp236		; <i1> [#uses=1]
+	br i1 %tmp282489, label %bb341, label %bb289
+
+bb280:		; preds = %bb260
+	%tmp282 = fcmp olt float %tmp230, %tmp239		; <i1> [#uses=1]
+	br i1 %tmp282, label %bb341, label %bb289
+
+bb289:		; preds = %bb280, %bb270, %bb256
+	br i1 %tmp240, label %bb302, label %bb310
+
+bb302:		; preds = %bb289
+	%tmp312491 = fcmp olt float %tmp233, %tmp236		; <i1> [#uses=2]
+	%brmerge793 = or i1 %tmp312491, %tmp240		; <i1> [#uses=1]
+	%tmp232.mux794 = select i1 %tmp312491, float* %tmp232, float* %tmp235		; <float*> [#uses=1]
+	br i1 %brmerge793, label %bb341, label %bb333
+
+bb310:		; preds = %bb289
+	%tmp312 = fcmp olt float %tmp233, %tmp239		; <i1> [#uses=2]
+	%brmerge792 = or i1 %tmp312, %tmp240		; <i1> [#uses=1]
+	%tmp232.mux = select i1 %tmp312, float* %tmp232, float* %tmp235		; <float*> [#uses=1]
+	br i1 %brmerge792, label %bb341, label %bb333
+
+bb333:		; preds = %bb310, %bb302
+	br label %bb341
+
+bb341:		; preds = %bb333, %bb310, %bb302, %bb280, %bb270, %bb256
+	%iftmp.14.0.in = phi float* [ %tmp238, %bb333 ], [ %tmp229, %bb280 ], [ %tmp229, %bb270 ], [ %tmp229, %bb256 ], [ %tmp232.mux, %bb310 ], [ %tmp232.mux794, %bb302 ]		; <float*> [#uses=1]
+	%iftmp.14.0 = load float* %iftmp.14.0.in		; <float> [#uses=1]
+	%tmp355 = fcmp ogt float %tmp236, %tmp239		; <i1> [#uses=5]
+	br i1 %tmp355, label %bb358, label %bb366
+
+bb358:		; preds = %bb341
+	%tmp368493 = fcmp ogt float %tmp233, %tmp236		; <i1> [#uses=1]
+	br i1 %tmp368493, label %bb371, label %bb375
+
+bb366:		; preds = %bb341
+	%tmp368 = fcmp ogt float %tmp233, %tmp239		; <i1> [#uses=1]
+	br i1 %tmp368, label %bb371, label %bb375
+
+bb371:		; preds = %bb366, %bb358
+	%tmp397495 = fcmp ogt float %tmp230, %tmp233		; <i1> [#uses=1]
+	br i1 %tmp397495, label %bb456, label %bb404
+
+bb375:		; preds = %bb366, %bb358
+	br i1 %tmp355, label %bb385, label %bb395
+
+bb385:		; preds = %bb375
+	%tmp397497 = fcmp ogt float %tmp230, %tmp236		; <i1> [#uses=1]
+	br i1 %tmp397497, label %bb456, label %bb404
+
+bb395:		; preds = %bb375
+	%tmp397 = fcmp ogt float %tmp230, %tmp239		; <i1> [#uses=1]
+	br i1 %tmp397, label %bb456, label %bb404
+
+bb404:		; preds = %bb395, %bb385, %bb371
+	br i1 %tmp355, label %bb417, label %bb425
+
+bb417:		; preds = %bb404
+	%tmp427499 = fcmp ogt float %tmp233, %tmp236		; <i1> [#uses=2]
+	%brmerge797 = or i1 %tmp427499, %tmp355		; <i1> [#uses=1]
+	%tmp232.mux798 = select i1 %tmp427499, float* %tmp232, float* %tmp235		; <float*> [#uses=1]
+	br i1 %brmerge797, label %bb456, label %bb448
+
+bb425:		; preds = %bb404
+	%tmp427 = fcmp ogt float %tmp233, %tmp239		; <i1> [#uses=2]
+	%brmerge795 = or i1 %tmp427, %tmp355		; <i1> [#uses=1]
+	%tmp232.mux796 = select i1 %tmp427, float* %tmp232, float* %tmp235		; <float*> [#uses=1]
+	br i1 %brmerge795, label %bb456, label %bb448
+
+bb448:		; preds = %bb425, %bb417
+	br label %bb456
+
+bb456:		; preds = %bb448, %bb425, %bb417, %bb395, %bb385, %bb371
+	%iftmp.21.0.in = phi float* [ %tmp238, %bb448 ], [ %tmp229, %bb395 ], [ %tmp229, %bb385 ], [ %tmp229, %bb371 ], [ %tmp232.mux796, %bb425 ], [ %tmp232.mux798, %bb417 ]		; <float*> [#uses=1]
+	%iftmp.21.0 = load float* %iftmp.21.0.in		; <float> [#uses=1]
+	%tmp458459 = fpext float %iftmp.21.0 to double		; <double> [#uses=1]
+	%tmp460461 = fpext float %iftmp.7.0 to double		; <double> [#uses=1]
+	%tmp462463 = fpext float %iftmp.14.0 to double		; <double> [#uses=1]
+	%tmp464465 = fpext float %iftmp.0.0 to double		; <double> [#uses=1]
+	%tmp467 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([48 x i8]* @.str, i32 0, i32 0), double %tmp464465, double %tmp462463, double %tmp460461, double %tmp458459 ) nounwind 		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind 

diff --git a/src/LLVM/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/src/LLVM/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
new file mode 100644
index 0000000..5115e48
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s | grep {a:} | not grep ax
+; RUN: llc < %s | grep {b:} | not grep ax
+; PR2078
+; The clobber list says that "ax" is clobbered.  Make sure that eax isn't 
+; allocated to the input/output register.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@pixels = weak global i32 0		; <i32*> [#uses=2]
+
+define void @test() nounwind  {
+entry:
+	%tmp = load i32* @pixels, align 4		; <i32> [#uses=1]
+	%tmp1 = tail call i32 asm sideeffect "a: $0 $1", "=r,0,~{dirflag},~{fpsr},~{flags},~{ax}"( i32 %tmp ) nounwind 		; <i32> [#uses=1]
+	store i32 %tmp1, i32* @pixels, align 4
+	ret void
+}
+
+define void @test2(i16* %block, i8* %pixels, i32 %line_size) nounwind  {
+entry:
+	%tmp1 = getelementptr i16* %block, i32 64		; <i16*> [#uses=1]
+	%tmp3 = tail call i8* asm sideeffect "b: $0 $1 $2", "=r,r,0,~{dirflag},~{fpsr},~{flags},~{ax}"( i16* %tmp1, i8* %pixels ) nounwind 		; <i8*> [#uses=0]
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll b/src/LLVM/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
new file mode 100644
index 0000000..da02907
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -regalloc=fast -march=x86 -mattr=+mmx | grep esi
+; PR2082
+; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of
+; registers.
+define void @transpose4x4(i8* %dst, i8* %src, i32 %dst_stride, i32 %src_stride) {
+entry:
+	%dst_addr = alloca i8*		; <i8**> [#uses=5]
+	%src_addr = alloca i8*		; <i8**> [#uses=5]
+	%dst_stride_addr = alloca i32		; <i32*> [#uses=4]
+	%src_stride_addr = alloca i32		; <i32*> [#uses=4]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i8* %dst, i8** %dst_addr
+	store i8* %src, i8** %src_addr
+	store i32 %dst_stride, i32* %dst_stride_addr
+	store i32 %src_stride, i32* %src_stride_addr
+	%tmp = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp1 = getelementptr i8* %tmp, i32 0		; <i8*> [#uses=1]
+	%tmp12 = bitcast i8* %tmp1 to i32*		; <i32*> [#uses=1]
+	%tmp3 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp4 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp5 = getelementptr i8* %tmp3, i32 %tmp4		; <i8*> [#uses=1]
+	%tmp56 = bitcast i8* %tmp5 to i32*		; <i32*> [#uses=1]
+	%tmp7 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp8 = mul i32 %tmp7, 2		; <i32> [#uses=1]
+	%tmp9 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp10 = getelementptr i8* %tmp9, i32 %tmp8		; <i8*> [#uses=1]
+	%tmp1011 = bitcast i8* %tmp10 to i32*		; <i32*> [#uses=1]
+	%tmp13 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp14 = mul i32 %tmp13, 3		; <i32> [#uses=1]
+	%tmp15 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp16 = getelementptr i8* %tmp15, i32 %tmp14		; <i8*> [#uses=1]
+	%tmp1617 = bitcast i8* %tmp16 to i32*		; <i32*> [#uses=1]
+	%tmp18 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp19 = getelementptr i8* %tmp18, i32 0		; <i8*> [#uses=1]
+	%tmp1920 = bitcast i8* %tmp19 to i32*		; <i32*> [#uses=1]
+	%tmp21 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp22 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp23 = getelementptr i8* %tmp21, i32 %tmp22		; <i8*> [#uses=1]
+	%tmp2324 = bitcast i8* %tmp23 to i32*		; <i32*> [#uses=1]
+	%tmp25 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp26 = mul i32 %tmp25, 2		; <i32> [#uses=1]
+	%tmp27 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp28 = getelementptr i8* %tmp27, i32 %tmp26		; <i8*> [#uses=1]
+	%tmp2829 = bitcast i8* %tmp28 to i32*		; <i32*> [#uses=1]
+	%tmp30 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp31 = mul i32 %tmp30, 3		; <i32> [#uses=1]
+	%tmp32 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp33 = getelementptr i8* %tmp32, i32 %tmp31		; <i8*> [#uses=1]
+	%tmp3334 = bitcast i8* %tmp33 to i32*		; <i32*> [#uses=1]
+	call void asm sideeffect "movd  $4, %mm0                \0A\09movd  $5, %mm1                \0A\09movd  $6, %mm2                \0A\09movd  $7, %mm3                \0A\09punpcklbw %mm1, %mm0         \0A\09punpcklbw %mm3, %mm2         \0A\09movq %mm0, %mm1              \0A\09punpcklwd %mm2, %mm0         \0A\09punpckhwd %mm2, %mm1         \0A\09movd  %mm0, $0                \0A\09punpckhdq %mm0, %mm0         \0A\09movd  %mm0, $1                \0A\09movd  %mm1, $2                \0A\09punpckhdq %mm1, %mm1         \0A\09movd  %mm1, $3                \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* %tmp12, i32* %tmp56, i32* %tmp1011, i32* %tmp1617, i32* %tmp1920, i32* %tmp2324, i32* %tmp2829, i32* %tmp3334 ) nounwind 
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/src/LLVM/test/CodeGen/X86/2008-02-22-ReMatBug.ll
new file mode 100644
index 0000000..8f4d353
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-02-22-ReMatBug.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of re-materialization} | grep 2
+; rdar://5761454
+
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define  %struct.quad_struct* @MakeTree(i32 %size, i32 %center_x, i32 %center_y, i32 %lo_proc, i32 %hi_proc, %struct.quad_struct* %parent, i32 %ct, i32 %level) nounwind  {
+entry:
+	br i1 true, label %bb43.i, label %bb.i
+
+bb.i:		; preds = %entry
+	ret %struct.quad_struct* null
+
+bb43.i:		; preds = %entry
+	br i1 true, label %CheckOutside.exit40.i, label %bb11.i38.i
+
+bb11.i38.i:		; preds = %bb43.i
+	ret %struct.quad_struct* null
+
+CheckOutside.exit40.i:		; preds = %bb43.i
+	br i1 true, label %CheckOutside.exit30.i, label %bb11.i28.i
+
+bb11.i28.i:		; preds = %CheckOutside.exit40.i
+	ret %struct.quad_struct* null
+
+CheckOutside.exit30.i:		; preds = %CheckOutside.exit40.i
+	br i1 true, label %CheckOutside.exit20.i, label %bb11.i18.i
+
+bb11.i18.i:		; preds = %CheckOutside.exit30.i
+	ret %struct.quad_struct* null
+
+CheckOutside.exit20.i:		; preds = %CheckOutside.exit30.i
+	br i1 true, label %bb34, label %bb11.i8.i
+
+bb11.i8.i:		; preds = %CheckOutside.exit20.i
+	ret %struct.quad_struct* null
+
+bb34:		; preds = %CheckOutside.exit20.i
+	%tmp15.reg2mem.0 = sdiv i32 %size, 2		; <i32> [#uses=7]
+	%tmp85 = sub i32 %center_y, %tmp15.reg2mem.0		; <i32> [#uses=2]
+	%tmp88 = sub i32 %center_x, %tmp15.reg2mem.0		; <i32> [#uses=2]
+	%tmp92 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp85, i32 0, i32 %hi_proc, %struct.quad_struct* null, i32 2, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
+	%tmp99 = add i32 0, %hi_proc		; <i32> [#uses=1]
+	%tmp100 = sdiv i32 %tmp99, 2		; <i32> [#uses=1]
+	%tmp110 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp85, i32 0, i32 %tmp100, %struct.quad_struct* null, i32 3, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
+	%tmp122 = add i32 %tmp15.reg2mem.0, %center_y		; <i32> [#uses=2]
+	%tmp129 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp122, i32 0, i32 0, %struct.quad_struct* null, i32 1, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
+	%tmp147 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp122, i32 %lo_proc, i32 0, %struct.quad_struct* null, i32 0, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll b/src/LLVM/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
new file mode 100644
index 0000000..1d31859
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -mattr=+sse2
+; PR2076
+
+define void @h264_h_loop_filter_luma_mmx2(i8* %pix, i32 %stride, i32 %alpha, i32 %beta, i8* %tc0) nounwind  {
+entry:
+	%tmp164 = getelementptr [16 x i32]* null, i32 0, i32 11		; <i32*> [#uses=1]
+	%tmp169 = getelementptr [16 x i32]* null, i32 0, i32 13		; <i32*> [#uses=1]
+	%tmp174 = getelementptr [16 x i32]* null, i32 0, i32 15		; <i32*> [#uses=1]
+	%tmp154.sum317 = add i32 0, %stride		; <i32> [#uses=1]
+	%tmp154.sum315 = mul i32 %stride, 6		; <i32> [#uses=1]
+	%tmp154.sum = mul i32 %stride, 7		; <i32> [#uses=1]
+	%pix_addr.0327.rec = mul i32 0, 0		; <i32> [#uses=4]
+	br i1 false, label %bb292, label %bb32
+
+bb32:		; preds = %entry
+	%pix_addr.0327.sum340 = add i32 %pix_addr.0327.rec, 0		; <i32> [#uses=1]
+	%tmp154 = getelementptr i8* %pix, i32 %pix_addr.0327.sum340		; <i8*> [#uses=1]
+	%tmp177178 = bitcast i8* %tmp154 to i32*		; <i32*> [#uses=1]
+	%pix_addr.0327.sum339 = add i32 %pix_addr.0327.rec, %tmp154.sum317		; <i32> [#uses=1]
+	%tmp181 = getelementptr i8* %pix, i32 %pix_addr.0327.sum339		; <i8*> [#uses=1]
+	%tmp181182 = bitcast i8* %tmp181 to i32*		; <i32*> [#uses=1]
+	%pix_addr.0327.sum338 = add i32 %pix_addr.0327.rec, %tmp154.sum315		; <i32> [#uses=1]
+	%tmp186 = getelementptr i8* %pix, i32 %pix_addr.0327.sum338		; <i8*> [#uses=1]
+	%tmp186187 = bitcast i8* %tmp186 to i32*		; <i32*> [#uses=1]
+	%pix_addr.0327.sum337 = add i32 %pix_addr.0327.rec, %tmp154.sum		; <i32> [#uses=1]
+	%tmp191 = getelementptr i8* %pix, i32 %pix_addr.0327.sum337		; <i8*> [#uses=1]
+	%tmp191192 = bitcast i8* %tmp191 to i32*		; <i32*> [#uses=1]
+	call void asm sideeffect "movd  $4, %mm0                \0A\09movd  $5, %mm1                \0A\09movd  $6, %mm2                \0A\09movd  $7, %mm3                \0A\09punpcklbw %mm1, %mm0         \0A\09punpcklbw %mm3, %mm2         \0A\09movq %mm0, %mm1              \0A\09punpcklwd %mm2, %mm0         \0A\09punpckhwd %mm2, %mm1         \0A\09movd  %mm0, $0                \0A\09punpckhdq %mm0, %mm0         \0A\09movd  %mm0, $1                \0A\09movd  %mm1, $2                \0A\09punpckhdq %mm1, %mm1         \0A\09movd  %mm1, $3                \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* null, i32* %tmp164, i32* %tmp169, i32* %tmp174, i32* %tmp177178, i32* %tmp181182, i32* %tmp186187, i32* %tmp191192 ) nounwind 
+	unreachable
+
+bb292:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
new file mode 100644
index 0000000..fd9c35e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=x86-64
+
+	%struct.XX = type <{ i8 }>
+	%struct.YY = type { i64 }
+	%struct.ZZ = type opaque
+
+define signext i8 @f(%struct.XX*** %fontMap, %struct.XX* %uen)   {
+entry:
+	%tmp45 = add i16 0, 1		; <i16> [#uses=2]
+	br i1 false, label %bb124, label %bb53
+
+bb53:		; preds = %entry
+	%tmp55 = call %struct.YY** @AA( i64 1, %struct.XX* %uen )		; <%struct.YY**> [#uses=3]
+	%tmp2728128 = load %struct.XX** null		; <%struct.XX*> [#uses=1]
+	%tmp61 = load %struct.YY** %tmp55, align 8		; <%struct.YY*> [#uses=1]
+	%tmp62 = getelementptr %struct.YY* %tmp61, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp63 = load i64* %tmp62, align 8		; <i64> [#uses=1]
+	%tmp6566 = zext i16 %tmp45 to i64		; <i64> [#uses=1]
+	%tmp67 = shl i64 %tmp6566, 1		; <i64> [#uses=1]
+	call void @BB( %struct.YY** %tmp55, i64 %tmp67, i8 signext  0, %struct.XX* %uen )
+	%tmp121131 = icmp eq i16 %tmp45, 1		; <i1> [#uses=1]
+	br i1 %tmp121131, label %bb124, label %bb70.preheader
+
+bb70.preheader:		; preds = %bb53
+	%tmp72 = bitcast %struct.XX* %tmp2728128 to %struct.ZZ***		; <%struct.ZZ***> [#uses=1]
+	br label %bb70
+
+bb70:		; preds = %bb119, %bb70.preheader
+	%indvar133 = phi i32 [ %indvar.next134, %bb119 ], [ 0, %bb70.preheader ]		; <i32> [#uses=2]
+	%tmp.135 = trunc i64 %tmp63 to i32		; <i32> [#uses=1]
+	%tmp136 = shl i32 %indvar133, 1		; <i32> [#uses=1]
+	%DD = add i32 %tmp136, %tmp.135		; <i32> [#uses=1]
+	%tmp73 = load %struct.ZZ*** %tmp72, align 8		; <%struct.ZZ**> [#uses=0]
+	br i1 false, label %bb119, label %bb77
+
+bb77:		; preds = %bb70
+	%tmp8384 = trunc i32 %DD to i16		; <i16> [#uses=1]
+	%tmp85 = sub i16 0, %tmp8384		; <i16> [#uses=1]
+	store i16 %tmp85, i16* null, align 8
+	call void @CC( %struct.YY** %tmp55, i64 0, i64 2, i8* null, %struct.XX* %uen )
+	ret i8 0
+
+bb119:		; preds = %bb70
+	%indvar.next134 = add i32 %indvar133, 1		; <i32> [#uses=1]
+	br label %bb70
+
+bb124:		; preds = %bb53, %entry
+	ret i8 undef
+}
+
+declare %struct.YY** @AA(i64, %struct.XX*)
+
+declare void @BB(%struct.YY**, i64, i8 signext , %struct.XX*)
+
+declare void @CC(%struct.YY**, i64, i64, i8*, %struct.XX*)

diff --git a/src/LLVM/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll b/src/LLVM/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll
new file mode 100644
index 0000000..0b4eb3a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @dct_unquantize_h263_intra_mmx(i16* %block, i32 %n, i32 %qscale) nounwind  {
+entry:
+	%tmp1 = shl i32 %qscale, 1		; <i32> [#uses=1]
+	br i1 false, label %bb46, label %bb59
+
+bb46:		; preds = %entry
+	ret void
+
+bb59:		; preds = %entry
+	tail call void asm sideeffect "movd $1, %mm6                 \0A\09packssdw %mm6, %mm6          \0A\09packssdw %mm6, %mm6          \0A\09movd $2, %mm5                 \0A\09pxor %mm7, %mm7              \0A\09packssdw %mm5, %mm5          \0A\09packssdw %mm5, %mm5          \0A\09psubw %mm5, %mm7             \0A\09pxor %mm4, %mm4              \0A\09.align 1<<4\0A\091:                             \0A\09movq ($0, $3), %mm0           \0A\09movq 8($0, $3), %mm1          \0A\09pmullw %mm6, %mm0            \0A\09pmullw %mm6, %mm1            \0A\09movq ($0, $3), %mm2           \0A\09movq 8($0, $3), %mm3          \0A\09pcmpgtw %mm4, %mm2           \0A\09pcmpgtw %mm4, %mm3           \0A\09pxor %mm2, %mm0              \0A\09pxor %mm3, %mm1              \0A\09paddw %mm7, %mm0             \0A\09paddw %mm7, %mm1             \0A\09pxor %mm0, %mm2              \0A\09pxor %mm1, %mm3              \0A\09pcmpeqw %mm7, %mm0           \0A\09pcmpeqw %mm7, %mm1           \0A\09pandn %mm2, %mm0             \0A\09pandn %mm3, %mm1             \0A\09movq %mm0, ($0, $3)           \0A\09movq %mm1, 8($0, $3)          \0A\09add $$16, $3                    \0A\09jng 1b                         \0A\09", "r,imr,imr,r,~{dirflag},~{fpsr},~{flags},~{memory}"( i16* null, i32 %tmp1, i32 0, i32 0 ) nounwind 
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll b/src/LLVM/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
new file mode 100644
index 0000000..ad7950c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll

@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86
+
+	%struct.CompAtom = type <{ %struct.Position, float, i32 }>
+	%struct.Lattice = type { %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, i32, i32, i32 }
+	%struct.Position = type { double, double, double }
+
+define fastcc %struct.CompAtom* @_ZNK7Lattice6createEP8CompAtomii(%struct.Lattice* %this, %struct.CompAtom* %d, i32 %n, i32 %i) {
+entry:
+	%tmp18 = tail call i8* @_Znam( i32 0 )		; <i8*> [#uses=1]
+	%tmp1819 = bitcast i8* %tmp18 to %struct.CompAtom*		; <%struct.CompAtom*> [#uses=4]
+	%tmp3286 = icmp eq i32 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp3286, label %bb35, label %bb24
+
+bb24:		; preds = %bb24, %entry
+	%tmp9.0.reg2mem.0.rec = phi i32 [ %indvar.next, %bb24 ], [ 0, %entry ]		; <i32> [#uses=3]
+	%tmp3.i.i = getelementptr %struct.CompAtom* %tmp1819, i32 %tmp9.0.reg2mem.0.rec, i32 0, i32 1		; <double*> [#uses=0]
+	%tmp5.i.i = getelementptr %struct.CompAtom* %tmp1819, i32 %tmp9.0.reg2mem.0.rec, i32 0, i32 2		; <double*> [#uses=1]
+	store double -9.999900e+04, double* %tmp5.i.i, align 4
+	%indvar.next = add i32 %tmp9.0.reg2mem.0.rec, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %n		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb35, label %bb24
+
+bb35:		; preds = %bb24, %entry
+	%tmp42 = sdiv i32 %i, 9		; <i32> [#uses=1]
+	%tmp43 = add i32 %tmp42, -1		; <i32> [#uses=1]
+	%tmp4344 = sitofp i32 %tmp43 to double		; <double> [#uses=1]
+	%tmp17.i76 = fmul double %tmp4344, 0.000000e+00		; <double> [#uses=1]
+	%tmp48 = sdiv i32 %i, 3		; <i32> [#uses=1]
+	%tmp49 = srem i32 %tmp48, 3		; <i32> [#uses=1]
+	%tmp50 = add i32 %tmp49, -1		; <i32> [#uses=1]
+	%tmp5051 = sitofp i32 %tmp50 to double		; <double> [#uses=1]
+	%tmp17.i63 = fmul double %tmp5051, 0.000000e+00		; <double> [#uses=1]
+	%tmp55 = srem i32 %i, 3		; <i32> [#uses=1]
+	%tmp56 = add i32 %tmp55, -1		; <i32> [#uses=1]
+	%tmp5657 = sitofp i32 %tmp56 to double		; <double> [#uses=1]
+	%tmp15.i49 = getelementptr %struct.Lattice* %this, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp16.i50 = load double* %tmp15.i49, align 4		; <double> [#uses=1]
+	%tmp17.i = fmul double %tmp5657, %tmp16.i50		; <double> [#uses=1]
+	%tmp20.i39 = fadd double %tmp17.i, %tmp17.i63		; <double> [#uses=1]
+	%tmp20.i23 = fadd double %tmp20.i39, %tmp17.i76		; <double> [#uses=1]
+	br i1 false, label %bb58.preheader, label %bb81
+
+bb58.preheader:		; preds = %bb35
+	%smax = select i1 false, i32 1, i32 %n		; <i32> [#uses=1]
+	br label %bb58
+
+bb58:		; preds = %bb58, %bb58.preheader
+	%tmp20.i7 = getelementptr %struct.CompAtom* %d, i32 0, i32 2		; <i32*> [#uses=2]
+	%tmp25.i = getelementptr %struct.CompAtom* %tmp1819, i32 0, i32 2		; <i32*> [#uses=2]
+	%tmp74.i = load i32* %tmp20.i7, align 1		; <i32> [#uses=1]
+	%tmp82.i = and i32 %tmp74.i, 134217728		; <i32> [#uses=1]
+	%tmp85.i = or i32 0, %tmp82.i		; <i32> [#uses=1]
+	store i32 %tmp85.i, i32* %tmp25.i, align 1
+	%tmp88.i = load i32* %tmp20.i7, align 1		; <i32> [#uses=1]
+	%tmp95.i = and i32 %tmp88.i, -268435456		; <i32> [#uses=1]
+	%tmp97.i = or i32 0, %tmp95.i		; <i32> [#uses=1]
+	store i32 %tmp97.i, i32* %tmp25.i, align 1
+	%tmp6.i = fadd double 0.000000e+00, %tmp20.i23		; <double> [#uses=0]
+	%exitcond96 = icmp eq i32 0, %smax		; <i1> [#uses=1]
+	br i1 %exitcond96, label %bb81, label %bb58
+
+bb81:		; preds = %bb58, %bb35
+	ret %struct.CompAtom* %tmp1819
+}
+
+declare i8* @_Znam(i32)

diff --git a/src/LLVM/test/CodeGen/X86/2008-02-27-PEICrash.ll b/src/LLVM/test/CodeGen/X86/2008-02-27-PEICrash.ll
new file mode 100644
index 0000000..d842967
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-02-27-PEICrash.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define i64 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone  {
+entry:
+	br i1 false, label %bb56, label %bb33
+
+bb33:		; preds = %entry
+	br label %bb56
+
+bb56:		; preds = %bb33, %entry
+	%tmp36.pn = phi float [ 0.000000e+00, %bb33 ], [ 0.000000e+00, %entry ]		; <float> [#uses=1]
+	%b.pn509 = phi float [ %b, %bb33 ], [ %a, %entry ]		; <float> [#uses=1]
+	%a.pn = phi float [ %a, %bb33 ], [ %b, %entry ]		; <float> [#uses=1]
+	%tmp41.pn508 = phi float [ 0.000000e+00, %bb33 ], [ 0.000000e+00, %entry ]		; <float> [#uses=1]
+	%tmp51.pn = phi float [ 0.000000e+00, %bb33 ], [ %a, %entry ]		; <float> [#uses=1]
+	%tmp44.pn = fmul float %tmp36.pn, %b.pn509		; <float> [#uses=1]
+	%tmp46.pn = fadd float %tmp44.pn, %a.pn		; <float> [#uses=1]
+	%tmp53.pn = fsub float 0.000000e+00, %tmp51.pn		; <float> [#uses=1]
+	%x.0 = fdiv float %tmp46.pn, %tmp41.pn508		; <float> [#uses=1]
+	%y.0 = fdiv float %tmp53.pn, 0.000000e+00		; <float> [#uses=1]
+	br i1 false, label %bb433, label %bb98
+
+bb98:		; preds = %bb56
+	%tmp102 = fmul float 0.000000e+00, %a		; <float> [#uses=1]
+	%tmp106 = fmul float 0.000000e+00, %b		; <float> [#uses=1]
+	br label %bb433
+
+bb433:		; preds = %bb98, %bb56
+	%x.1 = phi float [ %tmp102, %bb98 ], [ %x.0, %bb56 ]		; <float> [#uses=0]
+	%y.1 = phi float [ %tmp106, %bb98 ], [ %y.0, %bb56 ]		; <float> [#uses=1]
+	%tmp460 = fadd float %y.1, 0.000000e+00		; <float> [#uses=0]
+	ret i64 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-03-06-frem-fpstack.ll b/src/LLVM/test/CodeGen/X86/2008-03-06-frem-fpstack.ll
new file mode 100644
index 0000000..70a83b5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-03-06-frem-fpstack.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mcpu=i386
+; PR2122
+define float @func(float %a, float %b) nounwind  {
+entry:
+        %tmp3 = frem float %a, %b               ; <float> [#uses=1]
+        ret float %tmp3
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-03-07-APIntBug.ll b/src/LLVM/test/CodeGen/X86/2008-03-07-APIntBug.ll
new file mode 100644
index 0000000..84e4827
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-03-07-APIntBug.ll

@@ -0,0 +1,94 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 | not grep 255
+
+	%struct.CONSTRAINT = type { i32, i32, i32, i32 }
+	%struct.FIRST_UNION = type { %struct.anon }
+	%struct.FOURTH_UNION = type { %struct.CONSTRAINT }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { { i16, i8, i8 } }
+	%struct.THIRD_UNION = type { { [2 x i32], [2 x i32] } }
+	%struct.anon = type { i8, i8, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, { %struct.rec* }, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+	%struct.symbol_type = type <{ [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i16, i16, i8, i8, i8, i8 }>
+	%struct.word_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, [4 x i8] }
+
+define void @InsertSym_bb1163(%struct.rec** %s) {
+newFuncRoot:
+	br label %bb1163
+bb1233.exitStub:		; preds = %bb1163
+	ret void
+bb1163:		; preds = %newFuncRoot
+	%tmp1164 = load %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
+	%tmp1165 = getelementptr %struct.rec* %tmp1164, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11651166 = bitcast %struct.head_type* %tmp1165 to %struct.symbol_type*		; <%struct.symbol_type*> [#uses=1]
+	%tmp1167 = getelementptr %struct.symbol_type* %tmp11651166, i32 0, i32 3		; <%struct.rec**> [#uses=1]
+	%tmp1168 = load %struct.rec** %tmp1167, align 1		; <%struct.rec*> [#uses=2]
+	%tmp1169 = load %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
+	%tmp1170 = getelementptr %struct.rec* %tmp1169, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11701171 = bitcast %struct.head_type* %tmp1170 to %struct.symbol_type*		; <%struct.symbol_type*> [#uses=1]
+	%tmp1172 = getelementptr %struct.symbol_type* %tmp11701171, i32 0, i32 3		; <%struct.rec**> [#uses=1]
+	%tmp1173 = load %struct.rec** %tmp1172, align 1		; <%struct.rec*> [#uses=2]
+	%tmp1174 = getelementptr %struct.rec* %tmp1173, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11741175 = bitcast %struct.head_type* %tmp1174 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1176 = getelementptr %struct.word_type* %tmp11741175, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1177 = getelementptr %struct.SECOND_UNION* %tmp1176, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp11771178 = bitcast { i16, i8, i8 }* %tmp1177 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1179 = getelementptr <{ i8, i8, i8, i8 }>* %tmp11771178, i32 0, i32 2		; <i8*> [#uses=2]
+	%mask1180 = and i8 1, 1		; <i8> [#uses=2]
+	%tmp1181 = load i8* %tmp1179, align 1		; <i8> [#uses=1]
+	%tmp1182 = shl i8 %mask1180, 7		; <i8> [#uses=1]
+	%tmp1183 = and i8 %tmp1181, 127		; <i8> [#uses=1]
+	%tmp1184 = or i8 %tmp1183, %tmp1182		; <i8> [#uses=1]
+	store i8 %tmp1184, i8* %tmp1179, align 1
+	%mask1185 = and i8 %mask1180, 1		; <i8> [#uses=0]
+	%tmp1186 = getelementptr %struct.rec* %tmp1173, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11861187 = bitcast %struct.head_type* %tmp1186 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1188 = getelementptr %struct.word_type* %tmp11861187, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1189 = getelementptr %struct.SECOND_UNION* %tmp1188, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp11891190 = bitcast { i16, i8, i8 }* %tmp1189 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1191 = getelementptr <{ i8, i8, i8, i8 }>* %tmp11891190, i32 0, i32 2		; <i8*> [#uses=1]
+	%tmp1192 = load i8* %tmp1191, align 1		; <i8> [#uses=1]
+	%tmp1193 = lshr i8 %tmp1192, 7		; <i8> [#uses=1]
+	%mask1194 = and i8 %tmp1193, 1		; <i8> [#uses=2]
+	%mask1195 = and i8 %mask1194, 1		; <i8> [#uses=0]
+	%tmp1196 = getelementptr %struct.rec* %tmp1168, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11961197 = bitcast %struct.head_type* %tmp1196 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1198 = getelementptr %struct.word_type* %tmp11961197, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1199 = getelementptr %struct.SECOND_UNION* %tmp1198, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp11991200 = bitcast { i16, i8, i8 }* %tmp1199 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1201 = getelementptr <{ i8, i8, i8, i8 }>* %tmp11991200, i32 0, i32 1		; <i8*> [#uses=2]
+	%mask1202 = and i8 %mask1194, 1		; <i8> [#uses=2]
+	%tmp1203 = load i8* %tmp1201, align 1		; <i8> [#uses=1]
+	%tmp1204 = shl i8 %mask1202, 1		; <i8> [#uses=1]
+	%tmp1205 = and i8 %tmp1204, 2		; <i8> [#uses=1]
+	%tmp1206 = and i8 %tmp1203, -3		; <i8> [#uses=1]
+	%tmp1207 = or i8 %tmp1206, %tmp1205		; <i8> [#uses=1]
+	store i8 %tmp1207, i8* %tmp1201, align 1
+	%mask1208 = and i8 %mask1202, 1		; <i8> [#uses=0]
+	%tmp1209 = getelementptr %struct.rec* %tmp1168, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp12091210 = bitcast %struct.head_type* %tmp1209 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1211 = getelementptr %struct.word_type* %tmp12091210, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1212 = getelementptr %struct.SECOND_UNION* %tmp1211, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp12121213 = bitcast { i16, i8, i8 }* %tmp1212 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1214 = getelementptr <{ i8, i8, i8, i8 }>* %tmp12121213, i32 0, i32 1		; <i8*> [#uses=1]
+	%tmp1215 = load i8* %tmp1214, align 1		; <i8> [#uses=1]
+	%tmp1216 = shl i8 %tmp1215, 6		; <i8> [#uses=1]
+	%tmp1217 = lshr i8 %tmp1216, 7		; <i8> [#uses=1]
+	%mask1218 = and i8 %tmp1217, 1		; <i8> [#uses=2]
+	%mask1219 = and i8 %mask1218, 1		; <i8> [#uses=0]
+	%tmp1220 = load %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
+	%tmp1221 = getelementptr %struct.rec* %tmp1220, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp12211222 = bitcast %struct.head_type* %tmp1221 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1223 = getelementptr %struct.word_type* %tmp12211222, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1224 = getelementptr %struct.SECOND_UNION* %tmp1223, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp12241225 = bitcast { i16, i8, i8 }* %tmp1224 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1226 = getelementptr <{ i8, i8, i8, i8 }>* %tmp12241225, i32 0, i32 1		; <i8*> [#uses=2]
+	%mask1227 = and i8 %mask1218, 1		; <i8> [#uses=2]
+	%tmp1228 = load i8* %tmp1226, align 1		; <i8> [#uses=1]
+	%tmp1229 = and i8 %mask1227, 1		; <i8> [#uses=1]
+	%tmp1230 = and i8 %tmp1228, -2		; <i8> [#uses=1]
+	%tmp1231 = or i8 %tmp1230, %tmp1229		; <i8> [#uses=1]
+	store i8 %tmp1231, i8* %tmp1226, align 1
+	%mask1232 = and i8 %mask1227, 1		; <i8> [#uses=0]
+	br label %bb1233.exitStub
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll b/src/LLVM/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
new file mode 100644
index 0000000..40aafb4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim
+; PR2134
+
+declare fastcc i8* @w_addchar(i8*, i32*, i32*, i8 signext ) nounwind 
+
+define x86_stdcallcc i32 @parse_backslash(i8** inreg  %word, i32* inreg  %word_length, i32* inreg  %max_length) nounwind  {
+entry:
+	%tmp6 = load i8* null, align 1		; <i8> [#uses=1]
+	br label %bb13
+bb13:		; preds = %entry
+	%tmp26 = call fastcc i8* @w_addchar( i8* null, i32* %word_length, i32* %max_length, i8 signext  %tmp6 ) nounwind 		; <i8*> [#uses=1]
+	store i8* %tmp26, i8** %word, align 4
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/src/LLVM/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
new file mode 100644
index 0000000..e673d31
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -relocation-model=pic | grep TLSGD | count 2
+; PR2137
+
+; ModuleID = '1.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+	%struct.__res_state = type { i32 }
+@__resp = thread_local global %struct.__res_state* @_res		; <%struct.__res_state**> [#uses=1]
+@_res = global %struct.__res_state zeroinitializer, section ".bss"		; <%struct.__res_state*> [#uses=1]
+
+@__libc_resp = hidden alias %struct.__res_state** @__resp		; <%struct.__res_state**> [#uses=2]
+
+define i32 @foo() {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = load %struct.__res_state** @__libc_resp, align 4		; <%struct.__res_state*> [#uses=1]
+	%tmp1 = getelementptr %struct.__res_state* %tmp, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp1, align 4
+	br label %return
+return:		; preds = %entry
+	%retval2 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval2
+}
+
+define i32 @bar() {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = load %struct.__res_state** @__libc_resp, align 4		; <%struct.__res_state*> [#uses=1]
+	%tmp1 = getelementptr %struct.__res_state* %tmp, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp1, align 4
+	br label %return
+return:		; preds = %entry
+	%retval2 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval2
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll b/src/LLVM/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
new file mode 100644
index 0000000..19d49b2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll

@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=x86
+
+define signext i16 @t(i32 %depth)  nounwind  {
+entry:
+	br i1 false, label %bb74, label %bb
+bb:		; preds = %entry
+	ret i16 0
+bb74:		; preds = %entry
+	switch i32 0, label %bail [
+		 i32 17, label %bb84
+		 i32 18, label %bb81
+		 i32 33, label %bb80
+		 i32 34, label %bb84
+	]
+bb80:		; preds = %bb74
+	switch i32 %depth, label %bb103 [
+		 i32 16, label %bb96
+		 i32 32, label %bb91
+		 i32 846624121, label %bb96
+		 i32 1094862674, label %bb91
+		 i32 1096368963, label %bb91
+		 i32 1111970369, label %bb91
+		 i32 1278555445, label %bb96
+		 i32 1278555701, label %bb96
+		 i32 1380401729, label %bb91
+		 i32 1668118891, label %bb91
+		 i32 1916022840, label %bb91
+		 i32 1983131704, label %bb91
+		 i32 2037741171, label %bb96
+		 i32 2037741173, label %bb96
+	]
+bb81:		; preds = %bb74
+	ret i16 0
+bb84:		; preds = %bb74, %bb74
+	switch i32 %depth, label %bb103 [
+		 i32 16, label %bb96
+		 i32 32, label %bb91
+		 i32 846624121, label %bb96
+		 i32 1094862674, label %bb91
+		 i32 1096368963, label %bb91
+		 i32 1111970369, label %bb91
+		 i32 1278555445, label %bb96
+		 i32 1278555701, label %bb96
+		 i32 1380401729, label %bb91
+		 i32 1668118891, label %bb91
+		 i32 1916022840, label %bb91
+		 i32 1983131704, label %bb91
+		 i32 2037741171, label %bb96
+		 i32 2037741173, label %bb96
+	]
+bb91:		; preds = %bb84, %bb84, %bb84, %bb84, %bb84, %bb84, %bb84, %bb84, %bb80, %bb80, %bb80, %bb80, %bb80, %bb80, %bb80, %bb80
+	%wMB.0.reg2mem.0 = phi i16 [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ]		; <i16> [#uses=2]
+	%tmp941478 = shl i16 %wMB.0.reg2mem.0, 2		; <i16> [#uses=1]
+	br label %bb103
+bb96:		; preds = %bb84, %bb84, %bb84, %bb84, %bb84, %bb84, %bb80, %bb80, %bb80, %bb80, %bb80, %bb80
+	ret i16 0
+bb103:		; preds = %bb91, %bb84, %bb80
+	%wMB.0.reg2mem.2 = phi i16 [ %wMB.0.reg2mem.0, %bb91 ], [ 16, %bb80 ], [ 0, %bb84 ]		; <i16> [#uses=1]
+	%bBump.0 = phi i16 [ %tmp941478, %bb91 ], [ 16, %bb80 ], [ 0, %bb84 ]		; <i16> [#uses=0]
+	br i1 false, label %bb164, label %UnifiedReturnBlock
+bb164:		; preds = %bb103
+	%tmp167168 = sext i16 %wMB.0.reg2mem.2 to i32		; <i32> [#uses=0]
+	ret i16 0
+bail:		; preds = %bb74
+	ret i16 0
+UnifiedReturnBlock:		; preds = %bb103
+	ret i16 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-03-14-SpillerCrash.ll b/src/LLVM/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
new file mode 100644
index 0000000..8946415
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-03-14-SpillerCrash.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
+; PR2138
+
+	%struct.__locale_struct = type { [13 x %struct.locale_data*], i16*, i32*, i32*, [13 x i8*] }
+	%struct.anon = type { i8* }
+	%struct.locale_data = type { i8*, i8*, i32, i32, { void (%struct.locale_data*)*, %struct.anon }, i32, i32, i32, [0 x %struct.locale_data_value] }
+	%struct.locale_data_value = type { i32* }
+
+@wcstoll_l = alias i64 (i32*, i32**, i32, %struct.__locale_struct*)* @__wcstoll_l		; <i64 (i32*, i32**, i32, %struct.__locale_struct*)*> [#uses=0]
+
+define i64 @____wcstoll_l_internal(i32* %nptr, i32** %endptr, i32 %base, i32 %group, %struct.__locale_struct* %loc) nounwind  {
+entry:
+	%tmp27 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp83 = getelementptr i32* %nptr, i32 1		; <i32*> [#uses=1]
+	%tmp233 = add i32 0, -48		; <i32> [#uses=1]
+	br label %bb271.us
+bb271.us:		; preds = %entry
+	br label %bb374.outer
+bb311.split:		; preds = %bb305.us
+	%tmp313 = add i32 %tmp378.us, -48		; <i32> [#uses=1]
+	br i1 false, label %bb374.outer, label %bb383
+bb327.split:		; preds = %bb314.us
+	ret i64 0
+bb374.outer:		; preds = %bb311.split, %bb271.us
+	%tmp370371552.pn.in = phi i32 [ %tmp233, %bb271.us ], [ %tmp313, %bb311.split ]		; <i32> [#uses=1]
+	%tmp278279.pn = phi i64 [ 0, %bb271.us ], [ %tmp373.reg2mem.0.ph, %bb311.split ]		; <i64> [#uses=1]
+	%s.5.ph = phi i32* [ null, %bb271.us ], [ %tmp376.us, %bb311.split ]		; <i32*> [#uses=1]
+	%tmp366367550.pn = sext i32 %base to i64		; <i64> [#uses=1]
+	%tmp370371552.pn = zext i32 %tmp370371552.pn.in to i64		; <i64> [#uses=1]
+	%tmp369551.pn = mul i64 %tmp278279.pn, %tmp366367550.pn		; <i64> [#uses=1]
+	%tmp373.reg2mem.0.ph = add i64 %tmp370371552.pn, %tmp369551.pn		; <i64> [#uses=1]
+	br label %bb374.us
+bb374.us:		; preds = %bb314.us, %bb374.outer
+	%tmp376.us = getelementptr i32* %s.5.ph, i32 0		; <i32*> [#uses=3]
+	%tmp378.us = load i32* %tmp376.us, align 4		; <i32> [#uses=2]
+	%tmp302.us = icmp eq i32* %tmp376.us, %tmp83		; <i1> [#uses=1]
+	%bothcond484.us = or i1 false, %tmp302.us		; <i1> [#uses=1]
+	br i1 %bothcond484.us, label %bb383, label %bb305.us
+bb305.us:		; preds = %bb374.us
+	br i1 false, label %bb311.split, label %bb314.us
+bb314.us:		; preds = %bb305.us
+	%tmp320.us = icmp eq i32 %tmp378.us, %tmp27		; <i1> [#uses=1]
+	br i1 %tmp320.us, label %bb374.us, label %bb327.split
+bb383:		; preds = %bb374.us, %bb311.split
+	ret i64 0
+}
+
+declare i64 @__wcstoll_l(i32*, i32**, i32, %struct.__locale_struct*) nounwind 

diff --git a/src/LLVM/test/CodeGen/X86/2008-03-18-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
new file mode 100644
index 0000000..33d658c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-03-18-CoalescerBug.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -regalloc=linearscan | grep movss | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -regalloc=linearscan -stats |& grep {Number of re-materialization} | grep 1
+
+	%struct..0objc_object = type opaque
+	%struct.OhBoy = type {  }
+	%struct.BooHoo = type { i32 }
+	%struct.objc_selector = type opaque
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.OhBoy*, %struct.objc_selector*, i32, %struct.BooHoo*)* @"-[MessageHeaderDisplay adjustFontSizeBy:viewingState:]" to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @"-[MessageHeaderDisplay adjustFontSizeBy:viewingState:]"(%struct.OhBoy* %self, %struct.objc_selector* %_cmd, i32 %delta, %struct.BooHoo* %viewingState) nounwind  {
+entry:
+	%tmp19 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp24 = tail call float bitcast (void (%struct..0objc_object*, ...)* @objc_msgSend_fpret to float (%struct..0objc_object*, %struct.objc_selector*)*)( %struct..0objc_object* null, %struct.objc_selector* null ) nounwind 		; <float> [#uses=2]
+	%tmp30 = icmp sgt i32 %delta, 0		; <i1> [#uses=1]
+	br i1 %tmp30, label %bb33, label %bb87.preheader
+bb33:		; preds = %entry
+	%tmp28 = fadd float 0.000000e+00, %tmp24		; <float> [#uses=1]
+	%tmp35 = fcmp ogt float %tmp28, 1.800000e+01		; <i1> [#uses=1]
+	br i1 %tmp35, label %bb38, label %bb87.preheader
+bb38:		; preds = %bb33
+	%tmp53 = add i32 %tmp19, %delta		; <i32> [#uses=2]
+	br label %bb43
+bb43:		; preds = %bb38
+	store i32 %tmp53, i32* null, align 4
+	ret void
+bb50:		; preds = %bb38
+	%tmp56 = fsub float 1.800000e+01, %tmp24		; <float> [#uses=1]
+	%tmp57 = fcmp ugt float 0.000000e+00, %tmp56		; <i1> [#uses=1]
+	br i1 %tmp57, label %bb64, label %bb87.preheader
+bb64:		; preds = %bb50
+	ret void
+bb87.preheader:		; preds = %bb50, %bb33, %entry
+	%usableDelta.0 = phi i32 [ %delta, %entry ], [ %delta, %bb33 ], [ %tmp53, %bb50 ]		; <i32> [#uses=1]
+	%tmp100 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* null, %struct.objc_selector* null, %struct..0objc_object* null ) nounwind 		; <%struct..0objc_object*> [#uses=2]
+	%tmp106 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp100, %struct.objc_selector* null ) nounwind 		; <%struct..0objc_object*> [#uses=0]
+	%umax = select i1 false, i32 1, i32 0		; <i32> [#uses=1]
+	br label %bb108
+bb108:		; preds = %bb108, %bb87.preheader
+	%attachmentIndex.0.reg2mem.0 = phi i32 [ 0, %bb87.preheader ], [ %indvar.next, %bb108 ]		; <i32> [#uses=2]
+	%tmp114 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp100, %struct.objc_selector* null, i32 %attachmentIndex.0.reg2mem.0 ) nounwind 		; <%struct..0objc_object*> [#uses=1]
+	%tmp121 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp114, %struct.objc_selector* null, i32 %usableDelta.0 ) nounwind 		; <%struct..0objc_object*> [#uses=0]
+	%indvar.next = add i32 %attachmentIndex.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb130, label %bb108
+bb130:		; preds = %bb108
+	ret void
+}
+
+declare %struct..0objc_object* @objc_msgSend(%struct..0objc_object*, %struct.objc_selector*, ...)
+
+declare void @objc_msgSend_fpret(%struct..0objc_object*, ...)

diff --git a/src/LLVM/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll b/src/LLVM/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll
new file mode 100644
index 0000000..eaa883c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86
+
+define i32 @t() nounwind  {
+entry:
+	%tmp54 = add i32 0, 1		; <i32> [#uses=1]
+	br i1 false, label %bb71, label %bb77
+bb71:		; preds = %entry
+	%tmp74 = shl i32 %tmp54, 1		; <i32> [#uses=1]
+	%tmp76 = ashr i32 %tmp74, 3		; <i32> [#uses=1]
+	br label %bb77
+bb77:		; preds = %bb71, %entry
+	%payLoadSize.0 = phi i32 [ %tmp76, %bb71 ], [ 0, %entry ]		; <i32> [#uses=0]
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll b/src/LLVM/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
new file mode 100644
index 0000000..4dc3a10
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose | grep {#} | not grep -v {##}
+
+	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
+	%struct.AGenericManager = type <{ i8 }>
+	%struct.ComponentInstanceRecord = type opaque
+	%struct.ComponentParameters = type { [1 x i64] }
+
+define i32 @_ZN12AGenericCall10MapIDPtrAtEsRP23ComponentInstanceRecord(%struct.AGenericCall* %this, i16 signext  %param, %struct.ComponentInstanceRecord** %instance) {
+entry:
+	%tmp4 = icmp slt i16 %param, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	%tmp1415 = shl i16 %param, 3		; <i16> [#uses=1]
+	%tmp17 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
+	%tmp18 = load %struct.ComponentParameters** %tmp17, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp1920 = bitcast %struct.ComponentParameters* %tmp18 to i8*		; <i8*> [#uses=1]
+	%tmp212223 = sext i16 %tmp1415 to i64		; <i64> [#uses=1]
+	%tmp24 = getelementptr i8* %tmp1920, i64 %tmp212223		; <i8*> [#uses=1]
+	%tmp2425 = bitcast i8* %tmp24 to i64*		; <i64*> [#uses=1]
+	%tmp28 = load i64* %tmp2425, align 8		; <i64> [#uses=1]
+	%tmp2829 = inttoptr i64 %tmp28 to i32*		; <i32*> [#uses=1]
+	%tmp31 = getelementptr %struct.AGenericCall* %this, i32 0, i32 2		; <i32**> [#uses=1]
+	store i32* %tmp2829, i32** %tmp31, align 8
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4243 = shl i16 %param, 3		; <i16> [#uses=1]
+	%tmp46 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
+	%tmp47 = load %struct.ComponentParameters** %tmp46, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp4849 = bitcast %struct.ComponentParameters* %tmp47 to i8*		; <i8*> [#uses=1]
+	%tmp505152 = sext i16 %tmp4243 to i64		; <i64> [#uses=1]
+	%tmp53 = getelementptr i8* %tmp4849, i64 %tmp505152		; <i8*> [#uses=1]
+	%tmp5354 = bitcast i8* %tmp53 to i64*		; <i64*> [#uses=1]
+	%tmp58 = load i64* %tmp5354, align 8		; <i64> [#uses=1]
+	%tmp59 = icmp eq i64 %tmp58, 0		; <i1> [#uses=1]
+	br i1 %tmp59, label %UnifiedReturnBlock, label %cond_true63
+
+cond_true63:		; preds = %cond_next
+	%tmp65 = getelementptr %struct.AGenericCall* %this, i32 0, i32 0		; <%struct.AGenericManager**> [#uses=1]
+	%tmp66 = load %struct.AGenericManager** %tmp65, align 8		; <%struct.AGenericManager*> [#uses=1]
+	%tmp69 = tail call i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord( %struct.AGenericManager* %tmp66, %struct.ComponentInstanceRecord** %instance )		; <i32> [#uses=1]
+	ret i32 %tmp69
+
+UnifiedReturnBlock:		; preds = %cond_next
+	ret i32 undef
+}
+
+declare i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord(%struct.AGenericManager*, %struct.ComponentInstanceRecord**)

diff --git a/src/LLVM/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll b/src/LLVM/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
new file mode 100644
index 0000000..2d868e0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define void @t() {
+entry:
+	%tmp455 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 1, i32 0, i32 3, i32 2 >		; <<4 x float>> [#uses=1]
+	%tmp457 = fmul <4 x float> zeroinitializer, %tmp455		; <<4 x float>> [#uses=2]
+	%tmp461 = shufflevector <4 x float> %tmp457, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>> [#uses=1]
+	%tmp465 = shufflevector <4 x float> %tmp457, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>> [#uses=1]
+	%tmp466 = fsub <4 x float> %tmp461, %tmp465		; <<4 x float>> [#uses=1]
+	%tmp536 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp466, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%tmp542 = shufflevector <4 x float> %tmp536, <4 x float> zeroinitializer, <4 x i32> < i32 6, i32 7, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
+	%tmp580 = bitcast <4 x float> %tmp542 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp582 = and <4 x i32> %tmp580, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp591 = or <4 x i32> %tmp582, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp592 = bitcast <4 x i32> %tmp591 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp609 = fdiv <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, %tmp592		; <<4 x float>> [#uses=1]
+	%tmp652 = shufflevector <4 x float> %tmp609, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>> [#uses=1]
+	%tmp662 = fmul <4 x float> zeroinitializer, %tmp652		; <<4 x float>> [#uses=1]
+	%tmp678 = shufflevector <4 x float> %tmp662, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>> [#uses=1]
+	%tmp753 = fmul <4 x float> zeroinitializer, %tmp678		; <<4 x float>> [#uses=1]
+	%tmp754 = fsub <4 x float> zeroinitializer, %tmp753		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp754, <4 x float>* null, align 16
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll b/src/LLVM/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
new file mode 100644
index 0000000..305968a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim | grep add | grep 12 | not grep non_lazy_ptr
+; Don't fold re-materialized load into a two address instruction
+
+	%"struct.Smarts::Runnable" = type { i32 (...)**, i32 }
+	%struct.__sbuf = type { i8*, i32 }
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %struct.__sbuf, [8 x %struct.__sbuf], i32, %struct.__sbuf*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.Smarts::Runnable"**, i32, %"struct.Smarts::Runnable"**, i8** }
+@_ZTVSt9basic_iosIcSt11char_traitsIcEE = external constant [4 x i32 (...)*]		; <[4 x i32 (...)*]*> [#uses=1]
+@_ZTTSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE = external constant [4 x i8*]		; <[4 x i8*]*> [#uses=1]
+@_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE = external constant [10 x i32 (...)*]		; <[10 x i32 (...)*]*> [#uses=2]
+@_ZTVSt15basic_streambufIcSt11char_traitsIcEE = external constant [16 x i32 (...)*]		; <[16 x i32 (...)*]*> [#uses=1]
+@_ZTVSt15basic_stringbufIcSt11char_traitsIcESaIcEE = external constant [16 x i32 (...)*]		; <[16 x i32 (...)*]*> [#uses=1]
+
+define void @_GLOBAL__I__ZN5Pooma5pinfoE() nounwind  {
+entry:
+	store i32 (...)** getelementptr ([10 x i32 (...)*]* @_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i32 8), i32 (...)*** null, align 4
+	%tmp96.i.i142.i = call i8* @_Znwm( i32 180 ) nounwind 		; <i8*> [#uses=2]
+	call void @_ZNSt8ios_baseC2Ev( %"struct.std::ios_base"* null ) nounwind 
+	store i32 (...)** getelementptr ([4 x i32 (...)*]* @_ZTVSt9basic_iosIcSt11char_traitsIcEE, i32 0, i32 2), i32 (...)*** null, align 4
+	store i32 (...)** null, i32 (...)*** null, align 4
+	%ctg2242.i.i163.i = getelementptr i8* %tmp96.i.i142.i, i32 0		; <i8*> [#uses=1]
+	%tmp150.i.i164.i = load i8** getelementptr ([4 x i8*]* @_ZTTSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i64 2), align 4		; <i8*> [#uses=1]
+	%tmp150151.i.i165.i = bitcast i8* %tmp150.i.i164.i to i32 (...)**		; <i32 (...)**> [#uses=1]
+	%tmp153.i.i166.i = bitcast i8* %ctg2242.i.i163.i to i32 (...)***		; <i32 (...)***> [#uses=1]
+	store i32 (...)** %tmp150151.i.i165.i, i32 (...)*** %tmp153.i.i166.i, align 4
+	%tmp159.i.i167.i = bitcast i8* %tmp96.i.i142.i to i32 (...)***		; <i32 (...)***> [#uses=1]
+	store i32 (...)** getelementptr ([10 x i32 (...)*]* @_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i32 3), i32 (...)*** %tmp159.i.i167.i, align 4
+	store i32 (...)** getelementptr ([16 x i32 (...)*]* @_ZTVSt15basic_streambufIcSt11char_traitsIcEE, i32 0, i32 2), i32 (...)*** null, align 4
+	call void @_ZNSt6localeC1Ev( %"struct.std::locale"* null ) nounwind 
+	store i32 (...)** getelementptr ([16 x i32 (...)*]* @_ZTVSt15basic_stringbufIcSt11char_traitsIcESaIcEE, i32 0, i32 2), i32 (...)*** null, align 4
+	unreachable
+}
+
+declare i8* @_Znwm(i32)
+
+declare void @_ZNSt8ios_baseC2Ev(%"struct.std::ios_base"*)
+
+declare void @_ZNSt6localeC1Ev(%"struct.std::locale"*) nounwind 

diff --git a/src/LLVM/test/CodeGen/X86/2008-04-02-unnamedEH.ll b/src/LLVM/test/CodeGen/X86/2008-04-02-unnamedEH.ll
new file mode 100644
index 0000000..ab8ec80
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-04-02-unnamedEH.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -disable-cfi | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @_Z3bazv() {
+	call void @0( )		; <i32>:1 [#uses=0]
+	ret void
+}
+
+define internal void @""() {
+	call i32 @_Z3barv( )		; <i32>:4 [#uses=1]
+	ret void
+}
+; CHECK: unnamed_1.eh
+
+declare i32 @_Z3barv()

diff --git a/src/LLVM/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll b/src/LLVM/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
new file mode 100644
index 0000000..5089e8c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx
+
+define i32 @t2() nounwind  {
+entry:
+	tail call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{di},~{si},~{dx},~{cx},~{ax}"( ) nounwind 
+	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect ".line 8", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	%tmp1 = tail call x86_mmx asm sideeffect "movd $1, $0", "=={mm4},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 undef ) nounwind 		; <x86_mmx> [#uses=1]
+	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect ".line 9", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	%tmp3 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm3},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx undef ) nounwind 		; <i32> [#uses=1]
+	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect ".line 10", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect "movntq $0, 0($1,$2)", "{mm0},{di},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx undef, i32 undef, i32 %tmp3 ) nounwind 
+	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect ".line 11", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	%tmp8 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm4},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx %tmp1 ) nounwind 		; <i32> [#uses=0]
+	ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-04-09-BranchFolding.ll b/src/LLVM/test/CodeGen/X86/2008-04-09-BranchFolding.ll
new file mode 100644
index 0000000..f4b2d71
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-04-09-BranchFolding.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86 | not grep jmp
+
+	%struct..0anon = type { i32 }
+	%struct.binding_level = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.binding_level*, i8, i8, i8, i8, i8, i32, %struct.tree_node* }
+	%struct.lang_decl = type opaque
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
+	%struct.tree_decl = type { [12 x i8], i8*, i32, %struct.tree_node*, i32, i8, i8, i8, i8, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct..0anon, { %struct.rtx_def* }, %struct.tree_node*, %struct.lang_decl* }
+	%struct.tree_node = type { %struct.tree_decl }
+
+define fastcc %struct.tree_node* @pushdecl(%struct.tree_node* %x) nounwind  {
+entry:
+	%tmp3.i40 = icmp eq %struct.binding_level* null, null		; <i1> [#uses=2]
+	br label %bb140
+bb140:		; preds = %entry
+	br i1 %tmp3.i40, label %bb160, label %bb17.i
+bb17.i:		; preds = %bb140
+	ret %struct.tree_node* null
+bb143:		; preds = %entry
+	%tmp8.i43 = load %struct.tree_node** null, align 4		; <%struct.tree_node*> [#uses=1]
+	br i1 %tmp3.i40, label %bb160, label %bb9.i48
+bb9.i48:		; preds = %bb143
+	ret %struct.tree_node* null
+bb160:		; preds = %bb143, %bb140
+	%t.0.reg2mem.0 = phi %struct.tree_node* [ null, %bb140 ], [ %tmp8.i43, %bb143 ]		; <%struct.tree_node*> [#uses=1]
+	%tmp162 = icmp eq %struct.tree_node* %t.0.reg2mem.0, null		; <i1> [#uses=2]
+	br i1 %tmp162, label %bb174, label %bb165
+bb165:		; preds = %bb160
+	br label %bb174
+bb174:		; preds = %bb165, %bb160
+	%line.0 = phi i32 [ 0, %bb165 ], [ undef, %bb160 ]		; <i32> [#uses=1]
+	%file.0 = phi i8* [ null, %bb165 ], [ undef, %bb160 ]		; <i8*> [#uses=1]
+	br i1 %tmp162, label %bb344, label %bb73.i
+bb73.i:		; preds = %bb174
+	br i1 false, label %bb226.i, label %bb220.i
+bb220.i:		; preds = %bb73.i
+	ret %struct.tree_node* null
+bb226.i:		; preds = %bb73.i
+	br i1 false, label %bb260, label %bb273.i
+bb273.i:		; preds = %bb226.i
+	ret %struct.tree_node* null
+bb260:		; preds = %bb226.i
+	tail call void (i8*, i32, ...)* @pedwarn_with_file_and_line( i8* %file.0, i32 %line.0, i8* null ) nounwind 
+	ret %struct.tree_node* null
+bb344:		; preds = %bb174
+	ret %struct.tree_node* null
+}
+
+declare void @pedwarn_with_file_and_line(i8*, i32, ...) nounwind 

diff --git a/src/LLVM/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/src/LLVM/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
new file mode 100644
index 0000000..0742371
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=fast
+; PR5534
+
+	%struct.CGPoint = type { double, double }
+	%struct.NSArray = type { %struct.NSObject }
+	%struct.NSAssertionHandler = type { %struct.NSObject, i8* }
+	%struct.NSDockTile = type { %struct.NSObject, %struct.NSObject*, i8*, %struct.NSView*, %struct.NSView*, %struct.NSView*, %struct.NSArray*, %struct._SPFlags, %struct.CGPoint, [5 x %struct.NSObject*] }
+	%struct.NSDocument = type { %struct.NSObject, %struct.NSWindow*, %struct.NSObject*, %struct.NSURL*, %struct.NSArray*, %struct.NSPrintInfo*, i64, %struct.NSView*, %struct.NSObject*, %struct.NSObject*, %struct.NSUndoManager*, %struct._BCFlags2, %struct.NSArray* }
+	%struct.AA = type { %struct.NSObject, %struct.NSDocument*, %struct.NSURL*, %struct.NSArray*, %struct.NSArray* }
+	%struct.NSError = type { %struct.NSObject, i8*, i64, %struct.NSArray*, %struct.NSArray* }
+	%struct.NSImage = type { %struct.NSObject, %struct.NSArray*, %struct.CGPoint, %struct._BCFlags2, %struct.NSObject*, %struct._NSImageAuxiliary* }
+	%struct.NSMutableArray = type { %struct.NSArray }
+	%struct.NSObject = type { %struct.NSObject* }
+	%struct.NSPrintInfo = type { %struct.NSObject, %struct.NSMutableArray*, %struct.NSObject* }
+	%struct.NSRect = type { %struct.CGPoint, %struct.CGPoint }
+	%struct.NSRegion = type opaque
+	%struct.NSResponder = type { %struct.NSObject, %struct.NSObject* }
+	%struct.NSToolbar = type { %struct.NSObject, %struct.NSArray*, %struct.NSMutableArray*, %struct.NSMutableArray*, %struct.NSArray*, %struct.NSObject*, %struct.NSArray*, i8*, %struct.NSObject*, %struct.NSWindow*, %struct.NSObject*, %struct.NSObject*, i64, %struct._BCFlags2, i64, %struct.NSObject* }
+	%struct.NSURL = type { %struct.NSObject, %struct.NSArray*, %struct.NSURL*, i8*, i8* }
+	%struct.NSUndoManager = type { %struct.NSObject, %struct.NSObject*, %struct.NSObject*, %struct.NSArray*, i64, %struct._SPFlags, %struct.NSObject*, i8*, i8*, i8* }
+	%struct.NSView = type { %struct.NSResponder, %struct.NSRect, %struct.NSRect, %struct.NSObject*, %struct.NSObject*, %struct.NSWindow*, %struct.NSObject*, %struct.NSObject*, %struct.NSObject*, %struct.NSObject*, %struct._NSViewAuxiliary*, %struct._BCFlags, %struct._SPFlags }
+	%struct.NSWindow = type { %struct.NSResponder, %struct.NSRect, %struct.NSObject*, %struct.NSObject*, %struct.NSResponder*, %struct.NSView*, %struct.NSView*, %struct.NSObject*, %struct.NSObject*, i32, i64, i32, %struct.NSArray*, %struct.NSObject*, i8, i8, i8, i8, i8*, i8*, %struct.NSImage*, i32, %struct.NSMutableArray*, %struct.NSURL*, %struct.CGPoint*, %struct.NSArray*, %struct.NSArray*, %struct.__wFlags, %struct.NSObject*, %struct.NSView*, %struct.NSWindowAuxiliary* }
+	%struct.NSWindowAuxiliary = type { %struct.NSObject, %struct.NSArray*, %struct.NSDockTile*, %struct._NSWindowAnimator*, %struct.NSRect, i32, %struct.NSAssertionHandler*, %struct.NSUndoManager*, %struct.NSWindowController*, %struct.NSAssertionHandler*, %struct.NSObject*, i32, %struct.__CFRunLoopObserver*, %struct.__CFRunLoopObserver*, %struct.NSArray*, %struct.NSArray*, %struct.NSView*, %struct.NSRegion*, %struct.NSWindow*, %struct.NSWindow*, %struct.NSArray*, %struct.NSMutableArray*, %struct.NSArray*, %struct.NSWindow*, %struct.CGPoint, %struct.NSObject*, i8*, i8*, i32, %struct.NSObject*, %struct.NSArray*, double, %struct.CGPoint, %struct.NSArray*, %struct.NSMutableArray*, %struct.NSMutableArray*, %struct.NSWindow*, %struct.NSView*, %struct.NSArray*, %struct.__auxWFlags, i32, i8*, double, %struct.NSObject*, %struct.NSObject*, %struct.__CFArray*, %struct.NSRegion*, %struct.NSArray*, %struct.NSRect, %struct.NSToolbar*, %struct.NSRect, %struct.NSMutableArray* }
+	%struct.NSWindowController = type { %struct.NSResponder, %struct.NSWindow*, %struct.NSArray*, %struct.NSDocument*, %struct.NSArray*, %struct.NSObject*, %struct._SPFlags, %struct.NSArray*, %struct.NSObject* }
+	%struct._BCFlags = type <{ i8, i8, i8, i8 }>
+	%struct._BCFlags2 = type <{ i8, [3 x i8] }>
+	%struct._NSImageAuxiliary = type opaque
+	%struct._NSViewAuxiliary = type opaque
+	%struct._NSWindowAnimator = type opaque
+	%struct._SPFlags = type <{ i32 }>
+	%struct.__CFArray = type opaque
+	%struct.__CFRunLoopObserver = type opaque
+	%struct.__auxWFlags = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16 }
+	%struct.__wFlags = type <{ i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%struct._message_ref_t = type { %struct.NSObject* (%struct.NSObject*, %struct._message_ref_t*, ...)*, %struct.objc_selector* }
+	%struct.objc_selector = type opaque
+@"\01L_OBJC_MESSAGE_REF_228" = internal global %struct._message_ref_t zeroinitializer		; <%struct._message_ref_t*> [#uses=1]
+@llvm.used1 = appending global [1 x i8*] [ i8* bitcast (void (%struct.AA*, %struct._message_ref_t*, %struct.NSError*, i64, %struct.NSObject*, %struct.objc_selector*, i8*)* @"-[AA BB:optionIndex:delegate:CC:contextInfo:]" to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @"-[AA BB:optionIndex:delegate:CC:contextInfo:]"(%struct.AA* %self, %struct._message_ref_t* %_cmd, %struct.NSError* %inError, i64 %inOptionIndex, %struct.NSObject* %inDelegate, %struct.objc_selector* %inDidRecoverSelector, i8* %inContextInfo) {
+entry:
+	%tmp105 = load %struct.NSArray** null, align 8		; <%struct.NSArray*> [#uses=1]
+	%tmp107 = load %struct.NSObject** null, align 8		; <%struct.NSObject*> [#uses=1]
+	call void null( %struct.NSObject* %tmp107, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_228", %struct.NSArray* %tmp105, i8 signext  0 )
+	%tmp111 = call %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)* @objc_msgSend( %struct.NSObject* null, %struct.objc_selector* null, i32 0, i8* null )		; <%struct.NSObject*> [#uses=0]
+	ret void
+}
+
+declare %struct.NSObject* @objc_msgSend(%struct.NSObject*, %struct.objc_selector*, ...)

diff --git a/src/LLVM/test/CodeGen/X86/2008-04-16-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
new file mode 100644
index 0000000..3ccc0fe
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-04-16-CoalescerBug.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86
+
+define void @Hubba(i8* %saveunder, i32 %firstBlob, i32 %select) nounwind  {
+entry:
+	br i1 false, label %bb53.us, label %bb53
+bb53.us:		; preds = %bb94.us, %bb53.us, %entry
+	switch i8 1, label %bb71.us [
+		 i8 0, label %bb53.us
+		 i8 1, label %bb94.us
+	]
+bb94.us:		; preds = %bb71.us, %bb53.us
+	%result.0.us = phi i32 [ %tmp93.us, %bb71.us ], [ 0, %bb53.us ]		; <i32> [#uses=2]
+	%tmp101.us = lshr i32 %result.0.us, 3		; <i32> [#uses=1]
+	%result.0163.us = trunc i32 %result.0.us to i16		; <i16> [#uses=2]
+	shl i16 %result.0163.us, 7		; <i16>:0 [#uses=1]
+	%tmp106.us = and i16 %0, -1024		; <i16> [#uses=1]
+	shl i16 %result.0163.us, 2		; <i16>:1 [#uses=1]
+	%tmp109.us = and i16 %1, -32		; <i16> [#uses=1]
+	%tmp111112.us = trunc i32 %tmp101.us to i16		; <i16> [#uses=1]
+	%tmp110.us = or i16 %tmp109.us, %tmp111112.us		; <i16> [#uses=1]
+	%tmp113.us = or i16 %tmp110.us, %tmp106.us		; <i16> [#uses=1]
+	store i16 %tmp113.us, i16* null, align 2
+	br label %bb53.us
+bb71.us:		; preds = %bb53.us
+	%tmp80.us = load i8* null, align 1		; <i8> [#uses=1]
+	%tmp8081.us = zext i8 %tmp80.us to i32		; <i32> [#uses=1]
+	%tmp87.us = mul i32 %tmp8081.us, 0		; <i32> [#uses=1]
+	%tmp92.us = add i32 0, %tmp87.us		; <i32> [#uses=1]
+	%tmp93.us = udiv i32 %tmp92.us, 255		; <i32> [#uses=1]
+	br label %bb94.us
+bb53:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/src/LLVM/test/CodeGen/X86/2008-04-16-ReMatBug.ll
new file mode 100644
index 0000000..109069e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-04-16-ReMatBug.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-cgp-branch-opts | grep movw | not grep {, %e}
+
+	%struct.DBC_t = type { i32, i8*, i16, %struct.DBC_t*, i8*, i8*, i8*, i8*, i8*, %struct.DBC_t*, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16, i32*, i8, i16, %struct.DRVOPT*, i16 }
+	%struct.DRVOPT = type { i16, i32, i8, %struct.DRVOPT* }
+	%struct.GENV_t = type { i32, i8*, i16, i8*, i8*, i32, i32, i32, i32, %struct.DBC_t*, i16 }
+	%struct.pthread_mutex_t = type { i32, [40 x i8] }
+@iodbcdm_global_lock = external global %struct.pthread_mutex_t		; <%struct.pthread_mutex_t*> [#uses=1]
+
+define i16 @SQLDriversW(i8* %henv, i16 zeroext  %fDir, i32* %szDrvDesc, i16 signext  %cbDrvDescMax, i16* %pcbDrvDesc, i32* %szDrvAttr, i16 signext  %cbDrvAttrMax, i16* %pcbDrvAttr) nounwind  {
+entry:
+	%tmp12 = bitcast i8* %henv to %struct.GENV_t*		; <%struct.GENV_t*> [#uses=1]
+	br i1 true, label %bb28, label %bb
+bb:		; preds = %entry
+	ret i16 0
+bb28:		; preds = %entry
+	br i1 false, label %bb37, label %done
+bb37:		; preds = %bb28
+	%tmp46 = getelementptr %struct.GENV_t* %tmp12, i32 0, i32 10		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp46, align 4
+	br i1 false, label %bb74, label %bb92
+bb74:		; preds = %bb37
+	br label %bb92
+bb92:		; preds = %bb74, %bb37
+	%tmp95180 = shl i16 %cbDrvAttrMax, 2		; <i16> [#uses=1]
+	%tmp100178 = shl i16 %cbDrvDescMax, 2		; <i16> [#uses=1]
+	%tmp113 = tail call i16 @SQLDrivers_Internal( i8* %henv, i16 zeroext  %fDir, i8* null, i16 signext  %tmp100178, i16* %pcbDrvDesc, i8* null, i16 signext  %tmp95180, i16* %pcbDrvAttr, i8 zeroext  87 )  nounwind 		; <i16> [#uses=1]
+	br i1 false, label %done, label %bb137
+bb137:		; preds = %bb92
+	ret i16 0
+done:		; preds = %bb92, %bb28
+	%retcode.0 = phi i16 [ -2, %bb28 ], [ %tmp113, %bb92 ]		; <i16> [#uses=2]
+	br i1 false, label %bb167, label %bb150
+bb150:		; preds = %done
+	%tmp157158 = sext i16 %retcode.0 to i32		; <i32> [#uses=1]
+	tail call void @trace_SQLDriversW( i32 1, i32 %tmp157158, i8* %henv, i16 zeroext  %fDir, i32* %szDrvDesc, i16 signext  %cbDrvDescMax, i16* %pcbDrvDesc, i32* %szDrvAttr, i16 signext  %cbDrvAttrMax, i16* %pcbDrvAttr ) nounwind 
+	ret i16 0
+bb167:		; preds = %done
+	%tmp168 = tail call i32 @pthread_mutex_unlock( %struct.pthread_mutex_t* @iodbcdm_global_lock ) nounwind 		; <i32> [#uses=0]
+	ret i16 %retcode.0
+}
+
+declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
+
+declare i16 @SQLDrivers_Internal(i8*, i16 zeroext , i8*, i16 signext , i16*, i8*, i16 signext , i16*, i8 zeroext )  nounwind 
+
+declare void @trace_SQLDriversW(i32, i32, i8*, i16 zeroext , i32*, i16 signext , i16*, i32*, i16 signext , i16*)

diff --git a/src/LLVM/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
new file mode 100644
index 0000000..859041e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-04-17-CoalescerBug.ll

@@ -0,0 +1,177 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep xorl | grep {%e}
+; Make sure xorl operands are 32-bit registers.
+
+	%struct.tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }
+	%struct.wxDateTime = type { %struct.wxLongLong }
+	%"struct.wxDateTime::TimeZone" = type { i32 }
+	%struct.wxLongLong = type { i64 }
+	%struct.wxString = type { %struct.wxStringBase }
+	%struct.wxStringBase = type { i32* }
+@.str = external constant [27 x i32]		; <[27 x i32]*> [#uses=1]
+@.str4 = external constant [14 x i32]		; <[14 x i32]*> [#uses=1]
+@_ZZNK10wxDateTime5GetTmERKNS_8TimeZoneEE12__FUNCTION__ = external constant [6 x i8]		; <[6 x i8]*> [#uses=1]
+@.str33 = external constant [29 x i32]		; <[29 x i32]*> [#uses=1]
+@.str89 = external constant [5 x i32]		; <[5 x i32]*> [#uses=1]
+
+define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalias sret  %agg.result, %struct.wxDateTime* %this, i32* %format, %"struct.wxDateTime::TimeZone"* %tz, i1 %foo) {
+entry:
+	br i1 %foo, label %bb116.i, label %bb115.critedge.i
+bb115.critedge.i:		; preds = %entry
+	ret void
+bb116.i:		; preds = %entry
+	br i1 %foo, label %bb52.i.i, label %bb3118
+bb3118:		; preds = %bb116.i
+	ret void
+bb52.i.i:		; preds = %bb116.i
+	br i1 %foo, label %bb142.i, label %bb115.critedge.i.i
+bb115.critedge.i.i:		; preds = %bb52.i.i
+	ret void
+bb142.i:		; preds = %bb52.i.i
+	br i1 %foo, label %bb161.i, label %bb182.i
+bb161.i:		; preds = %bb142.i
+	br label %bb3261
+bb182.i:		; preds = %bb142.i
+	ret void
+bb3261:		; preds = %bb7834, %bb161.i
+	%tmp3263 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp3264 = icmp eq i32 %tmp3263, 37		; <i1> [#uses=1]
+	br i1 %tmp3264, label %bb3306, label %bb3267
+bb3267:		; preds = %bb3261
+	ret void
+bb3306:		; preds = %bb3261
+	%tmp3310 = invoke %struct.wxStringBase* @_ZN12wxStringBaseaSEPKw( %struct.wxStringBase* null, i32* getelementptr ([5 x i32]* @.str89, i32 0, i32 0) )
+			to label %bb3314 unwind label %lpad		; <%struct.wxStringBase*> [#uses=0]
+bb3314:		; preds = %bb3306
+	%tmp3316 = load i32* null, align 4		; <i32> [#uses=1]
+	switch i32 %tmp3316, label %bb7595 [
+		 i32 0, label %bb7819
+		 i32 37, label %bb7806
+		 i32 66, label %bb3477
+		 i32 72, label %bb5334
+		 i32 73, label %bb5484
+		 i32 77, label %bb6118
+		 i32 83, label %bb6406
+		 i32 85, label %bb6556
+		 i32 87, label %bb6708
+		 i32 89, label %bb7308
+		 i32 98, label %bb3477
+		 i32 99, label %bb3626
+		 i32 100, label %bb5184
+		 i32 106, label %bb5657
+		 i32 108, label %bb5809
+		 i32 109, label %bb5968
+		 i32 119, label %bb6860
+		 i32 120, label %bb3626
+		 i32 121, label %bb7158
+	]
+bb3477:		; preds = %bb3314, %bb3314
+	ret void
+bb3626:		; preds = %bb3314, %bb3314
+	ret void
+bb5184:		; preds = %bb3314
+	ret void
+bb5334:		; preds = %bb3314
+	ret void
+bb5484:		; preds = %bb3314
+	ret void
+bb5657:		; preds = %bb3314
+	%tmp5661 = invoke zeroext i16 @_ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE( %struct.wxDateTime* %this, %"struct.wxDateTime::TimeZone"* %tz )  
+			to label %invcont5660 unwind label %lpad		; <i16> [#uses=0]
+invcont5660:		; preds = %bb5657
+	ret void
+bb5809:		; preds = %bb3314
+	%tmp61.i.i8486 = icmp sgt i64 0, -1		; <i1> [#uses=1]
+	%tmp95.i.i8490 = icmp slt i64 0, 2147483647000		; <i1> [#uses=1]
+	%bothcond9308 = and i1 %tmp61.i.i8486, %tmp95.i.i8490		; <i1> [#uses=1]
+	br i1 %bothcond9308, label %bb91.i8504, label %bb115.critedge.i.i8492
+bb115.critedge.i.i8492:		; preds = %bb5809
+	ret void
+bb91.i8504:		; preds = %bb5809
+	br i1 %foo, label %bb155.i8541, label %bb182.i8560
+bb155.i8541:		; preds = %bb91.i8504
+	%tmp156.i85398700 = invoke %struct.tm* @gmtime_r( i32* null, %struct.tm* null )
+			to label %bb182.i8560 unwind label %lpad		; <%struct.tm*> [#uses=1]
+bb182.i8560:		; preds = %bb155.i8541, %bb91.i8504
+	%tm48.0.i8558 = phi %struct.tm* [ null, %bb91.i8504 ], [ %tmp156.i85398700, %bb155.i8541 ]		; <%struct.tm*> [#uses=0]
+	br i1 %foo, label %bb278.i8617, label %bb187.i8591
+bb187.i8591:		; preds = %bb182.i8560
+	%tmp245.i8588 = srem i64 0, 86400000		; <i64> [#uses=1]
+	br i1 %foo, label %bb264.i8592, label %bb265.i8606
+bb264.i8592:		; preds = %bb187.i8591
+	ret void
+bb265.i8606:		; preds = %bb187.i8591
+	%tmp268269.i8593 = trunc i64 %tmp245.i8588 to i32		; <i32> [#uses=1]
+	%tmp273.i8594 = srem i32 %tmp268269.i8593, 1000		; <i32> [#uses=1]
+	%tmp273274.i8595 = trunc i32 %tmp273.i8594 to i16		; <i16> [#uses=1]
+	br label %invcont5814
+bb278.i8617:		; preds = %bb182.i8560
+	%timeOnly50.0.i8622 = add i32 0, 0		; <i32> [#uses=1]
+	br i1 %foo, label %bb440.i8663, label %bb448.i8694
+bb440.i8663:		; preds = %bb278.i8617
+	invoke void @_Z10wxOnAssertPKwiPKcS0_S0_( i32* getelementptr ([27 x i32]* @.str, i32 0, i32 0), i32 1717, i8* getelementptr ([6 x i8]* @_ZZNK10wxDateTime5GetTmERKNS_8TimeZoneEE12__FUNCTION__, i32 0, i32 0), i32* getelementptr ([29 x i32]* @.str33, i32 0, i32 0), i32* getelementptr ([14 x i32]* @.str4, i32 0, i32 0) )
+			to label %bb448.i8694 unwind label %lpad
+bb448.i8694:		; preds = %bb440.i8663, %bb278.i8617
+	%tmp477.i8669 = srem i32 %timeOnly50.0.i8622, 1000		; <i32> [#uses=1]
+	%tmp477478.i8670 = trunc i32 %tmp477.i8669 to i16		; <i16> [#uses=1]
+	br label %invcont5814
+invcont5814:		; preds = %bb448.i8694, %bb265.i8606
+	%tmp812.0.0 = phi i16 [ %tmp477478.i8670, %bb448.i8694 ], [ %tmp273274.i8595, %bb265.i8606 ]		; <i16> [#uses=1]
+	%tmp58165817 = zext i16 %tmp812.0.0 to i32		; <i32> [#uses=1]
+	invoke void (%struct.wxString*, i32*, ...)* @_ZN8wxString6FormatEPKwz( %struct.wxString* noalias sret  null, i32* null, i32 %tmp58165817 )
+			to label %invcont5831 unwind label %lpad
+invcont5831:		; preds = %invcont5814
+	%tmp5862 = invoke zeroext  i8 @_ZN12wxStringBase10ConcatSelfEmPKwm( %struct.wxStringBase* null, i32 0, i32* null, i32 0 ) 
+			to label %bb7834 unwind label %lpad8185		; <i8> [#uses=0]
+bb5968:		; preds = %bb3314
+	invoke void (%struct.wxString*, i32*, ...)* @_ZN8wxString6FormatEPKwz( %struct.wxString* noalias sret  null, i32* null, i32 0 )
+			to label %invcont5981 unwind label %lpad
+invcont5981:		; preds = %bb5968
+	ret void
+bb6118:		; preds = %bb3314
+	ret void
+bb6406:		; preds = %bb3314
+	ret void
+bb6556:		; preds = %bb3314
+	ret void
+bb6708:		; preds = %bb3314
+	ret void
+bb6860:		; preds = %bb3314
+	ret void
+bb7158:		; preds = %bb3314
+	ret void
+bb7308:		; preds = %bb3314
+	ret void
+bb7595:		; preds = %bb3314
+	ret void
+bb7806:		; preds = %bb3314
+	%tmp7814 = invoke %struct.wxStringBase* @_ZN12wxStringBase6appendEmw( %struct.wxStringBase* null, i32 1, i32 0 )
+			to label %bb7834 unwind label %lpad		; <%struct.wxStringBase*> [#uses=0]
+bb7819:		; preds = %bb3314
+	ret void
+bb7834:		; preds = %bb7806, %invcont5831
+	br label %bb3261
+lpad:		; preds = %bb7806, %bb5968, %invcont5814, %bb440.i8663, %bb155.i8541, %bb5657, %bb3306
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	ret void
+lpad8185:		; preds = %invcont5831
+        %exn8185 = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	ret void
+}
+
+declare void @_Z10wxOnAssertPKwiPKcS0_S0_(i32*, i32, i8*, i32*, i32*)
+
+declare zeroext  i8 @_ZN12wxStringBase10ConcatSelfEmPKwm(%struct.wxStringBase*, i32, i32*, i32) 
+
+declare %struct.tm* @gmtime_r(i32*, %struct.tm*)
+
+declare zeroext  i16 @_ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE(%struct.wxDateTime*, %"struct.wxDateTime::TimeZone"*) 
+
+declare %struct.wxStringBase* @_ZN12wxStringBase6appendEmw(%struct.wxStringBase*, i32, i32)
+
+declare %struct.wxStringBase* @_ZN12wxStringBaseaSEPKw(%struct.wxStringBase*, i32*)
+
+declare void @_ZN8wxString6FormatEPKwz(%struct.wxString* noalias sret , i32*, ...)
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/X86/2008-04-24-MemCpyBug.ll b/src/LLVM/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
new file mode 100644
index 0000000..6389267
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-04-24-MemCpyBug.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | not grep 120
+; Don't accidentally add the offset twice for trailing bytes.
+
+	%struct.S63 = type { [63 x i8] }
+@g1s63 = external global %struct.S63		; <%struct.S63*> [#uses=1]
+
+declare void @test63(%struct.S63* byval align 4 ) nounwind 
+
+define void @testit63_entry_2E_ce() nounwind  {
+	tail call void @test63( %struct.S63* byval align 4  @g1s63 ) nounwind 
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll b/src/LLVM/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
new file mode 100644
index 0000000..4eaca17
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mattr=+sse41
+; rdar://5886601
+; gcc testsuite:  gcc.target/i386/sse4_1-pblendw.c
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define i32 @main() nounwind  {
+entry:
+	%tmp122 = load <2 x i64>* null, align 16		; <<2 x i64>> [#uses=1]
+	%tmp126 = bitcast <2 x i64> %tmp122 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp129 = call <8 x i16> @llvm.x86.sse41.pblendw( <8 x i16> zeroinitializer, <8 x i16> %tmp126, i32 2 ) nounwind 		; <<8 x i16>> [#uses=0]
+	ret i32 0
+}
+
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind 

diff --git a/src/LLVM/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll b/src/LLVM/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
new file mode 100644
index 0000000..6e9a629
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s | FileCheck %s
+; rdar://5720231
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @test() nounwind  {
+; CHECK: test:
+; CHECK-NOT: ret
+; CHECK: 1 $2 3
+; CHECK: ret
+
+	tail call void asm sideeffect " ${0:c} $1 ${2:c} ", "imr,imr,i,~{dirflag},~{fpsr},~{flags}"( i32 1, i32 2, i32 3 ) nounwind 
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2008-04-28-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
new file mode 100644
index 0000000..5b97eb7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-04-28-CoalescerBug.ll

@@ -0,0 +1,167 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl > %t
+; RUN: not grep {r\[abcd\]x} %t
+; RUN: not grep {r\[ds\]i} %t
+; RUN: not grep {r\[bs\]p} %t
+
+	%struct.BITMAP = type { i16, i16, i32, i32, i32, i32, i32, i32, i8*, i8* }
+	%struct.BltData = type { float, float, float, float }
+	%struct.BltDepth = type { i32, i8**, i32, %struct.BITMAP* (%struct.BltDepth**, %struct.BITMAP*, i32, i32, float*, float, i32)*, i32 (%struct.BltDepth**, %struct.BltOp*)*, i32 (%struct.BltDepth**, %struct.BltOp*, %struct.BltImg*)*, i32 (%struct.BltDepth**, %struct.BltOp*, %struct.BltSh*)*, [28 x [2 x [2 x i32]]]*, %struct.BltData* }
+	%struct.BltImg = type { i32, i8, i8, i8, float, float*, float*, i32, i32, float*, i32 (i8*, i8*, i8**, i32*, i8**, i32*)*, i8* }
+	%struct.BltOp = type { i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i8* }
+	%struct.BltSh = type { i8, i8, i8, i8, float, float*, float*, float*, float*, i32, i32, float*, float*, float* }
+
+define void @t(%struct.BltDepth* %depth, %struct.BltOp* %bop, i32 %mode) nounwind  {
+entry:
+	switch i32 %mode, label %return [
+		 i32 1, label %bb2898.us
+		 i32 18, label %bb13086.preheader
+	]
+
+bb13086.preheader:		; preds = %entry
+	%tmp13098 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	%tmp13238 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br label %bb13088
+
+bb2898.us:		; preds = %bb2898.us, %entry
+	br label %bb2898.us
+
+bb13088:		; preds = %bb13572, %bb13567, %bb13107, %bb13086.preheader
+	br i1 %tmp13098, label %bb13107, label %bb13101
+
+bb13101:		; preds = %bb13088
+	br label %bb13107
+
+bb13107:		; preds = %bb13101, %bb13088
+	%iftmp.684.0 = phi i32 [ 0, %bb13101 ], [ 65535, %bb13088 ]		; <i32> [#uses=2]
+	%tmp13111 = load i64* null, align 8		; <i64> [#uses=3]
+	%tmp13116 = lshr i64 %tmp13111, 16		; <i64> [#uses=1]
+	%tmp1311613117 = trunc i64 %tmp13116 to i32		; <i32> [#uses=1]
+	%tmp13118 = and i32 %tmp1311613117, 65535		; <i32> [#uses=1]
+	%tmp13120 = lshr i64 %tmp13111, 32		; <i64> [#uses=1]
+	%tmp1312013121 = trunc i64 %tmp13120 to i32		; <i32> [#uses=1]
+	%tmp13122 = and i32 %tmp1312013121, 65535		; <i32> [#uses=2]
+	%tmp13124 = lshr i64 %tmp13111, 48		; <i64> [#uses=1]
+	%tmp1312413125 = trunc i64 %tmp13124 to i32		; <i32> [#uses=2]
+	%tmp1314013141not = xor i16 0, -1		; <i16> [#uses=1]
+	%tmp1314013141not13142 = zext i16 %tmp1314013141not to i32		; <i32> [#uses=3]
+	%tmp13151 = mul i32 %tmp13122, %tmp1314013141not13142		; <i32> [#uses=1]
+	%tmp13154 = mul i32 %tmp1312413125, %tmp1314013141not13142		; <i32> [#uses=1]
+	%tmp13157 = mul i32 %iftmp.684.0, %tmp1314013141not13142		; <i32> [#uses=1]
+	%tmp13171 = add i32 %tmp13151, 1		; <i32> [#uses=1]
+	%tmp13172 = add i32 %tmp13171, 0		; <i32> [#uses=1]
+	%tmp13176 = add i32 %tmp13154, 1		; <i32> [#uses=1]
+	%tmp13177 = add i32 %tmp13176, 0		; <i32> [#uses=1]
+	%tmp13181 = add i32 %tmp13157, 1		; <i32> [#uses=1]
+	%tmp13182 = add i32 %tmp13181, 0		; <i32> [#uses=1]
+	%tmp13188 = lshr i32 %tmp13172, 16		; <i32> [#uses=1]
+	%tmp13190 = lshr i32 %tmp13177, 16		; <i32> [#uses=1]
+	%tmp13192 = lshr i32 %tmp13182, 16		; <i32> [#uses=1]
+	%tmp13198 = sub i32 %tmp13118, 0		; <i32> [#uses=1]
+	%tmp13201 = sub i32 %tmp13122, %tmp13188		; <i32> [#uses=1]
+	%tmp13204 = sub i32 %tmp1312413125, %tmp13190		; <i32> [#uses=1]
+	%tmp13207 = sub i32 %iftmp.684.0, %tmp13192		; <i32> [#uses=1]
+	%tmp1320813209 = zext i32 %tmp13204 to i64		; <i64> [#uses=1]
+	%tmp13211 = shl i64 %tmp1320813209, 48		; <i64> [#uses=1]
+	%tmp1321213213 = zext i32 %tmp13201 to i64		; <i64> [#uses=1]
+	%tmp13214 = shl i64 %tmp1321213213, 32		; <i64> [#uses=1]
+	%tmp13215 = and i64 %tmp13214, 281470681743360		; <i64> [#uses=1]
+	%tmp1321713218 = zext i32 %tmp13198 to i64		; <i64> [#uses=1]
+	%tmp13219 = shl i64 %tmp1321713218, 16		; <i64> [#uses=1]
+	%tmp13220 = and i64 %tmp13219, 4294901760		; <i64> [#uses=1]
+	%tmp13216 = or i64 %tmp13211, 0		; <i64> [#uses=1]
+	%tmp13221 = or i64 %tmp13216, %tmp13215		; <i64> [#uses=1]
+	%tmp13225 = or i64 %tmp13221, %tmp13220		; <i64> [#uses=4]
+	%tmp1322713228 = trunc i32 %tmp13207 to i16		; <i16> [#uses=4]
+	%tmp13233 = icmp eq i16 %tmp1322713228, 0		; <i1> [#uses=1]
+	br i1 %tmp13233, label %bb13088, label %bb13236
+
+bb13236:		; preds = %bb13107
+	br i1 false, label %bb13567, label %bb13252
+
+bb13252:		; preds = %bb13236
+	%tmp1329013291 = zext i16 %tmp1322713228 to i64		; <i64> [#uses=8]
+	%tmp13296 = lshr i64 %tmp13225, 16		; <i64> [#uses=1]
+	%tmp13297 = and i64 %tmp13296, 65535		; <i64> [#uses=1]
+	%tmp13299 = lshr i64 %tmp13225, 32		; <i64> [#uses=1]
+	%tmp13300 = and i64 %tmp13299, 65535		; <i64> [#uses=1]
+	%tmp13302 = lshr i64 %tmp13225, 48		; <i64> [#uses=1]
+	%tmp13306 = sub i64 %tmp1329013291, 0		; <i64> [#uses=0]
+	%tmp13309 = sub i64 %tmp1329013291, %tmp13297		; <i64> [#uses=1]
+	%tmp13312 = sub i64 %tmp1329013291, %tmp13300		; <i64> [#uses=1]
+	%tmp13315 = sub i64 %tmp1329013291, %tmp13302		; <i64> [#uses=1]
+	%tmp13318 = mul i64 %tmp1329013291, %tmp1329013291		; <i64> [#uses=1]
+	br i1 false, label %bb13339, label %bb13324
+
+bb13324:		; preds = %bb13252
+	br i1 false, label %bb13339, label %bb13330
+
+bb13330:		; preds = %bb13324
+	%tmp13337 = sdiv i64 0, 0		; <i64> [#uses=1]
+	br label %bb13339
+
+bb13339:		; preds = %bb13330, %bb13324, %bb13252
+	%r0120.0 = phi i64 [ %tmp13337, %bb13330 ], [ 0, %bb13252 ], [ 4294836225, %bb13324 ]		; <i64> [#uses=1]
+	br i1 false, label %bb13360, label %bb13345
+
+bb13345:		; preds = %bb13339
+	br i1 false, label %bb13360, label %bb13351
+
+bb13351:		; preds = %bb13345
+	%tmp13354 = mul i64 0, %tmp13318		; <i64> [#uses=1]
+	%tmp13357 = sub i64 %tmp1329013291, %tmp13309		; <i64> [#uses=1]
+	%tmp13358 = sdiv i64 %tmp13354, %tmp13357		; <i64> [#uses=1]
+	br label %bb13360
+
+bb13360:		; preds = %bb13351, %bb13345, %bb13339
+	%r1121.0 = phi i64 [ %tmp13358, %bb13351 ], [ 0, %bb13339 ], [ 4294836225, %bb13345 ]		; <i64> [#uses=1]
+	br i1 false, label %bb13402, label %bb13387
+
+bb13387:		; preds = %bb13360
+	br label %bb13402
+
+bb13402:		; preds = %bb13387, %bb13360
+	%r3123.0 = phi i64 [ 0, %bb13360 ], [ 4294836225, %bb13387 ]		; <i64> [#uses=1]
+	%tmp13404 = icmp eq i16 %tmp1322713228, -1		; <i1> [#uses=1]
+	br i1 %tmp13404, label %bb13435, label %bb13407
+
+bb13407:		; preds = %bb13402
+	br label %bb13435
+
+bb13435:		; preds = %bb13407, %bb13402
+	%r0120.1 = phi i64 [ 0, %bb13407 ], [ %r0120.0, %bb13402 ]		; <i64> [#uses=0]
+	%r1121.1 = phi i64 [ 0, %bb13407 ], [ %r1121.0, %bb13402 ]		; <i64> [#uses=0]
+	%r3123.1 = phi i64 [ 0, %bb13407 ], [ %r3123.0, %bb13402 ]		; <i64> [#uses=0]
+	%tmp13450 = mul i64 0, %tmp13312		; <i64> [#uses=0]
+	%tmp13455 = mul i64 0, %tmp13315		; <i64> [#uses=0]
+	%tmp13461 = add i64 0, %tmp1329013291		; <i64> [#uses=1]
+	%tmp13462 = mul i64 %tmp13461, 65535		; <i64> [#uses=1]
+	%tmp13466 = sub i64 %tmp13462, 0		; <i64> [#uses=1]
+	%tmp13526 = add i64 %tmp13466, 1		; <i64> [#uses=1]
+	%tmp13527 = add i64 %tmp13526, 0		; <i64> [#uses=1]
+	%tmp13528 = ashr i64 %tmp13527, 16		; <i64> [#uses=4]
+	%tmp13536 = sub i64 %tmp13528, 0		; <i64> [#uses=1]
+	%tmp13537 = shl i64 %tmp13536, 32		; <i64> [#uses=1]
+	%tmp13538 = and i64 %tmp13537, 281470681743360		; <i64> [#uses=1]
+	%tmp13542 = sub i64 %tmp13528, 0		; <i64> [#uses=1]
+	%tmp13543 = shl i64 %tmp13542, 16		; <i64> [#uses=1]
+	%tmp13544 = and i64 %tmp13543, 4294901760		; <i64> [#uses=1]
+	%tmp13548 = sub i64 %tmp13528, 0		; <i64> [#uses=1]
+	%tmp13549 = and i64 %tmp13548, 65535		; <i64> [#uses=1]
+	%tmp13539 = or i64 %tmp13538, 0		; <i64> [#uses=1]
+	%tmp13545 = or i64 %tmp13539, %tmp13549		; <i64> [#uses=1]
+	%tmp13550 = or i64 %tmp13545, %tmp13544		; <i64> [#uses=1]
+	%tmp1355213553 = trunc i64 %tmp13528 to i16		; <i16> [#uses=1]
+	br label %bb13567
+
+bb13567:		; preds = %bb13435, %bb13236
+	%tsp1040.0.0 = phi i64 [ %tmp13550, %bb13435 ], [ %tmp13225, %bb13236 ]		; <i64> [#uses=0]
+	%tsp1040.1.0 = phi i16 [ %tmp1355213553, %bb13435 ], [ %tmp1322713228, %bb13236 ]		; <i16> [#uses=1]
+	br i1 %tmp13238, label %bb13088, label %bb13572
+
+bb13572:		; preds = %bb13567
+	store i16 %tsp1040.1.0, i16* null, align 2
+	br label %bb13088
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll b/src/LLVM/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll
new file mode 100644
index 0000000..6e8e98d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86
+
+define i64 @t(i64 %maxIdleDuration) nounwind  {
+	call void asm sideeffect "wrmsr", "{cx},A,~{dirflag},~{fpsr},~{flags}"( i32 416, i64 0 ) nounwind 
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll b/src/LLVM/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
new file mode 100644
index 0000000..a708224
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86 | grep jnp
+; rdar://5902801
+
+declare void @test2()
+
+define i32 @test(double %p) nounwind {
+	%tmp5 = fcmp uno double %p, 0.000000e+00
+	br i1 %tmp5, label %bb, label %UnifiedReturnBlock
+bb:
+	call void @test2()
+	ret i32 17
+UnifiedReturnBlock:
+	ret i32 42
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2008-05-09-PHIElimBug.ll b/src/LLVM/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
new file mode 100644
index 0000000..cea0076
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-05-09-PHIElimBug.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86
+
+	%struct.V = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x i32>, float*, float*, float*, float*, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define fastcc void @t() nounwind  {
+entry:
+	br i1 false, label %bb23816.preheader, label %bb23821
+
+bb23816.preheader:		; preds = %entry
+	%tmp23735 = and i32 0, 2		; <i32> [#uses=0]
+	br label %bb23830
+
+bb23821:		; preds = %entry
+	br i1 false, label %bb23830, label %bb23827
+
+bb23827:		; preds = %bb23821
+	%tmp23829 = getelementptr %struct.V* null, i32 0, i32 42		; <i32*> [#uses=0]
+	br label %bb23830
+
+bb23830:		; preds = %bb23827, %bb23821, %bb23816.preheader
+	%scaledInDst.2.reg2mem.5 = phi i8 [ undef, %bb23827 ], [ undef, %bb23821 ], [ undef, %bb23816.preheader ]		; <i8> [#uses=1]
+	%toBool35047 = icmp eq i8 %scaledInDst.2.reg2mem.5, 0		; <i1> [#uses=1]
+	%bothcond39107 = or i1 %toBool35047, false		; <i1> [#uses=0]
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll b/src/LLVM/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
new file mode 100644
index 0000000..5ceb546
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define fastcc void @glgVectorFloatConversion() nounwind  {
+	%tmp12745 = load <4 x float>* null, align 16		; <<4 x float>> [#uses=1]
+	%tmp12773 = insertelement <4 x float> %tmp12745, float 1.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12774 = insertelement <4 x float> %tmp12773, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp12775 = insertelement <4 x float> %tmp12774, float 1.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp12775, <4 x float>* null, align 16
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/src/LLVM/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
new file mode 100644
index 0000000..4852e89
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-05-12-tailmerge-5.ll

@@ -0,0 +1,145 @@
+; RUN: llc < %s | grep abort | count 1
+; Calls to abort should all be merged
+
+; ModuleID = '5898899.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+	%struct.BoundaryAlignment = type { [3 x i8], i8, i16, i16, i8, [2 x i8] }
+
+define void @passing2(i64 %str.0, i64 %str.1, i16 signext  %s, i32 %j, i8 signext  %c, i16 signext  %t, i16 signext  %u, i8 signext  %d) nounwind optsize {
+entry:
+	%str_addr = alloca %struct.BoundaryAlignment		; <%struct.BoundaryAlignment*> [#uses=7]
+	%s_addr = alloca i16		; <i16*> [#uses=1]
+	%j_addr = alloca i32		; <i32*> [#uses=2]
+	%c_addr = alloca i8		; <i8*> [#uses=2]
+	%t_addr = alloca i16		; <i16*> [#uses=2]
+	%u_addr = alloca i16		; <i16*> [#uses=2]
+	%d_addr = alloca i8		; <i8*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = bitcast %struct.BoundaryAlignment* %str_addr to { i64, i64 }*		; <{ i64, i64 }*> [#uses=1]
+	%tmp1 = getelementptr { i64, i64 }* %tmp, i32 0, i32 0		; <i64*> [#uses=1]
+	store i64 %str.0, i64* %tmp1
+	%tmp2 = bitcast %struct.BoundaryAlignment* %str_addr to { i64, i64 }*		; <{ i64, i64 }*> [#uses=1]
+	%tmp3 = getelementptr { i64, i64 }* %tmp2, i32 0, i32 1		; <i64*> [#uses=1]
+	%bc = bitcast i64* %tmp3 to i8*		; <i8*> [#uses=2]
+	%byte = trunc i64 %str.1 to i8		; <i8> [#uses=1]
+	store i8 %byte, i8* %bc
+	%shft = lshr i64 %str.1, 8		; <i64> [#uses=2]
+	%Loc = getelementptr i8* %bc, i32 1		; <i8*> [#uses=2]
+	%byte4 = trunc i64 %shft to i8		; <i8> [#uses=1]
+	store i8 %byte4, i8* %Loc
+	%shft5 = lshr i64 %shft, 8		; <i64> [#uses=2]
+	%Loc6 = getelementptr i8* %Loc, i32 1		; <i8*> [#uses=2]
+	%byte7 = trunc i64 %shft5 to i8		; <i8> [#uses=1]
+	store i8 %byte7, i8* %Loc6
+	%shft8 = lshr i64 %shft5, 8		; <i64> [#uses=2]
+	%Loc9 = getelementptr i8* %Loc6, i32 1		; <i8*> [#uses=2]
+	%byte10 = trunc i64 %shft8 to i8		; <i8> [#uses=1]
+	store i8 %byte10, i8* %Loc9
+	%shft11 = lshr i64 %shft8, 8		; <i64> [#uses=0]
+	%Loc12 = getelementptr i8* %Loc9, i32 1		; <i8*> [#uses=0]
+	store i16 %s, i16* %s_addr
+	store i32 %j, i32* %j_addr
+	store i8 %c, i8* %c_addr
+	store i16 %t, i16* %t_addr
+	store i16 %u, i16* %u_addr
+	store i8 %d, i8* %d_addr
+	%tmp13 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 0		; <[3 x i8]*> [#uses=1]
+	%tmp1314 = bitcast [3 x i8]* %tmp13 to i32*		; <i32*> [#uses=1]
+	%tmp15 = load i32* %tmp1314, align 4		; <i32> [#uses=1]
+	%tmp16 = shl i32 %tmp15, 14		; <i32> [#uses=1]
+	%tmp17 = ashr i32 %tmp16, 23		; <i32> [#uses=1]
+	%tmp1718 = trunc i32 %tmp17 to i16		; <i16> [#uses=1]
+	%sextl = shl i16 %tmp1718, 7		; <i16> [#uses=1]
+	%sextr = ashr i16 %sextl, 7		; <i16> [#uses=2]
+	%sextl19 = shl i16 %sextr, 7		; <i16> [#uses=1]
+	%sextr20 = ashr i16 %sextl19, 7		; <i16> [#uses=0]
+	%sextl21 = shl i16 %sextr, 7		; <i16> [#uses=1]
+	%sextr22 = ashr i16 %sextl21, 7		; <i16> [#uses=1]
+	%sextr2223 = sext i16 %sextr22 to i32		; <i32> [#uses=1]
+	%tmp24 = load i32* %j_addr, align 4		; <i32> [#uses=1]
+	%tmp25 = icmp ne i32 %sextr2223, %tmp24		; <i1> [#uses=1]
+	%tmp2526 = zext i1 %tmp25 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp2526, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb27
+
+bb:		; preds = %entry
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb27:		; preds = %entry
+	%tmp28 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 1		; <i8*> [#uses=1]
+	%tmp29 = load i8* %tmp28, align 4		; <i8> [#uses=1]
+	%tmp30 = load i8* %c_addr, align 1		; <i8> [#uses=1]
+	%tmp31 = icmp ne i8 %tmp29, %tmp30		; <i1> [#uses=1]
+	%tmp3132 = zext i1 %tmp31 to i8		; <i8> [#uses=1]
+	%toBool33 = icmp ne i8 %tmp3132, 0		; <i1> [#uses=1]
+	br i1 %toBool33, label %bb34, label %bb35
+
+bb34:		; preds = %bb27
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb35:		; preds = %bb27
+	%tmp36 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 2		; <i16*> [#uses=1]
+	%tmp37 = load i16* %tmp36, align 4		; <i16> [#uses=1]
+	%tmp38 = shl i16 %tmp37, 7		; <i16> [#uses=1]
+	%tmp39 = ashr i16 %tmp38, 7		; <i16> [#uses=1]
+	%sextl40 = shl i16 %tmp39, 7		; <i16> [#uses=1]
+	%sextr41 = ashr i16 %sextl40, 7		; <i16> [#uses=2]
+	%sextl42 = shl i16 %sextr41, 7		; <i16> [#uses=1]
+	%sextr43 = ashr i16 %sextl42, 7		; <i16> [#uses=0]
+	%sextl44 = shl i16 %sextr41, 7		; <i16> [#uses=1]
+	%sextr45 = ashr i16 %sextl44, 7		; <i16> [#uses=1]
+	%tmp46 = load i16* %t_addr, align 2		; <i16> [#uses=1]
+	%tmp47 = icmp ne i16 %sextr45, %tmp46		; <i1> [#uses=1]
+	%tmp4748 = zext i1 %tmp47 to i8		; <i8> [#uses=1]
+	%toBool49 = icmp ne i8 %tmp4748, 0		; <i1> [#uses=1]
+	br i1 %toBool49, label %bb50, label %bb51
+
+bb50:		; preds = %bb35
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb51:		; preds = %bb35
+	%tmp52 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 3		; <i16*> [#uses=1]
+	%tmp53 = load i16* %tmp52, align 4		; <i16> [#uses=1]
+	%tmp54 = shl i16 %tmp53, 7		; <i16> [#uses=1]
+	%tmp55 = ashr i16 %tmp54, 7		; <i16> [#uses=1]
+	%sextl56 = shl i16 %tmp55, 7		; <i16> [#uses=1]
+	%sextr57 = ashr i16 %sextl56, 7		; <i16> [#uses=2]
+	%sextl58 = shl i16 %sextr57, 7		; <i16> [#uses=1]
+	%sextr59 = ashr i16 %sextl58, 7		; <i16> [#uses=0]
+	%sextl60 = shl i16 %sextr57, 7		; <i16> [#uses=1]
+	%sextr61 = ashr i16 %sextl60, 7		; <i16> [#uses=1]
+	%tmp62 = load i16* %u_addr, align 2		; <i16> [#uses=1]
+	%tmp63 = icmp ne i16 %sextr61, %tmp62		; <i1> [#uses=1]
+	%tmp6364 = zext i1 %tmp63 to i8		; <i8> [#uses=1]
+	%toBool65 = icmp ne i8 %tmp6364, 0		; <i1> [#uses=1]
+	br i1 %toBool65, label %bb66, label %bb67
+
+bb66:		; preds = %bb51
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb67:		; preds = %bb51
+	%tmp68 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 4		; <i8*> [#uses=1]
+	%tmp69 = load i8* %tmp68, align 4		; <i8> [#uses=1]
+	%tmp70 = load i8* %d_addr, align 1		; <i8> [#uses=1]
+	%tmp71 = icmp ne i8 %tmp69, %tmp70		; <i1> [#uses=1]
+	%tmp7172 = zext i1 %tmp71 to i8		; <i8> [#uses=1]
+	%toBool73 = icmp ne i8 %tmp7172, 0		; <i1> [#uses=1]
+	br i1 %toBool73, label %bb74, label %bb75
+
+bb74:		; preds = %bb67
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb75:		; preds = %bb67
+	br label %return
+
+return:		; preds = %bb75
+	ret void
+}
+
+declare void @abort(...) noreturn nounwind 

diff --git a/src/LLVM/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
new file mode 100644
index 0000000..e5dda4a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-05-21-CoalescerBug.ll

@@ -0,0 +1,98 @@
+; RUN: llc < %s -march=x86 -O0 -fast-isel=false -regalloc=linearscan | grep mov | count 5
+; PR2343
+
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.VEC_basic_block_base = type { i32, i32, [1 x %struct.basic_block_def*] }
+	%struct.VEC_basic_block_gc = type { %struct.VEC_basic_block_base }
+	%struct.VEC_edge_base = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct.VEC_edge_gc = type { %struct.VEC_edge_base }
+	%struct.VEC_rtx_base = type { i32, i32, [1 x %struct.rtx_def*] }
+	%struct.VEC_rtx_gc = type { %struct.VEC_rtx_base }
+	%struct.VEC_temp_slot_p_base = type { i32, i32, [1 x %struct.temp_slot*] }
+	%struct.VEC_temp_slot_p_gc = type { %struct.VEC_temp_slot_p_base }
+	%struct.VEC_tree_base = type { i32, i32, [1 x %struct.tree_node*] }
+	%struct.VEC_tree_gc = type { %struct.VEC_tree_base }
+	%struct.__sbuf = type { i8*, i32 }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.basic_block_def = type { %struct.tree_node*, %struct.VEC_edge_gc*, %struct.VEC_edge_gc*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_il_dependent, %struct.tree_node*, %struct.edge_prediction*, i64, i32, i32, i32, i32 }
+	%struct.basic_block_il_dependent = type { %struct.rtl_bb_info* }
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.block_symbol = type { [3 x %struct.cfg_stats_d], %struct.object_block*, i64 }
+	%struct.cfg_stats_d = type { i32 }
+	%struct.control_flow_graph = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.VEC_basic_block_gc*, i32, i32, i32, %struct.VEC_basic_block_gc*, i32 }
+	%struct.def_optype_d = type { %struct.def_optype_d*, %struct.tree_node** }
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.__sbuf*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.__sbuf, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.control_flow_graph*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.VEC_temp_slot_p_gc*, %struct.temp_slot*, %struct.var_refs_queue*, i32, i32, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.htab*, %struct.rtx_def*, i32, i32, i32, %struct.__sbuf, %struct.VEC_tree_gc*, %struct.tree_node*, i8*, i8*, i8*, i8*, i8*, %struct.tree_node*, i8, i8, i8, i8, i8, i8 }
+	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.language_function = type opaque
+	%struct.loop = type { i32, %struct.basic_block_def*, %struct.basic_block_def*, %llvm.dbg.anchor.type, i32, i32, i32, i32, %struct.loop**, i32, %struct.loop*, %struct.loop*, %struct.loop*, %struct.loop*, i8*, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound*, %struct.edge_def*, i32 }
+	%struct.machine_function = type opaque
+	%struct.maydef_optype_d = type { %struct.maydef_optype_d*, %struct.tree_node*, %struct.tree_node*, %struct.ssa_use_operand_d }
+	%struct.nb_iter_bound = type { %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound* }
+	%struct.object_block = type { %struct.section*, i32, i64, %struct.VEC_rtx_gc*, %struct.VEC_rtx_gc* }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.rtl_bb_info = type { %struct.rtx_def*, %struct.rtx_def*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, %struct.rtx_def*, %struct.rtx_def*, i32 }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.section = type { %struct.unnamed_section }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.ssa_use_operand_d = type { %struct.ssa_use_operand_d*, %struct.ssa_use_operand_d*, %struct.tree_node*, %struct.tree_node** }
+	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.bitmap_head_def*, i32, i8* }
+	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.use_optype_d*, %struct.maydef_optype_d*, %struct.vuse_optype_d*, %struct.maydef_optype_d* }
+	%struct.temp_slot = type opaque
+	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
+	%struct.tree_ann_d = type { %struct.stmt_ann_d }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl_common = type { %struct.tree_decl_minimal, %struct.tree_node*, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_minimal = type { %struct.tree_common, %struct.__sbuf, i32, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_decl_non_common = type { %struct.tree_decl_with_vis, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_with_rtl = type { %struct.tree_decl_common, %struct.rtx_def*, i32 }
+	%struct.tree_decl_with_vis = type { %struct.tree_decl_with_rtl, %struct.tree_node*, %struct.tree_node*, i8, i8, i8 }
+	%struct.tree_function_decl = type { %struct.tree_decl_non_common, i8, i8, i64, %struct.function* }
+	%struct.tree_node = type { %struct.tree_function_decl }
+	%struct.u = type { %struct.block_symbol }
+	%struct.unnamed_section = type { %struct.cfg_stats_d, void (i8*)*, i8*, %struct.section* }
+	%struct.use_optype_d = type { %struct.use_optype_d*, %struct.ssa_use_operand_d }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.vuse_optype_d = type { %struct.vuse_optype_d*, %struct.tree_node*, %struct.ssa_use_operand_d }
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (%struct.edge_def* (%struct.edge_def*, %struct.basic_block_def*)* @tree_redirect_edge_and_branch to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define %struct.edge_def* @tree_redirect_edge_and_branch(%struct.edge_def* %e1, %struct.basic_block_def* %dest2) nounwind  {
+entry:
+	br label %bb497
+
+bb483:		; preds = %bb497
+	%tmp496 = load %struct.tree_node** null, align 4		; <%struct.tree_node*> [#uses=1]
+	br label %bb497
+
+bb497:		; preds = %bb483, %entry
+	%cases.0 = phi %struct.tree_node* [ %tmp496, %bb483 ], [ null, %entry ]		; <%struct.tree_node*> [#uses=1]
+	%last.0 = phi %struct.tree_node* [ %cases.0, %bb483 ], [ undef, %entry ]		; <%struct.tree_node*> [#uses=1]
+	%foo = phi i1 [ 0, %bb483 ], [ 1, %entry ]
+	br i1 %foo, label %bb483, label %bb502
+
+bb502:		; preds = %bb497
+	br i1 %foo, label %bb507, label %bb841
+
+bb507:		; preds = %bb502
+	%tmp517 = getelementptr %struct.tree_node* %last.0, i32 0, i32 0		; <%struct.tree_function_decl*> [#uses=1]
+	%tmp517518 = bitcast %struct.tree_function_decl* %tmp517 to %struct.tree_common*		; <%struct.tree_common*> [#uses=1]
+	%tmp519 = getelementptr %struct.tree_common* %tmp517518, i32 0, i32 0		; <%struct.tree_node**> [#uses=1]
+	store %struct.tree_node* null, %struct.tree_node** %tmp519, align 4
+	br label %bb841
+
+bb841:		; preds = %bb507, %bb502
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll b/src/LLVM/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
new file mode 100644
index 0000000..19a7354
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movups | count 2
+
+define void @a(<4 x float>* %x) nounwind  {
+entry:
+        %tmp2 = load <4 x float>* %x, align 1
+        %inv = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %tmp2)
+        store <4 x float> %inv, <4 x float>* %x, align 1
+        ret void
+}
+
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>)

diff --git a/src/LLVM/test/CodeGen/X86/2008-05-28-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2008-05-28-CoalescerBug.ll
new file mode 100644
index 0000000..32bf8d4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-05-28-CoalescerBug.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR2289
+
+define void @_ada_ca11001() {
+entry:
+        %tmp59 = call i16 @ca11001_0__cartesian_assign( i8 zeroext  0, i8 zeroext  0, i16 undef )               ; <i16> [#uses=0]
+        unreachable
+}
+
+declare i16 @ca11001_0__cartesian_assign(i8 zeroext , i8 zeroext , i16)

diff --git a/src/LLVM/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/src/LLVM/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
new file mode 100644
index 0000000..0d11546
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=fast
+
+@_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*]		; <[5 x i32 (...)*]*> [#uses=1]
+
+declare i8* @llvm.eh.exception() nounwind 
+
+declare i8* @_Znwm(i32)
+
+declare i8* @__cxa_begin_catch(i8*) nounwind 
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	br i1 false, label %bb37, label %bb34
+
+bb34:		; preds = %entry
+	ret i32 1
+
+bb37:		; preds = %entry
+	%tmp12.i.i.i.i.i66 = invoke i8* @_Znwm( i32 12 )
+			to label %tmp12.i.i.i.i.i.noexc65 unwind label %lpad243		; <i8*> [#uses=0]
+
+tmp12.i.i.i.i.i.noexc65:		; preds = %bb37
+	unreachable
+
+lpad243:		; preds = %bb37
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	%eh_ptr244 = extractvalue { i8*, i32 } %exn, 0
+	store i32 (...)** getelementptr ([5 x i32 (...)*]* @_ZTVN10Evaluation10GridOutputILi3EEE, i32 0, i32 2), i32 (...)*** null, align 8
+	%tmp133 = call i8* @__cxa_begin_catch( i8* %eh_ptr244 ) nounwind 		; <i8*> [#uses=0]
+	unreachable
+}
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll b/src/LLVM/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
new file mode 100644
index 0000000..90af387
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | not grep movsd
+; RUN: llc < %s -march=x86 | grep movw
+; RUN: llc < %s -march=x86 | grep addw
+; These transforms are turned off for volatile loads and stores.
+; Check that they weren't turned off for all loads and stores!
+
+@atomic = global double 0.000000e+00		; <double*> [#uses=1]
+@atomic2 = global double 0.000000e+00		; <double*> [#uses=1]
+@ioport = global i32 0		; <i32*> [#uses=1]
+@ioport2 = global i32 0		; <i32*> [#uses=1]
+
+define i16 @f(i64 %x) {
+	%b = bitcast i64 %x to double		; <double> [#uses=1]
+	store double %b, double* @atomic
+	store double 0.000000e+00, double* @atomic2
+	%l = load i32* @ioport		; <i32> [#uses=1]
+	%t = trunc i32 %l to i16		; <i16> [#uses=1]
+	%l2 = load i32* @ioport2		; <i32> [#uses=1]
+	%tmp = lshr i32 %l2, 16		; <i32> [#uses=1]
+	%t2 = trunc i32 %tmp to i16		; <i16> [#uses=1]
+	%f = add i16 %t, %t2		; <i16> [#uses=1]
+	ret i16 %f
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/src/LLVM/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
new file mode 100644
index 0000000..8665282
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movsd | count 5
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movl | count 2
+
+@atomic = global double 0.000000e+00		; <double*> [#uses=1]
+@atomic2 = global double 0.000000e+00		; <double*> [#uses=1]
+@anything = global i64 0		; <i64*> [#uses=1]
+@ioport = global i32 0		; <i32*> [#uses=2]
+
+define i16 @f(i64 %x, double %y) {
+	%b = bitcast i64 %x to double		; <double> [#uses=1]
+	volatile store double %b, double* @atomic ; one processor operation only
+	volatile store double 0.000000e+00, double* @atomic2 ; one processor operation only
+	%b2 = bitcast double %y to i64		; <i64> [#uses=1]
+	volatile store i64 %b2, i64* @anything ; may transform to store of double
+	%l = volatile load i32* @ioport		; must not narrow
+	%t = trunc i32 %l to i16		; <i16> [#uses=1]
+	%l2 = volatile load i32* @ioport		; must not narrow
+	%tmp = lshr i32 %l2, 16		; <i32> [#uses=1]
+	%t2 = trunc i32 %tmp to i16		; <i16> [#uses=1]
+	%f = add i16 %t, %t2		; <i16> [#uses=1]
+	ret i16 %f
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-06-16-SubregsBug.ll b/src/LLVM/test/CodeGen/X86/2008-06-16-SubregsBug.ll
new file mode 100644
index 0000000..4d4819a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-06-16-SubregsBug.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 4
+
+define i16 @test(i16* %tmp179) nounwind  {
+	%tmp180 = load i16* %tmp179, align 2		; <i16> [#uses=2]
+	%tmp184 = and i16 %tmp180, -1024		; <i16> [#uses=1]
+	%tmp186 = icmp eq i16 %tmp184, -32768		; <i1> [#uses=1]
+	br i1 %tmp186, label %bb189, label %bb288
+
+bb189:		; preds = %0
+	ret i16 %tmp180
+
+bb288:		; preds = %0
+	ret i16 32
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-06-18-BadShuffle.ll b/src/LLVM/test/CodeGen/X86/2008-06-18-BadShuffle.ll
new file mode 100644
index 0000000..66f9065
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-06-18-BadShuffle.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 -mattr=+sse2 | grep pinsrw
+
+; Test to make sure we actually insert the bottom element of the vector
+define <8 x i16> @a(<8 x i16> %a) nounwind  {
+entry:
+	shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> < i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 >
+	%add = add <8 x i16> %0, %a
+	ret <8 x i16> %add
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2008-06-25-VecISelBug.ll b/src/LLVM/test/CodeGen/X86/2008-06-25-VecISelBug.ll
new file mode 100644
index 0000000..72d1907
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-06-25-VecISelBug.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep pslldq
+
+define void @t() nounwind  {
+entry:
+	%tmp1 = shufflevector <4 x float> zeroinitializer, <4 x float> < float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >
+	%tmp2 = insertelement <4 x float> %tmp1, float 1.000000e+00, i32 3
+	store <4 x float> %tmp2, <4 x float>* null, align 16
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll b/src/LLVM/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
new file mode 100644
index 0000000..46341fc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll

@@ -0,0 +1,99 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9
+
+	%struct.ogg_stream_state = type { i8*, i32, i32, i32, i32*, i64*, i32, i32, i32, i32, [282 x i8], i32, i32, i32, i32, i32, i64, i64 }
+	%struct.res_state = type { i32, i32, i32, i32, float*, float*, i32, i32 }
+	%struct.vorbis_comment = type { i8**, i32*, i32, i8* }
+
+declare i32 @strlen(i8*) nounwind readonly 
+
+define i32 @res_init(%struct.res_state* %state, i32 %channels, i32 %outfreq, i32 %infreq, i32 %op1, ...) nounwind  {
+entry:
+	br i1 false, label %bb95, label %bb
+
+bb:		; preds = %entry
+	br i1 false, label %bb95, label %bb24
+
+bb24:		; preds = %bb
+	br i1 false, label %bb40.preheader, label %bb26
+
+bb26:		; preds = %bb24
+	ret i32 -1
+
+bb40.preheader:		; preds = %bb24
+	br i1 false, label %bb39, label %bb49.outer
+
+bb39:		; preds = %bb39, %bb40.preheader
+	shl i32 0, 1		; <i32>:0 [#uses=0]
+	br i1 false, label %bb39, label %bb49.outer
+
+bb49.outer:		; preds = %bb39, %bb40.preheader
+	getelementptr %struct.res_state* %state, i32 0, i32 3		; <i32*>:1 [#uses=0]
+	getelementptr %struct.res_state* %state, i32 0, i32 7		; <i32*>:2 [#uses=0]
+	%base10.1 = select i1 false, float* null, float* null		; <float*> [#uses=1]
+	br label %bb74
+
+bb69:		; preds = %bb74
+	br label %bb71
+
+bb71:		; preds = %bb74, %bb69
+	store float 0.000000e+00, float* null, align 4
+	add i32 0, 1		; <i32>:3 [#uses=1]
+	%indvar.next137 = add i32 %indvar136, 1		; <i32> [#uses=1]
+	br i1 false, label %bb74, label %bb73
+
+bb73:		; preds = %bb71
+	%.rec = add i32 %base10.2.ph.rec, 1		; <i32> [#uses=2]
+	getelementptr float* %base10.1, i32 %.rec		; <float*>:4 [#uses=1]
+	br label %bb74
+
+bb74:		; preds = %bb73, %bb71, %bb49.outer
+	%N13.1.ph = phi i32 [ 0, %bb49.outer ], [ 0, %bb73 ], [ %N13.1.ph, %bb71 ]		; <i32> [#uses=1]
+	%dest12.2.ph = phi float* [ null, %bb49.outer ], [ %4, %bb73 ], [ %dest12.2.ph, %bb71 ]		; <float*> [#uses=1]
+	%x8.0.ph = phi i32 [ 0, %bb49.outer ], [ %3, %bb73 ], [ %x8.0.ph, %bb71 ]		; <i32> [#uses=1]
+	%base10.2.ph.rec = phi i32 [ 0, %bb49.outer ], [ %.rec, %bb73 ], [ %base10.2.ph.rec, %bb71 ]		; <i32> [#uses=2]
+	%indvar136 = phi i32 [ %indvar.next137, %bb71 ], [ 0, %bb73 ], [ 0, %bb49.outer ]		; <i32> [#uses=1]
+	br i1 false, label %bb71, label %bb69
+
+bb95:		; preds = %bb, %entry
+	ret i32 -1
+}
+
+define i32 @read_resampled(i8* %d, float** %buffer, i32 %samples) nounwind  {
+entry:
+	br i1 false, label %bb17.preheader, label %bb30
+
+bb17.preheader:		; preds = %entry
+	load i32* null, align 4		; <i32>:0 [#uses=0]
+	br label %bb16
+
+bb16:		; preds = %bb16, %bb17.preheader
+	%i1.036 = phi i32 [ 0, %bb17.preheader ], [ %1, %bb16 ]		; <i32> [#uses=1]
+	add i32 %i1.036, 1		; <i32>:1 [#uses=2]
+	icmp ult i32 %1, 0		; <i1>:2 [#uses=0]
+	br label %bb16
+
+bb30:		; preds = %entry
+	ret i32 0
+}
+
+define i32 @ogg_stream_reset_serialno(%struct.ogg_stream_state* %os, i32 %serialno) nounwind  {
+entry:
+	unreachable
+}
+
+define void @vorbis_lsp_to_curve(float* %curve, i32* %map, i32 %n, i32 %ln, float* %lsp, i32 %m, float %amp, float %ampoffset) nounwind  {
+entry:
+	unreachable
+}
+
+define i32 @vorbis_comment_query_count(%struct.vorbis_comment* %vc, i8* %tag) nounwind  {
+entry:
+	%strlen = call i32 @strlen( i8* null )		; <i32> [#uses=1]
+	%endptr = getelementptr i8* null, i32 %strlen		; <i8*> [#uses=0]
+	unreachable
+}
+
+define fastcc i32 @push(%struct.res_state* %state, float* %pool, i32* %poolfill, i32* %offset, float* %dest, i32 %dststep, float* %source, i32 %srcstep, i32 %srclen) nounwind  {
+entry:
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll b/src/LLVM/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll
new file mode 100644
index 0000000..1a786ef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s | grep ax
+; PR2024
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define i32 @foo(i32 %A, i32 %B) nounwind  section ".init.text" {
+entry:
+	tail call i32 @bar( i32 %A, i32 %B ) nounwind 		; <i32>:0 [#uses=1]
+	ret i32 %0
+}
+
+declare i32 @bar(i32, i32)

diff --git a/src/LLVM/test/CodeGen/X86/2008-07-11-SHLBy1.ll b/src/LLVM/test/CodeGen/X86/2008-07-11-SHLBy1.ll
new file mode 100644
index 0000000..ff2b05f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-07-11-SHLBy1.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86-64 -o - | not grep shr
+define i128 @sl(i128 %x) {
+        %t = shl i128 %x, 1
+        ret i128 %t
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll b/src/LLVM/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
new file mode 100644
index 0000000..f56604b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+
+	%struct.SV = type { i8*, i64, i64 }
+@"\01LC25" = external constant [8 x i8]		; <[8 x i8]*> [#uses=1]
+
+declare void @Perl_sv_catpvf(%struct.SV*, i8*, ...) nounwind 
+
+declare fastcc i64 @Perl_utf8n_to_uvuni(i8*, i64, i64*, i64) nounwind 
+
+define fastcc i8* @Perl_pv_uni_display(%struct.SV* %dsv, i8* %spv, i64 %len, i64 %pvlim, i64 %flags) nounwind  {
+entry:
+	br i1 false, label %bb, label %bb40
+
+bb:		; preds = %entry
+	tail call fastcc i64 @Perl_utf8n_to_uvuni( i8* null, i64 13, i64* null, i64 255 ) nounwind 		; <i64>:0 [#uses=1]
+	br i1 false, label %bb6, label %bb33
+
+bb6:		; preds = %bb
+	br i1 false, label %bb30, label %bb31
+
+bb30:		; preds = %bb6
+	unreachable
+
+bb31:		; preds = %bb6
+	icmp eq i8 0, 0		; <i1>:1 [#uses=0]
+	br label %bb33
+
+bb33:		; preds = %bb31, %bb
+	tail call void (%struct.SV*, i8*, ...)* @Perl_sv_catpvf( %struct.SV* %dsv, i8* getelementptr ([8 x i8]* @"\01LC25", i32 0, i64 0), i64 %0 ) nounwind 
+	unreachable
+
+bb40:		; preds = %entry
+	ret i8* null
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-07-19-movups-spills.ll b/src/LLVM/test/CodeGen/X86/2008-07-19-movups-spills.ll
new file mode 100644
index 0000000..368af6d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-07-19-movups-spills.ll

@@ -0,0 +1,639 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movups | count 33
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movups | count 33
+; PR2539
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
+; Verify that movups is still generated with an aligned stack for the globals
+; that must be accessed unaligned
+
+external global <4 x float>, align 1		; <<4 x float>*>:0 [#uses=2]
+external global <4 x float>, align 1		; <<4 x float>*>:1 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:2 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:3 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:4 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:5 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:6 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:7 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:8 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:9 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:10 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:11 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:12 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:13 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:14 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:15 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:16 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:17 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:18 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:19 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:20 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:21 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:22 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:23 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:24 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:25 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:26 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:27 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:28 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:29 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:30 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:31 [#uses=1]
+
+declare void @abort()
+
+define void @""() {
+	load <4 x float>* @0, align 1		; <<4 x float>>:1 [#uses=2]
+	load <4 x float>* @1, align 1		; <<4 x float>>:2 [#uses=3]
+	load <4 x float>* @2, align 1		; <<4 x float>>:3 [#uses=4]
+	load <4 x float>* @3, align 1		; <<4 x float>>:4 [#uses=5]
+	load <4 x float>* @4, align 1		; <<4 x float>>:5 [#uses=6]
+	load <4 x float>* @5, align 1		; <<4 x float>>:6 [#uses=7]
+	load <4 x float>* @6, align 1		; <<4 x float>>:7 [#uses=8]
+	load <4 x float>* @7, align 1		; <<4 x float>>:8 [#uses=9]
+	load <4 x float>* @8, align 1		; <<4 x float>>:9 [#uses=10]
+	load <4 x float>* @9, align 1		; <<4 x float>>:10 [#uses=11]
+	load <4 x float>* @10, align 1		; <<4 x float>>:11 [#uses=12]
+	load <4 x float>* @11, align 1		; <<4 x float>>:12 [#uses=13]
+	load <4 x float>* @12, align 1		; <<4 x float>>:13 [#uses=14]
+	load <4 x float>* @13, align 1		; <<4 x float>>:14 [#uses=15]
+	load <4 x float>* @14, align 1		; <<4 x float>>:15 [#uses=16]
+	load <4 x float>* @15, align 1		; <<4 x float>>:16 [#uses=17]
+	load <4 x float>* @16, align 1		; <<4 x float>>:17 [#uses=18]
+	load <4 x float>* @17, align 1		; <<4 x float>>:18 [#uses=19]
+	load <4 x float>* @18, align 1		; <<4 x float>>:19 [#uses=20]
+	load <4 x float>* @19, align 1		; <<4 x float>>:20 [#uses=21]
+	load <4 x float>* @20, align 1		; <<4 x float>>:21 [#uses=22]
+	load <4 x float>* @21, align 1		; <<4 x float>>:22 [#uses=23]
+	load <4 x float>* @22, align 1		; <<4 x float>>:23 [#uses=24]
+	load <4 x float>* @23, align 1		; <<4 x float>>:24 [#uses=25]
+	load <4 x float>* @24, align 1		; <<4 x float>>:25 [#uses=26]
+	load <4 x float>* @25, align 1		; <<4 x float>>:26 [#uses=27]
+	load <4 x float>* @26, align 1		; <<4 x float>>:27 [#uses=28]
+	load <4 x float>* @27, align 1		; <<4 x float>>:28 [#uses=29]
+	load <4 x float>* @28, align 1		; <<4 x float>>:29 [#uses=30]
+	load <4 x float>* @29, align 1		; <<4 x float>>:30 [#uses=31]
+	load <4 x float>* @30, align 1		; <<4 x float>>:31 [#uses=32]
+	load <4 x float>* @31, align 1		; <<4 x float>>:32 [#uses=33]
+	fmul <4 x float> %1, %1		; <<4 x float>>:33 [#uses=1]
+	fmul <4 x float> %33, %2		; <<4 x float>>:34 [#uses=1]
+	fmul <4 x float> %34, %3		; <<4 x float>>:35 [#uses=1]
+	fmul <4 x float> %35, %4		; <<4 x float>>:36 [#uses=1]
+	fmul <4 x float> %36, %5		; <<4 x float>>:37 [#uses=1]
+	fmul <4 x float> %37, %6		; <<4 x float>>:38 [#uses=1]
+	fmul <4 x float> %38, %7		; <<4 x float>>:39 [#uses=1]
+	fmul <4 x float> %39, %8		; <<4 x float>>:40 [#uses=1]
+	fmul <4 x float> %40, %9		; <<4 x float>>:41 [#uses=1]
+	fmul <4 x float> %41, %10		; <<4 x float>>:42 [#uses=1]
+	fmul <4 x float> %42, %11		; <<4 x float>>:43 [#uses=1]
+	fmul <4 x float> %43, %12		; <<4 x float>>:44 [#uses=1]
+	fmul <4 x float> %44, %13		; <<4 x float>>:45 [#uses=1]
+	fmul <4 x float> %45, %14		; <<4 x float>>:46 [#uses=1]
+	fmul <4 x float> %46, %15		; <<4 x float>>:47 [#uses=1]
+	fmul <4 x float> %47, %16		; <<4 x float>>:48 [#uses=1]
+	fmul <4 x float> %48, %17		; <<4 x float>>:49 [#uses=1]
+	fmul <4 x float> %49, %18		; <<4 x float>>:50 [#uses=1]
+	fmul <4 x float> %50, %19		; <<4 x float>>:51 [#uses=1]
+	fmul <4 x float> %51, %20		; <<4 x float>>:52 [#uses=1]
+	fmul <4 x float> %52, %21		; <<4 x float>>:53 [#uses=1]
+	fmul <4 x float> %53, %22		; <<4 x float>>:54 [#uses=1]
+	fmul <4 x float> %54, %23		; <<4 x float>>:55 [#uses=1]
+	fmul <4 x float> %55, %24		; <<4 x float>>:56 [#uses=1]
+	fmul <4 x float> %56, %25		; <<4 x float>>:57 [#uses=1]
+	fmul <4 x float> %57, %26		; <<4 x float>>:58 [#uses=1]
+	fmul <4 x float> %58, %27		; <<4 x float>>:59 [#uses=1]
+	fmul <4 x float> %59, %28		; <<4 x float>>:60 [#uses=1]
+	fmul <4 x float> %60, %29		; <<4 x float>>:61 [#uses=1]
+	fmul <4 x float> %61, %30		; <<4 x float>>:62 [#uses=1]
+	fmul <4 x float> %62, %31		; <<4 x float>>:63 [#uses=1]
+	fmul <4 x float> %63, %32		; <<4 x float>>:64 [#uses=3]
+	fmul <4 x float> %2, %2		; <<4 x float>>:65 [#uses=1]
+	fmul <4 x float> %65, %3		; <<4 x float>>:66 [#uses=1]
+	fmul <4 x float> %66, %4		; <<4 x float>>:67 [#uses=1]
+	fmul <4 x float> %67, %5		; <<4 x float>>:68 [#uses=1]
+	fmul <4 x float> %68, %6		; <<4 x float>>:69 [#uses=1]
+	fmul <4 x float> %69, %7		; <<4 x float>>:70 [#uses=1]
+	fmul <4 x float> %70, %8		; <<4 x float>>:71 [#uses=1]
+	fmul <4 x float> %71, %9		; <<4 x float>>:72 [#uses=1]
+	fmul <4 x float> %72, %10		; <<4 x float>>:73 [#uses=1]
+	fmul <4 x float> %73, %11		; <<4 x float>>:74 [#uses=1]
+	fmul <4 x float> %74, %12		; <<4 x float>>:75 [#uses=1]
+	fmul <4 x float> %75, %13		; <<4 x float>>:76 [#uses=1]
+	fmul <4 x float> %76, %14		; <<4 x float>>:77 [#uses=1]
+	fmul <4 x float> %77, %15		; <<4 x float>>:78 [#uses=1]
+	fmul <4 x float> %78, %16		; <<4 x float>>:79 [#uses=1]
+	fmul <4 x float> %79, %17		; <<4 x float>>:80 [#uses=1]
+	fmul <4 x float> %80, %18		; <<4 x float>>:81 [#uses=1]
+	fmul <4 x float> %81, %19		; <<4 x float>>:82 [#uses=1]
+	fmul <4 x float> %82, %20		; <<4 x float>>:83 [#uses=1]
+	fmul <4 x float> %83, %21		; <<4 x float>>:84 [#uses=1]
+	fmul <4 x float> %84, %22		; <<4 x float>>:85 [#uses=1]
+	fmul <4 x float> %85, %23		; <<4 x float>>:86 [#uses=1]
+	fmul <4 x float> %86, %24		; <<4 x float>>:87 [#uses=1]
+	fmul <4 x float> %87, %25		; <<4 x float>>:88 [#uses=1]
+	fmul <4 x float> %88, %26		; <<4 x float>>:89 [#uses=1]
+	fmul <4 x float> %89, %27		; <<4 x float>>:90 [#uses=1]
+	fmul <4 x float> %90, %28		; <<4 x float>>:91 [#uses=1]
+	fmul <4 x float> %91, %29		; <<4 x float>>:92 [#uses=1]
+	fmul <4 x float> %92, %30		; <<4 x float>>:93 [#uses=1]
+	fmul <4 x float> %93, %31		; <<4 x float>>:94 [#uses=1]
+	fmul <4 x float> %94, %32		; <<4 x float>>:95 [#uses=1]
+	fmul <4 x float> %3, %3		; <<4 x float>>:96 [#uses=1]
+	fmul <4 x float> %96, %4		; <<4 x float>>:97 [#uses=1]
+	fmul <4 x float> %97, %5		; <<4 x float>>:98 [#uses=1]
+	fmul <4 x float> %98, %6		; <<4 x float>>:99 [#uses=1]
+	fmul <4 x float> %99, %7		; <<4 x float>>:100 [#uses=1]
+	fmul <4 x float> %100, %8		; <<4 x float>>:101 [#uses=1]
+	fmul <4 x float> %101, %9		; <<4 x float>>:102 [#uses=1]
+	fmul <4 x float> %102, %10		; <<4 x float>>:103 [#uses=1]
+	fmul <4 x float> %103, %11		; <<4 x float>>:104 [#uses=1]
+	fmul <4 x float> %104, %12		; <<4 x float>>:105 [#uses=1]
+	fmul <4 x float> %105, %13		; <<4 x float>>:106 [#uses=1]
+	fmul <4 x float> %106, %14		; <<4 x float>>:107 [#uses=1]
+	fmul <4 x float> %107, %15		; <<4 x float>>:108 [#uses=1]
+	fmul <4 x float> %108, %16		; <<4 x float>>:109 [#uses=1]
+	fmul <4 x float> %109, %17		; <<4 x float>>:110 [#uses=1]
+	fmul <4 x float> %110, %18		; <<4 x float>>:111 [#uses=1]
+	fmul <4 x float> %111, %19		; <<4 x float>>:112 [#uses=1]
+	fmul <4 x float> %112, %20		; <<4 x float>>:113 [#uses=1]
+	fmul <4 x float> %113, %21		; <<4 x float>>:114 [#uses=1]
+	fmul <4 x float> %114, %22		; <<4 x float>>:115 [#uses=1]
+	fmul <4 x float> %115, %23		; <<4 x float>>:116 [#uses=1]
+	fmul <4 x float> %116, %24		; <<4 x float>>:117 [#uses=1]
+	fmul <4 x float> %117, %25		; <<4 x float>>:118 [#uses=1]
+	fmul <4 x float> %118, %26		; <<4 x float>>:119 [#uses=1]
+	fmul <4 x float> %119, %27		; <<4 x float>>:120 [#uses=1]
+	fmul <4 x float> %120, %28		; <<4 x float>>:121 [#uses=1]
+	fmul <4 x float> %121, %29		; <<4 x float>>:122 [#uses=1]
+	fmul <4 x float> %122, %30		; <<4 x float>>:123 [#uses=1]
+	fmul <4 x float> %123, %31		; <<4 x float>>:124 [#uses=1]
+	fmul <4 x float> %124, %32		; <<4 x float>>:125 [#uses=1]
+	fmul <4 x float> %4, %4		; <<4 x float>>:126 [#uses=1]
+	fmul <4 x float> %126, %5		; <<4 x float>>:127 [#uses=1]
+	fmul <4 x float> %127, %6		; <<4 x float>>:128 [#uses=1]
+	fmul <4 x float> %128, %7		; <<4 x float>>:129 [#uses=1]
+	fmul <4 x float> %129, %8		; <<4 x float>>:130 [#uses=1]
+	fmul <4 x float> %130, %9		; <<4 x float>>:131 [#uses=1]
+	fmul <4 x float> %131, %10		; <<4 x float>>:132 [#uses=1]
+	fmul <4 x float> %132, %11		; <<4 x float>>:133 [#uses=1]
+	fmul <4 x float> %133, %12		; <<4 x float>>:134 [#uses=1]
+	fmul <4 x float> %134, %13		; <<4 x float>>:135 [#uses=1]
+	fmul <4 x float> %135, %14		; <<4 x float>>:136 [#uses=1]
+	fmul <4 x float> %136, %15		; <<4 x float>>:137 [#uses=1]
+	fmul <4 x float> %137, %16		; <<4 x float>>:138 [#uses=1]
+	fmul <4 x float> %138, %17		; <<4 x float>>:139 [#uses=1]
+	fmul <4 x float> %139, %18		; <<4 x float>>:140 [#uses=1]
+	fmul <4 x float> %140, %19		; <<4 x float>>:141 [#uses=1]
+	fmul <4 x float> %141, %20		; <<4 x float>>:142 [#uses=1]
+	fmul <4 x float> %142, %21		; <<4 x float>>:143 [#uses=1]
+	fmul <4 x float> %143, %22		; <<4 x float>>:144 [#uses=1]
+	fmul <4 x float> %144, %23		; <<4 x float>>:145 [#uses=1]
+	fmul <4 x float> %145, %24		; <<4 x float>>:146 [#uses=1]
+	fmul <4 x float> %146, %25		; <<4 x float>>:147 [#uses=1]
+	fmul <4 x float> %147, %26		; <<4 x float>>:148 [#uses=1]
+	fmul <4 x float> %148, %27		; <<4 x float>>:149 [#uses=1]
+	fmul <4 x float> %149, %28		; <<4 x float>>:150 [#uses=1]
+	fmul <4 x float> %150, %29		; <<4 x float>>:151 [#uses=1]
+	fmul <4 x float> %151, %30		; <<4 x float>>:152 [#uses=1]
+	fmul <4 x float> %152, %31		; <<4 x float>>:153 [#uses=1]
+	fmul <4 x float> %153, %32		; <<4 x float>>:154 [#uses=1]
+	fmul <4 x float> %5, %5		; <<4 x float>>:155 [#uses=1]
+	fmul <4 x float> %155, %6		; <<4 x float>>:156 [#uses=1]
+	fmul <4 x float> %156, %7		; <<4 x float>>:157 [#uses=1]
+	fmul <4 x float> %157, %8		; <<4 x float>>:158 [#uses=1]
+	fmul <4 x float> %158, %9		; <<4 x float>>:159 [#uses=1]
+	fmul <4 x float> %159, %10		; <<4 x float>>:160 [#uses=1]
+	fmul <4 x float> %160, %11		; <<4 x float>>:161 [#uses=1]
+	fmul <4 x float> %161, %12		; <<4 x float>>:162 [#uses=1]
+	fmul <4 x float> %162, %13		; <<4 x float>>:163 [#uses=1]
+	fmul <4 x float> %163, %14		; <<4 x float>>:164 [#uses=1]
+	fmul <4 x float> %164, %15		; <<4 x float>>:165 [#uses=1]
+	fmul <4 x float> %165, %16		; <<4 x float>>:166 [#uses=1]
+	fmul <4 x float> %166, %17		; <<4 x float>>:167 [#uses=1]
+	fmul <4 x float> %167, %18		; <<4 x float>>:168 [#uses=1]
+	fmul <4 x float> %168, %19		; <<4 x float>>:169 [#uses=1]
+	fmul <4 x float> %169, %20		; <<4 x float>>:170 [#uses=1]
+	fmul <4 x float> %170, %21		; <<4 x float>>:171 [#uses=1]
+	fmul <4 x float> %171, %22		; <<4 x float>>:172 [#uses=1]
+	fmul <4 x float> %172, %23		; <<4 x float>>:173 [#uses=1]
+	fmul <4 x float> %173, %24		; <<4 x float>>:174 [#uses=1]
+	fmul <4 x float> %174, %25		; <<4 x float>>:175 [#uses=1]
+	fmul <4 x float> %175, %26		; <<4 x float>>:176 [#uses=1]
+	fmul <4 x float> %176, %27		; <<4 x float>>:177 [#uses=1]
+	fmul <4 x float> %177, %28		; <<4 x float>>:178 [#uses=1]
+	fmul <4 x float> %178, %29		; <<4 x float>>:179 [#uses=1]
+	fmul <4 x float> %179, %30		; <<4 x float>>:180 [#uses=1]
+	fmul <4 x float> %180, %31		; <<4 x float>>:181 [#uses=1]
+	fmul <4 x float> %181, %32		; <<4 x float>>:182 [#uses=1]
+	fmul <4 x float> %6, %6		; <<4 x float>>:183 [#uses=1]
+	fmul <4 x float> %183, %7		; <<4 x float>>:184 [#uses=1]
+	fmul <4 x float> %184, %8		; <<4 x float>>:185 [#uses=1]
+	fmul <4 x float> %185, %9		; <<4 x float>>:186 [#uses=1]
+	fmul <4 x float> %186, %10		; <<4 x float>>:187 [#uses=1]
+	fmul <4 x float> %187, %11		; <<4 x float>>:188 [#uses=1]
+	fmul <4 x float> %188, %12		; <<4 x float>>:189 [#uses=1]
+	fmul <4 x float> %189, %13		; <<4 x float>>:190 [#uses=1]
+	fmul <4 x float> %190, %14		; <<4 x float>>:191 [#uses=1]
+	fmul <4 x float> %191, %15		; <<4 x float>>:192 [#uses=1]
+	fmul <4 x float> %192, %16		; <<4 x float>>:193 [#uses=1]
+	fmul <4 x float> %193, %17		; <<4 x float>>:194 [#uses=1]
+	fmul <4 x float> %194, %18		; <<4 x float>>:195 [#uses=1]
+	fmul <4 x float> %195, %19		; <<4 x float>>:196 [#uses=1]
+	fmul <4 x float> %196, %20		; <<4 x float>>:197 [#uses=1]
+	fmul <4 x float> %197, %21		; <<4 x float>>:198 [#uses=1]
+	fmul <4 x float> %198, %22		; <<4 x float>>:199 [#uses=1]
+	fmul <4 x float> %199, %23		; <<4 x float>>:200 [#uses=1]
+	fmul <4 x float> %200, %24		; <<4 x float>>:201 [#uses=1]
+	fmul <4 x float> %201, %25		; <<4 x float>>:202 [#uses=1]
+	fmul <4 x float> %202, %26		; <<4 x float>>:203 [#uses=1]
+	fmul <4 x float> %203, %27		; <<4 x float>>:204 [#uses=1]
+	fmul <4 x float> %204, %28		; <<4 x float>>:205 [#uses=1]
+	fmul <4 x float> %205, %29		; <<4 x float>>:206 [#uses=1]
+	fmul <4 x float> %206, %30		; <<4 x float>>:207 [#uses=1]
+	fmul <4 x float> %207, %31		; <<4 x float>>:208 [#uses=1]
+	fmul <4 x float> %208, %32		; <<4 x float>>:209 [#uses=1]
+	fmul <4 x float> %7, %7		; <<4 x float>>:210 [#uses=1]
+	fmul <4 x float> %210, %8		; <<4 x float>>:211 [#uses=1]
+	fmul <4 x float> %211, %9		; <<4 x float>>:212 [#uses=1]
+	fmul <4 x float> %212, %10		; <<4 x float>>:213 [#uses=1]
+	fmul <4 x float> %213, %11		; <<4 x float>>:214 [#uses=1]
+	fmul <4 x float> %214, %12		; <<4 x float>>:215 [#uses=1]
+	fmul <4 x float> %215, %13		; <<4 x float>>:216 [#uses=1]
+	fmul <4 x float> %216, %14		; <<4 x float>>:217 [#uses=1]
+	fmul <4 x float> %217, %15		; <<4 x float>>:218 [#uses=1]
+	fmul <4 x float> %218, %16		; <<4 x float>>:219 [#uses=1]
+	fmul <4 x float> %219, %17		; <<4 x float>>:220 [#uses=1]
+	fmul <4 x float> %220, %18		; <<4 x float>>:221 [#uses=1]
+	fmul <4 x float> %221, %19		; <<4 x float>>:222 [#uses=1]
+	fmul <4 x float> %222, %20		; <<4 x float>>:223 [#uses=1]
+	fmul <4 x float> %223, %21		; <<4 x float>>:224 [#uses=1]
+	fmul <4 x float> %224, %22		; <<4 x float>>:225 [#uses=1]
+	fmul <4 x float> %225, %23		; <<4 x float>>:226 [#uses=1]
+	fmul <4 x float> %226, %24		; <<4 x float>>:227 [#uses=1]
+	fmul <4 x float> %227, %25		; <<4 x float>>:228 [#uses=1]
+	fmul <4 x float> %228, %26		; <<4 x float>>:229 [#uses=1]
+	fmul <4 x float> %229, %27		; <<4 x float>>:230 [#uses=1]
+	fmul <4 x float> %230, %28		; <<4 x float>>:231 [#uses=1]
+	fmul <4 x float> %231, %29		; <<4 x float>>:232 [#uses=1]
+	fmul <4 x float> %232, %30		; <<4 x float>>:233 [#uses=1]
+	fmul <4 x float> %233, %31		; <<4 x float>>:234 [#uses=1]
+	fmul <4 x float> %234, %32		; <<4 x float>>:235 [#uses=1]
+	fmul <4 x float> %8, %8		; <<4 x float>>:236 [#uses=1]
+	fmul <4 x float> %236, %9		; <<4 x float>>:237 [#uses=1]
+	fmul <4 x float> %237, %10		; <<4 x float>>:238 [#uses=1]
+	fmul <4 x float> %238, %11		; <<4 x float>>:239 [#uses=1]
+	fmul <4 x float> %239, %12		; <<4 x float>>:240 [#uses=1]
+	fmul <4 x float> %240, %13		; <<4 x float>>:241 [#uses=1]
+	fmul <4 x float> %241, %14		; <<4 x float>>:242 [#uses=1]
+	fmul <4 x float> %242, %15		; <<4 x float>>:243 [#uses=1]
+	fmul <4 x float> %243, %16		; <<4 x float>>:244 [#uses=1]
+	fmul <4 x float> %244, %17		; <<4 x float>>:245 [#uses=1]
+	fmul <4 x float> %245, %18		; <<4 x float>>:246 [#uses=1]
+	fmul <4 x float> %246, %19		; <<4 x float>>:247 [#uses=1]
+	fmul <4 x float> %247, %20		; <<4 x float>>:248 [#uses=1]
+	fmul <4 x float> %248, %21		; <<4 x float>>:249 [#uses=1]
+	fmul <4 x float> %249, %22		; <<4 x float>>:250 [#uses=1]
+	fmul <4 x float> %250, %23		; <<4 x float>>:251 [#uses=1]
+	fmul <4 x float> %251, %24		; <<4 x float>>:252 [#uses=1]
+	fmul <4 x float> %252, %25		; <<4 x float>>:253 [#uses=1]
+	fmul <4 x float> %253, %26		; <<4 x float>>:254 [#uses=1]
+	fmul <4 x float> %254, %27		; <<4 x float>>:255 [#uses=1]
+	fmul <4 x float> %255, %28		; <<4 x float>>:256 [#uses=1]
+	fmul <4 x float> %256, %29		; <<4 x float>>:257 [#uses=1]
+	fmul <4 x float> %257, %30		; <<4 x float>>:258 [#uses=1]
+	fmul <4 x float> %258, %31		; <<4 x float>>:259 [#uses=1]
+	fmul <4 x float> %259, %32		; <<4 x float>>:260 [#uses=1]
+	fmul <4 x float> %9, %9		; <<4 x float>>:261 [#uses=1]
+	fmul <4 x float> %261, %10		; <<4 x float>>:262 [#uses=1]
+	fmul <4 x float> %262, %11		; <<4 x float>>:263 [#uses=1]
+	fmul <4 x float> %263, %12		; <<4 x float>>:264 [#uses=1]
+	fmul <4 x float> %264, %13		; <<4 x float>>:265 [#uses=1]
+	fmul <4 x float> %265, %14		; <<4 x float>>:266 [#uses=1]
+	fmul <4 x float> %266, %15		; <<4 x float>>:267 [#uses=1]
+	fmul <4 x float> %267, %16		; <<4 x float>>:268 [#uses=1]
+	fmul <4 x float> %268, %17		; <<4 x float>>:269 [#uses=1]
+	fmul <4 x float> %269, %18		; <<4 x float>>:270 [#uses=1]
+	fmul <4 x float> %270, %19		; <<4 x float>>:271 [#uses=1]
+	fmul <4 x float> %271, %20		; <<4 x float>>:272 [#uses=1]
+	fmul <4 x float> %272, %21		; <<4 x float>>:273 [#uses=1]
+	fmul <4 x float> %273, %22		; <<4 x float>>:274 [#uses=1]
+	fmul <4 x float> %274, %23		; <<4 x float>>:275 [#uses=1]
+	fmul <4 x float> %275, %24		; <<4 x float>>:276 [#uses=1]
+	fmul <4 x float> %276, %25		; <<4 x float>>:277 [#uses=1]
+	fmul <4 x float> %277, %26		; <<4 x float>>:278 [#uses=1]
+	fmul <4 x float> %278, %27		; <<4 x float>>:279 [#uses=1]
+	fmul <4 x float> %279, %28		; <<4 x float>>:280 [#uses=1]
+	fmul <4 x float> %280, %29		; <<4 x float>>:281 [#uses=1]
+	fmul <4 x float> %281, %30		; <<4 x float>>:282 [#uses=1]
+	fmul <4 x float> %282, %31		; <<4 x float>>:283 [#uses=1]
+	fmul <4 x float> %283, %32		; <<4 x float>>:284 [#uses=1]
+	fmul <4 x float> %10, %10		; <<4 x float>>:285 [#uses=1]
+	fmul <4 x float> %285, %11		; <<4 x float>>:286 [#uses=1]
+	fmul <4 x float> %286, %12		; <<4 x float>>:287 [#uses=1]
+	fmul <4 x float> %287, %13		; <<4 x float>>:288 [#uses=1]
+	fmul <4 x float> %288, %14		; <<4 x float>>:289 [#uses=1]
+	fmul <4 x float> %289, %15		; <<4 x float>>:290 [#uses=1]
+	fmul <4 x float> %290, %16		; <<4 x float>>:291 [#uses=1]
+	fmul <4 x float> %291, %17		; <<4 x float>>:292 [#uses=1]
+	fmul <4 x float> %292, %18		; <<4 x float>>:293 [#uses=1]
+	fmul <4 x float> %293, %19		; <<4 x float>>:294 [#uses=1]
+	fmul <4 x float> %294, %20		; <<4 x float>>:295 [#uses=1]
+	fmul <4 x float> %295, %21		; <<4 x float>>:296 [#uses=1]
+	fmul <4 x float> %296, %22		; <<4 x float>>:297 [#uses=1]
+	fmul <4 x float> %297, %23		; <<4 x float>>:298 [#uses=1]
+	fmul <4 x float> %298, %24		; <<4 x float>>:299 [#uses=1]
+	fmul <4 x float> %299, %25		; <<4 x float>>:300 [#uses=1]
+	fmul <4 x float> %300, %26		; <<4 x float>>:301 [#uses=1]
+	fmul <4 x float> %301, %27		; <<4 x float>>:302 [#uses=1]
+	fmul <4 x float> %302, %28		; <<4 x float>>:303 [#uses=1]
+	fmul <4 x float> %303, %29		; <<4 x float>>:304 [#uses=1]
+	fmul <4 x float> %304, %30		; <<4 x float>>:305 [#uses=1]
+	fmul <4 x float> %305, %31		; <<4 x float>>:306 [#uses=1]
+	fmul <4 x float> %306, %32		; <<4 x float>>:307 [#uses=1]
+	fmul <4 x float> %11, %11		; <<4 x float>>:308 [#uses=1]
+	fmul <4 x float> %308, %12		; <<4 x float>>:309 [#uses=1]
+	fmul <4 x float> %309, %13		; <<4 x float>>:310 [#uses=1]
+	fmul <4 x float> %310, %14		; <<4 x float>>:311 [#uses=1]
+	fmul <4 x float> %311, %15		; <<4 x float>>:312 [#uses=1]
+	fmul <4 x float> %312, %16		; <<4 x float>>:313 [#uses=1]
+	fmul <4 x float> %313, %17		; <<4 x float>>:314 [#uses=1]
+	fmul <4 x float> %314, %18		; <<4 x float>>:315 [#uses=1]
+	fmul <4 x float> %315, %19		; <<4 x float>>:316 [#uses=1]
+	fmul <4 x float> %316, %20		; <<4 x float>>:317 [#uses=1]
+	fmul <4 x float> %317, %21		; <<4 x float>>:318 [#uses=1]
+	fmul <4 x float> %318, %22		; <<4 x float>>:319 [#uses=1]
+	fmul <4 x float> %319, %23		; <<4 x float>>:320 [#uses=1]
+	fmul <4 x float> %320, %24		; <<4 x float>>:321 [#uses=1]
+	fmul <4 x float> %321, %25		; <<4 x float>>:322 [#uses=1]
+	fmul <4 x float> %322, %26		; <<4 x float>>:323 [#uses=1]
+	fmul <4 x float> %323, %27		; <<4 x float>>:324 [#uses=1]
+	fmul <4 x float> %324, %28		; <<4 x float>>:325 [#uses=1]
+	fmul <4 x float> %325, %29		; <<4 x float>>:326 [#uses=1]
+	fmul <4 x float> %326, %30		; <<4 x float>>:327 [#uses=1]
+	fmul <4 x float> %327, %31		; <<4 x float>>:328 [#uses=1]
+	fmul <4 x float> %328, %32		; <<4 x float>>:329 [#uses=1]
+	fmul <4 x float> %12, %12		; <<4 x float>>:330 [#uses=1]
+	fmul <4 x float> %330, %13		; <<4 x float>>:331 [#uses=1]
+	fmul <4 x float> %331, %14		; <<4 x float>>:332 [#uses=1]
+	fmul <4 x float> %332, %15		; <<4 x float>>:333 [#uses=1]
+	fmul <4 x float> %333, %16		; <<4 x float>>:334 [#uses=1]
+	fmul <4 x float> %334, %17		; <<4 x float>>:335 [#uses=1]
+	fmul <4 x float> %335, %18		; <<4 x float>>:336 [#uses=1]
+	fmul <4 x float> %336, %19		; <<4 x float>>:337 [#uses=1]
+	fmul <4 x float> %337, %20		; <<4 x float>>:338 [#uses=1]
+	fmul <4 x float> %338, %21		; <<4 x float>>:339 [#uses=1]
+	fmul <4 x float> %339, %22		; <<4 x float>>:340 [#uses=1]
+	fmul <4 x float> %340, %23		; <<4 x float>>:341 [#uses=1]
+	fmul <4 x float> %341, %24		; <<4 x float>>:342 [#uses=1]
+	fmul <4 x float> %342, %25		; <<4 x float>>:343 [#uses=1]
+	fmul <4 x float> %343, %26		; <<4 x float>>:344 [#uses=1]
+	fmul <4 x float> %344, %27		; <<4 x float>>:345 [#uses=1]
+	fmul <4 x float> %345, %28		; <<4 x float>>:346 [#uses=1]
+	fmul <4 x float> %346, %29		; <<4 x float>>:347 [#uses=1]
+	fmul <4 x float> %347, %30		; <<4 x float>>:348 [#uses=1]
+	fmul <4 x float> %348, %31		; <<4 x float>>:349 [#uses=1]
+	fmul <4 x float> %349, %32		; <<4 x float>>:350 [#uses=1]
+	fmul <4 x float> %13, %13		; <<4 x float>>:351 [#uses=1]
+	fmul <4 x float> %351, %14		; <<4 x float>>:352 [#uses=1]
+	fmul <4 x float> %352, %15		; <<4 x float>>:353 [#uses=1]
+	fmul <4 x float> %353, %16		; <<4 x float>>:354 [#uses=1]
+	fmul <4 x float> %354, %17		; <<4 x float>>:355 [#uses=1]
+	fmul <4 x float> %355, %18		; <<4 x float>>:356 [#uses=1]
+	fmul <4 x float> %356, %19		; <<4 x float>>:357 [#uses=1]
+	fmul <4 x float> %357, %20		; <<4 x float>>:358 [#uses=1]
+	fmul <4 x float> %358, %21		; <<4 x float>>:359 [#uses=1]
+	fmul <4 x float> %359, %22		; <<4 x float>>:360 [#uses=1]
+	fmul <4 x float> %360, %23		; <<4 x float>>:361 [#uses=1]
+	fmul <4 x float> %361, %24		; <<4 x float>>:362 [#uses=1]
+	fmul <4 x float> %362, %25		; <<4 x float>>:363 [#uses=1]
+	fmul <4 x float> %363, %26		; <<4 x float>>:364 [#uses=1]
+	fmul <4 x float> %364, %27		; <<4 x float>>:365 [#uses=1]
+	fmul <4 x float> %365, %28		; <<4 x float>>:366 [#uses=1]
+	fmul <4 x float> %366, %29		; <<4 x float>>:367 [#uses=1]
+	fmul <4 x float> %367, %30		; <<4 x float>>:368 [#uses=1]
+	fmul <4 x float> %368, %31		; <<4 x float>>:369 [#uses=1]
+	fmul <4 x float> %369, %32		; <<4 x float>>:370 [#uses=1]
+	fmul <4 x float> %14, %14		; <<4 x float>>:371 [#uses=1]
+	fmul <4 x float> %371, %15		; <<4 x float>>:372 [#uses=1]
+	fmul <4 x float> %372, %16		; <<4 x float>>:373 [#uses=1]
+	fmul <4 x float> %373, %17		; <<4 x float>>:374 [#uses=1]
+	fmul <4 x float> %374, %18		; <<4 x float>>:375 [#uses=1]
+	fmul <4 x float> %375, %19		; <<4 x float>>:376 [#uses=1]
+	fmul <4 x float> %376, %20		; <<4 x float>>:377 [#uses=1]
+	fmul <4 x float> %377, %21		; <<4 x float>>:378 [#uses=1]
+	fmul <4 x float> %378, %22		; <<4 x float>>:379 [#uses=1]
+	fmul <4 x float> %379, %23		; <<4 x float>>:380 [#uses=1]
+	fmul <4 x float> %380, %24		; <<4 x float>>:381 [#uses=1]
+	fmul <4 x float> %381, %25		; <<4 x float>>:382 [#uses=1]
+	fmul <4 x float> %382, %26		; <<4 x float>>:383 [#uses=1]
+	fmul <4 x float> %383, %27		; <<4 x float>>:384 [#uses=1]
+	fmul <4 x float> %384, %28		; <<4 x float>>:385 [#uses=1]
+	fmul <4 x float> %385, %29		; <<4 x float>>:386 [#uses=1]
+	fmul <4 x float> %386, %30		; <<4 x float>>:387 [#uses=1]
+	fmul <4 x float> %387, %31		; <<4 x float>>:388 [#uses=1]
+	fmul <4 x float> %388, %32		; <<4 x float>>:389 [#uses=1]
+	fmul <4 x float> %15, %15		; <<4 x float>>:390 [#uses=1]
+	fmul <4 x float> %390, %16		; <<4 x float>>:391 [#uses=1]
+	fmul <4 x float> %391, %17		; <<4 x float>>:392 [#uses=1]
+	fmul <4 x float> %392, %18		; <<4 x float>>:393 [#uses=1]
+	fmul <4 x float> %393, %19		; <<4 x float>>:394 [#uses=1]
+	fmul <4 x float> %394, %20		; <<4 x float>>:395 [#uses=1]
+	fmul <4 x float> %395, %21		; <<4 x float>>:396 [#uses=1]
+	fmul <4 x float> %396, %22		; <<4 x float>>:397 [#uses=1]
+	fmul <4 x float> %397, %23		; <<4 x float>>:398 [#uses=1]
+	fmul <4 x float> %398, %24		; <<4 x float>>:399 [#uses=1]
+	fmul <4 x float> %399, %25		; <<4 x float>>:400 [#uses=1]
+	fmul <4 x float> %400, %26		; <<4 x float>>:401 [#uses=1]
+	fmul <4 x float> %401, %27		; <<4 x float>>:402 [#uses=1]
+	fmul <4 x float> %402, %28		; <<4 x float>>:403 [#uses=1]
+	fmul <4 x float> %403, %29		; <<4 x float>>:404 [#uses=1]
+	fmul <4 x float> %404, %30		; <<4 x float>>:405 [#uses=1]
+	fmul <4 x float> %405, %31		; <<4 x float>>:406 [#uses=1]
+	fmul <4 x float> %406, %32		; <<4 x float>>:407 [#uses=1]
+	fmul <4 x float> %16, %16		; <<4 x float>>:408 [#uses=1]
+	fmul <4 x float> %408, %17		; <<4 x float>>:409 [#uses=1]
+	fmul <4 x float> %409, %18		; <<4 x float>>:410 [#uses=1]
+	fmul <4 x float> %410, %19		; <<4 x float>>:411 [#uses=1]
+	fmul <4 x float> %411, %20		; <<4 x float>>:412 [#uses=1]
+	fmul <4 x float> %412, %21		; <<4 x float>>:413 [#uses=1]
+	fmul <4 x float> %413, %22		; <<4 x float>>:414 [#uses=1]
+	fmul <4 x float> %414, %23		; <<4 x float>>:415 [#uses=1]
+	fmul <4 x float> %415, %24		; <<4 x float>>:416 [#uses=1]
+	fmul <4 x float> %416, %25		; <<4 x float>>:417 [#uses=1]
+	fmul <4 x float> %417, %26		; <<4 x float>>:418 [#uses=1]
+	fmul <4 x float> %418, %27		; <<4 x float>>:419 [#uses=1]
+	fmul <4 x float> %419, %28		; <<4 x float>>:420 [#uses=1]
+	fmul <4 x float> %420, %29		; <<4 x float>>:421 [#uses=1]
+	fmul <4 x float> %421, %30		; <<4 x float>>:422 [#uses=1]
+	fmul <4 x float> %422, %31		; <<4 x float>>:423 [#uses=1]
+	fmul <4 x float> %423, %32		; <<4 x float>>:424 [#uses=1]
+	fmul <4 x float> %17, %17		; <<4 x float>>:425 [#uses=1]
+	fmul <4 x float> %425, %18		; <<4 x float>>:426 [#uses=1]
+	fmul <4 x float> %426, %19		; <<4 x float>>:427 [#uses=1]
+	fmul <4 x float> %427, %20		; <<4 x float>>:428 [#uses=1]
+	fmul <4 x float> %428, %21		; <<4 x float>>:429 [#uses=1]
+	fmul <4 x float> %429, %22		; <<4 x float>>:430 [#uses=1]
+	fmul <4 x float> %430, %23		; <<4 x float>>:431 [#uses=1]
+	fmul <4 x float> %431, %24		; <<4 x float>>:432 [#uses=1]
+	fmul <4 x float> %432, %25		; <<4 x float>>:433 [#uses=1]
+	fmul <4 x float> %433, %26		; <<4 x float>>:434 [#uses=1]
+	fmul <4 x float> %434, %27		; <<4 x float>>:435 [#uses=1]
+	fmul <4 x float> %435, %28		; <<4 x float>>:436 [#uses=1]
+	fmul <4 x float> %436, %29		; <<4 x float>>:437 [#uses=1]
+	fmul <4 x float> %437, %30		; <<4 x float>>:438 [#uses=1]
+	fmul <4 x float> %438, %31		; <<4 x float>>:439 [#uses=1]
+	fmul <4 x float> %439, %32		; <<4 x float>>:440 [#uses=1]
+	fmul <4 x float> %18, %18		; <<4 x float>>:441 [#uses=1]
+	fmul <4 x float> %441, %19		; <<4 x float>>:442 [#uses=1]
+	fmul <4 x float> %442, %20		; <<4 x float>>:443 [#uses=1]
+	fmul <4 x float> %443, %21		; <<4 x float>>:444 [#uses=1]
+	fmul <4 x float> %444, %22		; <<4 x float>>:445 [#uses=1]
+	fmul <4 x float> %445, %23		; <<4 x float>>:446 [#uses=1]
+	fmul <4 x float> %446, %24		; <<4 x float>>:447 [#uses=1]
+	fmul <4 x float> %447, %25		; <<4 x float>>:448 [#uses=1]
+	fmul <4 x float> %448, %26		; <<4 x float>>:449 [#uses=1]
+	fmul <4 x float> %449, %27		; <<4 x float>>:450 [#uses=1]
+	fmul <4 x float> %450, %28		; <<4 x float>>:451 [#uses=1]
+	fmul <4 x float> %451, %29		; <<4 x float>>:452 [#uses=1]
+	fmul <4 x float> %452, %30		; <<4 x float>>:453 [#uses=1]
+	fmul <4 x float> %453, %31		; <<4 x float>>:454 [#uses=1]
+	fmul <4 x float> %454, %32		; <<4 x float>>:455 [#uses=1]
+	fmul <4 x float> %19, %19		; <<4 x float>>:456 [#uses=1]
+	fmul <4 x float> %456, %20		; <<4 x float>>:457 [#uses=1]
+	fmul <4 x float> %457, %21		; <<4 x float>>:458 [#uses=1]
+	fmul <4 x float> %458, %22		; <<4 x float>>:459 [#uses=1]
+	fmul <4 x float> %459, %23		; <<4 x float>>:460 [#uses=1]
+	fmul <4 x float> %460, %24		; <<4 x float>>:461 [#uses=1]
+	fmul <4 x float> %461, %25		; <<4 x float>>:462 [#uses=1]
+	fmul <4 x float> %462, %26		; <<4 x float>>:463 [#uses=1]
+	fmul <4 x float> %463, %27		; <<4 x float>>:464 [#uses=1]
+	fmul <4 x float> %464, %28		; <<4 x float>>:465 [#uses=1]
+	fmul <4 x float> %465, %29		; <<4 x float>>:466 [#uses=1]
+	fmul <4 x float> %466, %30		; <<4 x float>>:467 [#uses=1]
+	fmul <4 x float> %467, %31		; <<4 x float>>:468 [#uses=1]
+	fmul <4 x float> %468, %32		; <<4 x float>>:469 [#uses=1]
+	fmul <4 x float> %20, %20		; <<4 x float>>:470 [#uses=1]
+	fmul <4 x float> %470, %21		; <<4 x float>>:471 [#uses=1]
+	fmul <4 x float> %471, %22		; <<4 x float>>:472 [#uses=1]
+	fmul <4 x float> %472, %23		; <<4 x float>>:473 [#uses=1]
+	fmul <4 x float> %473, %24		; <<4 x float>>:474 [#uses=1]
+	fmul <4 x float> %474, %25		; <<4 x float>>:475 [#uses=1]
+	fmul <4 x float> %475, %26		; <<4 x float>>:476 [#uses=1]
+	fmul <4 x float> %476, %27		; <<4 x float>>:477 [#uses=1]
+	fmul <4 x float> %477, %28		; <<4 x float>>:478 [#uses=1]
+	fmul <4 x float> %478, %29		; <<4 x float>>:479 [#uses=1]
+	fmul <4 x float> %479, %30		; <<4 x float>>:480 [#uses=1]
+	fmul <4 x float> %480, %31		; <<4 x float>>:481 [#uses=1]
+	fmul <4 x float> %481, %32		; <<4 x float>>:482 [#uses=1]
+	fmul <4 x float> %21, %21		; <<4 x float>>:483 [#uses=1]
+	fmul <4 x float> %483, %22		; <<4 x float>>:484 [#uses=1]
+	fmul <4 x float> %484, %23		; <<4 x float>>:485 [#uses=1]
+	fmul <4 x float> %485, %24		; <<4 x float>>:486 [#uses=1]
+	fmul <4 x float> %486, %25		; <<4 x float>>:487 [#uses=1]
+	fmul <4 x float> %487, %26		; <<4 x float>>:488 [#uses=1]
+	fmul <4 x float> %488, %27		; <<4 x float>>:489 [#uses=1]
+	fmul <4 x float> %489, %28		; <<4 x float>>:490 [#uses=1]
+	fmul <4 x float> %490, %29		; <<4 x float>>:491 [#uses=1]
+	fmul <4 x float> %491, %30		; <<4 x float>>:492 [#uses=1]
+	fmul <4 x float> %492, %31		; <<4 x float>>:493 [#uses=1]
+	fmul <4 x float> %493, %32		; <<4 x float>>:494 [#uses=1]
+	fmul <4 x float> %22, %22		; <<4 x float>>:495 [#uses=1]
+	fmul <4 x float> %495, %23		; <<4 x float>>:496 [#uses=1]
+	fmul <4 x float> %496, %24		; <<4 x float>>:497 [#uses=1]
+	fmul <4 x float> %497, %25		; <<4 x float>>:498 [#uses=1]
+	fmul <4 x float> %498, %26		; <<4 x float>>:499 [#uses=1]
+	fmul <4 x float> %499, %27		; <<4 x float>>:500 [#uses=1]
+	fmul <4 x float> %500, %28		; <<4 x float>>:501 [#uses=1]
+	fmul <4 x float> %501, %29		; <<4 x float>>:502 [#uses=1]
+	fmul <4 x float> %502, %30		; <<4 x float>>:503 [#uses=1]
+	fmul <4 x float> %503, %31		; <<4 x float>>:504 [#uses=1]
+	fmul <4 x float> %504, %32		; <<4 x float>>:505 [#uses=1]
+	fmul <4 x float> %23, %23		; <<4 x float>>:506 [#uses=1]
+	fmul <4 x float> %506, %24		; <<4 x float>>:507 [#uses=1]
+	fmul <4 x float> %507, %25		; <<4 x float>>:508 [#uses=1]
+	fmul <4 x float> %508, %26		; <<4 x float>>:509 [#uses=1]
+	fmul <4 x float> %509, %27		; <<4 x float>>:510 [#uses=1]
+	fmul <4 x float> %510, %28		; <<4 x float>>:511 [#uses=1]
+	fmul <4 x float> %511, %29		; <<4 x float>>:512 [#uses=1]
+	fmul <4 x float> %512, %30		; <<4 x float>>:513 [#uses=1]
+	fmul <4 x float> %513, %31		; <<4 x float>>:514 [#uses=1]
+	fmul <4 x float> %514, %32		; <<4 x float>>:515 [#uses=1]
+	fmul <4 x float> %24, %24		; <<4 x float>>:516 [#uses=1]
+	fmul <4 x float> %516, %25		; <<4 x float>>:517 [#uses=1]
+	fmul <4 x float> %517, %26		; <<4 x float>>:518 [#uses=1]
+	fmul <4 x float> %518, %27		; <<4 x float>>:519 [#uses=1]
+	fmul <4 x float> %519, %28		; <<4 x float>>:520 [#uses=1]
+	fmul <4 x float> %520, %29		; <<4 x float>>:521 [#uses=1]
+	fmul <4 x float> %521, %30		; <<4 x float>>:522 [#uses=1]
+	fmul <4 x float> %522, %31		; <<4 x float>>:523 [#uses=1]
+	fmul <4 x float> %523, %32		; <<4 x float>>:524 [#uses=1]
+	fmul <4 x float> %25, %25		; <<4 x float>>:525 [#uses=1]
+	fmul <4 x float> %525, %26		; <<4 x float>>:526 [#uses=1]
+	fmul <4 x float> %526, %27		; <<4 x float>>:527 [#uses=1]
+	fmul <4 x float> %527, %28		; <<4 x float>>:528 [#uses=1]
+	fmul <4 x float> %528, %29		; <<4 x float>>:529 [#uses=1]
+	fmul <4 x float> %529, %30		; <<4 x float>>:530 [#uses=1]
+	fmul <4 x float> %530, %31		; <<4 x float>>:531 [#uses=1]
+	fmul <4 x float> %531, %32		; <<4 x float>>:532 [#uses=1]
+	fmul <4 x float> %26, %26		; <<4 x float>>:533 [#uses=1]
+	fmul <4 x float> %533, %27		; <<4 x float>>:534 [#uses=1]
+	fmul <4 x float> %534, %28		; <<4 x float>>:535 [#uses=1]
+	fmul <4 x float> %535, %29		; <<4 x float>>:536 [#uses=1]
+	fmul <4 x float> %536, %30		; <<4 x float>>:537 [#uses=1]
+	fmul <4 x float> %537, %31		; <<4 x float>>:538 [#uses=1]
+	fmul <4 x float> %538, %32		; <<4 x float>>:539 [#uses=1]
+	fmul <4 x float> %27, %27		; <<4 x float>>:540 [#uses=1]
+	fmul <4 x float> %540, %28		; <<4 x float>>:541 [#uses=1]
+	fmul <4 x float> %541, %29		; <<4 x float>>:542 [#uses=1]
+	fmul <4 x float> %542, %30		; <<4 x float>>:543 [#uses=1]
+	fmul <4 x float> %543, %31		; <<4 x float>>:544 [#uses=1]
+	fmul <4 x float> %544, %32		; <<4 x float>>:545 [#uses=1]
+	fmul <4 x float> %28, %28		; <<4 x float>>:546 [#uses=1]
+	fmul <4 x float> %546, %29		; <<4 x float>>:547 [#uses=1]
+	fmul <4 x float> %547, %30		; <<4 x float>>:548 [#uses=1]
+	fmul <4 x float> %548, %31		; <<4 x float>>:549 [#uses=1]
+	fmul <4 x float> %549, %32		; <<4 x float>>:550 [#uses=1]
+	fmul <4 x float> %29, %29		; <<4 x float>>:551 [#uses=1]
+	fmul <4 x float> %551, %30		; <<4 x float>>:552 [#uses=1]
+	fmul <4 x float> %552, %31		; <<4 x float>>:553 [#uses=1]
+	fmul <4 x float> %553, %32		; <<4 x float>>:554 [#uses=1]
+	fmul <4 x float> %30, %30		; <<4 x float>>:555 [#uses=1]
+	fmul <4 x float> %555, %31		; <<4 x float>>:556 [#uses=1]
+	fmul <4 x float> %556, %32		; <<4 x float>>:557 [#uses=1]
+	fmul <4 x float> %31, %31		; <<4 x float>>:558 [#uses=1]
+	fmul <4 x float> %558, %32		; <<4 x float>>:559 [#uses=1]
+	fmul <4 x float> %32, %32		; <<4 x float>>:560 [#uses=1]
+	fadd <4 x float> %64, %64		; <<4 x float>>:561 [#uses=1]
+	fadd <4 x float> %561, %64		; <<4 x float>>:562 [#uses=1]
+	fadd <4 x float> %562, %95		; <<4 x float>>:563 [#uses=1]
+	fadd <4 x float> %563, %125		; <<4 x float>>:564 [#uses=1]
+	fadd <4 x float> %564, %154		; <<4 x float>>:565 [#uses=1]
+	fadd <4 x float> %565, %182		; <<4 x float>>:566 [#uses=1]
+	fadd <4 x float> %566, %209		; <<4 x float>>:567 [#uses=1]
+	fadd <4 x float> %567, %235		; <<4 x float>>:568 [#uses=1]
+	fadd <4 x float> %568, %260		; <<4 x float>>:569 [#uses=1]
+	fadd <4 x float> %569, %284		; <<4 x float>>:570 [#uses=1]
+	fadd <4 x float> %570, %307		; <<4 x float>>:571 [#uses=1]
+	fadd <4 x float> %571, %329		; <<4 x float>>:572 [#uses=1]
+	fadd <4 x float> %572, %350		; <<4 x float>>:573 [#uses=1]
+	fadd <4 x float> %573, %370		; <<4 x float>>:574 [#uses=1]
+	fadd <4 x float> %574, %389		; <<4 x float>>:575 [#uses=1]
+	fadd <4 x float> %575, %407		; <<4 x float>>:576 [#uses=1]
+	fadd <4 x float> %576, %424		; <<4 x float>>:577 [#uses=1]
+	fadd <4 x float> %577, %440		; <<4 x float>>:578 [#uses=1]
+	fadd <4 x float> %578, %455		; <<4 x float>>:579 [#uses=1]
+	fadd <4 x float> %579, %469		; <<4 x float>>:580 [#uses=1]
+	fadd <4 x float> %580, %482		; <<4 x float>>:581 [#uses=1]
+	fadd <4 x float> %581, %494		; <<4 x float>>:582 [#uses=1]
+	fadd <4 x float> %582, %505		; <<4 x float>>:583 [#uses=1]
+	fadd <4 x float> %583, %515		; <<4 x float>>:584 [#uses=1]
+	fadd <4 x float> %584, %524		; <<4 x float>>:585 [#uses=1]
+	fadd <4 x float> %585, %532		; <<4 x float>>:586 [#uses=1]
+	fadd <4 x float> %586, %539		; <<4 x float>>:587 [#uses=1]
+	fadd <4 x float> %587, %545		; <<4 x float>>:588 [#uses=1]
+	fadd <4 x float> %588, %550		; <<4 x float>>:589 [#uses=1]
+	fadd <4 x float> %589, %554		; <<4 x float>>:590 [#uses=1]
+	fadd <4 x float> %590, %557		; <<4 x float>>:591 [#uses=1]
+	fadd <4 x float> %591, %559		; <<4 x float>>:592 [#uses=1]
+	fadd <4 x float> %592, %560		; <<4 x float>>:593 [#uses=1]
+	store <4 x float> %593, <4 x float>* @0, align 1
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-07-22-CombinerCrash.ll b/src/LLVM/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
new file mode 100644
index 0000000..0f67145
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-07-22-CombinerCrash.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; PR2566
+
+external global i16		; <i16*>:0 [#uses=1]
+external global <4 x i16>		; <<4 x i16>*>:1 [#uses=1]
+
+declare void @abort()
+
+define void @t() nounwind {
+	load i16* @0		; <i16>:1 [#uses=1]
+	zext i16 %1 to i64		; <i64>:2 [#uses=1]
+	bitcast i64 %2 to <4 x i16>		; <<4 x i16>>:3 [#uses=1]
+	shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer		; <<4 x i16>>:4 [#uses=1]
+	store <4 x i16> %4, <4 x i16>* @1
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-07-23-VSetCC.ll b/src/LLVM/test/CodeGen/X86/2008-07-23-VSetCC.ll
new file mode 100644
index 0000000..684ca5c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-07-23-VSetCC.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mcpu=pentium
+; PR2575
+
+define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind  {
+	br i1 false, label %bb.nph, label %._crit_edge
+
+bb.nph:		; preds = %bb.nph, %0
+	%X = icmp sgt <4 x i32> zeroinitializer, < i32 -128, i32 -128, i32 -128, i32 -128 >		; <<4 x i32>>:1 [#uses=1]
+        sext <4 x i1> %X to <4 x i32>
+	extractelement <4 x i32> %1, i32 3		; <i32>:2 [#uses=1]
+	lshr i32 %2, 31		; <i32>:3 [#uses=1]
+	trunc i32 %3 to i1		; <i1>:4 [#uses=1]
+	select i1 %4, i32 -1, i32 0		; <i32>:5 [#uses=1]
+	insertelement <4 x i32> zeroinitializer, i32 %5, i32 3		; <<4 x i32>>:6 [#uses=1]
+	and <4 x i32> zeroinitializer, %6		; <<4 x i32>>:7 [#uses=1]
+	bitcast <4 x i32> %7 to <4 x float>		; <<4 x float>>:8 [#uses=1]
+	fmul <4 x float> zeroinitializer, %8		; <<4 x float>>:9 [#uses=1]
+	bitcast <4 x float> %9 to <4 x i32>		; <<4 x i32>>:10 [#uses=1]
+	or <4 x i32> %10, zeroinitializer		; <<4 x i32>>:11 [#uses=1]
+	bitcast <4 x i32> %11 to <4 x float>		; <<4 x float>>:12 [#uses=1]
+	fmul <4 x float> %12, < float 1.000000e+02, float 1.000000e+02, float 1.000000e+02, float 1.000000e+02 >		; <<4 x float>>:13 [#uses=1]
+	fsub <4 x float> %13, < float 1.000000e+02, float 1.000000e+02, float 1.000000e+02, float 1.000000e+02 >		; <<4 x float>>:14 [#uses=1]
+	extractelement <4 x float> %14, i32 3		; <float>:15 [#uses=1]
+	call float @fmaxf( float 0.000000e+00, float %15 )		; <float>:16 [#uses=0]
+	br label %bb.nph
+
+._crit_edge:		; preds = %0
+	ret void
+}
+
+
+declare float @fmaxf(float, float)

diff --git a/src/LLVM/test/CodeGen/X86/2008-08-06-CmpStride.ll b/src/LLVM/test/CodeGen/X86/2008-08-06-CmpStride.ll
new file mode 100644
index 0000000..99cb856
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-08-06-CmpStride.ll

@@ -0,0 +1,23 @@
+; RUN: llc -march=x86-64 < %s -o - | grep {cmpl	\\$\[1\], %}
+
+@.str = internal constant [4 x i8] c"%d\0A\00"
+
+declare i32 @printf(i8* noalias , ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+        br label %forbody
+
+forbody:
+        %i.0 = phi i32 [ 0, %entry ], [ %inc, %forbody ]                ; <i32>[#uses=3]
+        %sub14 = sub i32 1027, %i.0             ; <i32> [#uses=1]
+        %mul15 = mul i32 %sub14, 10             ; <i32> [#uses=1]
+        %add166 = or i32 %mul15, 1              ; <i32> [#uses=1] *
+        call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
+        %inc = add i32 %i.0, 1          ; <i32> [#uses=3]
+        %cmp = icmp ne i32 %inc, 1027          ; <i1> [#uses=1]
+        br i1 %cmp, label %forbody, label %afterfor
+
+afterfor:               ; preds = %forcond
+        ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-08-06-RewriterBug.ll b/src/LLVM/test/CodeGen/X86/2008-08-06-RewriterBug.ll
new file mode 100644
index 0000000..4428035
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-08-06-RewriterBug.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86
+; PR2596
+
+@data = external global [400 x i64]		; <[400 x i64]*> [#uses=5]
+
+define void @foo(double* noalias, double* noalias) {
+	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 200), align 4		; <i64>:3 [#uses=1]
+	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 199), align 4		; <i64>:4 [#uses=1]
+	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 198), align 4		; <i64>:5 [#uses=2]
+	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 197), align 4		; <i64>:6 [#uses=1]
+	br i1 false, label %28, label %7
+
+; <label>:7		; preds = %2
+	load double** getelementptr (double** bitcast ([400 x i64]* @data to double**), i64 180), align 8		; <double*>:8 [#uses=1]
+	bitcast double* %8 to double*		; <double*>:9 [#uses=1]
+	ptrtoint double* %9 to i64		; <i64>:10 [#uses=1]
+	mul i64 %4, %3		; <i64>:11 [#uses=1]
+	add i64 0, %11		; <i64>:12 [#uses=1]
+	shl i64 %12, 3		; <i64>:13 [#uses=1]
+	sub i64 %10, %13		; <i64>:14 [#uses=1]
+	add i64 %5, 0		; <i64>:15 [#uses=1]
+	shl i64 %15, 3		; <i64>:16 [#uses=1]
+	bitcast i64 %16 to i64		; <i64>:17 [#uses=1]
+	mul i64 %6, %5		; <i64>:18 [#uses=1]
+	add i64 0, %18		; <i64>:19 [#uses=1]
+	shl i64 %19, 3		; <i64>:20 [#uses=1]
+	sub i64 %17, %20		; <i64>:21 [#uses=1]
+	add i64 0, %21		; <i64>:22 [#uses=1]
+	add i64 0, %14		; <i64>:23 [#uses=1]
+	br label %24
+
+; <label>:24		; preds = %24, %7
+	phi i64 [ 0, %24 ], [ %22, %7 ]		; <i64>:25 [#uses=1]
+	phi i64 [ 0, %24 ], [ %23, %7 ]		; <i64>:26 [#uses=0]
+	add i64 %25, 24		; <i64>:27 [#uses=0]
+	br label %24
+
+; <label>:28		; preds = %2
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll b/src/LLVM/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll
new file mode 100644
index 0000000..32f6ca0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movzbl
+
+define i32 @foo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+	tail call i32 @llvm.x86.sse.ucomige.ss( <4 x float> %a, <4 x float> %b ) nounwind readnone
+	ret i32 %0
+}
+
+declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/2008-08-19-SubAndFetch.ll b/src/LLVM/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
new file mode 100644
index 0000000..360ec73
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-08-19-SubAndFetch.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+@var = external global i64		; <i64*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+; CHECK: main:
+; CHECK: lock
+; CHECK: decq
+	atomicrmw sub i64* @var, i64 1 monotonic
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll b/src/LLVM/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
new file mode 100644
index 0000000..53402c0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86-64
+
+	%struct.DrawHelper = type { void (i32, %struct.QT_FT_Span*, i8*)*, void (i32, %struct.QT_FT_Span*, i8*)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i32, i32)* }
+	%struct.QBasicAtomic = type { i32 }
+	%struct.QClipData = type { i32, %"struct.QClipData::ClipLine"*, i32, i32, %struct.QT_FT_Span*, i32, i32, i32, i32 }
+	%"struct.QClipData::ClipLine" = type { i32, %struct.QT_FT_Span* }
+	%struct.QRasterBuffer = type { %struct.QRect, %struct.QRect, %struct.QRegion, %struct.QRegion, %struct.QClipData*, %struct.QClipData*, i8, i8, i32, i32, i32, i32, %struct.DrawHelper*, i32, i32, i32, i8* }
+	%struct.QRect = type { i32, i32, i32, i32 }
+	%struct.QRegion = type { %"struct.QRegion::QRegionData"* }
+	%"struct.QRegion::QRegionData" = type { %struct.QBasicAtomic, %struct._XRegion*, i8*, %struct.QRegionPrivate* }
+	%struct.QRegionPrivate = type opaque
+	%struct.QT_FT_Span = type { i16, i16, i16, i8 }
+	%struct._XRegion = type opaque
+
+define hidden void @_Z24qt_bitmapblit16_sse3dnowP13QRasterBufferiijPKhiii(%struct.QRasterBuffer* %rasterBuffer, i32 %x, i32 %y, i32 %color, i8* %src, i32 %width, i32 %height, i32 %stride) nounwind {
+entry:
+	br i1 false, label %bb.nph144.split, label %bb133
+
+bb.nph144.split:		; preds = %entry
+        %tmp = bitcast <8 x i8> zeroinitializer to x86_mmx
+        %tmp2 = bitcast <8 x i8> zeroinitializer to x86_mmx
+	tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp, x86_mmx %tmp2, i8* null ) nounwind
+	unreachable
+
+bb133:		; preds = %entry
+	ret void
+}
+
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll b/src/LLVM/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
new file mode 100644
index 0000000..101b3c5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -mcpu=core2 | grep pxor | count 2
+; RUN: llc < %s -mcpu=core2 | not grep movapd
+; PR2715
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.XPTTypeDescriptorPrefix = type { i8 }
+	%struct.nsISupports = type { i32 (...)** }
+	%struct.nsXPTCMiniVariant = type { %"struct.nsXPTCMiniVariant::._39" }
+	%"struct.nsXPTCMiniVariant::._39" = type { i64 }
+	%struct.nsXPTCVariant = type { %struct.nsXPTCMiniVariant, i8*, %struct.nsXPTType, i8 }
+	%struct.nsXPTType = type { %struct.XPTTypeDescriptorPrefix }
+
+define i32 @XPTC_InvokeByIndex(%struct.nsISupports* %that, i32 %methodIndex, i32 %paramCount, %struct.nsXPTCVariant* %params) nounwind {
+entry:
+	call void asm sideeffect "", "{xmm0},{xmm1},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},~{dirflag},~{fpsr},~{flags}"( double undef, double undef, double undef, double 1.0, double undef, double 0.0, double undef, double 0.0 ) nounwind
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll b/src/LLVM/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
new file mode 100644
index 0000000..1d27fc5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll

@@ -0,0 +1,17 @@
+; Check that eh_return & unwind_init were properly lowered
+; RUN: llc < %s | grep %ebp | count 9
+; RUN: llc < %s | grep %ecx | count 5
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i386-pc-linux"
+
+define i8* @test(i32 %a, i8* %b)  {
+entry:
+  call void @llvm.eh.unwind.init()
+  %foo   = alloca i32
+  call void @llvm.eh.return.i32(i32 %a, i8* %b)
+  unreachable
+}
+
+declare void @llvm.eh.return.i32(i32, i8*)
+declare void @llvm.eh.unwind.init()

diff --git a/src/LLVM/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll b/src/LLVM/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
new file mode 100644
index 0000000..d423bfc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll

@@ -0,0 +1,17 @@
+; Check that eh_return & unwind_init were properly lowered
+; RUN: llc < %s | grep %rbp | count 7
+; RUN: llc < %s | grep %rcx | count 3
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i8* @test(i64 %a, i8* %b)  {
+entry:
+  call void @llvm.eh.unwind.init()
+  %foo   = alloca i32
+  call void @llvm.eh.return.i64(i64 %a, i8* %b)
+  unreachable
+}
+
+declare void @llvm.eh.return.i64(i64, i8*)
+declare void @llvm.eh.unwind.init()

diff --git a/src/LLVM/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/src/LLVM/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
new file mode 100644
index 0000000..2dc1dea
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpcklpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpckhpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
+; originally from PR2687, but things don't work that way any more.
+; there are no MMX instructions here; we use XMM.
+
+define <2 x double> @a(<2 x i32> %x) nounwind {
+entry:
+  %y = sitofp <2 x i32> %x to <2 x double>
+  ret <2 x double> %y
+}
+
+define <2 x i32> @b(<2 x double> %x) nounwind {
+entry:
+  %y = fptosi <2 x double> %x to <2 x i32>
+  ret <2 x i32> %y
+}
+
+; This is how to get MMX instructions.
+
+define <2 x double> @a2(x86_mmx %x) nounwind {
+entry:
+  %y = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %x)
+  ret <2 x double> %y
+}
+
+define x86_mmx @b2(<2 x double> %x) nounwind {
+entry:
+  %y = tail call x86_mmx @llvm.x86.sse.cvttpd2pi (<2 x double> %x)
+  ret x86_mmx %y
+}
+
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx)
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>)

diff --git a/src/LLVM/test/CodeGen/X86/2008-09-09-LinearScanBug.ll b/src/LLVM/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
new file mode 100644
index 0000000..b3312d9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-09-09-LinearScanBug.ll

@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; PR2757
+
+@g_3 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_125(i32 %p_126, i32 %p_128, i32 %p_129) nounwind {
+entry:
+	%tmp2.i = load i32* @g_3		; <i32> [#uses=2]
+	%conv = trunc i32 %tmp2.i to i16		; <i16> [#uses=3]
+	br label %forcond1.preheader.i.i7
+
+forcond1.preheader.i.i7:		; preds = %forinc6.i.i25, %entry
+	%p_86.addr.06.i.i4 = phi i32 [ 0, %entry ], [ %sub.i.i.i23, %forinc6.i.i25 ]		; <i32> [#uses=1]
+	%p_87.addr.15.i.i5 = phi i32 [ 0, %entry ], [ %p_87.addr.0.lcssa.i.i21, %forinc6.i.i25 ]		; <i32> [#uses=2]
+	br i1 false, label %forinc6.i.i25, label %forinc.i.i11
+
+forinc.i.i11:		; preds = %forcond1.backedge.i.i20, %forcond1.preheader.i.i7
+	%p_87.addr.02.i.i8 = phi i32 [ %p_87.addr.15.i.i5, %forcond1.preheader.i.i7 ], [ %p_87.addr.0.be.i.i18, %forcond1.backedge.i.i20 ]		; <i32> [#uses=1]
+	%conv.i.i9 = trunc i32 %p_87.addr.02.i.i8 to i8		; <i8> [#uses=1]
+	br i1 false, label %land_rhs3.i.i.i14, label %lor_rhs.i.i.i17
+
+land_rhs3.i.i.i14:		; preds = %forinc.i.i11
+	br i1 false, label %forcond1.backedge.i.i20, label %lor_rhs.i.i.i17
+
+lor_rhs.i.i.i17:		; preds = %land_rhs3.i.i.i14, %forinc.i.i11
+	%conv29.i.i.i15 = sext i8 %conv.i.i9 to i32		; <i32> [#uses=1]
+	%add.i.i.i16 = add i32 %conv29.i.i.i15, 1		; <i32> [#uses=1]
+	br label %forcond1.backedge.i.i20
+
+forcond1.backedge.i.i20:		; preds = %lor_rhs.i.i.i17, %land_rhs3.i.i.i14
+	%p_87.addr.0.be.i.i18 = phi i32 [ %add.i.i.i16, %lor_rhs.i.i.i17 ], [ 0, %land_rhs3.i.i.i14 ]		; <i32> [#uses=3]
+	%tobool3.i.i19 = icmp eq i32 %p_87.addr.0.be.i.i18, 0		; <i1> [#uses=1]
+	br i1 %tobool3.i.i19, label %forinc6.i.i25, label %forinc.i.i11
+
+forinc6.i.i25:		; preds = %forcond1.backedge.i.i20, %forcond1.preheader.i.i7
+	%p_87.addr.0.lcssa.i.i21 = phi i32 [ %p_87.addr.15.i.i5, %forcond1.preheader.i.i7 ], [ %p_87.addr.0.be.i.i18, %forcond1.backedge.i.i20 ]		; <i32> [#uses=1]
+	%conv.i.i.i22 = and i32 %p_86.addr.06.i.i4, 255		; <i32> [#uses=1]
+	%sub.i.i.i23 = add i32 %conv.i.i.i22, -1		; <i32> [#uses=2]
+	%phitmp.i.i24 = icmp eq i32 %sub.i.i.i23, 0		; <i1> [#uses=1]
+	br i1 %phitmp.i.i24, label %func_106.exit27, label %forcond1.preheader.i.i7
+
+func_106.exit27:		; preds = %forinc6.i.i25
+	%cmp = icmp ne i32 %tmp2.i, 1		; <i1> [#uses=3]
+	%cmp.ext = zext i1 %cmp to i32		; <i32> [#uses=1]
+	br i1 %cmp, label %safe_mod_int16_t_s_s.exit, label %lor_rhs.i
+
+lor_rhs.i:		; preds = %func_106.exit27
+	%tobool.i = xor i1 %cmp, true		; <i1> [#uses=1]
+	%or.cond.i = or i1 false, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %ifend.i, label %safe_mod_int16_t_s_s.exit
+
+ifend.i:		; preds = %lor_rhs.i
+	%conv6.i = sext i16 %conv to i32		; <i32> [#uses=1]
+	%rem.i = urem i32 %conv6.i, %cmp.ext		; <i32> [#uses=1]
+	%conv8.i = trunc i32 %rem.i to i16		; <i16> [#uses=1]
+	br label %safe_mod_int16_t_s_s.exit
+
+safe_mod_int16_t_s_s.exit:		; preds = %ifend.i, %lor_rhs.i, %func_106.exit27
+	%call31 = phi i16 [ %conv8.i, %ifend.i ], [ %conv, %func_106.exit27 ], [ %conv, %lor_rhs.i ]		; <i16> [#uses=1]
+	%conv4 = sext i16 %call31 to i32		; <i32> [#uses=1]
+	%call5 = tail call i32 (...)* @func_104( i32 %conv4 )		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @func_104(...)

diff --git a/src/LLVM/test/CodeGen/X86/2008-09-11-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
new file mode 100644
index 0000000..108f243
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-09-11-CoalescerBug.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86
+; PR2783
+
+@g_15 = external global i16		; <i16*> [#uses=2]
+
+define i32 @func_3(i32 %p_5) nounwind {
+entry:
+	%0 = srem i32 1, 0		; <i32> [#uses=2]
+	%1 = load i16* @g_15, align 2		; <i16> [#uses=1]
+	%2 = zext i16 %1 to i32		; <i32> [#uses=1]
+	%3 = and i32 %2, 1		; <i32> [#uses=1]
+	%4 = tail call i32 (...)* @rshift_u_s( i32 1 ) nounwind		; <i32> [#uses=1]
+	%5 = icmp slt i32 %4, 2		; <i1> [#uses=1]
+	%6 = zext i1 %5 to i32		; <i32> [#uses=1]
+	%7 = icmp sge i32 %3, %6		; <i1> [#uses=1]
+	%8 = zext i1 %7 to i32		; <i32> [#uses=1]
+	%9 = load i16* @g_15, align 2		; <i16> [#uses=1]
+	%10 = icmp eq i16 %9, 0		; <i1> [#uses=1]
+	%11 = zext i1 %10 to i32		; <i32> [#uses=1]
+	%12 = tail call i32 (...)* @func_20( i32 1 ) nounwind		; <i32> [#uses=1]
+	%13 = icmp sge i32 %11, %12		; <i1> [#uses=1]
+	%14 = zext i1 %13 to i32		; <i32> [#uses=1]
+	%15 = sub i32 %8, %14		; <i32> [#uses=1]
+	%16 = icmp ult i32 %15, 2		; <i1> [#uses=1]
+	%17 = zext i1 %16 to i32		; <i32> [#uses=1]
+	%18 = icmp ugt i32 %0, 3		; <i1> [#uses=1]
+	%or.cond = or i1 false, %18		; <i1> [#uses=1]
+	%19 = select i1 %or.cond, i32 0, i32 %0		; <i32> [#uses=1]
+	%.0 = lshr i32 %17, %19		; <i32> [#uses=1]
+	%20 = tail call i32 (...)* @func_7( i32 %.0 ) nounwind		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @rshift_u_s(...)
+
+declare i32 @func_20(...)
+
+declare i32 @func_7(...)

diff --git a/src/LLVM/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll b/src/LLVM/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
new file mode 100644
index 0000000..534f990
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86
+; PR2748
+
+@g_73 = external global i32		; <i32*> [#uses=1]
+@g_5 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_44(i16 signext %p_46) nounwind {
+entry:
+	%0 = load i32* @g_5, align 4		; <i32> [#uses=1]
+	%1 = ashr i32 %0, 1		; <i32> [#uses=1]
+	%2 = icmp sgt i32 %1, 1		; <i1> [#uses=1]
+	%3 = zext i1 %2 to i32		; <i32> [#uses=1]
+	%4 = load i32* @g_73, align 4		; <i32> [#uses=1]
+	%5 = zext i16 %p_46 to i64		; <i64> [#uses=1]
+	%6 = sub i64 0, %5		; <i64> [#uses=1]
+	%7 = trunc i64 %6 to i8		; <i8> [#uses=2]
+	%8 = trunc i32 %4 to i8		; <i8> [#uses=2]
+	%9 = icmp eq i8 %8, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb11, label %bb12
+
+bb11:		; preds = %entry
+	%10 = urem i8 %7, %8		; <i8> [#uses=1]
+	br label %bb12
+
+bb12:		; preds = %bb11, %entry
+	%.014.in = phi i8 [ %10, %bb11 ], [ %7, %entry ]		; <i8> [#uses=1]
+	%11 = icmp ne i8 %.014.in, 0		; <i1> [#uses=1]
+	%12 = zext i1 %11 to i32		; <i32> [#uses=1]
+	%13 = tail call i32 (...)* @func_48( i32 %12, i32 %3, i32 0 ) nounwind		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @func_48(...)

diff --git a/src/LLVM/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/src/LLVM/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
new file mode 100644
index 0000000..86e50c9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-09-17-inline-asm-1.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=fast | FileCheck %s
+
+; %0 must not be put in EAX or EDX.
+; In the first asm, $0 and $2 must not be put in EAX.
+; CHECK: InlineAsm Start
+; CHECK-NOT: movl %eax, %eax
+; CHECK-NOT: movl (%eax), %eax
+; CHECK: InlineAsm End
+; In the second asm, $0 and $2 must not be put in EDX.
+; CHECK: InlineAsm Start
+; CHECK-NOT: movl %edx, %edx
+; CHECK-NOT: movl (%edx), %edx
+; CHECK: InlineAsm End
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@x = common global i32 0
+
+define i32 @aci(i32* %pw) nounwind {
+entry:
+	%0 = load i32* @x, align 4
+	%asmtmp = tail call { i32, i32 } asm "movl $0, %eax\0A\090:\0A\09test %eax, %eax\0A\09je 1f\0A\09movl %eax, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{ax},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind
+	%asmtmp2 = tail call { i32, i32 } asm "movl $0, %edx\0A\090:\0A\09test %edx, %edx\0A\09je 1f\0A\09movl %edx, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{dx},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind
+	%asmresult2 = extractvalue { i32, i32 } %asmtmp, 0
+	%asmresult3 = extractvalue { i32, i32 } %asmtmp2, 0
+	%1 = add i32 %asmresult2, %asmresult3
+	%2 = add i32 %0, %1
+	ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/src/LLVM/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
new file mode 100644
index 0000000..511c7b5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-09-18-inline-asm-2.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86 -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=fast       | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=basic      | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=greedy     | FileCheck %s
+
+; The 1st, 2nd, 3rd and 5th registers must all be different.  The registers
+; referenced in the 4th and 6th operands must not be the same as the 1st or 5th
+; operand.
+;
+; CHECK: 1st=[[A1:%...]]
+; CHECK-NOT: [[A1]]
+; CHECK: 2nd=[[A2:%...]]
+; CHECK-NOT: [[A1]]
+; CHECK-NOT: [[A2]]
+; CHECK: 3rd=[[A3:%...]]
+; CHECK-NOT: [[A1]]
+; CHECK-NOT: [[A2]]
+; CHECK-NOT: [[A3]]
+; CHECK: 5th=[[A5:%...]]
+; CHECK-NOT: [[A1]]
+; CHECK-NOT: [[A5]]
+; CHECK: =4th
+
+; The 6th operand is an 8-bit register, and it mustn't alias the 1st and 5th.
+; CHECK: 1%e[[S1:.]]x
+; CHECK: 5%e[[S5:.]]x
+; CHECK-NOT: %[[S1]]
+; CHECK-NOT: %[[S5]]
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+	%struct.foo = type { i32, i32, i8* }
+
+define i32 @get(%struct.foo* %c, i8* %state) nounwind {
+entry:
+	%0 = getelementptr %struct.foo* %c, i32 0, i32 0		; <i32*> [#uses=2]
+	%1 = getelementptr %struct.foo* %c, i32 0, i32 1		; <i32*> [#uses=2]
+	%2 = getelementptr %struct.foo* %c, i32 0, i32 2		; <i8**> [#uses=2]
+	%3 = load i32* %0, align 4		; <i32> [#uses=1]
+	%4 = load i32* %1, align 4		; <i32> [#uses=1]
+	%5 = load i8* %state, align 1		; <i8> [#uses=1]
+	%asmtmp = tail call { i32, i32, i32, i32 } asm sideeffect "#1st=$0 $1 2nd=$1 $2 3rd=$2 $4 5th=$4 $3=4th 1$0 1%eXx 5$4 5%eXx 6th=$5", "=&r,=r,=r,=*m,=&q,=*imr,1,2,*m,5,~{dirflag},~{fpsr},~{flags},~{cx}"(i8** %2, i8* %state, i32 %3, i32 %4, i8** %2, i8 %5) nounwind		; <{ i32, i32, i32, i32 }> [#uses=3]
+	%asmresult = extractvalue { i32, i32, i32, i32 } %asmtmp, 0		; <i32> [#uses=1]
+	%asmresult1 = extractvalue { i32, i32, i32, i32 } %asmtmp, 1		; <i32> [#uses=1]
+	store i32 %asmresult1, i32* %0
+	%asmresult2 = extractvalue { i32, i32, i32, i32 } %asmtmp, 2		; <i32> [#uses=1]
+	store i32 %asmresult2, i32* %1
+	ret i32 %asmresult
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-09-19-RegAllocBug.ll b/src/LLVM/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
new file mode 100644
index 0000000..a8f2912
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-09-19-RegAllocBug.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; PR2808
+
+@g_3 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_4() nounwind {
+entry:
+	%0 = load i32* @g_3, align 4		; <i32> [#uses=2]
+	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
+	%2 = sub i8 1, %1		; <i8> [#uses=1]
+	%3 = sext i8 %2 to i32		; <i32> [#uses=1]
+	%.0 = ashr i32 %3, select (i1 icmp ne (i8 zext (i1 icmp ugt (i32 ptrtoint (i32 ()* @func_4 to i32), i32 3) to i8), i8 0), i32 0, i32 ptrtoint (i32 ()* @func_4 to i32))		; <i32> [#uses=1]
+	%4 = urem i32 %0, %.0		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
+	br i1 %5, label %return, label %bb4
+
+bb4:		; preds = %entry
+	ret i32 undef
+
+return:		; preds = %entry
+	ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-09-25-sseregparm-1.ll b/src/LLVM/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
new file mode 100644
index 0000000..fc3e35e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-09-25-sseregparm-1.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movs | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep fld | count 2
+; check 'inreg' attribute for sse_regparm
+
+define inreg double @foo1()  nounwind {
+  ret double 1.0
+}
+
+define inreg float @foo2()  nounwind {
+  ret float 1.0
+}
+
+define double @bar() nounwind {
+  ret double 1.0
+}
+
+define float @bar2() nounwind {
+  ret float 1.0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll b/src/LLVM/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll
new file mode 100644
index 0000000..f1ada28
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9
+
+	%struct._Unwind_Context = type { [18 x i8*], i8*, i8*, i8*, %struct.dwarf_eh_bases, i32, i32, i32, [18 x i8] }
+	%struct._Unwind_Exception = type { i64, void (i32, %struct._Unwind_Exception*)*, i32, i32, [3 x i32] }
+	%struct.dwarf_eh_bases = type { i8*, i8*, i8* }
+
+declare fastcc void @uw_init_context_1(%struct._Unwind_Context*, i8*, i8*)
+
+declare i8* @llvm.eh.dwarf.cfa(i32) nounwind
+
+define hidden void @_Unwind_Resume(%struct._Unwind_Exception* %exc) noreturn noreturn {
+entry:
+	%0 = call i8* @llvm.eh.dwarf.cfa(i32 0)		; <i8*> [#uses=1]
+	call fastcc void @uw_init_context_1(%struct._Unwind_Context* null, i8* %0, i8* null)
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-09-29-ReMatBug.ll b/src/LLVM/test/CodeGen/X86/2008-09-29-ReMatBug.ll
new file mode 100644
index 0000000..c36cf39
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-09-29-ReMatBug.ll

@@ -0,0 +1,85 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
+
+	%struct..0objc_selector = type opaque
+	%struct.NSString = type opaque
+	%struct.XCStringList = type { i32, %struct._XCStringListNode* }
+	%struct._XCStringListNode = type { [3 x i8], [0 x i8], i8 }
+	%struct.__builtin_CFString = type { i32*, i32, i8*, i32 }
+internal constant %struct.__builtin_CFString { i32* getelementptr ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr ([3 x i8]* @"\01LC", i32 0, i32 0), i32 2 }		; <%struct.__builtin_CFString*>:0 [#uses=1]
+@__CFConstantStringClassReference = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
+@"\01LC" = internal constant [3 x i8] c"NO\00"		; <[3 x i8]*> [#uses=1]
+@"\01LC1" = internal constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+@llvm.used1 = appending global [1 x i8*] [ i8* bitcast (%struct.NSString* (%struct.XCStringList*, %struct..0objc_selector*)* @"-[XCStringList stringRepresentation]" to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define %struct.NSString* @"-[XCStringList stringRepresentation]"(%struct.XCStringList* %self, %struct..0objc_selector* %_cmd) nounwind {
+entry:
+	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%1 = and i32 %0, 16777215		; <i32> [#uses=1]
+	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb44, label %bb4
+
+bb4:		; preds = %entry
+	%3 = load %struct._XCStringListNode** null, align 4		; <%struct._XCStringListNode*> [#uses=2]
+	%4 = icmp eq %struct._XCStringListNode* %3, null		; <i1> [#uses=1]
+	%5 = bitcast %struct._XCStringListNode* %3 to i32*		; <i32*> [#uses=1]
+	br label %bb37.outer
+
+bb6:		; preds = %bb37
+	br label %bb19
+
+bb19:		; preds = %bb37, %bb6
+	%.rle = phi i32 [ 0, %bb6 ], [ %10, %bb37 ]		; <i32> [#uses=1]
+	%bufptr.0.lcssa = phi i8* [ null, %bb6 ], [ null, %bb37 ]		; <i8*> [#uses=2]
+	%6 = and i32 %.rle, 16777215		; <i32> [#uses=1]
+	%7 = icmp eq i32 %6, 0		; <i1> [#uses=1]
+	br i1 %7, label %bb25.split, label %bb37
+
+bb25.split:		; preds = %bb19
+	call void @foo(i8* getelementptr ([1 x i8]* @"\01LC1", i32 0, i32 0)) nounwind nounwind
+	br label %bb35.outer
+
+bb34:		; preds = %bb35, %bb35, %bb35, %bb35
+	%8 = getelementptr i8* %bufptr.0.lcssa, i32 %totalLength.0.ph		; <i8*> [#uses=1]
+	store i8 92, i8* %8, align 1
+	br label %bb35.outer
+
+bb35.outer:		; preds = %bb34, %bb25.split
+	%totalLength.0.ph = add i32 0, %totalLength.1.ph		; <i32> [#uses=2]
+	br label %bb35
+
+bb35:		; preds = %bb35, %bb35.outer
+	%9 = load i8* null, align 1		; <i8> [#uses=1]
+	switch i8 %9, label %bb35 [
+		i8 0, label %bb37.outer
+		i8 32, label %bb34
+		i8 92, label %bb34
+		i8 34, label %bb34
+		i8 39, label %bb34
+	]
+
+bb37.outer:		; preds = %bb35, %bb4
+	%totalLength.1.ph = phi i32 [ 0, %bb4 ], [ %totalLength.0.ph, %bb35 ]		; <i32> [#uses=1]
+	%bufptr.1.ph = phi i8* [ null, %bb4 ], [ %bufptr.0.lcssa, %bb35 ]		; <i8*> [#uses=2]
+	br i1 %4, label %bb39.split, label %bb37
+
+bb37:		; preds = %bb37.outer, %bb19
+	%10 = load i32* %5, align 4		; <i32> [#uses=1]
+	br i1 false, label %bb6, label %bb19
+
+bb39.split:		; preds = %bb37.outer
+	%11 = bitcast i8* null to %struct.NSString*		; <%struct.NSString*> [#uses=2]
+	%12 = icmp eq i8* null, %bufptr.1.ph		; <i1> [#uses=1]
+	br i1 %12, label %bb44, label %bb42
+
+bb42:		; preds = %bb39.split
+	call void @quux(i8* %bufptr.1.ph) nounwind nounwind
+	ret %struct.NSString* %11
+
+bb44:		; preds = %bb39.split, %entry
+	%.0 = phi %struct.NSString* [ bitcast (%struct.__builtin_CFString* @0 to %struct.NSString*), %entry ], [ %11, %bb39.split ]		; <%struct.NSString*> [#uses=1]
+	ret %struct.NSString* %.0
+}
+
+declare void @foo(i8*)
+
+declare void @quux(i8*)

diff --git a/src/LLVM/test/CodeGen/X86/2008-09-29-VolatileBug.ll b/src/LLVM/test/CodeGen/X86/2008-09-29-VolatileBug.ll
new file mode 100644
index 0000000..935c4c5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-09-29-VolatileBug.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 | not grep movz
+; PR2835
+
+@g_407 = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 ()* @main to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @main() nounwind {
+entry:
+	%0 = volatile load i32* @g_407, align 4		; <i32> [#uses=1]
+	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
+	%2 = tail call i32 @func_45(i8 zeroext %1) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @func_45(i8 zeroext) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2008-10-06-MMXISelBug.ll b/src/LLVM/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
new file mode 100644
index 0000000..7f7b1a4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-10-06-MMXISelBug.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2
+; PR2850
+
+@tmp_V2i = common global <2 x i32> zeroinitializer		; <<2 x i32>*> [#uses=2]
+
+define void @f0() nounwind {
+entry:
+	%0 = load <2 x i32>* @tmp_V2i, align 8		; <<2 x i32>> [#uses=1]
+	%1 = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer		; <<2 x i32>> [#uses=1]
+	store <2 x i32> %1, <2 x i32>* @tmp_V2i, align 8
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll b/src/LLVM/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll
new file mode 100644
index 0000000..a135cd4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll

@@ -0,0 +1,13 @@
+; ModuleID = 'nan.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-f80:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fldl
+; This NaN should be shortened to a double (not a float).
+
+declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f)
+
+define i32 @main() {
+entry_nan.main:
+  call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 0xK7FFFC001234000000800)
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll b/src/LLVM/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
new file mode 100644
index 0000000..bd48105
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll

@@ -0,0 +1,18 @@
+; ModuleID = 'nan.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-f80:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fldt | count 3
+; it is not safe to shorten any of these NaNs.
+
+declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f)
+
+@_D3nan4rvale = global x86_fp80 0xK7FFF8001234000000000   ; <x86_fp80*> [#uses=1]
+
+define i32 @main() {
+entry_nan.main:
+  %tmp = load x86_fp80* @_D3nan4rvale   ; <x86_fp80> [#uses=1]
+  call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %tmp)
+  call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 0xK7FFF8001234000000000)
+  call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 0xK7FFFC001234000000400)
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-10-07-SSEISelBug.ll b/src/LLVM/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
new file mode 100644
index 0000000..bc57612
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-10-07-SSEISelBug.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2
+
+define <4 x float> @f(float %w) nounwind {
+entry:
+	%retval = alloca <4 x float>		; <<4 x float>*> [#uses=2]
+	%w.addr = alloca float		; <float*> [#uses=2]
+	%.compoundliteral = alloca <4 x float>		; <<4 x float>*> [#uses=2]
+	store float %w, float* %w.addr
+	%tmp = load float* %w.addr		; <float> [#uses=1]
+	%0 = insertelement <4 x float> undef, float %tmp, i32 0		; <<4 x float>> [#uses=1]
+	%1 = insertelement <4 x float> %0, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%2 = insertelement <4 x float> %1, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%3 = insertelement <4 x float> %2, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %3, <4 x float>* %.compoundliteral
+	%tmp1 = load <4 x float>* %.compoundliteral		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp1, <4 x float>* %retval
+	br label %return
+
+return:		; preds = %entry
+	%4 = load <4 x float>* %retval		; <<4 x float>> [#uses=1]
+	ret <4 x float> %4
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-10-11-CallCrash.ll b/src/LLVM/test/CodeGen/X86/2008-10-11-CallCrash.ll
new file mode 100644
index 0000000..efc6125
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-10-11-CallCrash.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s
+; PR2735
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+@g_385 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_45(i64 %p_46, i32 %p_48) nounwind {
+entry:
+	%0 = tail call i32 (...)* @lshift_s_u(i64 %p_46, i64 0) nounwind		; <i32> [#uses=0]
+	%1 = load i32* @g_385, align 4		; <i32> [#uses=1]
+	%2 = shl i32 %1, 1		; <i32> [#uses=1]
+	%3 = and i32 %2, 32		; <i32> [#uses=1]
+	%4 = tail call i32 (...)* @func_87(i32 undef, i32 %p_48, i32 1) nounwind		; <i32> [#uses=1]
+	%5 = add i32 %3, %4		; <i32> [#uses=1]
+	%6 = tail call i32 (...)* @div_rhs(i32 %5) nounwind		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @lshift_s_u(...)
+declare i32 @func_87(...)
+declare i32 @div_rhs(...)

diff --git a/src/LLVM/test/CodeGen/X86/2008-10-13-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
new file mode 100644
index 0000000..4d3f8c2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-10-13-CoalescerBug.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86
+; PR2775
+
+define i32 @func_77(i8 zeroext %p_79) nounwind {
+entry:
+	%0 = tail call i32 (...)* @func_43(i32 1) nounwind		; <i32> [#uses=1]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb3, label %bb
+
+bb:		; preds = %entry
+	br label %bb3
+
+bb3:		; preds = %bb, %entry
+	%p_79_addr.0 = phi i8 [ 0, %bb ], [ %p_79, %entry ]		; <i8> [#uses=1]
+	%2 = zext i8 %p_79_addr.0 to i32		; <i32> [#uses=2]
+	%3 = zext i1 false to i32		; <i32> [#uses=2]
+	%4 = tail call i32 (...)* @rshift_u_s(i32 1) nounwind		; <i32> [#uses=0]
+	%5 = lshr i32 %2, %2		; <i32> [#uses=3]
+	%6 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %6, label %bb6, label %bb9
+
+bb6:		; preds = %bb3
+	%7 = ashr i32 %5, %3		; <i32> [#uses=1]
+	%8 = icmp eq i32 %7, 0		; <i1> [#uses=1]
+	%9 = select i1 %8, i32 %3, i32 0		; <i32> [#uses=1]
+	%. = shl i32 %5, %9		; <i32> [#uses=1]
+	br label %bb9
+
+bb9:		; preds = %bb6, %bb3
+	%.0 = phi i32 [ %., %bb6 ], [ %5, %bb3 ]		; <i32> [#uses=0]
+	br i1 false, label %return, label %bb10
+
+bb10:		; preds = %bb9
+	ret i32 undef
+
+return:		; preds = %bb9
+	ret i32 undef
+}
+
+declare i32 @func_43(...)
+
+declare i32 @rshift_u_s(...)

diff --git a/src/LLVM/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll b/src/LLVM/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
new file mode 100644
index 0000000..de4c1e7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; PR2762
+define void @foo(<4 x i32>* %p, <4 x double>* %q) {
+  %n = load <4 x i32>* %p
+  %z = sitofp <4 x i32> %n to <4 x double>
+  store <4 x double> %z, <4 x double>* %q
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll b/src/LLVM/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll
new file mode 100644
index 0000000..b2e6061
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+define void @test(i64 %x) nounwind {
+entry:
+	tail call void asm sideeffect "ASM: $0", "r,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll b/src/LLVM/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll
new file mode 100644
index 0000000..353d1c7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+; from gcc.c-torture/compile/920520-1.c
+
+define i32 @g() nounwind {
+entry:
+	call void asm sideeffect "$0", "r"(double 1.500000e+00) nounwind
+	ret i32 0
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2008-10-24-FlippedCompare.ll b/src/LLVM/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
new file mode 100644
index 0000000..421b931
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-10-24-FlippedCompare.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o - | not grep {ucomiss\[^,\]*esp}
+
+define void @f(float %wt) {
+entry:
+	%0 = fcmp ogt float %wt, 0.000000e+00		; <i1> [#uses=1]
+	%1 = tail call i32 @g(i32 44)		; <i32> [#uses=3]
+	%2 = inttoptr i32 %1 to i8*		; <i8*> [#uses=2]
+	br i1 %0, label %bb, label %bb1
+
+bb:		; preds = %entry
+	ret void
+
+bb1:		; preds = %entry
+	ret void
+}
+
+declare i32 @g(i32)

diff --git a/src/LLVM/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
new file mode 100644
index 0000000..9d144a4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-10-27-CoalescerBug.ll

@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats |& FileCheck %s
+; Now this test spills one register. But a reload in the loop is cheaper than
+; the divsd so it's a win.
+
+define fastcc void @fourn(double* %data, i32 %isign) nounwind {
+; CHECK: fourn
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar93 = phi i32 [ 0, %entry ], [ %idim.030, %bb ]		; <i32> [#uses=2]
+	%idim.030 = add i32 %indvar93, 1		; <i32> [#uses=1]
+	%0 = add i32 %indvar93, 2		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 2		; <i1> [#uses=1]
+	br i1 %1, label %bb30.loopexit, label %bb
+
+; CHECK: %bb30.loopexit
+; CHECK: divsd %xmm0
+; CHECK: movsd %xmm0, 16(%esp)
+; CHECK: .align
+; CHECK-NEXT: %bb3
+bb3:		; preds = %bb30.loopexit, %bb25, %bb3
+	%2 = load i32* null, align 4		; <i32> [#uses=1]
+	%3 = mul i32 %2, 0		; <i32> [#uses=1]
+	%4 = icmp slt i32 0, %3		; <i1> [#uses=1]
+	br i1 %4, label %bb18, label %bb3
+
+bb18:		; preds = %bb3
+	%5 = fdiv double %11, 0.000000e+00		; <double> [#uses=1]
+	%6 = tail call double @sin(double %5) nounwind readonly		; <double> [#uses=1]
+	br label %bb24.preheader
+
+bb22.preheader:		; preds = %bb24.preheader, %bb22.preheader
+	br label %bb22.preheader
+
+bb25:		; preds = %bb24.preheader
+	%7 = fmul double 0.000000e+00, %6		; <double> [#uses=0]
+	%8 = add i32 %i3.122100, 0		; <i32> [#uses=1]
+	%9 = icmp sgt i32 %8, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb3, label %bb24.preheader
+
+bb24.preheader:		; preds = %bb25, %bb18
+	%i3.122100 = or i32 0, 1		; <i32> [#uses=2]
+	%10 = icmp slt i32 0, %i3.122100		; <i1> [#uses=1]
+	br i1 %10, label %bb25, label %bb22.preheader
+
+bb30.loopexit:		; preds = %bb
+	%11 = fmul double 0.000000e+00, 0x401921FB54442D1C		; <double> [#uses=1]
+	br label %bb3
+}
+
+declare double @sin(double) nounwind readonly

diff --git a/src/LLVM/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/src/LLVM/test/CodeGen/X86/2008-10-27-StackRealignment.ll
new file mode 100644
index 0000000..a57f716
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-10-27-StackRealignment.ll

@@ -0,0 +1,22 @@
+; Linux doesn't support stack realignment for functions with allocas (PR2888).
+; Until it does, we shouldn't use movaps to access the stack.  On targets with
+; sufficiently aligned stack (e.g. darwin) we should.
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | grep movaps | count 2
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
+
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+  
+define void @foo(i32 %t) nounwind {
+  %tmp1210 = alloca i8, i32 32, align 4
+  call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 32, i32 4, i1 false)
+  %x = alloca i8, i32 %t
+  call void @dummy(i8* %x)
+  ret void
+}
+
+declare void @dummy(i8*)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll b/src/LLVM/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll
new file mode 100644
index 0000000..7ad94f1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86
+; PR2977
+define i8* @ap_php_conv_p2(){
+entry:
+        %ap.addr = alloca i8*           ; <i8**> [#uses=36]
+        br label %sw.bb301
+sw.bb301:
+        %0 = va_arg i8** %ap.addr, i64          ; <i64> [#uses=1]
+        br label %sw.bb301
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-11-03-F80VAARG.ll b/src/LLVM/test/CodeGen/X86/2008-11-03-F80VAARG.ll
new file mode 100644
index 0000000..507799b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-11-03-F80VAARG.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -o - | not grep 10
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_copy(i8*, i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+define x86_fp80 @test(...) nounwind {
+	%ap = alloca i8*		; <i8**> [#uses=3]
+	%v1 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_start(i8* %v1)
+	%t1 = va_arg i8** %ap, x86_fp80		; <x86_fp80> [#uses=1]
+	%t2 = va_arg i8** %ap, x86_fp80		; <x86_fp80> [#uses=1]
+	%t = fadd x86_fp80 %t1, %t2		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %t
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-11-06-testb.ll b/src/LLVM/test/CodeGen/X86/2008-11-06-testb.ll
new file mode 100644
index 0000000..f8f317c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-11-06-testb.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep testb
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+	%struct.x = type <{ i8, i8, i16 }>
+
+define i32 @foo(%struct.x* %p) nounwind {
+entry:
+	%0 = getelementptr %struct.x* %p, i32 0, i32 0		; <i8*> [#uses=1]
+	store i8 55, i8* %0, align 1
+	%1 = bitcast %struct.x* %p to i32*		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 1		; <i32> [#uses=1]
+	%3 = and i32 %2, 512		; <i32> [#uses=1]
+	%4 = icmp eq i32 %3, 0		; <i1> [#uses=1]
+	br i1 %4, label %bb5, label %bb
+
+bb:		; preds = %entry
+	%5 = tail call i32 (...)* @xx() nounwind		; <i32> [#uses=1]
+	ret i32 %5
+
+bb5:		; preds = %entry
+	ret i32 0
+}
+
+declare i32 @xx(...)

diff --git a/src/LLVM/test/CodeGen/X86/2008-11-13-inlineasm-3.ll b/src/LLVM/test/CodeGen/X86/2008-11-13-inlineasm-3.ll
new file mode 100644
index 0000000..1dc97fc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-11-13-inlineasm-3.ll

@@ -0,0 +1,19 @@
+; RUN:  llc < %s -mtriple=i686-pc-linux-gnu
+; PR 1779
+; Using 'A' constraint and a tied constraint together used to crash.
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.linux_dirent64 = type { i64, i64, i16, i8, [0 x i8] }
+
+define i32 @sys_getdents64(i32 %fd, %struct.linux_dirent64* %dirent, i32 %count) {
+entry:
+	br i1 true, label %cond_next29, label %UnifiedReturnBlock
+
+cond_next29:		; preds = %entry
+	%tmp83 = call i32 asm sideeffect "1:\09movl %eax,0($2)\0A2:\09movl %edx,4($2)\0A3:\0A.section .fixup,\22ax\22\0A4:\09movl $3,$0\0A\09jmp 3b\0A.previous\0A .section __ex_table,\22a\22\0A .balign 4 \0A .long 1b,4b\0A .previous\0A .section __ex_table,\22a\22\0A .balign 4 \0A .long 2b,4b\0A .previous\0A", "=r,A,r,i,0,~{dirflag},~{fpsr},~{flags}"(i64 0, i64* null, i32 -14, i32 0) nounwind		; <i32> [#uses=0]
+        br label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 -14
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-11-29-ULT-Sign.ll b/src/LLVM/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
new file mode 100644
index 0000000..6dca141
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-11-29-ULT-Sign.ll

@@ -0,0 +1,22 @@
+; RUN:  llc < %s -mtriple=i686-pc-linux-gnu | grep "jns" | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define i32 @a(i32 %x) nounwind {
+entry:
+	%cmp = icmp ult i32 %x, -2147483648		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	%call = call i32 (...)* @b()		; <i32> [#uses=0]
+	br label %if.end
+
+if.end:		; preds = %if.then, %entry
+	br label %return
+
+return:		; preds = %if.end
+	ret i32 undef
+}
+
+declare i32 @b(...)
+

diff --git a/src/LLVM/test/CodeGen/X86/2008-12-01-SpillerAssert.ll b/src/LLVM/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
new file mode 100644
index 0000000..d96d806
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-12-01-SpillerAssert.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR3124
+
+        %struct.cpuinfo_x86 = type { i8, i8, i8, i8, i32, i8, i8, i8, i32, i32, [9 x i32], [16 x i8], [64 x i8], i32, i32, i32, i64, %struct.cpumask_t, i16, i16, i16, i16, i16, i16, i16, i16, i32 }
+        %struct.cpumask_t = type { [1 x i64] }
+@.str10 = external constant [70 x i8]           ; <[70 x i8]*> [#uses=1]
+
+declare i32 @printk(i8*, ...)
+
+define void @display_cacheinfo(%struct.cpuinfo_x86* %c) nounwind section ".cpuinit.text" {
+entry:
+        %asmtmp = tail call { i32, i32, i32, i32 } asm "cpuid", "={ax},={bx},={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 -2147483643, i32 0) nounwind          ; <{ i32, i32, i32, i32 }> [#uses=0]
+        %0 = tail call i32 (i8*, ...)* @printk(i8* getelementptr ([70 x i8]* @.str10, i32 0, i64 0), i32 0, i32 0, i32 0, i32 0) nounwind           ; <i32> [#uses=0]
+        unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll b/src/LLVM/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
new file mode 100644
index 0000000..1f8bd45
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep lea
+; The inner loop should use [reg] addressing, not [reg+reg] addressing.
+; rdar://6403965
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+
+define i8* @test(i8* %Q, i32* %L) nounwind {
+entry:
+	br label %bb1
+
+bb:		; preds = %bb1, %bb1
+	%indvar.next = add i32 %P.0.rec, 1		; <i32> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%P.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%P.0 = getelementptr i8* %Q, i32 %P.0.rec		; <i8*> [#uses=2]
+	%0 = load i8* %P.0, align 1		; <i8> [#uses=1]
+	switch i8 %0, label %bb3 [
+		i8 12, label %bb
+		i8 42, label %bb
+	]
+
+bb3:		; preds = %bb1
+	%P.0.sum = add i32 %P.0.rec, 2		; <i32> [#uses=1]
+	%1 = getelementptr i8* %Q, i32 %P.0.sum		; <i8*> [#uses=1]
+	store i8 4, i8* %1, align 1
+	ret i8* %P.0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-12-02-IllegalResultType.ll b/src/LLVM/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
new file mode 100644
index 0000000..4b72cb9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-12-02-IllegalResultType.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s
+; PR3117
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@g_118 = external global i8		; <i8*> [#uses=1]
+@g_7 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_73(i32 %p_74) nounwind {
+entry:
+	%0 = load i32* @g_7, align 4		; <i32> [#uses=1]
+	%1 = or i8 0, 118		; <i8> [#uses=1]
+	%2 = zext i8 %1 to i64		; <i64> [#uses=1]
+	%3 = icmp ne i32 %0, 0		; <i1> [#uses=1]
+	%4 = zext i1 %3 to i64		; <i64> [#uses=1]
+	%5 = or i64 %4, -758998846		; <i64> [#uses=3]
+	%6 = icmp sle i64 %2, %5		; <i1> [#uses=1]
+	%7 = zext i1 %6 to i8		; <i8> [#uses=1]
+	%8 = or i8 %7, 118		; <i8> [#uses=1]
+	%9 = zext i8 %8 to i64		; <i64> [#uses=1]
+	%10 = icmp sle i64 %9, 0		; <i1> [#uses=1]
+	%11 = zext i1 %10 to i8		; <i8> [#uses=1]
+	%12 = or i8 %11, 118		; <i8> [#uses=1]
+	%13 = zext i8 %12 to i64		; <i64> [#uses=1]
+	%14 = icmp sle i64 %13, %5		; <i1> [#uses=1]
+	%15 = zext i1 %14 to i8		; <i8> [#uses=1]
+	%16 = or i8 %15, 118		; <i8> [#uses=1]
+	%17 = zext i8 %16 to i64		; <i64> [#uses=1]
+	%18 = icmp sle i64 %17, 0		; <i1> [#uses=1]
+	%19 = zext i1 %18 to i8		; <i8> [#uses=1]
+	%20 = or i8 %19, 118		; <i8> [#uses=1]
+	%21 = zext i8 %20 to i64		; <i64> [#uses=1]
+	%22 = icmp sle i64 %21, %5		; <i1> [#uses=1]
+	%23 = zext i1 %22 to i8		; <i8> [#uses=1]
+	%24 = or i8 %23, 118		; <i8> [#uses=1]
+	store i8 %24, i8* @g_118, align 1
+	ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-12-02-dagcombine-1.ll b/src/LLVM/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
new file mode 100644
index 0000000..fe5bff3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-12-02-dagcombine-1.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; a - a should be found and removed, leaving refs to only L and P
+define i8* @test(i8* %a, i8* %L, i8* %P) nounwind {
+entry:
+        %0 = ptrtoint i8* %a to i32
+        %1 = sub i32 -2, %0
+        %2 = ptrtoint i8* %P to i32
+        %3 = sub i32 0, %2
+        %4 = ptrtoint i8* %L to i32
+        %5 = add i32 %4, %3
+	%6 = add i32 %5, %1         	; <i32> [#uses=1]
+	%7 = getelementptr i8* %a, i32 %6		; <i8*> [#uses=1]
+	br label %return
+
+return:		; preds = %bb3
+	ret i8* %7
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-12-02-dagcombine-2.ll b/src/LLVM/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
new file mode 100644
index 0000000..4cb1b42
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-12-02-dagcombine-2.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; a - a should be found and removed, leaving refs to only L and P
+define i8* @test(i8* %a, i8* %L, i8* %P) nounwind {
+entry:
+        %0 = ptrtoint i8* %a to i32
+        %1 = ptrtoint i8* %P to i32
+        %2 = sub i32 %1, %0
+        %3 = ptrtoint i8* %L to i32
+	%4 = sub i32 %2, %3         	; <i32> [#uses=1]
+	%5 = getelementptr i8* %a, i32 %4		; <i8*> [#uses=1]
+	br label %return
+
+return:		; preds = %bb3
+	ret i8* %5
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-12-02-dagcombine-3.ll b/src/LLVM/test/CodeGen/X86/2008-12-02-dagcombine-3.ll
new file mode 100644
index 0000000..d5a676a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-12-02-dagcombine-3.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | grep add | count 2
+; RUN: llc < %s -march=x86 | grep sub | grep -v subsections | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; this should be rearranged to have two +s and one -
+define i32 @test(i8* %a, i8* %L, i8* %P) nounwind {
+entry:
+        %0 = ptrtoint i8* %P to i32
+        %1 = sub i32 -2, %0
+        %2 = ptrtoint i8* %L to i32
+        %3 = ptrtoint i8* %a to i32
+	%4 = sub i32 %2, %3         	; <i32> [#uses=1]
+	%5 = add i32 %1, %4		; <i32> [#uses=1]
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %5
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll b/src/LLVM/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll
new file mode 100644
index 0000000..2e27811
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -disable-cfi -march=x86-64 -mtriple=x86_64-apple-darwin9 | grep ^__Z1fv.eh
+; RUN: llc < %s -disable-cfi -march=x86    -mtriple=i386-apple-darwin9 | grep ^__Z1fv.eh
+
+define void @_Z1fv() {
+entry:
+	br label %return
+
+return:
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-12-16-BadShift.ll b/src/LLVM/test/CodeGen/X86/2008-12-16-BadShift.ll
new file mode 100644
index 0000000..6c70c5b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-12-16-BadShift.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s | not grep shrl
+; Note: this test is really trying to make sure that the shift
+; returns the right result; shrl is most likely wrong,
+; but if CodeGen starts legitimately using an shrl here,
+; please adjust the test appropriately.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@.str = internal constant [6 x i8] c"%lld\0A\00"		; <[6 x i8]*> [#uses=1]
+
+define i64 @mebbe_shift(i32 %xx, i32 %test) nounwind {
+entry:
+	%conv = zext i32 %xx to i64		; <i64> [#uses=1]
+	%tobool = icmp ne i32 %test, 0		; <i1> [#uses=1]
+	%shl = select i1 %tobool, i64 3, i64 0		; <i64> [#uses=1]
+	%x.0 = shl i64 %conv, %shl		; <i64> [#uses=1]
+	ret i64 %x.0
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2008-12-16-dagcombine-4.ll b/src/LLVM/test/CodeGen/X86/2008-12-16-dagcombine-4.ll
new file mode 100644
index 0000000..3080d08
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-12-16-dagcombine-4.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; a - a should be found and removed, leaving refs to only L and P
+define i32 @test(i32 %a, i32 %L, i32 %P) nounwind {
+entry:
+        %0 = sub i32 %a, %L
+        %1 = add i32 %P, %0
+	%2 = sub i32 %1, %a
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/src/LLVM/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
new file mode 100644
index 0000000..75e0b8a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s
+; PR3149
+; Make sure the copy after inline asm is not coalesced away.
+
+; CHECK:         ## InlineAsm End
+; CHECK-NEXT: BB0_2:
+; CHECK-NEXT:    {{movl	%esi, %eax|addl	%edi, %esi}}
+
+
+@"\01LC" = internal constant [7 x i8] c"n0=%d\0A\00"		; <[7 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i64, i64)* @umoddi3 to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @umoddi3(i64 %u, i64 %v) nounwind noinline {
+entry:
+	%0 = trunc i64 %v to i32		; <i32> [#uses=2]
+	%1 = trunc i64 %u to i32		; <i32> [#uses=4]
+	%2 = lshr i64 %u, 32		; <i64> [#uses=1]
+	%3 = trunc i64 %2 to i32		; <i32> [#uses=2]
+	%4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i32 0), i32 %1) nounwind		; <i32> [#uses=0]
+	%5 = icmp ult i32 %1, %0		; <i1> [#uses=1]
+	br i1 %5, label %bb2, label %bb
+
+bb:		; preds = %entry
+	%6 = lshr i64 %v, 32		; <i64> [#uses=1]
+	%7 = trunc i64 %6 to i32		; <i32> [#uses=1]
+	%asmtmp = tail call { i32, i32 } asm "subl $5,$1\0A\09sbbl $3,$0", "=r,=&r,0,imr,1,imr,~{dirflag},~{fpsr},~{flags}"(i32 %3, i32 %7, i32 %1, i32 %0) nounwind		; <{ i32, i32 }> [#uses=2]
+	%asmresult = extractvalue { i32, i32 } %asmtmp, 0		; <i32> [#uses=1]
+	%asmresult1 = extractvalue { i32, i32 } %asmtmp, 1		; <i32> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb, %entry
+	%n1.0 = phi i32 [ %asmresult, %bb ], [ %3, %entry ]		; <i32> [#uses=1]
+	%n0.0 = phi i32 [ %asmresult1, %bb ], [ %1, %entry ]		; <i32> [#uses=1]
+	%8 = add i32 %n0.0, %n1.0		; <i32> [#uses=1]
+	ret i32 %8
+}
+
+declare i32 @printf(i8*, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2008-12-22-dagcombine-5.ll b/src/LLVM/test/CodeGen/X86/2008-12-22-dagcombine-5.ll
new file mode 100644
index 0000000..75773e0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-12-22-dagcombine-5.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; -(-a) - a should be found and removed, leaving refs to only L and P
+define i32 @test(i32 %a, i32 %L, i32 %P) nounwind {
+entry:
+        %0 = sub i32 %L, %a
+        %1 = sub i32 %P, %0
+	%2 = sub i32 %1, %a
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/X86/2008-12-23-crazy-address.ll b/src/LLVM/test/CodeGen/X86/2008-12-23-crazy-address.ll
new file mode 100644
index 0000000..2edcaea
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-12-23-crazy-address.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | grep {lea.*X.*esp} | count 2
+
+@X = external global [0 x i32]
+
+define void @foo() nounwind {
+entry:
+	%Y = alloca i32
+	call void @frob(i32* %Y) nounwind
+	%Y3 = bitcast i32* %Y to i8*
+	%ctg2 = getelementptr i8* %Y3, i32 ptrtoint ([0 x i32]* @X to i32)
+	%0 = ptrtoint i8* %ctg2 to i32
+	call void @borf(i32 %0) nounwind
+	ret void
+}
+
+define void @bar(i32 %i) nounwind {
+entry:
+	%Y = alloca [10 x i32]
+	%0 = getelementptr [10 x i32]* %Y, i32 0, i32 0
+	call void @frob(i32* %0) nounwind
+	%1 = getelementptr [0 x i32]* @X, i32 0, i32 %i
+	%2 = getelementptr [10 x i32]* %Y, i32 0, i32 0
+	%3 = ptrtoint i32* %2 to i32
+	%4 = bitcast i32* %1 to i8*
+	%ctg2 = getelementptr i8* %4, i32 %3
+	%5 = ptrtoint i8* %ctg2 to i32
+	call void @borf(i32 %5) nounwind
+	ret void
+}
+
+declare void @frob(i32*)
+
+declare void @borf(i32)

diff --git a/src/LLVM/test/CodeGen/X86/2008-12-23-dagcombine-6.ll b/src/LLVM/test/CodeGen/X86/2008-12-23-dagcombine-6.ll
new file mode 100644
index 0000000..bae9283
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2008-12-23-dagcombine-6.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 4
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; a - a should be found and removed, leaving refs to only L and P
+define i32 @test(i32 %a, i32 %L, i32 %P) nounwind {
+entry:
+        %0 = add i32 %a, %L
+        %1 = add i32 %P, %0
+	%2 = sub i32 %1, %a
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %2
+}
+define i32 @test2(i32 %a, i32 %L, i32 %P) nounwind {
+entry:
+        %0 = add i32 %L, %a
+        %1 = add i32 %P, %0
+	%2 = sub i32 %1, %a
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll b/src/LLVM/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
new file mode 100644
index 0000000..4feb764
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -enable-legalize-types-checking
+
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define void @__mindd16(<16 x double>* sret %vec.result, <16 x double> %x, double %y) nounwind {
+entry:
+	%tmp3.i = shufflevector <16 x double> zeroinitializer, <16 x double> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x double>> [#uses=1]
+	%tmp10.i.i = shufflevector <8 x double> %tmp3.i, <8 x double> undef, <4 x i32> < i32 4, i32 5, i32 6, i32 7 >		; <<4 x double>> [#uses=1]
+	%tmp3.i2.i.i = shufflevector <4 x double> %tmp10.i.i, <4 x double> undef, <2 x i32> < i32 0, i32 1 >		; <<2 x double>> [#uses=1]
+	%0 = tail call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> zeroinitializer, <2 x double> %tmp3.i2.i.i) nounwind		; <<2 x double>> [#uses=1]
+	%tmp5.i3.i.i = shufflevector <2 x double> %0, <2 x double> undef, <4 x i32> < i32 0, i32 1, i32 undef, i32 undef >		; <<4 x double>> [#uses=1]
+	%tmp6.i4.i.i = shufflevector <4 x double> zeroinitializer, <4 x double> %tmp5.i3.i.i, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x double>> [#uses=1]
+	%tmp14.i8.i.i = shufflevector <4 x double> %tmp6.i4.i.i, <4 x double> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >		; <<4 x double>> [#uses=1]
+	%tmp13.i.i = shufflevector <4 x double> %tmp14.i8.i.i, <4 x double> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef >		; <<8 x double>> [#uses=1]
+	%tmp14.i.i = shufflevector <8 x double> zeroinitializer, <8 x double> %tmp13.i.i, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11 >		; <<8 x double>> [#uses=1]
+	%tmp5.i = shufflevector <8 x double> %tmp14.i.i, <8 x double> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >		; <<16 x double>> [#uses=1]
+	%tmp6.i = shufflevector <16 x double> %x, <16 x double> %tmp5.i, <16 x i32> < i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >		; <<16 x double>> [#uses=1]
+	%tmp14.i = shufflevector <16 x double> %tmp6.i, <16 x double> zeroinitializer, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23 >		; <<16 x double>> [#uses=1]
+	store <16 x double> %tmp14.i, <16 x double>* %vec.result
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-01-16-SchedulerBug.ll b/src/LLVM/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
new file mode 100644
index 0000000..99bef6c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-01-16-SchedulerBug.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; rdar://6501631
+
+	%CF = type { %Register }
+	%XXV = type { i32 (...)** }
+	%Register = type { %"struct.XXC::BCFs", i32 }
+	%"struct.XXC::BCFs" = type { i32 }
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind
+
+define fastcc %XXV* @bar(%CF* %call_frame, %XXV** %exception) nounwind {
+prologue:
+	%param_x = load %XXV** null		; <%XXV*> [#uses=1]
+	%unique_1.i = ptrtoint %XXV* %param_x to i1		; <i1> [#uses=1]
+	br i1 %unique_1.i, label %NextVerify42, label %FailedVerify
+
+NextVerify42:		; preds = %prologue
+	%param_y = load %XXV** null		; <%XXV*> [#uses=1]
+	%unique_1.i58 = ptrtoint %XXV* %param_y to i1		; <i1> [#uses=1]
+	br i1 %unique_1.i58, label %function_setup.cont, label %FailedVerify
+
+function_setup.cont:		; preds = %NextVerify42
+	br i1 false, label %label13, label %label
+
+label:		; preds = %function_setup.cont
+	%has_exn = icmp eq %XXV* null, null		; <i1> [#uses=1]
+	br i1 %has_exn, label %kjsNumberLiteral.exit, label %handle_exception
+
+kjsNumberLiteral.exit:		; preds = %label
+	%0 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 0, i32 0)		; <{ i32, i1 }> [#uses=2]
+	%intAdd = extractvalue { i32, i1 } %0, 0		; <i32> [#uses=2]
+	%intAddOverflow = extractvalue { i32, i1 } %0, 1		; <i1> [#uses=1]
+	%toint56 = ashr i32 %intAdd, 1		; <i32> [#uses=1]
+	%toFP57 = sitofp i32 %toint56 to double		; <double> [#uses=1]
+	br i1 %intAddOverflow, label %rematerializeAdd, label %label13
+
+label13:		; preds = %kjsNumberLiteral.exit, %function_setup.cont
+	%var_lr1.0 = phi double [ %toFP57, %kjsNumberLiteral.exit ], [ 0.000000e+00, %function_setup.cont ]		; <double> [#uses=0]
+	unreachable
+
+FailedVerify:		; preds = %NextVerify42, %prologue
+	ret %XXV* null
+
+rematerializeAdd:		; preds = %kjsNumberLiteral.exit
+	%rematerializedInt = sub i32 %intAdd, 0		; <i32> [#uses=0]
+	ret %XXV* null
+
+handle_exception:		; preds = %label
+	ret %XXV* undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-01-16-UIntToFP.ll b/src/LLVM/test/CodeGen/X86/2009-01-16-UIntToFP.ll
new file mode 100644
index 0000000..2eab5f1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-01-16-UIntToFP.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define hidden float @__floatundisf(i64 %u) nounwind readnone {
+entry:
+	%0 = icmp ugt i64 %u, 9007199254740991		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %bb2
+
+bb:		; preds = %entry
+	%1 = and i64 %u, 2047		; <i64> [#uses=1]
+	%2 = icmp eq i64 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb2, label %bb1
+
+bb1:		; preds = %bb
+	%3 = or i64 %u, 2048		; <i64> [#uses=1]
+	%4 = and i64 %3, -2048		; <i64> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb1, %bb, %entry
+	%u_addr.0 = phi i64 [ %4, %bb1 ], [ %u, %entry ], [ %u, %bb ]		; <i64> [#uses=2]
+	%5 = lshr i64 %u_addr.0, 32		; <i64> [#uses=1]
+	%6 = trunc i64 %5 to i32		; <i32> [#uses=1]
+	%7 = uitofp i32 %6 to double		; <double> [#uses=1]
+	%8 = fmul double %7, 0x41F0000000000000		; <double> [#uses=1]
+	%9 = trunc i64 %u_addr.0 to i32		; <i32> [#uses=1]
+	%10 = uitofp i32 %9 to double		; <double> [#uses=1]
+	%11 = fadd double %10, %8		; <double> [#uses=1]
+	%12 = fptrunc double %11 to float		; <float> [#uses=1]
+	ret float %12
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll b/src/LLVM/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
new file mode 100644
index 0000000..f895336
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s
+; rdar://6505632
+; reduced from 483.xalancbmk
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%"struct.std::basic_ostream<char,std::char_traits<char> >.base" = type { i32 (...)** }
+	%"struct.xercesc_2_5::ASCIIRangeFactory" = type { %"struct.std::basic_ostream<char,std::char_traits<char> >.base", i8, i8 }
+@_ZN11xercesc_2_5L17gIdeographicCharsE = external constant [7 x i16]		; <[7 x i16]*> [#uses=3]
+
+define void @_ZN11xercesc_2_515XMLRangeFactory11buildRangesEv(%"struct.xercesc_2_5::ASCIIRangeFactory"* %this) {
+entry:
+	br i1 false, label %bb5, label %return
+
+bb5:		; preds = %entry
+	br label %bb4.i.i
+
+bb4.i.i:		; preds = %bb4.i.i, %bb5
+	br i1 false, label %bb.i51, label %bb4.i.i
+
+bb.i51:		; preds = %bb.i51, %bb4.i.i
+	br i1 false, label %bb4.i.i70, label %bb.i51
+
+bb4.i.i70:		; preds = %bb4.i.i70, %bb.i51
+	br i1 false, label %_ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73, label %bb4.i.i70
+
+_ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73:		; preds = %bb4.i.i70
+	%0 = load i16* getelementptr ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 add (i32 ashr (i32 sub (i32 ptrtoint (i16* getelementptr ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 4) to i32), i32 ptrtoint ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE to i32)), i32 1), i32 1)), align 4		; <i16> [#uses=0]
+	br label %bb4.i5.i141
+
+bb4.i5.i141:		; preds = %bb4.i5.i141, %_ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73
+	br label %bb4.i5.i141
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-01-25-NoSSE.ll b/src/LLVM/test/CodeGen/X86/2009-01-25-NoSSE.ll
new file mode 100644
index 0000000..8406c4a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-01-25-NoSSE.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 -mattr=-sse,-sse2 | not grep xmm
+; PR3402
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.ktermios = type { i32, i32, i32, i32, i8, [19 x i8], i32, i32 }
+
+define void @foo() nounwind {
+entry:
+  %termios = alloca %struct.ktermios, align 8
+  %termios1 = bitcast %struct.ktermios* %termios to i8*
+  call void @llvm.memset.p0i8.i64(i8* %termios1, i8 0, i64 44, i32 8, i1 false)
+  call void @bar(%struct.ktermios* %termios) nounwind
+  ret void
+}
+
+declare void @bar(%struct.ktermios*)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2009-01-26-WrongCheck.ll b/src/LLVM/test/CodeGen/X86/2009-01-26-WrongCheck.ll
new file mode 100644
index 0000000..117ff47
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-01-26-WrongCheck.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 -enable-legalize-types-checking
+; PR3393
+
+define void @foo(i32 inreg %x) {
+	%t709 = select i1 false, i32 0, i32 %x		; <i32> [#uses=1]
+	%t711 = add i32 %t709, 1		; <i32> [#uses=4]
+	%t801 = icmp slt i32 %t711, 0		; <i1> [#uses=1]
+	%t712 = zext i32 %t711 to i64		; <i64> [#uses=1]
+	%t804 = select i1 %t801, i64 0, i64 %t712		; <i64> [#uses=1]
+	store i64 %t804, i64* null
+	%t815 = icmp slt i32 %t711, 0		; <i1> [#uses=1]
+	%t814 = sext i32 %t711 to i64		; <i64> [#uses=1]
+	%t816 = select i1 %t815, i64 0, i64 %t814		; <i64> [#uses=1]
+	store i64 %t816, i64* null
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-01-27-NullStrings.ll b/src/LLVM/test/CodeGen/X86/2009-01-27-NullStrings.ll
new file mode 100644
index 0000000..8b3094b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-01-27-NullStrings.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s
+; CHECK: .section __TEXT,__cstring,cstring_literals
+
+@x = internal unnamed_addr constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+
+@y = global [1 x i8]* @x
+

diff --git a/src/LLVM/test/CodeGen/X86/2009-01-31-BigShift.ll b/src/LLVM/test/CodeGen/X86/2009-01-31-BigShift.ll
new file mode 100644
index 0000000..4eb0ec1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-01-31-BigShift.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | not grep and
+; PR3401
+
+define void @x(i288 %i) nounwind {
+	call void @add(i288 %i)
+	ret void
+}
+
+declare void @add(i288)

diff --git a/src/LLVM/test/CodeGen/X86/2009-01-31-BigShift2.ll b/src/LLVM/test/CodeGen/X86/2009-01-31-BigShift2.ll
new file mode 100644
index 0000000..9d24084
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-01-31-BigShift2.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 | grep {mov.*56}
+; PR3449
+
+define void @test(<8 x double>* %P, i64* %Q) nounwind {
+	%A = load <8 x double>* %P		; <<8 x double>> [#uses=1]
+	%B = bitcast <8 x double> %A to i512		; <i512> [#uses=1]
+	%C = lshr i512 %B, 448		; <i512> [#uses=1]
+	%D = trunc i512 %C to i64		; <i64> [#uses=1]
+	volatile store i64 %D, i64* %Q
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-01-31-BigShift3.ll b/src/LLVM/test/CodeGen/X86/2009-01-31-BigShift3.ll
new file mode 100644
index 0000000..1b531e3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-01-31-BigShift3.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86
+; PR3450
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%struct.BitMap = type { i8* }
+	%struct.BitMapListStruct = type { %struct.BitMap, %struct.BitMapListStruct*, %struct.BitMapListStruct* }
+	%struct.Material = type { float, float, float, %struct.Material*, %struct.Material* }
+	%struct.ObjPoint = type { double, double, double, double, double, double }
+	%struct.ObjectStruct = type { [57 x i8], %struct.PointListStruct*, %struct.Poly3Struct*, %struct.Poly4Struct*, %struct.Texture*, %struct.Material*, %struct.Point, i32, i32, %struct.Point, %struct.Point, %struct.Point, %struct.ObjectStruct*, %struct.ObjectStruct*, i32, i32, i32, i32, i32, i32, i32, %struct.ObjectStruct*, %struct.ObjectStruct* }
+	%struct.Point = type { double, double, double }
+	%struct.PointListStruct = type { %struct.ObjPoint*, %struct.PointListStruct*, %struct.PointListStruct* }
+	%struct.Poly3Struct = type { [3 x %struct.ObjPoint*], %struct.Material*, %struct.Texture*, %struct.Poly3Struct*, %struct.Poly3Struct* }
+	%struct.Poly4Struct = type { [4 x %struct.ObjPoint*], %struct.Material*, %struct.Texture*, %struct.Poly4Struct*, %struct.Poly4Struct* }
+	%struct.Texture = type { %struct.Point, %struct.BitMapListStruct*, %struct.Point, %struct.Point, %struct.Point, %struct.Texture*, %struct.Texture* }
+
+define fastcc void @ScaleObjectAdd(%struct.ObjectStruct* %o, double %sx, double %sy, double %sz) nounwind {
+entry:
+	%sz101112.ins = or i960 0, 0		; <i960> [#uses=1]
+	br i1 false, label %return, label %bb1.preheader
+
+bb1.preheader:		; preds = %entry
+	%0 = lshr i960 %sz101112.ins, 640		; <i960> [#uses=0]
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.preheader
+	br label %bb1
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-02-01-LargeMask.ll b/src/LLVM/test/CodeGen/X86/2009-02-01-LargeMask.ll
new file mode 100644
index 0000000..c4042e6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-02-01-LargeMask.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86
+; PR3453
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+	%struct.cl_engine = type { i32, i16, i32, i8**, i8**, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
+	%struct.cl_limits = type { i32, i32, i32, i32, i16, i32 }
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+	%struct.cli_ctx = type { i8**, i32*, %struct.cli_matcher*, %struct.cl_engine*, %struct.cl_limits*, i32, i32, i32, i32, %struct.cli_dconf* }
+	%struct.cli_dconf = type { i32, i32, i32, i32, i32, i32, i32 }
+	%struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+define fastcc i32 @cli_scanautoit(i32 %desc, %struct.cli_ctx* %ctx, i32 %offset) nounwind {
+entry:
+	br i1 false, label %bb.i49.i72, label %bb14
+
+bb.i49.i72:		; preds = %bb.i49.i72, %entry
+	%UNP.i1482.0 = phi i288 [ %.ins659, %bb.i49.i72 ], [ undef, %entry ]		; <i288> [#uses=1]
+	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%1 = xor i32 %0, 17834		; <i32> [#uses=1]
+	%2 = zext i32 %1 to i288		; <i288> [#uses=1]
+	%3 = shl i288 %2, 160		; <i288> [#uses=1]
+	%UNP.i1482.in658.mask = and i288 %UNP.i1482.0, -6277101733925179126504886505003981583386072424808101969921		; <i288> [#uses=1]
+	%.ins659 = or i288 %3, %UNP.i1482.in658.mask		; <i288> [#uses=1]
+	br label %bb.i49.i72
+
+bb14:		; preds = %entry
+	ret i32 -123
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll b/src/LLVM/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
new file mode 100644
index 0000000..e75af13
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86
+; PR3411
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@g_3 = external global i32		; <i32*> [#uses=1]
+
+define void @bar(i64 %p_66) nounwind {
+entry:
+	br i1 false, label %bb, label %bb1
+
+bb:		; preds = %entry
+	unreachable
+
+bb1:		; preds = %entry
+	%0 = load i32* @g_3, align 4		; <i32> [#uses=2]
+	%1 = sext i32 %0 to i64		; <i64> [#uses=1]
+	%2 = or i64 %1, %p_66		; <i64> [#uses=1]
+	%3 = shl i64 %2, 0		; <i64> [#uses=1]
+	%4 = and i64 %3, %p_66		; <i64> [#uses=1]
+	%5 = icmp eq i64 %4, 1		; <i1> [#uses=1]
+	%6 = trunc i64 %p_66 to i32		; <i32> [#uses=2]
+	%7 = or i32 %0, %6		; <i32> [#uses=2]
+	%8 = sub i32 %7, %6		; <i32> [#uses=1]
+	%iftmp.0.0 = select i1 %5, i32 %8, i32 %7		; <i32> [#uses=1]
+	%9 = tail call i32 @foo(i32 %iftmp.0.0) nounwind		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @foo(i32)

diff --git a/src/LLVM/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll b/src/LLVM/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
new file mode 100644
index 0000000..4880f62
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s | grep p-92
+; PR3481
+; The offset should print as -92, not +17179869092
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@p = common global [10 x i32] zeroinitializer, align 4          ; <[10 x i32]*>
+@g = global [1 x i32*] [ i32* bitcast (i8* getelementptr (i8* bitcast
+([10 x i32]* @p to i8*), i64 17179869092) to i32*) ], align 4 

diff --git a/src/LLVM/test/CodeGen/X86/2009-02-05-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
new file mode 100644
index 0000000..a46a20b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-02-05-CoalescerBug.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
+; RUN: grep movss %t | count 2
+; RUN: grep movaps %t | count 2
+; RUN: grep movdqa %t | count 2
+
+define i1 @t([2 x float]* %y, [2 x float]* %w, i32, [2 x float]* %x.pn59, i32 %smax190, i32 %j.1180, <4 x float> %wu.2179, <4 x float> %wr.2178, <4 x float>* %tmp89.out, <4 x float>* %tmp107.out, i32* %indvar.next218.out) nounwind {
+newFuncRoot:
+	%tmp82 = insertelement <4 x float> %wr.2178, float 0.000000e+00, i32 0		; <<4 x float>> [#uses=1]
+	%tmp85 = insertelement <4 x float> %tmp82, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp87 = insertelement <4 x float> %tmp85, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp89 = insertelement <4 x float> %tmp87, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp89, <4 x float>* %tmp89.out
+	ret i1 false
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-02-08-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2009-02-08-CoalescerBug.ll
new file mode 100644
index 0000000..908cc08
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-02-08-CoalescerBug.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86
+; PR3486
+
+define i32 @foo(i8 signext %p_26) nounwind {
+entry:
+	%0 = icmp eq i8 %p_26, 0		; <i1> [#uses=2]
+	%or.cond = or i1 false, %0		; <i1> [#uses=2]
+	%iftmp.1.0 = zext i1 %or.cond to i16		; <i16> [#uses=1]
+	br i1 %0, label %bb.i, label %bar.exit
+
+bb.i:		; preds = %entry
+	%1 = zext i1 %or.cond to i32		; <i32> [#uses=1]
+	%2 = sdiv i32 %1, 0		; <i32> [#uses=1]
+	%3 = trunc i32 %2 to i16		; <i16> [#uses=1]
+	br label %bar.exit
+
+bar.exit:		; preds = %bb.i, %entry
+	%4 = phi i16 [ %3, %bb.i ], [ %iftmp.1.0, %entry ]		; <i16> [#uses=1]
+	%5 = trunc i16 %4 to i8		; <i8> [#uses=1]
+	%6 = sext i8 %5 to i32		; <i32> [#uses=1]
+	ret i32 %6
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll b/src/LLVM/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll
new file mode 100644
index 0000000..36cc535
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s
+; This used to crash.
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout ="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @parse_number(i8* nocapture %p) nounwind {
+entry:
+        %shift.0 = select i1 false, i32 4, i32 2                ; <i32> [#uses=1]
+        br label %bb47
+
+bb47:           ; preds = %bb47, %entry
+        br i1 false, label %bb54, label %bb47
+
+bb54:           ; preds = %bb47
+        br i1 false, label %bb56, label %bb66
+
+bb56:           ; preds = %bb62, %bb54
+        %p_addr.0.pn.rec = phi i64 [ %p_addr.6.rec, %bb62 ], [ 0, %bb54 ]             ; <i64> [#uses=2]
+        %ch.6.in.in = phi i8* [ %p_addr.6, %bb62 ], [ null, %bb54 ]           ; <i8*> [#uses=0]
+        %indvar202 = trunc i64 %p_addr.0.pn.rec to i32          ; <i32>[#uses=1]
+        %frac_bits.0 = mul i32 %indvar202, %shift.0             ; <i32>[#uses=1]
+        %p_addr.6.rec = add i64 %p_addr.0.pn.rec, 1             ; <i64>[#uses=2]
+        %p_addr.6 = getelementptr i8* null, i64 %p_addr.6.rec           ; <i8*>[#uses=1]
+        br i1 false, label %bb66, label %bb62
+
+bb62:           ; preds = %bb56
+        br label %bb56
+
+bb66:           ; preds = %bb56, %bb54
+        %frac_bits.1 = phi i32 [ 0, %bb54 ], [ %frac_bits.0, %bb56 ]           ; <i32> [#uses=0]
+        unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll b/src/LLVM/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
new file mode 100644
index 0000000..1284b0d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s
+; PR3537
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	%struct.GetBitContext = type <{ i8*, i8*, i32, i32 }>
+
+define i32 @alac_decode_frame() nounwind {
+entry:
+	%tmp2 = load i8** null		; <i8*> [#uses=2]
+	%tmp34 = getelementptr i8* %tmp2, i32 4		; <i8*> [#uses=2]
+	%tmp5.i424 = bitcast i8* %tmp34 to i8**		; <i8**> [#uses=2]
+	%tmp15.i = getelementptr i8* %tmp2, i32 12		; <i8*> [#uses=1]
+	%0 = bitcast i8* %tmp15.i to i32*		; <i32*> [#uses=1]
+	br i1 false, label %if.then43, label %if.end47
+
+if.then43:		; preds = %entry
+	ret i32 0
+
+if.end47:		; preds = %entry
+	%tmp5.i590 = load i8** %tmp5.i424		; <i8*> [#uses=0]
+	store i32 19, i32* %0
+	%tmp6.i569 = load i8** %tmp5.i424		; <i8*> [#uses=0]
+	%1 = call i32 asm "bswap   $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 0) nounwind		; <i32> [#uses=0]
+	br i1 false, label %bb.nph, label %if.then63
+
+if.then63:		; preds = %if.end47
+	unreachable
+
+bb.nph:		; preds = %if.end47
+	%2 = bitcast i8* %tmp34 to %struct.GetBitContext*		; <%struct.GetBitContext*> [#uses=1]
+	%call9.i = call fastcc i32 @decode_scalar(%struct.GetBitContext* %2, i32 0, i32 0, i32 0) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare fastcc i32 @decode_scalar(%struct.GetBitContext* nocapture, i32, i32, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/src/LLVM/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
new file mode 100644
index 0000000..0dca14d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll

@@ -0,0 +1,85 @@
+; RUN: llc < %s
+; RUN: llc < %s -march=x86-64
+; PR3538
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+define signext i8 @foo(i8* %s1) nounwind ssp {
+entry:
+  %s1_addr = alloca i8*                           ; <i8**> [#uses=2]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %saved_stack.1 = alloca i8*                     ; <i8**> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %str.0 = alloca [0 x i8]*                       ; <[0 x i8]**> [#uses=3]
+  %1 = alloca i64                                 ; <i64*> [#uses=2]
+  %2 = alloca i64                                 ; <i64*> [#uses=1]
+  %3 = alloca i64                                 ; <i64*> [#uses=6]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{i8** %s1_addr}, metadata !0), !dbg !7
+  store i8* %s1, i8** %s1_addr
+  call void @llvm.dbg.declare(metadata !{[0 x i8]** %str.0}, metadata !8), !dbg !7
+  %4 = call i8* @llvm.stacksave(), !dbg !7        ; <i8*> [#uses=1]
+  store i8* %4, i8** %saved_stack.1, align 8, !dbg !7
+  %5 = load i8** %s1_addr, align 8, !dbg !13      ; <i8*> [#uses=1]
+  %6 = call i64 @strlen(i8* %5) nounwind readonly, !dbg !13 ; <i64> [#uses=1]
+  %7 = add i64 %6, 1, !dbg !13                    ; <i64> [#uses=1]
+  store i64 %7, i64* %3, align 8, !dbg !13
+  %8 = load i64* %3, align 8, !dbg !13            ; <i64> [#uses=1]
+  %9 = sub nsw i64 %8, 1, !dbg !13                ; <i64> [#uses=0]
+  %10 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  %11 = mul i64 %10, 8, !dbg !13                  ; <i64> [#uses=0]
+  %12 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  store i64 %12, i64* %2, align 8, !dbg !13
+  %13 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  %14 = mul i64 %13, 8, !dbg !13                  ; <i64> [#uses=0]
+  %15 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  store i64 %15, i64* %1, align 8, !dbg !13
+  %16 = load i64* %1, align 8, !dbg !13           ; <i64> [#uses=1]
+  %17 = trunc i64 %16 to i32, !dbg !13            ; <i32> [#uses=1]
+  %18 = alloca i8, i32 %17, !dbg !13              ; <i8*> [#uses=1]
+  %19 = bitcast i8* %18 to [0 x i8]*, !dbg !13    ; <[0 x i8]*> [#uses=1]
+  store [0 x i8]* %19, [0 x i8]** %str.0, align 8, !dbg !13
+  %20 = load [0 x i8]** %str.0, align 8, !dbg !15 ; <[0 x i8]*> [#uses=1]
+  %21 = getelementptr inbounds [0 x i8]* %20, i64 0, i64 0, !dbg !15 ; <i8*> [#uses=1]
+  store i8 0, i8* %21, align 1, !dbg !15
+  %22 = load [0 x i8]** %str.0, align 8, !dbg !16 ; <[0 x i8]*> [#uses=1]
+  %23 = getelementptr inbounds [0 x i8]* %22, i64 0, i64 0, !dbg !16 ; <i8*> [#uses=1]
+  %24 = load i8* %23, align 1, !dbg !16           ; <i8> [#uses=1]
+  %25 = sext i8 %24 to i32, !dbg !16              ; <i32> [#uses=1]
+  store i32 %25, i32* %0, align 4, !dbg !16
+  %26 = load i8** %saved_stack.1, align 8, !dbg !16 ; <i8*> [#uses=1]
+  call void @llvm.stackrestore(i8* %26), !dbg !16
+  %27 = load i32* %0, align 4, !dbg !16           ; <i32> [#uses=1]
+  store i32 %27, i32* %retval, align 4, !dbg !16
+  br label %return, !dbg !16
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval, !dbg !16          ; <i32> [#uses=1]
+  %retval12 = trunc i32 %retval1 to i8, !dbg !16  ; <i8> [#uses=1]
+  ret i8 %retval12, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @llvm.stacksave() nounwind
+
+declare i64 @strlen(i8*) nounwind readonly
+
+declare void @llvm.stackrestore(i8*) nounwind
+
+!0 = metadata !{i32 459009, metadata !1, metadata !"s1", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 458769, i32 0, i32 1, metadata !"vla.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 458773, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !6}
+!5 = metadata !{i32 458788, metadata !2, metadata !"char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458767, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 2, i32 0, metadata !1, null}
+!8 = metadata !{i32 459008, metadata !1, metadata !"str.0", metadata !2, i32 3, metadata !9} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458767, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 458753, metadata !2, metadata !"", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !5, metadata !11, i32 0, null} ; [ DW_TAG_array_type ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 458785, i64 0, i64 0}        ; [ DW_TAG_subrange_type ]
+!13 = metadata !{i32 3, i32 0, metadata !14, null}
+!14 = metadata !{i32 458763, metadata !1, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 4, i32 0, metadata !14, null}
+!16 = metadata !{i32 5, i32 0, metadata !14, null}

diff --git a/src/LLVM/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll b/src/LLVM/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
new file mode 100644
index 0000000..d64c966
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; ModuleID = 'shant.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define void @f() nounwind {
+; CHECK: f:
+; CHECK-NOT: ret
+; CHECK: foo $-81920
+; CHECK-NOT: ret
+; CHECK: foo $-81920
+; CHECK-NOT: ret
+; CHECK: foo $-81920
+; CHECK-NOT: ret
+; CHECK: foo $4294885376
+; CHECK: ret
+
+	call void asm sideeffect "foo $0", "n,~{dirflag},~{fpsr},~{flags}"(i32 -81920) nounwind
+	call void asm sideeffect "foo $0", "i,~{dirflag},~{fpsr},~{flags}"(i32 -81920) nounwind
+	call void asm sideeffect "foo $0", "e,~{dirflag},~{fpsr},~{flags}"(i32 -81920) nounwind
+	call void asm sideeffect "foo $0", "Z,~{dirflag},~{fpsr},~{flags}"(i64 4294885376) nounwind
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-02-12-SpillerBug.ll b/src/LLVM/test/CodeGen/X86/2009-02-12-SpillerBug.ll
new file mode 100644
index 0000000..4f8a5e7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-02-12-SpillerBug.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8
+; PR3561
+
+define hidden void @__mulxc3({ x86_fp80, x86_fp80 }* noalias nocapture sret %agg.result, x86_fp80 %a, x86_fp80 %b, x86_fp80 %c, x86_fp80 %d) nounwind {
+entry:
+	%0 = fmul x86_fp80 %b, %d		; <x86_fp80> [#uses=1]
+	%1 = fsub x86_fp80 0xK00000000000000000000, %0		; <x86_fp80> [#uses=1]
+	%2 = fadd x86_fp80 0xK00000000000000000000, 0xK00000000000000000000		; <x86_fp80> [#uses=1]
+	%3 = fcmp uno x86_fp80 %1, 0xK00000000000000000000		; <i1> [#uses=1]
+	%4 = fcmp uno x86_fp80 %2, 0xK00000000000000000000		; <i1> [#uses=1]
+	%or.cond = and i1 %3, %4		; <i1> [#uses=1]
+	br i1 %or.cond, label %bb47, label %bb71
+
+bb47:		; preds = %entry
+	%5 = fcmp uno x86_fp80 %a, 0xK00000000000000000000		; <i1> [#uses=1]
+	br i1 %5, label %bb60, label %bb62
+
+bb60:		; preds = %bb47
+	%6 = tail call x86_fp80 @copysignl(x86_fp80 0xK00000000000000000000, x86_fp80 %a) nounwind readnone		; <x86_fp80> [#uses=0]
+	br label %bb62
+
+bb62:		; preds = %bb60, %bb47
+	unreachable
+
+bb71:		; preds = %entry
+	ret void
+}
+
+declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll b/src/LLVM/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll
new file mode 100644
index 0000000..b3dd13c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s | grep weak | count 3
+; PR3629
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-freebsd7.1"
+module asm ".ident\09\22$FreeBSD$\22"
+	%struct.anon = type <{ %struct.uart_devinfo* }>
+	%struct.lock_object = type <{ i8*, i32, i32, %struct.witness* }>
+	%struct.mtx = type <{ %struct.lock_object, i64 }>
+	%struct.uart_bas = type <{ i64, i64, i32, i32, i32, i8, i8, i8, i8 }>
+	%struct.uart_class = type opaque
+	%struct.uart_devinfo = type <{ %struct.anon, %struct.uart_ops*, %struct.uart_bas, i32, i32, i32, i32, i32, i8, i8, i8, i8, i32 (%struct.uart_softc*)*, i32 (%struct.uart_softc*)*, i8*, %struct.mtx* }>
+	%struct.uart_ops = type <{ i32 (%struct.uart_bas*)*, void (%struct.uart_bas*, i32, i32, i32, i32)*, void (%struct.uart_bas*)*, void (%struct.uart_bas*, i32)*, i32 (%struct.uart_bas*)*, i32 (%struct.uart_bas*, %struct.mtx*)* }>
+	%struct.uart_softc = type opaque
+	%struct.witness = type opaque
+
+@uart_classes = internal global [3 x %struct.uart_class*] [%struct.uart_class* @uart_ns8250_class, %struct.uart_class* @uart_sab82532_class, %struct.uart_class* @uart_z8530_class], align 8		; <[3 x %struct.uart_class*]*> [#uses=1]
+@uart_ns8250_class = extern_weak global %struct.uart_class		; <%struct.uart_class*> [#uses=1]
+@uart_sab82532_class = extern_weak global %struct.uart_class		; <%struct.uart_class*> [#uses=1]
+@uart_z8530_class = extern_weak global %struct.uart_class		; <%struct.uart_class*> [#uses=1]

diff --git a/src/LLVM/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/src/LLVM/test/CodeGen/X86/2009-02-25-CommuteBug.ll
new file mode 100644
index 0000000..7ea6998
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-02-25-CommuteBug.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep commuted
+; rdar://6608609
+
+define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
+entry:
+	%tmp.i2 = bitcast <2 x double> %B to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp2.i = or <2 x i64> %tmp.i2, <i64 4607632778762754458, i64 4607632778762754458>		; <<2 x i64>> [#uses=1]
+	%tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double>		; <<2 x double>> [#uses=1]
+	%0 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %A, <2 x double> %tmp3.i) nounwind readnone		; <<2 x double>> [#uses=1]
+	%tmp.i = fadd <2 x double> %0, %C		; <<2 x double>> [#uses=1]
+	ret <2 x double> %tmp.i
+}
+
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/src/LLVM/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
new file mode 100644
index 0000000..0b5b7bd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {8 machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
+; rdar://6627786
+; rdar://7792037
+
+target triple = "x86_64-apple-darwin10.0"
+	%struct.Key = type { i64 }
+	%struct.__Rec = type opaque
+	%struct.__vv = type {  }
+
+define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp {
+entry:
+	br label %bb4
+
+bb4:		; preds = %bb.i, %bb26, %bb4, %entry
+; CHECK: %bb4
+; CHECK: xorb
+; CHECK: callq
+; CHECK: movq
+; CHECK: xorl
+; CHECK: xorb
+
+	%0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind		; <i32> [#uses=0]
+	%ins = or i64 %p, 2097152		; <i64> [#uses=1]
+	%1 = call i32 (...)* @xxCalculateMidType(%struct.Key* %desc, i32 0) nounwind		; <i32> [#uses=1]
+	%cond = icmp eq i32 %1, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb26, label %bb4
+
+bb26:		; preds = %bb4
+	%2 = and i64 %ins, 15728640		; <i64> [#uses=1]
+	%cond.i = icmp eq i64 %2, 1048576		; <i1> [#uses=1]
+	br i1 %cond.i, label %bb.i, label %bb4
+
+bb.i:		; preds = %bb26
+	%3 = load i32* null, align 4		; <i32> [#uses=1]
+	%4 = uitofp i32 %3 to float		; <float> [#uses=1]
+	%.sum13.i = add i64 0, 4		; <i64> [#uses=1]
+	%5 = getelementptr i8* null, i64 %.sum13.i		; <i8*> [#uses=1]
+	%6 = bitcast i8* %5 to i32*		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = uitofp i32 %7 to float		; <float> [#uses=1]
+	%.sum.i = add i64 0, 8		; <i64> [#uses=1]
+	%9 = getelementptr i8* null, i64 %.sum.i		; <i8*> [#uses=1]
+	%10 = bitcast i8* %9 to i32*		; <i32*> [#uses=1]
+	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%12 = uitofp i32 %11 to float		; <float> [#uses=1]
+	%13 = insertelement <4 x float> undef, float %4, i32 0		; <<4 x float>> [#uses=1]
+	%14 = insertelement <4 x float> %13, float %8, i32 1		; <<4 x float>> [#uses=1]
+	%15 = insertelement <4 x float> %14, float %12, i32 2		; <<4 x float>> [#uses=1]
+	store <4 x float> %15, <4 x float>* null, align 16
+	br label %bb4
+}
+
+declare i32 @xxGetOffsetForCode(...)
+
+declare i32 @xxCalculateMidType(...)

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-03-BTHang.ll b/src/LLVM/test/CodeGen/X86/2009-03-03-BTHang.ll
new file mode 100644
index 0000000..bb95925
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-03-BTHang.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86
+; rdar://6642541
+
+ 	%struct.HandleBlock = type { [30 x i32], [990 x i8*], %struct.HandleBlockTrailer }
+ 	%struct.HandleBlockTrailer = type { %struct.HandleBlock* }
+
+define hidden zeroext i8 @IsHandleAllocatedFromPool(i8** %h) nounwind optsize {
+entry:
+	%0 = ptrtoint i8** %h to i32		; <i32> [#uses=2]
+	%1 = and i32 %0, -4096		; <i32> [#uses=1]
+	%2 = inttoptr i32 %1 to %struct.HandleBlock*		; <%struct.HandleBlock*> [#uses=3]
+	%3 = getelementptr %struct.HandleBlock* %2, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	%4 = load i32* %3, align 4096		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 1751280747		; <i1> [#uses=1]
+	br i1 %5, label %bb, label %bb1
+
+bb:		; preds = %entry
+	%6 = getelementptr %struct.HandleBlock* %2, i32 0, i32 1		; <[990 x i8*]*> [#uses=1]
+	%7 = ptrtoint [990 x i8*]* %6 to i32		; <i32> [#uses=1]
+	%8 = sub i32 %0, %7		; <i32> [#uses=2]
+	%9 = lshr i32 %8, 2		; <i32> [#uses=1]
+	%10 = ashr i32 %8, 7		; <i32> [#uses=1]
+	%11 = and i32 %10, 134217727		; <i32> [#uses=1]
+	%12 = getelementptr %struct.HandleBlock* %2, i32 0, i32 0, i32 %11		; <i32*> [#uses=1]
+	%not.i = and i32 %9, 31		; <i32> [#uses=1]
+	%13 = xor i32 %not.i, 31		; <i32> [#uses=1]
+	%14 = shl i32 1, %13		; <i32> [#uses=1]
+	%15 = load i32* %12, align 4		; <i32> [#uses=1]
+	%16 = and i32 %15, %14		; <i32> [#uses=1]
+	%17 = icmp eq i32 %16, 0		; <i1> [#uses=1]
+	%tmp = zext i1 %17 to i8		; <i8> [#uses=1]
+	ret i8 %tmp
+
+bb1:		; preds = %entry
+	ret i8 0
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll b/src/LLVM/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
new file mode 100644
index 0000000..9deeceb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86
+; PR3686
+; rdar://6661799
+
+define i32 @x(i32 %y) nounwind readnone {
+entry:
+	%tmp14 = zext i32 %y to i80		; <i80> [#uses=1]
+	%tmp15 = bitcast i80 %tmp14 to x86_fp80		; <x86_fp80> [#uses=1]
+	%add = fadd x86_fp80 %tmp15, 0xK3FFF8000000000000000		; <x86_fp80> [#uses=1]
+	%tmp11 = bitcast x86_fp80 %add to i80		; <i80> [#uses=1]
+	%tmp10 = trunc i80 %tmp11 to i32		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-05-burr-list-crash.ll b/src/LLVM/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
new file mode 100644
index 0000000..411a0c9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-05-burr-list-crash.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+external global i32		; <i32*>:0 [#uses=1]
+
+declare i64 @strlen(i8* nocapture) nounwind readonly
+
+define fastcc i8* @1(i8*) nounwind {
+	br i1 false, label %3, label %2
+
+; <label>:2		; preds = %1
+	ret i8* %0
+
+; <label>:3		; preds = %1
+	%4 = call i64 @strlen(i8* %0) nounwind readonly		; <i64> [#uses=1]
+	%5 = trunc i64 %4 to i32		; <i32> [#uses=2]
+	%6 = load i32* @0, align 4		; <i32> [#uses=1]
+	%7 = sub i32 %5, %6		; <i32> [#uses=2]
+	%8 = sext i32 %5 to i64		; <i64> [#uses=1]
+	%9 = sext i32 %7 to i64		; <i64> [#uses=1]
+	%10 = sub i64 %8, %9		; <i64> [#uses=1]
+	%11 = getelementptr i8* %0, i64 %10		; <i8*> [#uses=1]
+	%12 = icmp sgt i32 %7, 0		; <i1> [#uses=1]
+	br i1 %12, label %13, label %14
+
+; <label>:13		; preds = %13, %3
+	br label %13
+
+; <label>:14		; preds = %3
+	%15 = call noalias i8* @make_temp_file(i8* %11) nounwind		; <i8*> [#uses=0]
+	unreachable
+}
+
+declare noalias i8* @make_temp_file(i8*)

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-07-FPConstSelect.ll b/src/LLVM/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
new file mode 100644
index 0000000..39caddc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-07-FPConstSelect.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep xmm
+; This should do a single load into the fp stack for the return, not diddle with xmm registers.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define float @f(i32 %x) nounwind readnone {
+entry:
+	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01
+	ret float %iftmp.0.0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-09-APIntCrash.ll b/src/LLVM/test/CodeGen/X86/2009-03-09-APIntCrash.ll
new file mode 100644
index 0000000..896c968
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-09-APIntCrash.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64
+; PR3763
+	%struct.__block_descriptor = type { i64, i64 }
+
+define %struct.__block_descriptor @evUTCTime() nounwind {
+entry:
+	br i1 false, label %if.then, label %return
+
+if.then:		; preds = %entry
+	%srcval18 = load i128* null, align 8		; <i128> [#uses=1]
+	%tmp15 = lshr i128 %srcval18, 64		; <i128> [#uses=1]
+	%tmp9 = mul i128 %tmp15, 18446744073709551616000		; <i128> [#uses=1]
+	br label %return
+
+return:		; preds = %if.then, %entry
+	%retval.0 = phi i128 [ %tmp9, %if.then ], [ undef, %entry ]		; <i128> [#uses=0]
+	ret %struct.__block_descriptor undef
+}
+
+define i128 @test(i128 %arg) nounwind {
+	%A = shl i128 1, 92
+	%B = sub i128 0, %A
+	%C = mul i128 %arg, %B
+	ret i128 %C  ;; should codegen to neg(shift)
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-09-SpillerBug.ll b/src/LLVM/test/CodeGen/X86/2009-03-09-SpillerBug.ll
new file mode 100644
index 0000000..4224210
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-09-SpillerBug.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
+; PR3706
+
+define void @__mulxc3(x86_fp80 %b) nounwind {
+entry:
+	%call = call x86_fp80 @y(x86_fp80* null, x86_fp80* null)		; <x86_fp80> [#uses=0]
+	%cmp = fcmp ord x86_fp80 %b, 0xK00000000000000000000		; <i1> [#uses=1]
+	%sub = fsub x86_fp80 %b, %b		; <x86_fp80> [#uses=1]
+	%cmp7 = fcmp uno x86_fp80 %sub, 0xK00000000000000000000		; <i1> [#uses=1]
+	%and12 = and i1 %cmp7, %cmp		; <i1> [#uses=1]
+	%and = zext i1 %and12 to i32		; <i32> [#uses=1]
+	%conv9 = sitofp i32 %and to x86_fp80		; <x86_fp80> [#uses=1]
+	store x86_fp80 %conv9, x86_fp80* null
+	store x86_fp80 %b, x86_fp80* null
+	ret void
+}
+
+declare x86_fp80 @y(x86_fp80*, x86_fp80*)

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-10-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
new file mode 100644
index 0000000..90dff88
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-10-CoalescerBug.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; rdar://r6661945
+
+	%struct.WINDOW = type { i16, i16, i16, i16, i16, i16, i16, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.ldat*, i16, i16, i32, i32, %struct.WINDOW*, %struct.pdat, i16, %struct.cchar_t }
+	%struct.cchar_t = type { i32, [5 x i32] }
+	%struct.ldat = type { %struct.cchar_t*, i16, i16, i16 }
+	%struct.pdat = type { i16, i16, i16, i16, i16, i16 }
+
+define i32 @pnoutrefresh(%struct.WINDOW* %win, i32 %pminrow, i32 %pmincol, i32 %sminrow, i32 %smincol, i32 %smaxrow, i32 %smaxcol) nounwind optsize ssp {
+entry:
+	%0 = load i16* null, align 4		; <i16> [#uses=2]
+	%1 = icmp sgt i16 0, %0		; <i1> [#uses=1]
+	br i1 %1, label %bb12, label %bb13
+
+bb12:		; preds = %entry
+	%2 = sext i16 %0 to i32		; <i32> [#uses=1]
+	%3 = sub i32 %2, 0		; <i32> [#uses=1]
+	%4 = add i32 %3, %smaxrow		; <i32> [#uses=2]
+	%5 = trunc i32 %4 to i16		; <i16> [#uses=1]
+	%6 = add i16 0, %5		; <i16> [#uses=1]
+	br label %bb13
+
+bb13:		; preds = %bb12, %entry
+	%pmaxrow.0 = phi i16 [ %6, %bb12 ], [ 0, %entry ]		; <i16> [#uses=0]
+	%smaxrow_addr.0 = phi i32 [ %4, %bb12 ], [ %smaxrow, %entry ]		; <i32> [#uses=1]
+	%7 = trunc i32 %smaxrow_addr.0 to i16		; <i16> [#uses=0]
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-12-CPAlignBug.ll b/src/LLVM/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
new file mode 100644
index 0000000..3564f01
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-12-CPAlignBug.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | not grep {.space}
+; rdar://6668548
+
+declare double @llvm.sqrt.f64(double) nounwind readonly
+
+declare double @fabs(double)
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
+
+define void @SolveCubic_bb1(i32* %solutions, double* %x, x86_fp80 %.reload, x86_fp80 %.reload5, x86_fp80 %.reload6, double %.reload8) nounwind {
+newFuncRoot:
+	br label %bb1
+
+bb1.ret.exitStub:		; preds = %bb1
+	ret void
+
+bb1:		; preds = %newFuncRoot
+	store i32 1, i32* %solutions, align 4
+	%0 = tail call double @llvm.sqrt.f64(double %.reload8)		; <double> [#uses=1]
+	%1 = fptrunc x86_fp80 %.reload6 to double		; <double> [#uses=1]
+	%2 = tail call double @fabs(double %1) nounwind readnone		; <double> [#uses=1]
+	%3 = fadd double %0, %2		; <double> [#uses=1]
+	%4 = tail call double @llvm.pow.f64(double %3, double 0x3FD5555555555555)		; <double> [#uses=1]
+	%5 = fpext double %4 to x86_fp80		; <x86_fp80> [#uses=2]
+	%6 = fdiv x86_fp80 %.reload5, %5		; <x86_fp80> [#uses=1]
+	%7 = fadd x86_fp80 %5, %6		; <x86_fp80> [#uses=1]
+	%8 = fptrunc x86_fp80 %7 to double		; <double> [#uses=1]
+	%9 = fcmp olt x86_fp80 %.reload6, 0xK00000000000000000000		; <i1> [#uses=1]
+	%iftmp.6.0 = select i1 %9, double 1.000000e+00, double -1.000000e+00		; <double> [#uses=1]
+	%10 = fmul double %8, %iftmp.6.0		; <double> [#uses=1]
+	%11 = fpext double %10 to x86_fp80		; <x86_fp80> [#uses=1]
+	%12 = fdiv x86_fp80 %.reload, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%13 = fadd x86_fp80 %11, %12		; <x86_fp80> [#uses=1]
+	%14 = fptrunc x86_fp80 %13 to double		; <double> [#uses=1]
+	store double %14, double* %x, align 1
+	br label %bb1.ret.exitStub
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/src/LLVM/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
new file mode 100644
index 0000000..e14c30a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-13-PHIElimBug.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s
+; Check the register copy comes after the call to f and before the call to g
+; PR3784
+
+declare i32 @f()
+
+declare i32 @g()
+
+define i32 @phi() {
+entry:
+	%a = call i32 @f()		; <i32> [#uses=1]
+	%b = invoke i32 @g()
+			to label %cont unwind label %lpad		; <i32> [#uses=1]
+
+cont:		; preds = %entry
+	%x = phi i32 [ %b, %entry ]		; <i32> [#uses=0]
+	%aa = call i32 @g()		; <i32> [#uses=1]
+	%bb = invoke i32 @g()
+			to label %cont2 unwind label %lpad		; <i32> [#uses=1]
+
+cont2:		; preds = %cont
+	%xx = phi i32 [ %bb, %cont ]		; <i32> [#uses=1]
+	ret i32 %xx
+
+lpad:		; preds = %cont, %entry
+	%y = phi i32 [ %a, %entry ], [ %aa, %cont ]		; <i32> [#uses=1]
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	ret i32 %y
+}
+
+; CHECK: call{{.*}}f
+; CHECK: movl %eax, %esi
+; CHECK: call{{.*}}g
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/src/LLVM/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
new file mode 100644
index 0000000..f8c7a15
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=i686-linux -asm-verbose | FileCheck %s
+; Check that register copies in the landing pad come after the EH_LABEL
+
+declare i32 @f()
+
+define i32 @phi(i32 %x) {
+entry:
+	%a = invoke i32 @f()
+			to label %cont unwind label %lpad		; <i32> [#uses=1]
+
+cont:		; preds = %entry
+	%b = invoke i32 @f()
+			to label %cont2 unwind label %lpad		; <i32> [#uses=1]
+
+cont2:		; preds = %cont
+	ret i32 %b
+
+lpad:		; preds = %cont, %entry
+	%v = phi i32 [ %x, %entry ], [ %a, %cont ]		; <i32> [#uses=1]
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	ret i32 %v
+}
+
+; CHECK: lpad
+; CHECK-NEXT: Ltmp
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-16-SpillerBug.ll b/src/LLVM/test/CodeGen/X86/2009-03-16-SpillerBug.ll
new file mode 100644
index 0000000..951e191
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-16-SpillerBug.ll

@@ -0,0 +1,167 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=linearscan -stats |& grep virtregrewriter | not grep {stores unfolded}
+; rdar://6682365
+
+; Do not clobber a register if another spill slot is available in it and it's marked "do not clobber".
+
+	%struct.CAST_KEY = type { [32 x i32], i32 }
+@CAST_S_table0 = constant [2 x i32] [i32 821772500, i32 -1616838901], align 32		; <[2 x i32]*> [#uses=0]
+@CAST_S_table4 = constant [2 x i32] [i32 2127105028, i32 745436345], align 32		; <[2 x i32]*> [#uses=6]
+@CAST_S_table5 = constant [2 x i32] [i32 -151351395, i32 749497569], align 32		; <[2 x i32]*> [#uses=5]
+@CAST_S_table6 = constant [2 x i32] [i32 -2048901095, i32 858518887], align 32		; <[2 x i32]*> [#uses=4]
+@CAST_S_table7 = constant [2 x i32] [i32 -501862387, i32 -1143078916], align 32		; <[2 x i32]*> [#uses=5]
+@CAST_S_table1 = constant [2 x i32] [i32 522195092, i32 -284448933], align 32		; <[2 x i32]*> [#uses=0]
+@CAST_S_table2 = constant [2 x i32] [i32 -1913667008, i32 637164959], align 32		; <[2 x i32]*> [#uses=0]
+@CAST_S_table3 = constant [2 x i32] [i32 -1649212384, i32 532081118], align 32		; <[2 x i32]*> [#uses=0]
+
+define void @CAST_set_key(%struct.CAST_KEY* nocapture %key, i32 %len, i8* nocapture %data) nounwind ssp {
+bb1.thread:
+	%0 = getelementptr [16 x i32]* null, i32 0, i32 5		; <i32*> [#uses=1]
+	%1 = getelementptr [16 x i32]* null, i32 0, i32 8		; <i32*> [#uses=1]
+	%2 = load i32* null, align 4		; <i32> [#uses=1]
+	%3 = shl i32 %2, 24		; <i32> [#uses=1]
+	%4 = load i32* null, align 4		; <i32> [#uses=1]
+	%5 = shl i32 %4, 16		; <i32> [#uses=1]
+	%6 = load i32* null, align 4		; <i32> [#uses=1]
+	%7 = or i32 %5, %3		; <i32> [#uses=1]
+	%8 = or i32 %7, %6		; <i32> [#uses=1]
+	%9 = or i32 %8, 0		; <i32> [#uses=1]
+	%10 = load i32* null, align 4		; <i32> [#uses=1]
+	%11 = shl i32 %10, 24		; <i32> [#uses=1]
+	%12 = load i32* %0, align 4		; <i32> [#uses=1]
+	%13 = shl i32 %12, 16		; <i32> [#uses=1]
+	%14 = load i32* null, align 4		; <i32> [#uses=1]
+	%15 = or i32 %13, %11		; <i32> [#uses=1]
+	%16 = or i32 %15, %14		; <i32> [#uses=1]
+	%17 = or i32 %16, 0		; <i32> [#uses=1]
+	br label %bb11
+
+bb11:		; preds = %bb11, %bb1.thread
+	%18 = phi i32 [ %110, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=1]
+	%19 = phi i32 [ %112, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=0]
+	%20 = phi i32 [ 0, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=0]
+	%21 = phi i32 [ %113, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=1]
+	%X.0.0 = phi i32 [ %9, %bb1.thread ], [ %92, %bb11 ]		; <i32> [#uses=0]
+	%X.1.0 = phi i32 [ %17, %bb1.thread ], [ 0, %bb11 ]		; <i32> [#uses=0]
+	%22 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %21		; <i32*> [#uses=0]
+	%23 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %18		; <i32*> [#uses=0]
+	%24 = load i32* null, align 4		; <i32> [#uses=1]
+	%25 = xor i32 0, %24		; <i32> [#uses=1]
+	%26 = xor i32 %25, 0		; <i32> [#uses=1]
+	%27 = xor i32 %26, 0		; <i32> [#uses=4]
+	%28 = and i32 %27, 255		; <i32> [#uses=2]
+	%29 = lshr i32 %27, 8		; <i32> [#uses=1]
+	%30 = and i32 %29, 255		; <i32> [#uses=2]
+	%31 = lshr i32 %27, 16		; <i32> [#uses=1]
+	%32 = and i32 %31, 255		; <i32> [#uses=1]
+	%33 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %28		; <i32*> [#uses=1]
+	%34 = load i32* %33, align 4		; <i32> [#uses=2]
+	%35 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %30		; <i32*> [#uses=1]
+	%36 = load i32* %35, align 4		; <i32> [#uses=2]
+	%37 = xor i32 %34, 0		; <i32> [#uses=1]
+	%38 = xor i32 %37, %36		; <i32> [#uses=1]
+	%39 = xor i32 %38, 0		; <i32> [#uses=1]
+	%40 = xor i32 %39, 0		; <i32> [#uses=1]
+	%41 = xor i32 %40, 0		; <i32> [#uses=3]
+	%42 = lshr i32 %41, 8		; <i32> [#uses=1]
+	%43 = and i32 %42, 255		; <i32> [#uses=2]
+	%44 = lshr i32 %41, 16		; <i32> [#uses=1]
+	%45 = and i32 %44, 255		; <i32> [#uses=1]
+	%46 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %43		; <i32*> [#uses=1]
+	%47 = load i32* %46, align 4		; <i32> [#uses=1]
+	%48 = load i32* null, align 4		; <i32> [#uses=1]
+	%49 = xor i32 %47, 0		; <i32> [#uses=1]
+	%50 = xor i32 %49, %48		; <i32> [#uses=1]
+	%51 = xor i32 %50, 0		; <i32> [#uses=1]
+	%52 = xor i32 %51, 0		; <i32> [#uses=1]
+	%53 = xor i32 %52, 0		; <i32> [#uses=2]
+	%54 = and i32 %53, 255		; <i32> [#uses=1]
+	%55 = lshr i32 %53, 24		; <i32> [#uses=1]
+	%56 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %55		; <i32*> [#uses=1]
+	%57 = load i32* %56, align 4		; <i32> [#uses=1]
+	%58 = xor i32 0, %57		; <i32> [#uses=1]
+	%59 = xor i32 %58, 0		; <i32> [#uses=1]
+	%60 = xor i32 %59, 0		; <i32> [#uses=1]
+	store i32 %60, i32* null, align 4
+	%61 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 0		; <i32*> [#uses=1]
+	%62 = load i32* %61, align 4		; <i32> [#uses=1]
+	%63 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %54		; <i32*> [#uses=1]
+	%64 = load i32* %63, align 4		; <i32> [#uses=1]
+	%65 = xor i32 0, %64		; <i32> [#uses=1]
+	%66 = xor i32 %65, 0		; <i32> [#uses=1]
+	store i32 %66, i32* null, align 4
+	%67 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %45		; <i32*> [#uses=1]
+	%68 = load i32* %67, align 4		; <i32> [#uses=1]
+	%69 = xor i32 %36, %34		; <i32> [#uses=1]
+	%70 = xor i32 %69, 0		; <i32> [#uses=1]
+	%71 = xor i32 %70, %68		; <i32> [#uses=1]
+	%72 = xor i32 %71, 0		; <i32> [#uses=1]
+	store i32 %72, i32* null, align 4
+	%73 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %32		; <i32*> [#uses=1]
+	%74 = load i32* %73, align 4		; <i32> [#uses=2]
+	%75 = load i32* null, align 4		; <i32> [#uses=1]
+	%76 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %43		; <i32*> [#uses=1]
+	%77 = load i32* %76, align 4		; <i32> [#uses=1]
+	%78 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 0		; <i32*> [#uses=1]
+	%79 = load i32* %78, align 4		; <i32> [#uses=1]
+	%80 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %30		; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4		; <i32> [#uses=2]
+	%82 = xor i32 %75, %74		; <i32> [#uses=1]
+	%83 = xor i32 %82, %77		; <i32> [#uses=1]
+	%84 = xor i32 %83, %79		; <i32> [#uses=1]
+	%85 = xor i32 %84, %81		; <i32> [#uses=1]
+	store i32 %85, i32* null, align 4
+	%86 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %28		; <i32*> [#uses=1]
+	%87 = load i32* %86, align 4		; <i32> [#uses=1]
+	%88 = xor i32 %74, %41		; <i32> [#uses=1]
+	%89 = xor i32 %88, %87		; <i32> [#uses=1]
+	%90 = xor i32 %89, 0		; <i32> [#uses=1]
+	%91 = xor i32 %90, %81		; <i32> [#uses=1]
+	%92 = xor i32 %91, 0		; <i32> [#uses=3]
+	%93 = lshr i32 %92, 16		; <i32> [#uses=1]
+	%94 = and i32 %93, 255		; <i32> [#uses=1]
+	store i32 %94, i32* null, align 4
+	%95 = lshr i32 %92, 24		; <i32> [#uses=2]
+	%96 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %95		; <i32*> [#uses=1]
+	%97 = load i32* %96, align 4		; <i32> [#uses=1]
+	%98 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 0		; <i32*> [#uses=1]
+	%99 = load i32* %98, align 4		; <i32> [#uses=1]
+	%100 = load i32* null, align 4		; <i32> [#uses=0]
+	%101 = xor i32 %97, 0		; <i32> [#uses=1]
+	%102 = xor i32 %101, %99		; <i32> [#uses=1]
+	%103 = xor i32 %102, 0		; <i32> [#uses=1]
+	%104 = xor i32 %103, 0		; <i32> [#uses=0]
+	store i32 0, i32* null, align 4
+	%105 = xor i32 0, %27		; <i32> [#uses=1]
+	%106 = xor i32 %105, 0		; <i32> [#uses=1]
+	%107 = xor i32 %106, 0		; <i32> [#uses=1]
+	%108 = xor i32 %107, 0		; <i32> [#uses=1]
+	%109 = xor i32 %108, %62		; <i32> [#uses=3]
+	%110 = and i32 %109, 255		; <i32> [#uses=1]
+	%111 = lshr i32 %109, 16		; <i32> [#uses=1]
+	%112 = and i32 %111, 255		; <i32> [#uses=1]
+	%113 = lshr i32 %109, 24		; <i32> [#uses=3]
+	store i32 %113, i32* %1, align 4
+	%114 = load i32* null, align 4		; <i32> [#uses=1]
+	%115 = xor i32 0, %114		; <i32> [#uses=1]
+	%116 = xor i32 %115, 0		; <i32> [#uses=1]
+	%117 = xor i32 %116, 0		; <i32> [#uses=1]
+	%K.0.sum42 = or i32 0, 12		; <i32> [#uses=1]
+	%118 = getelementptr [32 x i32]* null, i32 0, i32 %K.0.sum42		; <i32*> [#uses=1]
+	store i32 %117, i32* %118, align 4
+	%119 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 0		; <i32*> [#uses=0]
+	store i32 0, i32* null, align 4
+	%120 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %113		; <i32*> [#uses=1]
+	%121 = load i32* %120, align 4		; <i32> [#uses=1]
+	%122 = xor i32 0, %121		; <i32> [#uses=1]
+	store i32 %122, i32* null, align 4
+	%123 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 0		; <i32*> [#uses=1]
+	%124 = load i32* %123, align 4		; <i32> [#uses=1]
+	%125 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %95		; <i32*> [#uses=1]
+	%126 = load i32* %125, align 4		; <i32> [#uses=1]
+	%127 = xor i32 0, %124		; <i32> [#uses=1]
+	%128 = xor i32 %127, 0		; <i32> [#uses=1]
+	%129 = xor i32 %128, %126		; <i32> [#uses=1]
+	%130 = xor i32 %129, 0		; <i32> [#uses=1]
+	store i32 %130, i32* null, align 4
+	br label %bb11
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-23-LinearScanBug.ll b/src/LLVM/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
new file mode 100644
index 0000000..06dfdc0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-23-LinearScanBug.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -O0
+
+define fastcc void @optimize_bit_field() nounwind {
+bb4:
+        %a = load i32* null             ; <i32> [#uses=1]
+        %s = load i32* getelementptr (i32* null, i32 1)         ; <i32> [#uses=1]
+        %z = load i32* getelementptr (i32* null, i32 2)         ; <i32> [#uses=1]
+        %r = bitcast i32 0 to i32          ; <i32> [#uses=1]
+        %q = trunc i32 %z to i8            ; <i8> [#uses=1]
+        %b = icmp eq i8 0, %q              ; <i1> [#uses=1]
+        br i1 %b, label %bb73, label %bb72
+
+bb72:      ; preds = %bb4
+        %f = tail call fastcc i32 @gen_lowpart(i32 %r, i32 %a) nounwind              ; <i32> [#uses=1]
+        br label %bb73
+
+bb73:         ; preds = %bb72, %bb4
+        %y = phi i32 [ %f, %bb72 ], [ %s, %bb4 ]          ; <i32> [#uses=1]
+        store i32 %y, i32* getelementptr (i32* null, i32 3)
+        unreachable
+}
+
+declare fastcc i32 @gen_lowpart(i32, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/src/LLVM/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
new file mode 100644
index 0000000..90dabb8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-23-MultiUseSched.ll

@@ -0,0 +1,242 @@
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t
+; RUN: not grep spill %t
+; RUN: not grep {%rsp} %t
+; RUN: not grep {%rbp} %t
+
+; The register-pressure scheduler should be able to schedule this in a
+; way that does not require spills.
+
+@X = external global i64		; <i64*> [#uses=25]
+
+define fastcc i64 @foo() nounwind {
+	%tmp = volatile load i64* @X		; <i64> [#uses=7]
+	%tmp1 = volatile load i64* @X		; <i64> [#uses=5]
+	%tmp2 = volatile load i64* @X		; <i64> [#uses=3]
+	%tmp3 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp4 = volatile load i64* @X		; <i64> [#uses=5]
+	%tmp5 = volatile load i64* @X		; <i64> [#uses=3]
+	%tmp6 = volatile load i64* @X		; <i64> [#uses=2]
+	%tmp7 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp8 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp9 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp10 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp11 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp12 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp13 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp14 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp15 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp16 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp17 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp18 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp19 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp20 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp21 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp22 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp23 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp24 = call i64 @llvm.bswap.i64(i64 %tmp8)		; <i64> [#uses=1]
+	%tmp25 = add i64 %tmp6, %tmp5		; <i64> [#uses=1]
+	%tmp26 = add i64 %tmp25, %tmp4		; <i64> [#uses=1]
+	%tmp27 = add i64 %tmp7, %tmp4		; <i64> [#uses=1]
+	%tmp28 = add i64 %tmp27, %tmp26		; <i64> [#uses=1]
+	%tmp29 = add i64 %tmp28, %tmp24		; <i64> [#uses=2]
+	%tmp30 = add i64 %tmp2, %tmp1		; <i64> [#uses=1]
+	%tmp31 = add i64 %tmp30, %tmp		; <i64> [#uses=1]
+	%tmp32 = add i64 %tmp2, %tmp1		; <i64> [#uses=1]
+	%tmp33 = add i64 %tmp31, %tmp32		; <i64> [#uses=1]
+	%tmp34 = add i64 %tmp29, %tmp3		; <i64> [#uses=5]
+	%tmp35 = add i64 %tmp33, %tmp		; <i64> [#uses=1]
+	%tmp36 = add i64 %tmp35, %tmp29		; <i64> [#uses=7]
+	%tmp37 = call i64 @llvm.bswap.i64(i64 %tmp9)		; <i64> [#uses=1]
+	%tmp38 = add i64 %tmp4, %tmp5		; <i64> [#uses=1]
+	%tmp39 = add i64 %tmp38, %tmp34		; <i64> [#uses=1]
+	%tmp40 = add i64 %tmp6, %tmp37		; <i64> [#uses=1]
+	%tmp41 = add i64 %tmp40, %tmp39		; <i64> [#uses=1]
+	%tmp42 = add i64 %tmp41, %tmp34		; <i64> [#uses=2]
+	%tmp43 = add i64 %tmp1, %tmp		; <i64> [#uses=1]
+	%tmp44 = add i64 %tmp36, %tmp43		; <i64> [#uses=1]
+	%tmp45 = add i64 %tmp1, %tmp		; <i64> [#uses=1]
+	%tmp46 = add i64 %tmp44, %tmp45		; <i64> [#uses=1]
+	%tmp47 = add i64 %tmp42, %tmp2		; <i64> [#uses=5]
+	%tmp48 = add i64 %tmp36, %tmp46		; <i64> [#uses=1]
+	%tmp49 = add i64 %tmp48, %tmp42		; <i64> [#uses=7]
+	%tmp50 = call i64 @llvm.bswap.i64(i64 %tmp10)		; <i64> [#uses=1]
+	%tmp51 = add i64 %tmp34, %tmp4		; <i64> [#uses=1]
+	%tmp52 = add i64 %tmp51, %tmp47		; <i64> [#uses=1]
+	%tmp53 = add i64 %tmp5, %tmp50		; <i64> [#uses=1]
+	%tmp54 = add i64 %tmp53, %tmp52		; <i64> [#uses=1]
+	%tmp55 = add i64 %tmp54, %tmp47		; <i64> [#uses=2]
+	%tmp56 = add i64 %tmp36, %tmp		; <i64> [#uses=1]
+	%tmp57 = add i64 %tmp49, %tmp56		; <i64> [#uses=1]
+	%tmp58 = add i64 %tmp36, %tmp		; <i64> [#uses=1]
+	%tmp59 = add i64 %tmp57, %tmp58		; <i64> [#uses=1]
+	%tmp60 = add i64 %tmp55, %tmp1		; <i64> [#uses=5]
+	%tmp61 = add i64 %tmp49, %tmp59		; <i64> [#uses=1]
+	%tmp62 = add i64 %tmp61, %tmp55		; <i64> [#uses=7]
+	%tmp63 = call i64 @llvm.bswap.i64(i64 %tmp11)		; <i64> [#uses=1]
+	%tmp64 = add i64 %tmp47, %tmp34		; <i64> [#uses=1]
+	%tmp65 = add i64 %tmp64, %tmp60		; <i64> [#uses=1]
+	%tmp66 = add i64 %tmp4, %tmp63		; <i64> [#uses=1]
+	%tmp67 = add i64 %tmp66, %tmp65		; <i64> [#uses=1]
+	%tmp68 = add i64 %tmp67, %tmp60		; <i64> [#uses=2]
+	%tmp69 = add i64 %tmp49, %tmp36		; <i64> [#uses=1]
+	%tmp70 = add i64 %tmp62, %tmp69		; <i64> [#uses=1]
+	%tmp71 = add i64 %tmp49, %tmp36		; <i64> [#uses=1]
+	%tmp72 = add i64 %tmp70, %tmp71		; <i64> [#uses=1]
+	%tmp73 = add i64 %tmp68, %tmp		; <i64> [#uses=5]
+	%tmp74 = add i64 %tmp62, %tmp72		; <i64> [#uses=1]
+	%tmp75 = add i64 %tmp74, %tmp68		; <i64> [#uses=7]
+	%tmp76 = call i64 @llvm.bswap.i64(i64 %tmp12)		; <i64> [#uses=1]
+	%tmp77 = add i64 %tmp60, %tmp47		; <i64> [#uses=1]
+	%tmp78 = add i64 %tmp77, %tmp73		; <i64> [#uses=1]
+	%tmp79 = add i64 %tmp34, %tmp76		; <i64> [#uses=1]
+	%tmp80 = add i64 %tmp79, %tmp78		; <i64> [#uses=1]
+	%tmp81 = add i64 %tmp80, %tmp73		; <i64> [#uses=2]
+	%tmp82 = add i64 %tmp62, %tmp49		; <i64> [#uses=1]
+	%tmp83 = add i64 %tmp75, %tmp82		; <i64> [#uses=1]
+	%tmp84 = add i64 %tmp62, %tmp49		; <i64> [#uses=1]
+	%tmp85 = add i64 %tmp83, %tmp84		; <i64> [#uses=1]
+	%tmp86 = add i64 %tmp81, %tmp36		; <i64> [#uses=5]
+	%tmp87 = add i64 %tmp75, %tmp85		; <i64> [#uses=1]
+	%tmp88 = add i64 %tmp87, %tmp81		; <i64> [#uses=7]
+	%tmp89 = call i64 @llvm.bswap.i64(i64 %tmp13)		; <i64> [#uses=1]
+	%tmp90 = add i64 %tmp73, %tmp60		; <i64> [#uses=1]
+	%tmp91 = add i64 %tmp90, %tmp86		; <i64> [#uses=1]
+	%tmp92 = add i64 %tmp47, %tmp89		; <i64> [#uses=1]
+	%tmp93 = add i64 %tmp92, %tmp91		; <i64> [#uses=1]
+	%tmp94 = add i64 %tmp93, %tmp86		; <i64> [#uses=2]
+	%tmp95 = add i64 %tmp75, %tmp62		; <i64> [#uses=1]
+	%tmp96 = add i64 %tmp88, %tmp95		; <i64> [#uses=1]
+	%tmp97 = add i64 %tmp75, %tmp62		; <i64> [#uses=1]
+	%tmp98 = add i64 %tmp96, %tmp97		; <i64> [#uses=1]
+	%tmp99 = add i64 %tmp94, %tmp49		; <i64> [#uses=5]
+	%tmp100 = add i64 %tmp88, %tmp98		; <i64> [#uses=1]
+	%tmp101 = add i64 %tmp100, %tmp94		; <i64> [#uses=7]
+	%tmp102 = call i64 @llvm.bswap.i64(i64 %tmp14)		; <i64> [#uses=1]
+	%tmp103 = add i64 %tmp86, %tmp73		; <i64> [#uses=1]
+	%tmp104 = add i64 %tmp103, %tmp99		; <i64> [#uses=1]
+	%tmp105 = add i64 %tmp102, %tmp60		; <i64> [#uses=1]
+	%tmp106 = add i64 %tmp105, %tmp104		; <i64> [#uses=1]
+	%tmp107 = add i64 %tmp106, %tmp99		; <i64> [#uses=2]
+	%tmp108 = add i64 %tmp88, %tmp75		; <i64> [#uses=1]
+	%tmp109 = add i64 %tmp101, %tmp108		; <i64> [#uses=1]
+	%tmp110 = add i64 %tmp88, %tmp75		; <i64> [#uses=1]
+	%tmp111 = add i64 %tmp109, %tmp110		; <i64> [#uses=1]
+	%tmp112 = add i64 %tmp107, %tmp62		; <i64> [#uses=5]
+	%tmp113 = add i64 %tmp101, %tmp111		; <i64> [#uses=1]
+	%tmp114 = add i64 %tmp113, %tmp107		; <i64> [#uses=7]
+	%tmp115 = call i64 @llvm.bswap.i64(i64 %tmp15)		; <i64> [#uses=1]
+	%tmp116 = add i64 %tmp99, %tmp86		; <i64> [#uses=1]
+	%tmp117 = add i64 %tmp116, %tmp112		; <i64> [#uses=1]
+	%tmp118 = add i64 %tmp115, %tmp73		; <i64> [#uses=1]
+	%tmp119 = add i64 %tmp118, %tmp117		; <i64> [#uses=1]
+	%tmp120 = add i64 %tmp119, %tmp112		; <i64> [#uses=2]
+	%tmp121 = add i64 %tmp101, %tmp88		; <i64> [#uses=1]
+	%tmp122 = add i64 %tmp114, %tmp121		; <i64> [#uses=1]
+	%tmp123 = add i64 %tmp101, %tmp88		; <i64> [#uses=1]
+	%tmp124 = add i64 %tmp122, %tmp123		; <i64> [#uses=1]
+	%tmp125 = add i64 %tmp120, %tmp75		; <i64> [#uses=5]
+	%tmp126 = add i64 %tmp114, %tmp124		; <i64> [#uses=1]
+	%tmp127 = add i64 %tmp126, %tmp120		; <i64> [#uses=7]
+	%tmp128 = call i64 @llvm.bswap.i64(i64 %tmp16)		; <i64> [#uses=1]
+	%tmp129 = add i64 %tmp112, %tmp99		; <i64> [#uses=1]
+	%tmp130 = add i64 %tmp129, %tmp125		; <i64> [#uses=1]
+	%tmp131 = add i64 %tmp128, %tmp86		; <i64> [#uses=1]
+	%tmp132 = add i64 %tmp131, %tmp130		; <i64> [#uses=1]
+	%tmp133 = add i64 %tmp132, %tmp125		; <i64> [#uses=2]
+	%tmp134 = add i64 %tmp114, %tmp101		; <i64> [#uses=1]
+	%tmp135 = add i64 %tmp127, %tmp134		; <i64> [#uses=1]
+	%tmp136 = add i64 %tmp114, %tmp101		; <i64> [#uses=1]
+	%tmp137 = add i64 %tmp135, %tmp136		; <i64> [#uses=1]
+	%tmp138 = add i64 %tmp133, %tmp88		; <i64> [#uses=5]
+	%tmp139 = add i64 %tmp127, %tmp137		; <i64> [#uses=1]
+	%tmp140 = add i64 %tmp139, %tmp133		; <i64> [#uses=7]
+	%tmp141 = call i64 @llvm.bswap.i64(i64 %tmp17)		; <i64> [#uses=1]
+	%tmp142 = add i64 %tmp125, %tmp112		; <i64> [#uses=1]
+	%tmp143 = add i64 %tmp142, %tmp138		; <i64> [#uses=1]
+	%tmp144 = add i64 %tmp141, %tmp99		; <i64> [#uses=1]
+	%tmp145 = add i64 %tmp144, %tmp143		; <i64> [#uses=1]
+	%tmp146 = add i64 %tmp145, %tmp138		; <i64> [#uses=2]
+	%tmp147 = add i64 %tmp127, %tmp114		; <i64> [#uses=1]
+	%tmp148 = add i64 %tmp140, %tmp147		; <i64> [#uses=1]
+	%tmp149 = add i64 %tmp127, %tmp114		; <i64> [#uses=1]
+	%tmp150 = add i64 %tmp148, %tmp149		; <i64> [#uses=1]
+	%tmp151 = add i64 %tmp146, %tmp101		; <i64> [#uses=5]
+	%tmp152 = add i64 %tmp140, %tmp150		; <i64> [#uses=1]
+	%tmp153 = add i64 %tmp152, %tmp146		; <i64> [#uses=7]
+	%tmp154 = call i64 @llvm.bswap.i64(i64 %tmp18)		; <i64> [#uses=1]
+	%tmp155 = add i64 %tmp138, %tmp125		; <i64> [#uses=1]
+	%tmp156 = add i64 %tmp155, %tmp151		; <i64> [#uses=1]
+	%tmp157 = add i64 %tmp154, %tmp112		; <i64> [#uses=1]
+	%tmp158 = add i64 %tmp157, %tmp156		; <i64> [#uses=1]
+	%tmp159 = add i64 %tmp158, %tmp151		; <i64> [#uses=2]
+	%tmp160 = add i64 %tmp140, %tmp127		; <i64> [#uses=1]
+	%tmp161 = add i64 %tmp153, %tmp160		; <i64> [#uses=1]
+	%tmp162 = add i64 %tmp140, %tmp127		; <i64> [#uses=1]
+	%tmp163 = add i64 %tmp161, %tmp162		; <i64> [#uses=1]
+	%tmp164 = add i64 %tmp159, %tmp114		; <i64> [#uses=5]
+	%tmp165 = add i64 %tmp153, %tmp163		; <i64> [#uses=1]
+	%tmp166 = add i64 %tmp165, %tmp159		; <i64> [#uses=7]
+	%tmp167 = call i64 @llvm.bswap.i64(i64 %tmp19)		; <i64> [#uses=1]
+	%tmp168 = add i64 %tmp151, %tmp138		; <i64> [#uses=1]
+	%tmp169 = add i64 %tmp168, %tmp164		; <i64> [#uses=1]
+	%tmp170 = add i64 %tmp167, %tmp125		; <i64> [#uses=1]
+	%tmp171 = add i64 %tmp170, %tmp169		; <i64> [#uses=1]
+	%tmp172 = add i64 %tmp171, %tmp164		; <i64> [#uses=2]
+	%tmp173 = add i64 %tmp153, %tmp140		; <i64> [#uses=1]
+	%tmp174 = add i64 %tmp166, %tmp173		; <i64> [#uses=1]
+	%tmp175 = add i64 %tmp153, %tmp140		; <i64> [#uses=1]
+	%tmp176 = add i64 %tmp174, %tmp175		; <i64> [#uses=1]
+	%tmp177 = add i64 %tmp172, %tmp127		; <i64> [#uses=5]
+	%tmp178 = add i64 %tmp166, %tmp176		; <i64> [#uses=1]
+	%tmp179 = add i64 %tmp178, %tmp172		; <i64> [#uses=6]
+	%tmp180 = call i64 @llvm.bswap.i64(i64 %tmp20)		; <i64> [#uses=1]
+	%tmp181 = add i64 %tmp164, %tmp151		; <i64> [#uses=1]
+	%tmp182 = add i64 %tmp181, %tmp177		; <i64> [#uses=1]
+	%tmp183 = add i64 %tmp180, %tmp138		; <i64> [#uses=1]
+	%tmp184 = add i64 %tmp183, %tmp182		; <i64> [#uses=1]
+	%tmp185 = add i64 %tmp184, %tmp177		; <i64> [#uses=2]
+	%tmp186 = add i64 %tmp166, %tmp153		; <i64> [#uses=1]
+	%tmp187 = add i64 %tmp179, %tmp186		; <i64> [#uses=1]
+	%tmp188 = add i64 %tmp166, %tmp153		; <i64> [#uses=1]
+	%tmp189 = add i64 %tmp187, %tmp188		; <i64> [#uses=1]
+	%tmp190 = add i64 %tmp185, %tmp140		; <i64> [#uses=4]
+	%tmp191 = add i64 %tmp179, %tmp189		; <i64> [#uses=1]
+	%tmp192 = add i64 %tmp191, %tmp185		; <i64> [#uses=4]
+	%tmp193 = call i64 @llvm.bswap.i64(i64 %tmp21)		; <i64> [#uses=1]
+	%tmp194 = add i64 %tmp177, %tmp164		; <i64> [#uses=1]
+	%tmp195 = add i64 %tmp194, %tmp190		; <i64> [#uses=1]
+	%tmp196 = add i64 %tmp193, %tmp151		; <i64> [#uses=1]
+	%tmp197 = add i64 %tmp196, %tmp195		; <i64> [#uses=1]
+	%tmp198 = add i64 %tmp197, %tmp190		; <i64> [#uses=2]
+	%tmp199 = add i64 %tmp179, %tmp166		; <i64> [#uses=1]
+	%tmp200 = add i64 %tmp192, %tmp199		; <i64> [#uses=1]
+	%tmp201 = add i64 %tmp179, %tmp166		; <i64> [#uses=1]
+	%tmp202 = add i64 %tmp200, %tmp201		; <i64> [#uses=1]
+	%tmp203 = add i64 %tmp198, %tmp153		; <i64> [#uses=3]
+	%tmp204 = add i64 %tmp192, %tmp202		; <i64> [#uses=1]
+	%tmp205 = add i64 %tmp204, %tmp198		; <i64> [#uses=2]
+	%tmp206 = call i64 @llvm.bswap.i64(i64 %tmp22)		; <i64> [#uses=1]
+	%tmp207 = add i64 %tmp190, %tmp177		; <i64> [#uses=1]
+	%tmp208 = add i64 %tmp207, %tmp203		; <i64> [#uses=1]
+	%tmp209 = add i64 %tmp206, %tmp164		; <i64> [#uses=1]
+	%tmp210 = add i64 %tmp209, %tmp208		; <i64> [#uses=1]
+	%tmp211 = add i64 %tmp210, %tmp203		; <i64> [#uses=2]
+	%tmp212 = add i64 %tmp192, %tmp179		; <i64> [#uses=1]
+	%tmp213 = add i64 %tmp205, %tmp212		; <i64> [#uses=1]
+	%tmp214 = add i64 %tmp192, %tmp179		; <i64> [#uses=1]
+	%tmp215 = add i64 %tmp213, %tmp214		; <i64> [#uses=1]
+	%tmp216 = add i64 %tmp211, %tmp166		; <i64> [#uses=2]
+	%tmp217 = add i64 %tmp205, %tmp215		; <i64> [#uses=1]
+	%tmp218 = add i64 %tmp217, %tmp211		; <i64> [#uses=1]
+	%tmp219 = call i64 @llvm.bswap.i64(i64 %tmp23)		; <i64> [#uses=2]
+	volatile store i64 %tmp219, i64* @X, align 8
+	%tmp220 = add i64 %tmp203, %tmp190		; <i64> [#uses=1]
+	%tmp221 = add i64 %tmp220, %tmp216		; <i64> [#uses=1]
+	%tmp222 = add i64 %tmp219, %tmp177		; <i64> [#uses=1]
+	%tmp223 = add i64 %tmp222, %tmp221		; <i64> [#uses=1]
+	%tmp224 = add i64 %tmp223, %tmp216		; <i64> [#uses=1]
+	%tmp225 = add i64 %tmp224, %tmp218		; <i64> [#uses=1]
+	ret i64 %tmp225
+}
+
+declare i64 @llvm.bswap.i64(i64) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-23-i80-fp80.ll b/src/LLVM/test/CodeGen/X86/2009-03-23-i80-fp80.ll
new file mode 100644
index 0000000..e542325
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-23-i80-fp80.ll

@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | grep 302245289961712575840256
+; RUN: opt < %s -instcombine -S | grep K40018000000000000000
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9"
+
+define i80 @from() {
+  %tmp = bitcast x86_fp80 0xK4000C000000000000000 to i80
+  ret i80 %tmp
+}
+
+define x86_fp80 @to() {
+  %tmp = bitcast i80 302259125019767858003968 to x86_fp80
+  ret x86_fp80 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-25-TestBug.ll b/src/LLVM/test/CodeGen/X86/2009-03-25-TestBug.ll
new file mode 100644
index 0000000..f40fddc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-25-TestBug.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -o %t
+; RUN: not grep and %t
+; RUN: not grep shr %t
+; rdar://6661955
+
+@hello = internal constant [7 x i8] c"hello\0A\00"
+@world = internal constant [7 x i8] c"world\0A\00"
+
+define void @func(i32* %b) nounwind {
+bb1579.i.i:		; preds = %bb1514.i.i, %bb191.i.i
+	%tmp176 = load i32* %b, align 4
+	%tmp177 = and i32 %tmp176, 2
+	%tmp178 = icmp eq i32 %tmp177, 0
+        br i1 %tmp178, label %hello, label %world
+
+hello:
+	%h = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @hello, i32 0, i32 0))
+        ret void
+
+world:
+	%w = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @world, i32 0, i32 0))
+        ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll b/src/LLVM/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll
new file mode 100644
index 0000000..f486479
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define double @t(double %x) nounwind ssp noimplicitfloat {
+entry:
+	br i1 false, label %return, label %bb3
+
+bb3:		; preds = %entry
+	ret double 0.000000e+00
+
+return:		; preds = %entry
+	ret double undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll b/src/LLVM/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
new file mode 100644
index 0000000..b1222d1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -fast-isel
+; radr://6772169
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10"
+	%0 = type { i32, i1 }		; type %0
+
+declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
+
+define fastcc i32 @test() nounwind {
+entry:
+	%tmp1 = call %0 @llvm.sadd.with.overflow.i32(i32 1, i32 0)
+	%tmp2 = extractvalue %0 %tmp1, 1
+	br i1 %tmp2, label %.backedge, label %BB3
+
+BB3:
+	%tmp4 = extractvalue %0 %tmp1, 0
+	br label %.backedge
+
+.backedge:
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-12-picrel.ll b/src/LLVM/test/CodeGen/X86/2009-04-12-picrel.ll
new file mode 100644
index 0000000..f194280
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-12-picrel.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small > %t
+; RUN: grep leaq %t | count 1
+
+@dst = external global [131072 x i32]
+@ptr = external global i32*
+
+define void @off01(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
+	store i32* %0, i32** @ptr, align 8
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll b/src/LLVM/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
new file mode 100644
index 0000000..3d70b58
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; rdar://6781755
+; PR3934
+
+	%0 = type { i32, i32 }		; type %0
+
+define void @bn_sqr_comba8(i32* nocapture %r, i32* %a) nounwind {
+entry:
+	%asmtmp23 = tail call %0 asm "mulq $3", "={ax},={dx},{ax},*m,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 0, i32* %a) nounwind		; <%0> [#uses=1]
+	%asmresult25 = extractvalue %0 %asmtmp23, 1		; <i32> [#uses=1]
+	%asmtmp26 = tail call %0 asm "addq $0,$0; adcq $2,$1", "={dx},=r,imr,0,1,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 0, i32 %asmresult25, i32 0) nounwind		; <%0> [#uses=1]
+	%asmresult27 = extractvalue %0 %asmtmp26, 0		; <i32> [#uses=1]
+	%asmtmp29 = tail call %0 asm "addq $0,$0; adcq $2,$1", "={ax},={dx},imr,0,1,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 0, i32 0, i32 %asmresult27) nounwind		; <%0> [#uses=0]
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-13-2AddrAssert.ll b/src/LLVM/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
new file mode 100644
index 0000000..4362ba4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-13-2AddrAssert.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s
+; rdar://6781755
+; PR3934
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-undermydesk-freebsd8.0"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+        %call = tail call i32 (...)* @getpid()          ; <i32> [#uses=1]
+        %conv = trunc i32 %call to i16          ; <i16> [#uses=1]
+        %0 = tail call i16 asm "xchgb ${0:h}, ${0:b}","=Q,0,~{dirflag},~{fpsr},~{flags}"(i16 %conv) nounwind           ; <i16> [#uses=0]
+        ret i32 undef
+}
+
+declare i32 @getpid(...)

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/src/LLVM/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
new file mode 100644
index 0000000..bed863e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-14-IllegalRegs.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -O0 -regalloc=fast | not grep sil
+; rdar://6787136
+
+	%struct.X = type { i8, [32 x i8] }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @z to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @z() nounwind ssp {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%xxx = alloca %struct.X		; <%struct.X*> [#uses=6]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%1 = getelementptr %struct.X* %xxx, i32 0, i32 1		; <[32 x i8]*> [#uses=1]
+	%2 = getelementptr [32 x i8]* %1, i32 0, i32 31		; <i8*> [#uses=1]
+	store i8 48, i8* %2, align 1
+	%3 = getelementptr %struct.X* %xxx, i32 0, i32 1		; <[32 x i8]*> [#uses=1]
+	%4 = getelementptr [32 x i8]* %3, i32 0, i32 31		; <i8*> [#uses=1]
+	%5 = load i8* %4, align 1		; <i8> [#uses=1]
+	%6 = getelementptr %struct.X* %xxx, i32 0, i32 1		; <[32 x i8]*> [#uses=1]
+	%7 = getelementptr [32 x i8]* %6, i32 0, i32 0		; <i8*> [#uses=1]
+	store i8 %5, i8* %7, align 1
+	%8 = getelementptr %struct.X* %xxx, i32 0, i32 0		; <i8*> [#uses=1]
+	store i8 15, i8* %8, align 1
+	%9 = call i32 (...)* bitcast (i32 (%struct.X*, %struct.X*)* @f to i32 (...)*)(%struct.X* byval align 4 %xxx, %struct.X* byval align 4 %xxx) nounwind		; <i32> [#uses=1]
+	store i32 %9, i32* %0, align 4
+	%10 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %10, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval1
+}
+
+declare i32 @f(%struct.X* byval align 4, %struct.X* byval align 4) nounwind ssp

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/src/LLVM/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
new file mode 100644
index 0000000..f46eed4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll

@@ -0,0 +1,141 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of modref unfolded}
+; XFAIL: *
+; 69408 removed the opportunity for this optimization to work
+
+	%struct.SHA512_CTX = type { [8 x i64], i64, i64, %struct.anon, i32, i32 }
+	%struct.anon = type { [16 x i64] }
+@K512 = external constant [80 x i64], align 32		; <[80 x i64]*> [#uses=2]
+
+define fastcc void @sha512_block_data_order(%struct.SHA512_CTX* nocapture %ctx, i8* nocapture %in, i64 %num) nounwind ssp {
+entry:
+	br label %bb349
+
+bb349:		; preds = %bb349, %entry
+	%e.0489 = phi i64 [ 0, %entry ], [ %e.0, %bb349 ]		; <i64> [#uses=3]
+	%b.0472 = phi i64 [ 0, %entry ], [ %87, %bb349 ]		; <i64> [#uses=2]
+	%asmtmp356 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 41, i64 %e.0489) nounwind		; <i64> [#uses=1]
+	%0 = xor i64 0, %asmtmp356		; <i64> [#uses=1]
+	%1 = add i64 0, %0		; <i64> [#uses=1]
+	%2 = add i64 %1, 0		; <i64> [#uses=1]
+	%3 = add i64 %2, 0		; <i64> [#uses=1]
+	%4 = add i64 %3, 0		; <i64> [#uses=5]
+	%asmtmp372 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 %4) nounwind		; <i64> [#uses=1]
+	%asmtmp373 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 %4) nounwind		; <i64> [#uses=0]
+	%5 = xor i64 %asmtmp372, 0		; <i64> [#uses=0]
+	%6 = xor i64 0, %b.0472		; <i64> [#uses=1]
+	%7 = and i64 %4, %6		; <i64> [#uses=1]
+	%8 = xor i64 %7, 0		; <i64> [#uses=1]
+	%9 = add i64 0, %8		; <i64> [#uses=1]
+	%10 = add i64 %9, 0		; <i64> [#uses=2]
+	%asmtmp377 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 0) nounwind		; <i64> [#uses=1]
+	%11 = xor i64 0, %asmtmp377		; <i64> [#uses=1]
+	%12 = add i64 0, %11		; <i64> [#uses=1]
+	%13 = add i64 %12, 0		; <i64> [#uses=1]
+	%not381 = xor i64 0, -1		; <i64> [#uses=1]
+	%14 = and i64 %e.0489, %not381		; <i64> [#uses=1]
+	%15 = xor i64 0, %14		; <i64> [#uses=1]
+	%16 = add i64 %15, 0		; <i64> [#uses=1]
+	%17 = add i64 %16, %13		; <i64> [#uses=1]
+	%18 = add i64 %17, 0		; <i64> [#uses=1]
+	%19 = add i64 %18, 0		; <i64> [#uses=2]
+	%20 = add i64 %19, %b.0472		; <i64> [#uses=3]
+	%21 = add i64 %19, 0		; <i64> [#uses=1]
+	%22 = add i64 %21, 0		; <i64> [#uses=1]
+	%23 = add i32 0, 12		; <i32> [#uses=1]
+	%24 = and i32 %23, 12		; <i32> [#uses=1]
+	%25 = zext i32 %24 to i64		; <i64> [#uses=1]
+	%26 = getelementptr [16 x i64]* null, i64 0, i64 %25		; <i64*> [#uses=0]
+	%27 = add i64 0, %e.0489		; <i64> [#uses=1]
+	%28 = add i64 %27, 0		; <i64> [#uses=1]
+	%29 = add i64 %28, 0		; <i64> [#uses=1]
+	%30 = add i64 %29, 0		; <i64> [#uses=2]
+	%31 = and i64 %10, %4		; <i64> [#uses=1]
+	%32 = xor i64 0, %31		; <i64> [#uses=1]
+	%33 = add i64 %30, 0		; <i64> [#uses=3]
+	%34 = add i64 %30, %32		; <i64> [#uses=1]
+	%35 = add i64 %34, 0		; <i64> [#uses=1]
+	%36 = and i64 %33, %20		; <i64> [#uses=1]
+	%37 = xor i64 %36, 0		; <i64> [#uses=1]
+	%38 = add i64 %37, 0		; <i64> [#uses=1]
+	%39 = add i64 %38, 0		; <i64> [#uses=1]
+	%40 = add i64 %39, 0		; <i64> [#uses=1]
+	%41 = add i64 %40, 0		; <i64> [#uses=1]
+	%42 = add i64 %41, %4		; <i64> [#uses=3]
+	%43 = or i32 0, 6		; <i32> [#uses=1]
+	%44 = and i32 %43, 14		; <i32> [#uses=1]
+	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
+	%46 = getelementptr [16 x i64]* null, i64 0, i64 %45		; <i64*> [#uses=1]
+	%not417 = xor i64 %42, -1		; <i64> [#uses=1]
+	%47 = and i64 %20, %not417		; <i64> [#uses=1]
+	%48 = xor i64 0, %47		; <i64> [#uses=1]
+	%49 = getelementptr [80 x i64]* @K512, i64 0, i64 0		; <i64*> [#uses=1]
+	%50 = load i64* %49, align 8		; <i64> [#uses=1]
+	%51 = add i64 %48, 0		; <i64> [#uses=1]
+	%52 = add i64 %51, 0		; <i64> [#uses=1]
+	%53 = add i64 %52, 0		; <i64> [#uses=1]
+	%54 = add i64 %53, %50		; <i64> [#uses=2]
+	%asmtmp420 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 0) nounwind		; <i64> [#uses=1]
+	%asmtmp421 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 0) nounwind		; <i64> [#uses=1]
+	%55 = xor i64 %asmtmp420, 0		; <i64> [#uses=1]
+	%56 = xor i64 %55, %asmtmp421		; <i64> [#uses=1]
+	%57 = add i64 %54, %10		; <i64> [#uses=5]
+	%58 = add i64 %54, 0		; <i64> [#uses=1]
+	%59 = add i64 %58, %56		; <i64> [#uses=2]
+	%60 = or i32 0, 7		; <i32> [#uses=1]
+	%61 = and i32 %60, 15		; <i32> [#uses=1]
+	%62 = zext i32 %61 to i64		; <i64> [#uses=1]
+	%63 = getelementptr [16 x i64]* null, i64 0, i64 %62		; <i64*> [#uses=2]
+	%64 = load i64* null, align 8		; <i64> [#uses=1]
+	%65 = lshr i64 %64, 6		; <i64> [#uses=1]
+	%66 = xor i64 0, %65		; <i64> [#uses=1]
+	%67 = xor i64 %66, 0		; <i64> [#uses=1]
+	%68 = load i64* %46, align 8		; <i64> [#uses=1]
+	%69 = load i64* null, align 8		; <i64> [#uses=1]
+	%70 = add i64 %68, 0		; <i64> [#uses=1]
+	%71 = add i64 %70, %67		; <i64> [#uses=1]
+	%72 = add i64 %71, %69		; <i64> [#uses=1]
+	%asmtmp427 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 18, i64 %57) nounwind		; <i64> [#uses=1]
+	%asmtmp428 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 41, i64 %57) nounwind		; <i64> [#uses=1]
+	%73 = xor i64 %asmtmp427, 0		; <i64> [#uses=1]
+	%74 = xor i64 %73, %asmtmp428		; <i64> [#uses=1]
+	%75 = and i64 %57, %42		; <i64> [#uses=1]
+	%not429 = xor i64 %57, -1		; <i64> [#uses=1]
+	%76 = and i64 %33, %not429		; <i64> [#uses=1]
+	%77 = xor i64 %75, %76		; <i64> [#uses=1]
+	%78 = getelementptr [80 x i64]* @K512, i64 0, i64 0		; <i64*> [#uses=1]
+	%79 = load i64* %78, align 16		; <i64> [#uses=1]
+	%80 = add i64 %77, %20		; <i64> [#uses=1]
+	%81 = add i64 %80, %72		; <i64> [#uses=1]
+	%82 = add i64 %81, %74		; <i64> [#uses=1]
+	%83 = add i64 %82, %79		; <i64> [#uses=1]
+	%asmtmp432 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 %59) nounwind		; <i64> [#uses=1]
+	%asmtmp433 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 %59) nounwind		; <i64> [#uses=1]
+	%84 = xor i64 %asmtmp432, 0		; <i64> [#uses=1]
+	%85 = xor i64 %84, %asmtmp433		; <i64> [#uses=1]
+	%86 = add i64 %83, %22		; <i64> [#uses=2]
+	%87 = add i64 0, %85		; <i64> [#uses=1]
+	%asmtmp435 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 8, i64 0) nounwind		; <i64> [#uses=1]
+	%88 = xor i64 0, %asmtmp435		; <i64> [#uses=1]
+	%89 = load i64* null, align 8		; <i64> [#uses=3]
+	%asmtmp436 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 19, i64 %89) nounwind		; <i64> [#uses=1]
+	%asmtmp437 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 %89) nounwind		; <i64> [#uses=1]
+	%90 = lshr i64 %89, 6		; <i64> [#uses=1]
+	%91 = xor i64 %asmtmp436, %90		; <i64> [#uses=1]
+	%92 = xor i64 %91, %asmtmp437		; <i64> [#uses=1]
+	%93 = load i64* %63, align 8		; <i64> [#uses=1]
+	%94 = load i64* null, align 8		; <i64> [#uses=1]
+	%95 = add i64 %93, %88		; <i64> [#uses=1]
+	%96 = add i64 %95, %92		; <i64> [#uses=1]
+	%97 = add i64 %96, %94		; <i64> [#uses=2]
+	store i64 %97, i64* %63, align 8
+	%98 = and i64 %86, %57		; <i64> [#uses=1]
+	%not441 = xor i64 %86, -1		; <i64> [#uses=1]
+	%99 = and i64 %42, %not441		; <i64> [#uses=1]
+	%100 = xor i64 %98, %99		; <i64> [#uses=1]
+	%101 = add i64 %100, %33		; <i64> [#uses=1]
+	%102 = add i64 %101, %97		; <i64> [#uses=1]
+	%103 = add i64 %102, 0		; <i64> [#uses=1]
+	%104 = add i64 %103, 0		; <i64> [#uses=1]
+	%e.0 = add i64 %104, %35		; <i64> [#uses=1]
+	br label %bb349
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/src/LLVM/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
new file mode 100644
index 0000000..620e0f3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll

@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
+; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=linearscan < %s | \
+; RUN:   FileCheck %s
+; rdar://6808032
+
+; CHECK: pextrw $14
+; CHECK-NEXT: shrl $8
+; CHECK-NEXT: (%ebp)
+; CHECK-NEXT: pinsrw
+
+define void @update(i8** %args_list) nounwind {
+entry:
+	%cmp.i = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %cmp.i, label %if.then.i, label %test_cl.exit
+
+if.then.i:		; preds = %entry
+	%val = load <16 x i8> addrspace(1)* null		; <<16 x i8>> [#uses=8]
+	%tmp10.i = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 undef, i8 0, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef>, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 4, i32 undef, i32 6, i32 undef, i32 29, i32 undef, i32 10, i32 11, i32 12, i32 undef, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp17.i = shufflevector <16 x i8> %tmp10.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 18, i32 4, i32 undef, i32 6, i32 undef, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 undef, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp24.i = shufflevector <16 x i8> %tmp17.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 24, i32 6, i32 undef, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 undef, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp31.i = shufflevector <16 x i8> %tmp24.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 21, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp38.i = shufflevector <16 x i8> %tmp31.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 27, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 13, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp45.i = shufflevector <16 x i8> %tmp38.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 13, i32 29, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp52.i = shufflevector <16 x i8> %tmp45.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 21, i32 10, i32 11, i32 12, i32 13, i32 14, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp59.i = shufflevector <16 x i8> %tmp52.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 20>		; <<16 x i8>> [#uses=1]
+	store <16 x i8> %tmp59.i, <16 x i8> addrspace(1)* null
+	ret void
+
+test_cl.exit:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-24.ll b/src/LLVM/test/CodeGen/X86/2009-04-24.ll
new file mode 100644
index 0000000..d6ed0c4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-24.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -relocation-model=pic > %t2
+; RUN: grep {leaq.*TLSGD} %t2
+; RUN: grep {__tls_get_addr} %t2
+; PR4004
+
+@i = thread_local global i32 15
+
+define i32 @f() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-25-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
new file mode 100644
index 0000000..94d3eb2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-25-CoalescerBug.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
+; rdar://6806252
+
+define i64 @test(i32* %tmp13) nounwind {
+entry:
+	br label %while.cond
+
+while.cond:		; preds = %while.cond, %entry
+	%tmp15 = load i32* %tmp13		; <i32> [#uses=2]
+	%bf.lo = lshr i32 %tmp15, 1		; <i32> [#uses=1]
+	%bf.lo.cleared = and i32 %bf.lo, 2147483647		; <i32> [#uses=1]
+	%conv = zext i32 %bf.lo.cleared to i64		; <i64> [#uses=1]
+	%bf.lo.cleared25 = and i32 %tmp15, 1		; <i32> [#uses=1]
+	%tobool = icmp ne i32 %bf.lo.cleared25, 0		; <i1> [#uses=1]
+	br i1 %tobool, label %while.cond, label %while.end
+
+while.end:		; preds = %while.cond
+	ret i64 %conv
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll b/src/LLVM/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
new file mode 100644
index 0000000..7981a52
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll

@@ -0,0 +1,1457 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR4034
+
+	%struct.BiContextType = type { i16, i8 }
+	%struct.Bitstream = type { i32, i32, i32, i32, i8*, i32 }
+	%struct.DataPartition = type { %struct.Bitstream*, %struct.DecodingEnvironment, i32 (%struct.SyntaxElement*, %struct.ImageParameters*, %struct.DataPartition*)* }
+	%struct.DecRefPicMarking_t = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t* }
+	%struct.DecodingEnvironment = type { i32, i32, i32, i32, i32, i8*, i32* }
+	%struct.ImageParameters = type { i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [16 x [16 x i16]], [6 x [32 x i32]], [16 x [16 x i32]], [4 x [12 x [4 x [4 x i32]]]], [16 x i32], i8**, i32*, i32***, i32**, i32, i32, i32, i32, %struct.Slice*, %struct.Macroblock*, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32***, i32***, i32****, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [3 x [2 x i32]], i32, i32, i64, i64, %struct.timeb, %struct.timeb, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.Macroblock = type { i32, [2 x i32], i32, i32, %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], i32, i64, i64, i32, i32, [4 x i8], [4 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.MotionInfoContexts = type { [4 x [11 x %struct.BiContextType]], [2 x [9 x %struct.BiContextType]], [2 x [10 x %struct.BiContextType]], [2 x [6 x %struct.BiContextType]], [4 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x %struct.BiContextType] }
+	%struct.PixelPos = type { i32, i32, i32, i32, i32, i32 }
+	%struct.Slice = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DataPartition*, %struct.MotionInfoContexts*, %struct.TextureInfoContexts*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (%struct.ImageParameters*, %struct.inp_par*)*, i32, i32, i32, i32 }
+	%struct.SyntaxElement = type { i32, i32, i32, i32, i32, i32, i32, i32, void (i32, i32, i32*, i32*)*, void (%struct.SyntaxElement*, %struct.ImageParameters*, %struct.DecodingEnvironment*)* }
+	%struct.TextureInfoContexts = type { [2 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x [4 x %struct.BiContextType]], [10 x [4 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]] }
+	%struct.inp_par = type { [1000 x i8], [1000 x i8], [1000 x i8], i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.timeb = type { i64, i16, i16, i16 }
+@get_mb_block_pos = external global void (i32, i32*, i32*)*		; <void (i32, i32*, i32*)**> [#uses=1]
+@img = external global %struct.ImageParameters*		; <%struct.ImageParameters**> [#uses=14]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (i32, i32, i32, i32, %struct.PixelPos*)* @getAffNeighbour to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @getAffNeighbour(i32 %curr_mb_nr, i32 %xN, i32 %yN, i32 %is_chroma, %struct.PixelPos* %pix) nounwind {
+entry:
+	%Opq.sa.calc = add i32 0, 2		; <i32> [#uses=2]
+	%0 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=3]
+	%1 = getelementptr %struct.ImageParameters* %0, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%2 = load %struct.Macroblock** %1, align 8		; <%struct.Macroblock*> [#uses=24]
+	%3 = zext i32 %curr_mb_nr to i64		; <i64> [#uses=24]
+	%4 = sext i32 %is_chroma to i64		; <i64> [#uses=8]
+	br label %meshBB392
+
+entry.fragment:		; preds = %meshBB392
+	%Opq.sa.calc747 = add i32 %Opq.sa.calc921, 70		; <i32> [#uses=0]
+	%5 = getelementptr %struct.ImageParameters* %0, i64 0, i32 119, i64 %4, i64 0		; <i32*> [#uses=1]
+	%6 = load i32* %5, align 4		; <i32> [#uses=2]
+	%7 = getelementptr %struct.ImageParameters* %0, i64 0, i32 119, i64 %4, i64 1		; <i32*> [#uses=1]
+	%8 = load i32* %7, align 4		; <i32> [#uses=5]
+	br label %entry.fragment181
+
+entry.fragment181:		; preds = %entry.fragment
+	%Opq.sa.calc863 = add i32 %Opq.sa.calc921, -50		; <i32> [#uses=4]
+	%9 = getelementptr %struct.PixelPos* %pix, i64 0, i32 0		; <i32*> [#uses=4]
+	store i32 0, i32* %9, align 4
+	%10 = add i32 %8, -1		; <i32> [#uses=6]
+	%11 = icmp slt i32 %10, %yN		; <i1> [#uses=1]
+	br i1 %11, label %meshBB448, label %bb
+
+bb:		; preds = %entry.fragment181
+	%Opq.sa.calc460 = add i32 %Opq.sa.calc863, 50		; <i32> [#uses=0]
+	%12 = add i32 %6, -1		; <i32> [#uses=5]
+	%13 = icmp slt i32 %12, %xN		; <i1> [#uses=1]
+	br label %bb.fragment
+
+bb.fragment:		; preds = %bb
+	%Opq.sa.calc976 = add i32 %Opq.sa.calc863, 13		; <i32> [#uses=3]
+	%.not8 = icmp sgt i32 %yN, -1		; <i1> [#uses=1]
+	%14 = icmp sgt i32 %8, %yN		; <i1> [#uses=1]
+	%or.cond.not = and i1 %14, %.not8		; <i1> [#uses=3]
+	%or.cond1 = and i1 %or.cond.not, %13		; <i1> [#uses=1]
+	br i1 %or.cond1, label %meshBB396, label %bb3
+
+bb3:		; preds = %bb.fragment
+	%Opq.sa.calc462 = sub i32 %Opq.sa.calc976, -152		; <i32> [#uses=5]
+	%Opq.sa.calc461 = sub i32 %Opq.sa.calc462, 168		; <i32> [#uses=2]
+	%15 = icmp slt i32 %xN, 0		; <i1> [#uses=1]
+	br i1 %15, label %bb4, label %meshBB404
+
+bb4:		; preds = %bb3
+	%Opq.sa.calc467 = xor i32 %Opq.sa.calc462, 171		; <i32> [#uses=2]
+	%Opq.sa.calc465 = sub i32 %Opq.sa.calc467, %Opq.sa.calc462		; <i32> [#uses=1]
+	%Opq.sa.calc466 = xor i32 %Opq.sa.calc465, -164		; <i32> [#uses=1]
+	%16 = icmp slt i32 %yN, 0		; <i1> [#uses=1]
+	br i1 %16, label %meshBB428, label %meshBB392
+
+bb5:		; preds = %meshBB428
+	%Opq.sa.calc470 = sub i32 %Opq.sa.calc897, -49		; <i32> [#uses=1]
+	%17 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
+	%18 = load i32* %17, align 4		; <i32> [#uses=1]
+	br label %bb5.fragment
+
+bb5.fragment:		; preds = %bb5
+	%Opq.sa.calc873 = sub i32 %Opq.sa.calc470, 169		; <i32> [#uses=7]
+	%19 = icmp eq i32 %18, 0		; <i1> [#uses=1]
+	%20 = and i32 %curr_mb_nr, 1		; <i32> [#uses=1]
+	%21 = icmp eq i32 %20, 0		; <i1> [#uses=2]
+	br i1 %19, label %bb6, label %bb13
+
+bb6:		; preds = %bb5.fragment
+	%Opq.sa.calc473 = xor i32 %Opq.sa.calc873, 81		; <i32> [#uses=1]
+	br i1 %21, label %bb7, label %meshBB348
+
+bb7:		; preds = %bb6
+	%Opq.sa.calc476 = add i32 %Opq.sa.calc873, -58		; <i32> [#uses=1]
+	%22 = getelementptr %struct.Macroblock* %2, i64 %3, i32 25		; <i32*> [#uses=1]
+	%23 = load i32* %22, align 8		; <i32> [#uses=1]
+	%24 = add i32 %23, 1		; <i32> [#uses=1]
+	%25 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %meshBB388
+
+bb7.fragment:		; preds = %meshBB388
+	%Opq.sa.calc709 = sub i32 %Opq.sa.calc886, 143		; <i32> [#uses=1]
+	%Opq.sa.calc707 = add i32 %Opq.sa.calc709, %Opq.sa.calc886		; <i32> [#uses=1]
+	%Opq.sa.calc708 = xor i32 %Opq.sa.calc707, 474		; <i32> [#uses=0]
+	store i32 %.SV194.phi, i32* %.SV196.phi, align 4
+	%26 = getelementptr %struct.Macroblock* %.load17.SV.phi, i64 %.load36.SV.phi, i32 29		; <i32*> [#uses=1]
+	%27 = load i32* %26, align 8		; <i32> [#uses=2]
+	store i32 %27, i32* %.load67.SV.phi, align 4
+	br label %bb96
+
+bb8:		; preds = %meshBB348
+	%Opq.sa.calc479 = sub i32 %Opq.sa.calc805, 141		; <i32> [#uses=1]
+	%28 = getelementptr %struct.Macroblock* %2, i64 %3, i32 22		; <i32*> [#uses=2]
+	%29 = load i32* %28, align 4		; <i32> [#uses=2]
+	%30 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=2]
+	br label %meshBB368
+
+bb8.fragment:		; preds = %meshBB368
+	%Opq.sa.calc765 = sub i32 %Opq.sa.calc768, -115		; <i32> [#uses=2]
+	store i32 %.SV198.phi, i32* %.SV200.phi, align 4
+	%31 = getelementptr %struct.Macroblock* %.load16.SV.phi, i64 %.load35.SV.phi, i32 26		; <i32*> [#uses=2]
+	%32 = load i32* %31, align 4		; <i32> [#uses=4]
+	store i32 %32, i32* %.load66.SV.phi, align 4
+	%33 = load i32* %31, align 4		; <i32> [#uses=1]
+	%34 = icmp eq i32 %33, 0		; <i1> [#uses=1]
+	br i1 %34, label %bb96, label %bb9
+
+bb9:		; preds = %bb8.fragment
+	%Opq.sa.calc482 = xor i32 %Opq.sa.calc765, 163		; <i32> [#uses=0]
+	%35 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%36 = getelementptr %struct.ImageParameters* %35, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%37 = load %struct.Macroblock** %36, align 8		; <%struct.Macroblock*> [#uses=1]
+	%38 = load i32* %.SV76.phi, align 4		; <i32> [#uses=1]
+	br label %bb9.fragment
+
+bb9.fragment:		; preds = %bb9
+	%Opq.sa.calc999 = add i32 %Opq.sa.calc765, -44		; <i32> [#uses=1]
+	%39 = sext i32 %38 to i64		; <i64> [#uses=1]
+	%40 = getelementptr %struct.Macroblock* %37, i64 %39, i32 20		; <i32*> [#uses=1]
+	%41 = load i32* %40, align 4		; <i32> [#uses=1]
+	%42 = icmp eq i32 %41, 0		; <i1> [#uses=1]
+	br i1 %42, label %bb96, label %bb11
+
+bb11:		; preds = %bb9.fragment
+	%Opq.sa.calc485 = sub i32 %Opq.sa.calc999, 200		; <i32> [#uses=2]
+	%43 = add i32 %.SV78.phi, 1		; <i32> [#uses=1]
+	br label %meshBB332
+
+bb11.fragment:		; preds = %meshBB332
+	%Opq.sa.calc954 = xor i32 %Opq.link.mask859, 233		; <i32> [#uses=0]
+	store i32 %.SV206.phi, i32* %.load81.SV.phi, align 4
+	%44 = add i32 %.load50.SV.phi, %yN		; <i32> [#uses=1]
+	%45 = ashr i32 %44, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb13:		; preds = %bb5.fragment
+	%Opq.sa.calc490 = xor i32 %Opq.sa.calc873, 175		; <i32> [#uses=1]
+	%Opq.sa.calc488 = sub i32 %Opq.sa.calc490, %Opq.sa.calc873		; <i32> [#uses=1]
+	%Opq.sa.calc489 = sub i32 %Opq.sa.calc488, 133		; <i32> [#uses=1]
+	%46 = getelementptr %struct.Macroblock* %2, i64 %3, i32 25		; <i32*> [#uses=1]
+	br label %meshBB360
+
+bb13.fragment:		; preds = %meshBB360
+	%Opq.sa.calc870 = add i32 %Opq.sa.calc866, -129		; <i32> [#uses=3]
+	%47 = load i32* %.SV208.phi, align 8		; <i32> [#uses=3]
+	br i1 %.load74.SV.phi, label %bb14, label %meshBB412
+
+bb14:		; preds = %bb13.fragment
+	%Opq.sa.calc493 = add i32 %Opq.sa.calc870, 103		; <i32> [#uses=1]
+	%48 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=2]
+	store i32 %47, i32* %48, align 4
+	%49 = getelementptr %struct.Macroblock* %2, i64 %3, i32 29		; <i32*> [#uses=2]
+	br label %bb14.fragment
+
+bb14.fragment:		; preds = %bb14
+	%Opq.sa.calc723 = sub i32 %Opq.sa.calc493, 117		; <i32> [#uses=4]
+	%50 = load i32* %49, align 8		; <i32> [#uses=4]
+	store i32 %50, i32* %.SV52.phi1113, align 4
+	%51 = load i32* %49, align 8		; <i32> [#uses=1]
+	%52 = icmp eq i32 %51, 0		; <i1> [#uses=1]
+	br i1 %52, label %meshBB, label %bb15
+
+bb15:		; preds = %bb14.fragment
+	%Opq.sa.calc496 = sub i32 %Opq.sa.calc723, -8		; <i32> [#uses=1]
+	%53 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%54 = getelementptr %struct.ImageParameters* %53, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%55 = load %struct.Macroblock** %54, align 8		; <%struct.Macroblock*> [#uses=1]
+	%56 = load i32* %.SV208.phi, align 8		; <i32> [#uses=1]
+	br label %meshBB324
+
+bb15.fragment:		; preds = %meshBB324
+	%Opq.sa.calc925 = xor i32 %Opq.sa.calc750, 215		; <i32> [#uses=2]
+	%57 = sext i32 %.SV214.phi to i64		; <i64> [#uses=1]
+	%58 = getelementptr %struct.Macroblock* %.SV212.phi, i64 %57, i32 20		; <i32*> [#uses=1]
+	%59 = load i32* %58, align 4		; <i32> [#uses=1]
+	%60 = icmp eq i32 %59, 0		; <i1> [#uses=1]
+	br i1 %60, label %bb16, label %bb96
+
+bb16:		; preds = %bb15.fragment
+	%Opq.sa.calc499 = sub i32 %Opq.sa.calc925, -140		; <i32> [#uses=0]
+	%61 = add i32 %.SV87.phi, 1		; <i32> [#uses=1]
+	br label %bb16.fragment
+
+bb16.fragment:		; preds = %bb16
+	%Opq.sa.calc968 = add i32 %Opq.sa.calc925, 129		; <i32> [#uses=0]
+	store i32 %61, i32* %.SV91.phi, align 4
+	%62 = shl i32 %yN, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb19:		; preds = %meshBB412
+	%Opq.sa.calc502 = sub i32 %Opq.sa.calc932, -94		; <i32> [#uses=0]
+	%63 = add i32 %.SV87.phi1030, 1		; <i32> [#uses=1]
+	%64 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %bb19.fragment
+
+bb19.fragment:		; preds = %bb19
+	%Opq.sa.calc880 = xor i32 %Opq.sa.calc932, 246		; <i32> [#uses=0]
+	store i32 %63, i32* %64, align 4
+	%65 = getelementptr %struct.Macroblock* %2, i64 %3, i32 29		; <i32*> [#uses=1]
+	%66 = load i32* %65, align 8		; <i32> [#uses=2]
+	store i32 %66, i32* %.SV52.phi1186, align 4
+	br label %bb96
+
+bb21:		; preds = %meshBB392
+	%Opq.sa.calc505 = add i32 %Opq.sa.calc921, -40		; <i32> [#uses=2]
+	br i1 %or.cond.not.SV.phi, label %meshBB360, label %bb97
+
+bb23:		; preds = %meshBB360
+	%Opq.sa.calc509 = xor i32 %Opq.sa.calc866, 70		; <i32> [#uses=1]
+	%Opq.sa.calc508 = sub i32 %Opq.sa.calc509, -19		; <i32> [#uses=0]
+	%67 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
+	%68 = load i32* %67, align 4		; <i32> [#uses=1]
+	%69 = icmp eq i32 %68, 0		; <i1> [#uses=1]
+	%70 = and i32 %curr_mb_nr, 1		; <i32> [#uses=1]
+	%71 = icmp eq i32 %70, 0		; <i1> [#uses=2]
+	br label %bb23.fragment
+
+bb23.fragment:		; preds = %bb23
+	%Opq.sa.calc847 = sub i32 %Opq.sa.calc866, -9		; <i32> [#uses=2]
+	%72 = getelementptr %struct.Macroblock* %2, i64 %3, i32 22		; <i32*> [#uses=3]
+	%73 = load i32* %72, align 4		; <i32> [#uses=3]
+	%74 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=3]
+	store i32 %73, i32* %74, align 4
+	br label %bb23.fragment182
+
+bb23.fragment182:		; preds = %bb23.fragment
+	%Opq.sa.calc744 = xor i32 %Opq.sa.calc847, 152		; <i32> [#uses=4]
+	%Opq.sa.calc742 = add i32 %Opq.sa.calc744, %Opq.sa.calc847		; <i32> [#uses=1]
+	%Opq.sa.calc743 = add i32 %Opq.sa.calc742, -149		; <i32> [#uses=2]
+	%75 = getelementptr %struct.Macroblock* %2, i64 %3, i32 26		; <i32*> [#uses=2]
+	%76 = load i32* %75, align 4		; <i32> [#uses=3]
+	store i32 %76, i32* %.SV52.phi1113, align 4
+	%77 = load i32* %75, align 4		; <i32> [#uses=1]
+	%78 = icmp ne i32 %77, 0		; <i1> [#uses=2]
+	br i1 %69, label %meshBB344, label %meshBB432
+
+bb24:		; preds = %meshBB344
+	%Opq.sa.calc512 = add i32 %Opq.sa.calc716, -55		; <i32> [#uses=3]
+	br i1 %.SV96.phi, label %bb25, label %bb32
+
+bb25:		; preds = %bb24
+	%Opq.sa.calc515 = sub i32 %Opq.sa.calc716, 18		; <i32> [#uses=1]
+	br i1 %.SV135.phi, label %bb26, label %bb96
+
+bb26:		; preds = %bb25
+	%Opq.sa.calc519 = xor i32 %Opq.sa.calc515, 23		; <i32> [#uses=2]
+	%Opq.sa.calc518 = xor i32 %Opq.sa.calc519, 84		; <i32> [#uses=1]
+	%79 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%80 = getelementptr %struct.ImageParameters* %79, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%81 = load %struct.Macroblock** %80, align 8		; <%struct.Macroblock*> [#uses=1]
+	%82 = load i32* %.SV99.phi, align 4		; <i32> [#uses=1]
+	br label %meshBB340
+
+bb26.fragment:		; preds = %meshBB340
+	%Opq.sa.calc918 = xor i32 %Opq.sa.calc754, 228		; <i32> [#uses=4]
+	%Opq.sa.calc916 = add i32 %Opq.sa.calc918, %Opq.sa.calc754		; <i32> [#uses=1]
+	%Opq.sa.calc917 = add i32 %Opq.sa.calc916, -237		; <i32> [#uses=1]
+	%83 = sext i32 %.SV230.phi to i64		; <i64> [#uses=1]
+	%84 = getelementptr %struct.Macroblock* %.SV228.phi, i64 %83, i32 20		; <i32*> [#uses=1]
+	%85 = load i32* %84, align 4		; <i32> [#uses=1]
+	%86 = icmp eq i32 %85, 0		; <i1> [#uses=1]
+	br i1 %86, label %meshBB420, label %meshBB356
+
+bb28:		; preds = %meshBB356
+	%Opq.sa.calc522 = xor i32 %Opq.sa.calc983, 107		; <i32> [#uses=2]
+	%87 = and i32 %yN, 1		; <i32> [#uses=1]
+	%88 = icmp eq i32 %87, 0		; <i1> [#uses=1]
+	br i1 %88, label %bb29, label %bb30
+
+bb29:		; preds = %bb28
+	%Opq.sa.calc525 = xor i32 %Opq.sa.calc522, 151		; <i32> [#uses=2]
+	%89 = ashr i32 %yN, 1		; <i32> [#uses=1]
+	br label %meshBB340
+
+bb30:		; preds = %bb28
+	%Opq.sa.calc528 = sub i32 %Opq.sa.calc522, -64		; <i32> [#uses=1]
+	%90 = add i32 %.SV104.phi1160, 1		; <i32> [#uses=1]
+	br label %bb30.fragment
+
+bb30.fragment:		; preds = %bb30
+	%Opq.sa.calc791 = add i32 %Opq.sa.calc528, -14		; <i32> [#uses=0]
+	store i32 %90, i32* %.SV111.phi1159, align 4
+	%91 = ashr i32 %yN, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb32:		; preds = %bb24
+	%Opq.sa.calc531 = xor i32 %Opq.sa.calc512, 50		; <i32> [#uses=1]
+	br i1 %.SV135.phi, label %bb33, label %meshBB324
+
+bb33:		; preds = %bb32
+	%Opq.sa.calc534 = sub i32 %Opq.sa.calc512, -75		; <i32> [#uses=2]
+	%92 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%93 = getelementptr %struct.ImageParameters* %92, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%94 = load %struct.Macroblock** %93, align 8		; <%struct.Macroblock*> [#uses=1]
+	%95 = load i32* %.SV99.phi, align 4		; <i32> [#uses=1]
+	br label %bb33.fragment
+
+bb33.fragment:		; preds = %bb33
+	%Opq.sa.calc712 = add i32 %Opq.sa.calc534, -109		; <i32> [#uses=3]
+	%96 = sext i32 %95 to i64		; <i64> [#uses=1]
+	%97 = getelementptr %struct.Macroblock* %94, i64 %96, i32 20		; <i32*> [#uses=1]
+	%98 = load i32* %97, align 4		; <i32> [#uses=1]
+	%99 = icmp eq i32 %98, 0		; <i1> [#uses=1]
+	br i1 %99, label %bb34, label %meshBB
+
+bb34:		; preds = %bb33.fragment
+	%Opq.sa.calc537 = add i32 %Opq.sa.calc712, 8		; <i32> [#uses=1]
+	%100 = add i32 %.SV104.phi, 1		; <i32> [#uses=1]
+	br label %meshBB328
+
+bb34.fragment:		; preds = %meshBB328
+	%Opq.sa.calc965 = xor i32 %Opq.sa.calc787, 251		; <i32> [#uses=0]
+	store i32 %.SV238.phi, i32* %.load116.SV.phi, align 4
+	br label %bb96
+
+bb35:		; preds = %meshBB
+	%Opq.sa.calc541 = add i32 %Opq.sa.calc828, -112		; <i32> [#uses=3]
+	%Opq.sa.calc540 = xor i32 %Opq.sa.calc541, 3		; <i32> [#uses=1]
+	%101 = and i32 %yN, 1		; <i32> [#uses=1]
+	%102 = icmp eq i32 %101, 0		; <i1> [#uses=1]
+	br i1 %102, label %meshBB372, label %meshBB448
+
+bb36:		; preds = %meshBB372
+	%Opq.sa.calc544 = sub i32 %Opq.sa.calc812, -10		; <i32> [#uses=0]
+	%103 = add i32 %.SV43.phi1015, %yN		; <i32> [#uses=1]
+	br label %bb36.fragment
+
+bb36.fragment:		; preds = %bb36
+	%Opq.sa.calc762 = add i32 %Opq.sa.calc812, -69		; <i32> [#uses=0]
+	%104 = ashr i32 %103, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb37:		; preds = %meshBB448
+	%Opq.sa.calc547 = add i32 %Opq.sa.calc958, -49		; <i32> [#uses=1]
+	%105 = add i32 %.SV104.phi1157, 1		; <i32> [#uses=1]
+	br label %meshBB348
+
+bb37.fragment:		; preds = %meshBB348
+	%Opq.sa.calc728 = add i32 %Opq.sa.calc805, -5		; <i32> [#uses=0]
+	store i32 %.SV242.phi, i32* %.load115.SV.phi, align 4
+	%106 = add i32 %.load48.SV.phi, %yN		; <i32> [#uses=1]
+	%107 = ashr i32 %106, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb39:		; preds = %meshBB432
+	%Opq.sa.calc550 = sub i32 %Opq.sa.calc798, -214		; <i32> [#uses=0]
+	br i1 %.SV96.phi1038, label %bb40, label %bb48
+
+bb40:		; preds = %bb39
+	%Opq.sa.calc554 = xor i32 %Opq.sa.calc798, 14		; <i32> [#uses=4]
+	%Opq.sa.calc553 = sub i32 %Opq.sa.calc554, 7		; <i32> [#uses=1]
+	br i1 %.SV135.phi1039, label %meshBB336, label %meshBB444
+
+bb41:		; preds = %meshBB336
+	%Opq.sa.calc557 = sub i32 %Opq.sa.calc979, 143		; <i32> [#uses=1]
+	%108 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%109 = getelementptr %struct.ImageParameters* %108, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%110 = load %struct.Macroblock** %109, align 8		; <%struct.Macroblock*> [#uses=1]
+	%111 = load i32* %.SV99.phi1128, align 4		; <i32> [#uses=1]
+	br label %bb41.fragment
+
+bb41.fragment:		; preds = %bb41
+	%Opq.sa.calc987 = xor i32 %Opq.sa.calc557, 213		; <i32> [#uses=4]
+	%112 = sext i32 %111 to i64		; <i64> [#uses=1]
+	%113 = getelementptr %struct.Macroblock* %110, i64 %112, i32 20		; <i32*> [#uses=1]
+	%114 = load i32* %113, align 4		; <i32> [#uses=1]
+	%115 = icmp eq i32 %114, 0		; <i1> [#uses=1]
+	br i1 %115, label %bb42, label %bb96
+
+bb42:		; preds = %bb41.fragment
+	%Opq.sa.calc560 = add i32 %Opq.sa.calc987, -221		; <i32> [#uses=1]
+	%116 = ashr i32 %.SV43.phi1230, 1		; <i32> [#uses=1]
+	%117 = icmp sgt i32 %116, %yN		; <i1> [#uses=1]
+	br i1 %117, label %meshBB432, label %bb44
+
+bb43:		; preds = %meshBB432
+	%Opq.sa.calc563 = xor i32 %Opq.sa.calc798, 31		; <i32> [#uses=0]
+	%118 = shl i32 %yN, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb44:		; preds = %bb42
+	%Opq.sa.calc566 = sub i32 %Opq.sa.calc987, 217		; <i32> [#uses=1]
+	%119 = add i32 %.SV104.phi1127, 1		; <i32> [#uses=1]
+	br label %meshBB332
+
+bb44.fragment:		; preds = %meshBB332
+	%Opq.sa.calc894 = add i32 %Opq.sa.calc856, -200		; <i32> [#uses=1]
+	store i32 %.SV248.phi, i32* %.load114.SV.phi, align 4
+	%120 = shl i32 %yN, 1		; <i32> [#uses=1]
+	%121 = sub i32 %120, %.load46.SV.phi		; <i32> [#uses=1]
+	br label %meshBB376
+
+bb48:		; preds = %bb39
+	%Opq.sa.calc569 = sub i32 %Opq.sa.calc798, -110		; <i32> [#uses=1]
+	br i1 %.SV135.phi1039, label %bb49, label %bb96
+
+bb49:		; preds = %bb48
+	%Opq.sa.calc572 = add i32 %Opq.sa.calc798, 84		; <i32> [#uses=0]
+	%122 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%123 = getelementptr %struct.ImageParameters* %122, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%124 = load %struct.Macroblock** %123, align 8		; <%struct.Macroblock*> [#uses=1]
+	%125 = load i32* %.SV99.phi1037, align 4		; <i32> [#uses=1]
+	br label %bb49.fragment
+
+bb49.fragment:		; preds = %bb49
+	%Opq.sa.calc860 = sub i32 %Opq.sa.calc569, 114		; <i32> [#uses=5]
+	%126 = sext i32 %125 to i64		; <i64> [#uses=1]
+	%127 = getelementptr %struct.Macroblock* %124, i64 %126, i32 20		; <i32*> [#uses=1]
+	%128 = load i32* %127, align 4		; <i32> [#uses=1]
+	%129 = icmp eq i32 %128, 0		; <i1> [#uses=1]
+	br i1 %129, label %bb50, label %meshBB380
+
+bb50:		; preds = %bb49.fragment
+	%Opq.sa.calc577 = add i32 %Opq.sa.calc860, 12		; <i32> [#uses=2]
+	%130 = ashr i32 %.SV43.phi1178, 1		; <i32> [#uses=1]
+	%131 = icmp sgt i32 %130, %yN		; <i1> [#uses=1]
+	br i1 %131, label %meshBB328, label %bb52
+
+bb51:		; preds = %meshBB328
+	%Opq.sa.calc580 = xor i32 %Opq.sa.calc787, 194		; <i32> [#uses=0]
+	%132 = shl i32 %yN, 1		; <i32> [#uses=1]
+	%133 = or i32 %132, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb52:		; preds = %bb50
+	%Opq.sa.calc584 = sub i32 %Opq.sa.calc860, -65		; <i32> [#uses=2]
+	%Opq.sa.calc583 = sub i32 %Opq.sa.calc584, 50		; <i32> [#uses=1]
+	%134 = add i32 %.SV104.phi1036, 1		; <i32> [#uses=1]
+	store i32 %134, i32* %.SV111.phi1035, align 4
+	br label %meshBB384
+
+bb52.fragment:		; preds = %meshBB384
+	%Opq.sa.calc844 = add i32 %Opq.sa.calc901, -214		; <i32> [#uses=1]
+	%135 = shl i32 %yN, 1		; <i32> [#uses=1]
+	%136 = or i32 %135, 1		; <i32> [#uses=1]
+	%137 = sub i32 %136, %.load44.SV.phi		; <i32> [#uses=1]
+	br label %meshBB388
+
+bb54:		; preds = %meshBB380
+	%Opq.sa.calc589 = add i32 %Opq.sa.calc946, 108		; <i32> [#uses=1]
+	%138 = add i32 %.SV104.phi1124, 1		; <i32> [#uses=1]
+	br label %bb54.fragment
+
+bb54.fragment:		; preds = %bb54
+	%Opq.sa.calc883 = xor i32 %Opq.sa.calc589, 119		; <i32> [#uses=2]
+	store i32 %138, i32* %.SV111.phi1123, align 4
+	br label %meshBB440
+
+bb56:		; preds = %meshBB404
+	%Opq.sa.calc592 = sub i32 %Opq.sa.calc939, 87		; <i32> [#uses=2]
+	%.not4 = icmp sgt i32 %xN, -1		; <i1> [#uses=1]
+	%139 = icmp sgt i32 %.SV40.phi, %xN		; <i1> [#uses=1]
+	br label %meshBB364
+
+bb56.fragment:		; preds = %meshBB364
+	%Opq.sa.calc1002 = xor i32 %Opq.link.mask737, 77		; <i32> [#uses=6]
+	%or.cond5 = and i1 %.SV256.phi, %.not4.SV.phi		; <i1> [#uses=1]
+	%140 = icmp slt i32 %yN, 0		; <i1> [#uses=2]
+	br i1 %or.cond5, label %bb58, label %bb83
+
+bb58:		; preds = %bb56.fragment
+	%Opq.sa.calc596 = xor i32 %Opq.sa.calc1002, 73		; <i32> [#uses=1]
+	%Opq.sa.calc595 = add i32 %Opq.sa.calc596, 147		; <i32> [#uses=0]
+	br i1 %140, label %bb59, label %bb76
+
+bb59:		; preds = %bb58
+	%Opq.sa.calc599 = add i32 %Opq.sa.calc1002, 151		; <i32> [#uses=0]
+	%141 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
+	%142 = load i32* %141, align 4		; <i32> [#uses=1]
+	br label %bb59.fragment
+
+bb59.fragment:		; preds = %bb59
+	%Opq.sa.calc731 = sub i32 %Opq.sa.calc1002, -161		; <i32> [#uses=3]
+	%143 = icmp eq i32 %142, 0		; <i1> [#uses=1]
+	%144 = and i32 %curr_mb_nr, 1		; <i32> [#uses=1]
+	%145 = icmp eq i32 %144, 0		; <i1> [#uses=2]
+	br i1 %143, label %bb60, label %bb68
+
+bb60:		; preds = %bb59.fragment
+	%Opq.sa.calc602 = xor i32 %Opq.sa.calc731, 1		; <i32> [#uses=2]
+	br i1 %145, label %bb61, label %bb66
+
+bb61:		; preds = %bb60
+	%Opq.sa.calc605 = xor i32 %Opq.sa.calc731, 57		; <i32> [#uses=1]
+	%146 = getelementptr %struct.Macroblock* %2, i64 %3, i32 23		; <i32*> [#uses=2]
+	%147 = load i32* %146, align 8		; <i32> [#uses=3]
+	%148 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=3]
+	br label %bb61.fragment
+
+bb61.fragment:		; preds = %bb61
+	%Opq.sa.calc700 = sub i32 %Opq.sa.calc605, 108		; <i32> [#uses=3]
+	store i32 %147, i32* %148, align 4
+	%149 = getelementptr %struct.Macroblock* %2, i64 %3, i32 27		; <i32*> [#uses=4]
+	%150 = load i32* %149, align 8		; <i32> [#uses=1]
+	%151 = icmp eq i32 %150, 0		; <i1> [#uses=1]
+	br i1 %151, label %bb65, label %bb62
+
+bb62:		; preds = %bb61.fragment
+	%Opq.sa.calc608 = add i32 %Opq.sa.calc700, -94		; <i32> [#uses=1]
+	%152 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=2]
+	%153 = getelementptr %struct.ImageParameters* %152, i64 0, i32 45		; <i32*> [#uses=1]
+	%154 = load i32* %153, align 4		; <i32> [#uses=1]
+	%155 = icmp eq i32 %154, 1		; <i1> [#uses=1]
+	br i1 %155, label %bb63, label %bb64
+
+bb63:		; preds = %bb62
+	%Opq.sa.calc611 = add i32 %Opq.sa.calc700, -101		; <i32> [#uses=2]
+	%156 = getelementptr %struct.ImageParameters* %152, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%157 = load %struct.Macroblock** %156, align 8		; <%struct.Macroblock*> [#uses=1]
+	%158 = load i32* %146, align 8		; <i32> [#uses=1]
+	br label %meshBB452
+
+bb63.fragment:		; preds = %meshBB452
+	%Opq.sa.calc891 = add i32 %Opq.link.mask823, 18		; <i32> [#uses=2]
+	%Opq.sa.calc890 = add i32 %Opq.sa.calc891, -3		; <i32> [#uses=2]
+	%159 = sext i32 %.SV266.phi to i64		; <i64> [#uses=1]
+	%160 = getelementptr %struct.Macroblock* %.SV264.phi, i64 %159, i32 20		; <i32*> [#uses=1]
+	%161 = load i32* %160, align 4		; <i32> [#uses=1]
+	%162 = icmp eq i32 %161, 0		; <i1> [#uses=1]
+	br i1 %162, label %bb64, label %meshBB456
+
+bb64:		; preds = %bb63.fragment, %bb62
+	%.SV38.phi1132 = phi i64 [ %.SV38.phi1110, %bb63.fragment ], [ %.SV38.phi1098, %bb62 ]		; <i64> [#uses=1]
+	%.SV52.phi1131 = phi i32* [ %.SV52.phi1109, %bb63.fragment ], [ %.SV52.phi1097, %bb62 ]		; <i32*> [#uses=1]
+	%.SV68.phi1130 = phi i32 [ %.SV68.phi1108, %bb63.fragment ], [ %.SV68.phi1096, %bb62 ]		; <i32> [#uses=1]
+	%.SV70.phi1129 = phi i32 [ %.SV70.phi1107, %bb63.fragment ], [ %.SV70.phi1095, %bb62 ]		; <i32> [#uses=1]
+	%Opq.link.SV615.phi = phi i32 [ %Opq.sa.calc890, %bb63.fragment ], [ %Opq.sa.calc608, %bb62 ]		; <i32> [#uses=1]
+	%.SV150.phi = phi i32* [ %.SV150.phi1060, %bb63.fragment ], [ %148, %bb62 ]		; <i32*> [#uses=1]
+	%.SV152.phi = phi i32* [ %.SV152.phi1059, %bb63.fragment ], [ %149, %bb62 ]		; <i32*> [#uses=1]
+	%.SV148.phi = phi i32 [ %.SV148.phi1057, %bb63.fragment ], [ %147, %bb62 ]		; <i32> [#uses=1]
+	%Opq.link.mask = and i32 %Opq.link.SV615.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc614 = add i32 %Opq.link.mask, 189		; <i32> [#uses=1]
+	%163 = add i32 %.SV148.phi, 1		; <i32> [#uses=1]
+	store i32 %163, i32* %.SV150.phi, align 4
+	br label %bb65
+
+bb65:		; preds = %meshBB456, %bb64, %bb61.fragment
+	%.SV38.phi1144 = phi i64 [ %.SV38.phi1137, %meshBB456 ], [ %.SV38.phi1098, %bb61.fragment ], [ %.SV38.phi1132, %bb64 ]		; <i64> [#uses=1]
+	%.SV52.phi1143 = phi i32* [ %.SV52.phi1136, %meshBB456 ], [ %.SV52.phi1097, %bb61.fragment ], [ %.SV52.phi1131, %bb64 ]		; <i32*> [#uses=1]
+	%.SV68.phi1142 = phi i32 [ %.SV68.phi1135, %meshBB456 ], [ %.SV68.phi1096, %bb61.fragment ], [ %.SV68.phi1130, %bb64 ]		; <i32> [#uses=1]
+	%.SV70.phi1141 = phi i32 [ %.SV70.phi1134, %meshBB456 ], [ %.SV70.phi1095, %bb61.fragment ], [ %.SV70.phi1129, %bb64 ]		; <i32> [#uses=1]
+	%.SV152.phi1058 = phi i32* [ %.SV152.phi1133, %meshBB456 ], [ %149, %bb61.fragment ], [ %.SV152.phi, %bb64 ]		; <i32*> [#uses=1]
+	%Opq.link.SV618.phi = phi i32 [ %Opq.sa.calc816, %meshBB456 ], [ %Opq.sa.calc700, %bb61.fragment ], [ %Opq.sa.calc614, %bb64 ]		; <i32> [#uses=1]
+	%Opq.link.mask620 = and i32 %Opq.link.SV618.phi, 40		; <i32> [#uses=1]
+	%Opq.sa.calc617 = add i32 %Opq.link.mask620, -35		; <i32> [#uses=2]
+	%164 = load i32* %.SV152.phi1058, align 8		; <i32> [#uses=1]
+	br label %meshBB436
+
+bb65.fragment:		; preds = %meshBB436
+	%Opq.sa.calc832 = add i32 %Opq.link.mask706, 1		; <i32> [#uses=2]
+	store i32 %.SV268.phi, i32* %.load62.SV.phi, align 4
+	br label %meshBB364
+
+bb66:		; preds = %bb60
+	%Opq.sa.calc621 = add i32 %Opq.sa.calc602, -217		; <i32> [#uses=1]
+	%165 = add i32 %curr_mb_nr, -1		; <i32> [#uses=1]
+	%166 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %meshBB420
+
+bb66.fragment:		; preds = %meshBB420
+	%Opq.sa.calc795 = xor i32 %Opq.sa.calc837, 105		; <i32> [#uses=2]
+	%Opq.sa.calc794 = sub i32 %Opq.sa.calc795, 167		; <i32> [#uses=1]
+	store i32 %.SV270.phi, i32* %.SV272.phi, align 4
+	store i32 1, i32* %.load61.SV.phi, align 4
+	br label %meshBB444
+
+bb68:		; preds = %bb59.fragment
+	%Opq.sa.calc624 = sub i32 %Opq.sa.calc731, 229		; <i32> [#uses=3]
+	%167 = getelementptr %struct.Macroblock* %2, i64 %3, i32 23		; <i32*> [#uses=1]
+	br label %meshBB344
+
+bb68.fragment:		; preds = %meshBB344
+	%Opq.sa.calc784 = sub i32 %Opq.link.mask722, 3		; <i32> [#uses=5]
+	%168 = load i32* %.SV274.phi, align 8		; <i32> [#uses=3]
+	br i1 %.load144.SV.phi, label %bb69, label %meshBB412
+
+bb69:		; preds = %bb68.fragment
+	%Opq.sa.calc627 = add i32 %Opq.sa.calc784, 163		; <i32> [#uses=0]
+	%169 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=2]
+	store i32 %168, i32* %169, align 4
+	%170 = getelementptr %struct.Macroblock* %2, i64 %3, i32 27		; <i32*> [#uses=2]
+	br label %bb69.fragment
+
+bb69.fragment:		; preds = %bb69
+	%Opq.sa.calc996 = sub i32 %Opq.sa.calc784, -9		; <i32> [#uses=3]
+	%Opq.sa.calc994 = sub i32 %Opq.sa.calc996, %Opq.sa.calc784		; <i32> [#uses=1]
+	%Opq.sa.calc995 = sub i32 %Opq.sa.calc994, 3		; <i32> [#uses=2]
+	%171 = load i32* %170, align 8		; <i32> [#uses=3]
+	store i32 %171, i32* %.SV52.phi1170, align 4
+	%172 = load i32* %170, align 8		; <i32> [#uses=1]
+	%173 = icmp eq i32 %172, 0		; <i1> [#uses=1]
+	br i1 %173, label %meshBB396, label %meshBB400
+
+bb70:		; preds = %meshBB400
+	%Opq.sa.calc630 = add i32 %Opq.sa.calc824, -203		; <i32> [#uses=2]
+	%174 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%175 = getelementptr %struct.ImageParameters* %174, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%176 = load %struct.Macroblock** %175, align 8		; <%struct.Macroblock*> [#uses=1]
+	%177 = load i32* %.SV156.phi, align 8		; <i32> [#uses=1]
+	br label %meshBB428
+
+bb70.fragment:		; preds = %meshBB428
+	%Opq.sa.calc739 = xor i32 %Opq.sa.calc897, 213		; <i32> [#uses=2]
+	%Opq.sa.calc738 = sub i32 %Opq.sa.calc739, 1		; <i32> [#uses=2]
+	%178 = sext i32 %.SV280.phi to i64		; <i64> [#uses=1]
+	%179 = getelementptr %struct.Macroblock* %.SV278.phi, i64 %178, i32 20		; <i32*> [#uses=1]
+	%180 = load i32* %179, align 4		; <i32> [#uses=1]
+	%181 = icmp eq i32 %180, 0		; <i1> [#uses=1]
+	br i1 %181, label %meshBB452, label %meshBB356
+
+bb71:		; preds = %meshBB452
+	%Opq.sa.calc633 = xor i32 %Opq.sa.calc820, 118		; <i32> [#uses=1]
+	%182 = add i32 %.SV158.phi1106, 1		; <i32> [#uses=1]
+	br label %meshBB352
+
+bb71.fragment:		; preds = %meshBB352
+	%Opq.sa.calc809 = sub i32 %Opq.sa.calc876, 17		; <i32> [#uses=2]
+	store i32 %.SV282.phi, i32* %.load163.SV.phi, align 4
+	%183 = shl i32 %yN, 1		; <i32> [#uses=1]
+	br label %meshBB436
+
+bb74:		; preds = %meshBB412
+	%Opq.sa.calc636 = xor i32 %Opq.sa.calc932, 233		; <i32> [#uses=1]
+	%184 = add i32 %.SV158.phi1063, 1		; <i32> [#uses=1]
+	%185 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %bb74.fragment
+
+bb74.fragment:		; preds = %bb74
+	%Opq.sa.calc1011 = sub i32 %Opq.sa.calc636, -19		; <i32> [#uses=0]
+	store i32 %184, i32* %185, align 4
+	%186 = getelementptr %struct.Macroblock* %2, i64 %3, i32 27		; <i32*> [#uses=1]
+	%187 = load i32* %186, align 8		; <i32> [#uses=2]
+	store i32 %187, i32* %.SV52.phi1186, align 4
+	br label %bb96
+
+bb76:		; preds = %bb58
+	%Opq.sa.calc640 = xor i32 %Opq.sa.calc1002, 71		; <i32> [#uses=4]
+	%Opq.sa.calc639 = xor i32 %Opq.sa.calc640, 219		; <i32> [#uses=0]
+	%188 = icmp eq i32 %yN, 0		; <i1> [#uses=1]
+	br i1 %188, label %bb77, label %bb79
+
+bb77:		; preds = %bb76
+	%Opq.sa.calc643 = add i32 %Opq.sa.calc640, 2		; <i32> [#uses=2]
+	%189 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%190 = getelementptr %struct.ImageParameters* %189, i64 0, i32 45		; <i32*> [#uses=1]
+	%191 = load i32* %190, align 4		; <i32> [#uses=1]
+	%192 = icmp eq i32 %191, 2		; <i1> [#uses=1]
+	br i1 %192, label %meshBB416, label %bb79
+
+bb78:		; preds = %meshBB416
+	%Opq.sa.calc647 = xor i32 %Opq.sa.calc971, 25		; <i32> [#uses=2]
+	%Opq.sa.calc646 = sub i32 %Opq.sa.calc647, 29		; <i32> [#uses=0]
+	%193 = getelementptr %struct.Macroblock* %2, i64 %3, i32 23		; <i32*> [#uses=1]
+	%194 = load i32* %193, align 8		; <i32> [#uses=1]
+	%195 = add i32 %194, 1		; <i32> [#uses=1]
+	br label %bb78.fragment
+
+bb78.fragment:		; preds = %bb78
+	%Opq.sa.calc850 = sub i32 %Opq.sa.calc647, -93		; <i32> [#uses=0]
+	%196 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	store i32 %195, i32* %196, align 4
+	store i32 1, i32* %.SV52.phi1200, align 4
+	%197 = add i32 %yN, -1		; <i32> [#uses=1]
+	br label %bb98
+
+bb79:		; preds = %bb77, %bb76
+	%Opq.link.SV652.phi = phi i32 [ %Opq.sa.calc643, %bb77 ], [ %Opq.sa.calc640, %bb76 ]		; <i32> [#uses=1]
+	%Opq.link.mask654 = and i32 %Opq.link.SV652.phi, 8		; <i32> [#uses=1]
+	%Opq.sa.calc651 = sub i32 %Opq.link.mask654, -2		; <i32> [#uses=3]
+	%Opq.sa.calc650 = xor i32 %Opq.sa.calc651, 1		; <i32> [#uses=2]
+	br i1 %or.cond.not.SV.phi1094, label %meshBB456, label %meshBB352
+
+bb81:		; preds = %meshBB456
+	%Opq.sa.calc655 = add i32 %Opq.sa.calc816, 56		; <i32> [#uses=0]
+	%198 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	store i32 %curr_mb_nr, i32* %198, align 4
+	store i32 1, i32* %.SV52.phi1136, align 4
+	br label %bb98
+
+bb83:		; preds = %bb56.fragment
+	%Opq.sa.calc658 = sub i32 %Opq.sa.calc1002, 73		; <i32> [#uses=3]
+	br i1 %140, label %bb84, label %meshBB424
+
+bb84:		; preds = %bb83
+	%Opq.sa.calc661 = xor i32 %Opq.sa.calc658, 22		; <i32> [#uses=1]
+	%199 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
+	%200 = load i32* %199, align 4		; <i32> [#uses=1]
+	br label %meshBB400
+
+bb84.fragment:		; preds = %meshBB400
+	%Opq.sa.calc802 = xor i32 %Opq.sa.calc824, 240		; <i32> [#uses=3]
+	%201 = icmp eq i32 %.SV290.phi, 0		; <i1> [#uses=1]
+	%202 = and i32 %curr_mb_nr, 1		; <i32> [#uses=1]
+	%203 = icmp eq i32 %202, 0		; <i1> [#uses=2]
+	br i1 %201, label %meshBB372, label %bb89
+
+bb85:		; preds = %meshBB372
+	%Opq.sa.calc667 = sub i32 %Opq.sa.calc812, 20		; <i32> [#uses=3]
+	%Opq.sa.calc666 = sub i32 %Opq.sa.calc667, 84		; <i32> [#uses=2]
+	%Opq.sa.calc664 = add i32 %Opq.sa.calc666, %Opq.sa.calc667		; <i32> [#uses=1]
+	%Opq.sa.calc665 = add i32 %Opq.sa.calc664, -112		; <i32> [#uses=2]
+	br i1 %.SV167.phi, label %meshBB336, label %meshBB440
+
+bb86:		; preds = %meshBB336
+	%Opq.sa.calc670 = sub i32 %Opq.sa.calc979, 35		; <i32> [#uses=1]
+	%204 = getelementptr %struct.Macroblock* %2, i64 %3, i32 24		; <i32*> [#uses=1]
+	%205 = load i32* %204, align 4		; <i32> [#uses=1]
+	%206 = add i32 %205, 1		; <i32> [#uses=1]
+	%207 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %bb86.fragment
+
+bb86.fragment:		; preds = %bb86
+	%Opq.sa.calc943 = xor i32 %Opq.sa.calc670, 123		; <i32> [#uses=2]
+	store i32 %206, i32* %207, align 4
+	%208 = getelementptr %struct.Macroblock* %2, i64 %3, i32 28		; <i32*> [#uses=1]
+	%209 = load i32* %208, align 4		; <i32> [#uses=2]
+	store i32 %209, i32* %.SV52.phi1234, align 4
+	br label %meshBB424
+
+bb87:		; preds = %meshBB440
+	%Opq.sa.calc674 = xor i32 %Opq.sa.calc990, 44		; <i32> [#uses=1]
+	%Opq.sa.calc673 = xor i32 %Opq.sa.calc674, 160		; <i32> [#uses=1]
+	store i32 0, i32* %.SV52.phi1235, align 4
+	br label %meshBB408
+
+bb89:		; preds = %bb84.fragment
+	%Opq.sa.calc677 = sub i32 %Opq.sa.calc802, -183		; <i32> [#uses=1]
+	%210 = getelementptr %struct.Macroblock* %2, i64 %3, i32 24		; <i32*> [#uses=2]
+	br label %bb89.fragment
+
+bb89.fragment:		; preds = %bb89
+	%Opq.sa.calc962 = add i32 %Opq.sa.calc677, -188		; <i32> [#uses=3]
+	%211 = load i32* %210, align 4		; <i32> [#uses=3]
+	br i1 %203, label %bb90, label %meshBB408
+
+bb90:		; preds = %bb89.fragment
+	%Opq.sa.calc680 = xor i32 %Opq.sa.calc962, 92		; <i32> [#uses=1]
+	%212 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=2]
+	store i32 %211, i32* %212, align 4
+	%213 = getelementptr %struct.Macroblock* %2, i64 %3, i32 28		; <i32*> [#uses=2]
+	br label %bb90.fragment
+
+bb90.fragment:		; preds = %bb90
+	%Opq.sa.calc773 = sub i32 %Opq.sa.calc680, 60		; <i32> [#uses=3]
+	%Opq.sa.calc772 = add i32 %Opq.sa.calc773, -25		; <i32> [#uses=2]
+	%214 = load i32* %213, align 4		; <i32> [#uses=3]
+	store i32 %214, i32* %.SV52.phi1190, align 4
+	%215 = load i32* %213, align 4		; <i32> [#uses=1]
+	%216 = icmp eq i32 %215, 0		; <i1> [#uses=1]
+	br i1 %216, label %meshBB416, label %meshBB368
+
+bb91:		; preds = %meshBB368
+	%Opq.sa.calc683 = sub i32 %Opq.sa.calc768, -7		; <i32> [#uses=0]
+	%217 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%218 = getelementptr %struct.ImageParameters* %217, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%219 = load %struct.Macroblock** %218, align 8		; <%struct.Macroblock*> [#uses=1]
+	%220 = load i32* %.SV170.phi, align 4		; <i32> [#uses=1]
+	br label %bb91.fragment
+
+bb91.fragment:		; preds = %bb91
+	%Opq.sa.calc853 = xor i32 %Opq.sa.calc768, 8		; <i32> [#uses=1]
+	%221 = sext i32 %220 to i64		; <i64> [#uses=1]
+	%222 = getelementptr %struct.Macroblock* %219, i64 %221, i32 20		; <i32*> [#uses=1]
+	%223 = load i32* %222, align 4		; <i32> [#uses=1]
+	%224 = icmp eq i32 %223, 0		; <i1> [#uses=1]
+	br i1 %224, label %bb92, label %bb96
+
+bb92:		; preds = %bb91.fragment
+	%Opq.sa.calc686 = xor i32 %Opq.sa.calc853, 2		; <i32> [#uses=1]
+	%225 = add i32 %.SV172.phi, 1		; <i32> [#uses=1]
+	br label %bb92.fragment
+
+bb92.fragment:		; preds = %bb92
+	%Opq.sa.calc1005 = xor i32 %Opq.sa.calc686, 130		; <i32> [#uses=2]
+	store i32 %225, i32* %.SV176.phi, align 4
+	%226 = shl i32 %yN, 1		; <i32> [#uses=1]
+	br label %meshBB380
+
+bb95:		; preds = %meshBB408
+	%Opq.sa.calc689 = xor i32 %Opq.sa.calc912, 207		; <i32> [#uses=3]
+	%227 = add i32 %.SV172.phi1074, 1		; <i32> [#uses=1]
+	%228 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %meshBB384
+
+bb95.fragment:		; preds = %meshBB384
+	%Opq.sa.calc841 = sub i32 %Opq.sa.calc901, 76		; <i32> [#uses=0]
+	store i32 %.SV306.phi, i32* %.SV308.phi, align 4
+	%229 = getelementptr %struct.Macroblock* %.load.SV.phi, i64 %.load20.SV.phi, i32 28		; <i32*> [#uses=1]
+	%230 = load i32* %229, align 4		; <i32> [#uses=2]
+	store i32 %230, i32* %.load53.SV.phi, align 4
+	br label %bb96
+
+bb96:		; preds = %meshBB444, %meshBB440, %meshBB436, %meshBB424, %meshBB420, %meshBB416, %meshBB396, %meshBB388, %meshBB380, %meshBB376, %meshBB364, %meshBB356, %meshBB340, %meshBB324, %meshBB, %bb95.fragment, %bb91.fragment, %bb74.fragment, %bb51, %bb48, %bb43, %bb41.fragment, %bb37.fragment, %bb36.fragment, %bb34.fragment, %bb30.fragment, %bb25, %bb19.fragment, %bb16.fragment, %bb15.fragment, %bb11.fragment, %bb9.fragment, %bb8.fragment, %bb7.fragment
+	%.SV38.phi1087 = phi i64 [ %.SV38.phi1224, %meshBB444 ], [ %.SV38.phi1210, %meshBB440 ], [ %.SV38.phi1147, %meshBB436 ], [ %.SV38.phi1197, %meshBB424 ], [ %.SV38.phi1194, %meshBB420 ], [ %.SV38.phi1201, %meshBB416 ], [ %.SV38.phi, %meshBB396 ], [ %.SV38.phi1118, %meshBB388 ], [ %.SV38.phi1207, %meshBB380 ], [ %.SV38.phi1153, %meshBB376 ], [ %.SV38.phi1098, %meshBB364 ], [ %.SV38.phi1121, %meshBB356 ], [ %.SV38.phi1167, %meshBB340 ], [ %.SV38.phi1175, %meshBB324 ], [ %.SV38.phi1183, %meshBB ], [ %.SV38.phi1164, %bb91.fragment ], [ %.SV38.phi1179, %bb48 ], [ %.SV38.phi1231, %bb41.fragment ], [ %.SV38.phi1172, %bb25 ], [ %.SV38.phi1175, %bb15.fragment ], [ %.SV38.phi1164, %bb9.fragment ], [ %.SV38.phi1164, %bb8.fragment ], [ %.SV38.phi1221, %bb95.fragment ], [ %.SV38.phi1187, %bb74.fragment ], [ %.SV38.phi1227, %bb51 ], [ %.SV38.phi1179, %bb43 ], [ %.SV38.phi1103, %bb37.fragment ], [ %.SV38.phi1214, %bb36.fragment ], [ %.SV38.phi1227, %bb34.fragment ], [ %.SV38.phi1121, %bb30.fragment ], [ %.SV38.phi1187, %bb19.fragment ], [ %.SV38.phi1175, %bb16.fragment ], [ %.SV38.phi1204, %bb11.fragment ], [ %.SV38.phi1118, %bb7.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1086 = phi i32 [ %.SV68.phi1223, %meshBB444 ], [ %.SV68.phi1209, %meshBB440 ], [ %.SV68.phi1146, %meshBB436 ], [ %.SV68.phi1196, %meshBB424 ], [ %.SV68.phi1193, %meshBB420 ], [ %.SV68.phi1199, %meshBB416 ], [ %.SV68.phi, %meshBB396 ], [ %.SV68.phi1117, %meshBB388 ], [ %.SV68.phi1206, %meshBB380 ], [ %.SV68.phi1152, %meshBB376 ], [ %.SV68.phi1096, %meshBB364 ], [ %.SV68.phi1120, %meshBB356 ], [ %.SV68.phi1166, %meshBB340 ], [ %.SV68.phi1174, %meshBB324 ], [ %.SV68.phi1181, %meshBB ], [ %.SV68.phi1162, %bb91.fragment ], [ %.SV68.phi1177, %bb48 ], [ %.SV68.phi1229, %bb41.fragment ], [ %.SV68.phi1169, %bb25 ], [ %.SV68.phi1174, %bb15.fragment ], [ %.SV68.phi1162, %bb9.fragment ], [ %.SV68.phi1162, %bb8.fragment ], [ %.SV68.phi1220, %bb95.fragment ], [ %.SV68.phi1185, %bb74.fragment ], [ %.SV68.phi1226, %bb51 ], [ %.SV68.phi1177, %bb43 ], [ %.SV68.phi1100, %bb37.fragment ], [ %.SV68.phi1212, %bb36.fragment ], [ %.SV68.phi1226, %bb34.fragment ], [ %.SV68.phi1120, %bb30.fragment ], [ %.SV68.phi1185, %bb19.fragment ], [ %.SV68.phi1174, %bb16.fragment ], [ %.SV68.phi1203, %bb11.fragment ], [ %.SV68.phi1117, %bb7.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1085 = phi i32 [ %.SV70.phi1222, %meshBB444 ], [ %.SV70.phi1208, %meshBB440 ], [ %.SV70.phi1145, %meshBB436 ], [ %.SV70.phi1195, %meshBB424 ], [ %.SV70.phi1192, %meshBB420 ], [ %.SV70.phi1198, %meshBB416 ], [ %.SV70.phi, %meshBB396 ], [ %.SV70.phi1116, %meshBB388 ], [ %.SV70.phi1205, %meshBB380 ], [ %.SV70.phi1151, %meshBB376 ], [ %.SV70.phi1095, %meshBB364 ], [ %.SV70.phi1119, %meshBB356 ], [ %.SV70.phi1165, %meshBB340 ], [ %.SV70.phi1173, %meshBB324 ], [ %.SV70.phi1180, %meshBB ], [ %.SV70.phi1161, %bb91.fragment ], [ %.SV70.phi1176, %bb48 ], [ %.SV70.phi1228, %bb41.fragment ], [ %.SV70.phi1168, %bb25 ], [ %.SV70.phi1173, %bb15.fragment ], [ %.SV70.phi1161, %bb9.fragment ], [ %.SV70.phi1161, %bb8.fragment ], [ %.SV70.phi1219, %bb95.fragment ], [ %.SV70.phi1184, %bb74.fragment ], [ %.SV70.phi1225, %bb51 ], [ %.SV70.phi1176, %bb43 ], [ %.SV70.phi1099, %bb37.fragment ], [ %.SV70.phi1211, %bb36.fragment ], [ %.SV70.phi1225, %bb34.fragment ], [ %.SV70.phi1119, %bb30.fragment ], [ %.SV70.phi1184, %bb19.fragment ], [ %.SV70.phi1173, %bb16.fragment ], [ %.SV70.phi1202, %bb11.fragment ], [ %.SV70.phi1116, %bb7.fragment ]		; <i32> [#uses=2]
+	%.SV.phi = phi i32 [ %.SV.phi1048, %meshBB444 ], [ %.SV.phi1056, %meshBB440 ], [ %.SV.phi1067, %meshBB436 ], [ %.SV.phi1072, %meshBB424 ], [ %.SV.phi1044, %meshBB420 ], [ %.SV.phi1076, %meshBB416 ], [ %.SV.phi1065, %meshBB396 ], [ %.SV.phi1054, %meshBB388 ], [ %.SV.phi1052, %meshBB380 ], [ %.SV.phi1050, %meshBB376 ], [ %.SV.phi1062, %meshBB364 ], [ %.SV.phi1046, %meshBB356 ], [ %.SV.phi1042, %meshBB340 ], [ %.SV.phi1032, %meshBB324 ], [ %.SV.phi1034, %meshBB ], [ %.SV178.phi, %bb91.fragment ], [ %.SV118.phi1040, %bb48 ], [ %.SV118.phi1125, %bb41.fragment ], [ %.SV118.phi, %bb25 ], [ %.load94.SV.phi, %bb15.fragment ], [ %32, %bb9.fragment ], [ %32, %bb8.fragment ], [ %230, %bb95.fragment ], [ %187, %bb74.fragment ], [ %.SV118.phi1081, %bb51 ], [ %.SV118.phi1040, %bb43 ], [ %.load131.SV.phi, %bb37.fragment ], [ %.SV118.phi1154, %bb36.fragment ], [ %.load129.SV.phi, %bb34.fragment ], [ %.SV118.phi1158, %bb30.fragment ], [ %66, %bb19.fragment ], [ %.SV93.phi, %bb16.fragment ], [ %.load84.SV.phi, %bb11.fragment ], [ %27, %bb7.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi = phi i32 [ -1, %meshBB444 ], [ %yN, %meshBB440 ], [ %yM.0.SV.phi1066, %meshBB436 ], [ %yN, %meshBB424 ], [ %yN, %meshBB420 ], [ -1, %meshBB416 ], [ -1, %meshBB396 ], [ %yM.0.SV.phi1053, %meshBB388 ], [ %yM.0.SV.phi1051, %meshBB380 ], [ %yM.0.SV.phi1049, %meshBB376 ], [ %yN, %meshBB364 ], [ %yN, %meshBB356 ], [ %yM.0.SV.phi1041, %meshBB340 ], [ -1, %meshBB324 ], [ -1, %meshBB ], [ %yN, %bb91.fragment ], [ -1, %bb48 ], [ %yN, %bb41.fragment ], [ -1, %bb25 ], [ %yN, %bb15.fragment ], [ %yN, %bb9.fragment ], [ -1, %bb8.fragment ], [ %yN, %bb95.fragment ], [ %yN, %bb74.fragment ], [ %133, %bb51 ], [ %118, %bb43 ], [ %107, %bb37.fragment ], [ %104, %bb36.fragment ], [ %yN, %bb34.fragment ], [ %91, %bb30.fragment ], [ %yN, %bb19.fragment ], [ %62, %bb16.fragment ], [ %45, %bb11.fragment ], [ %yN, %bb7.fragment ]		; <i32> [#uses=2]
+	%Opq.sa.calc693 = add i32 0, 15		; <i32> [#uses=2]
+	%Opq.sa.calc692 = xor i32 %Opq.sa.calc693, 8		; <i32> [#uses=1]
+	%231 = icmp eq i32 %.SV.phi, 0		; <i1> [#uses=1]
+	br i1 %231, label %bb97, label %meshBB404
+
+bb97:		; preds = %meshBB424, %meshBB408, %meshBB352, %bb96, %bb21
+	%.SV38.phi1150 = phi i64 [ %.SV38.phi1197, %meshBB424 ], [ %.SV38.phi1218, %meshBB408 ], [ %.SV38.phi1140, %meshBB352 ], [ %.SV38.phi1087, %bb96 ], [ %4, %bb21 ]		; <i64> [#uses=1]
+	%.SV68.phi1149 = phi i32 [ %.SV68.phi1196, %meshBB424 ], [ %.SV68.phi1216, %meshBB408 ], [ %.SV68.phi1139, %meshBB352 ], [ %.SV68.phi1086, %bb96 ], [ %.SV68.phi1021, %bb21 ]		; <i32> [#uses=1]
+	%.SV70.phi1148 = phi i32 [ %.SV70.phi1195, %meshBB424 ], [ %.SV70.phi1215, %meshBB408 ], [ %.SV70.phi1138, %meshBB352 ], [ %.SV70.phi1085, %bb96 ], [ %.SV70.phi1027, %bb21 ]		; <i32> [#uses=1]
+	%yM.0.reg2mem.0.SV.phi = phi i32 [ -1, %meshBB424 ], [ -1, %meshBB408 ], [ -1, %meshBB352 ], [ %yM.0.SV.phi, %bb96 ], [ -1, %bb21 ]		; <i32> [#uses=1]
+	%Opq.sa.calc694 = xor i32 0, 243		; <i32> [#uses=1]
+	%232 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%233 = getelementptr %struct.ImageParameters* %232, i64 0, i32 45		; <i32*> [#uses=1]
+	br label %bb97.fragment
+
+bb97.fragment:		; preds = %bb97
+	%Opq.sa.calc928 = xor i32 %Opq.sa.calc694, 128		; <i32> [#uses=1]
+	%234 = load i32* %233, align 4		; <i32> [#uses=1]
+	%235 = icmp eq i32 %234, 0		; <i1> [#uses=1]
+	br i1 %235, label %return, label %bb98
+
+bb98:		; preds = %meshBB444, %meshBB404, %bb97.fragment, %bb81, %bb78.fragment
+	%.SV38.phi1093 = phi i64 [ %.SV38.phi1224, %meshBB444 ], [ %.SV38.phi1017, %meshBB404 ], [ %.SV38.phi1150, %bb97.fragment ], [ %.SV38.phi1137, %bb81 ], [ %.SV38.phi1201, %bb78.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1092 = phi i32 [ %.SV68.phi1223, %meshBB444 ], [ %.SV68.phi1023, %meshBB404 ], [ %.SV68.phi1149, %bb97.fragment ], [ %.SV68.phi1135, %bb81 ], [ %.SV68.phi1199, %bb78.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1091 = phi i32 [ %.SV70.phi1222, %meshBB444 ], [ %.SV70.phi1028, %meshBB404 ], [ %.SV70.phi1148, %bb97.fragment ], [ %.SV70.phi1134, %bb81 ], [ %.SV70.phi1198, %bb78.fragment ]		; <i32> [#uses=2]
+	%yM.0.reg2mem.1.SV.phi1068 = phi i32 [ %yN, %meshBB444 ], [ %yM.0.reg2mem.1.SV.phi1077, %meshBB404 ], [ %yM.0.reg2mem.0.SV.phi, %bb97.fragment ], [ %yN, %bb81 ], [ %197, %bb78.fragment ]		; <i32> [#uses=1]
+	%Opq.sa.calc695 = xor i32 0, 23		; <i32> [#uses=2]
+	%236 = and i32 %.SV70.phi1091, %xN		; <i32> [#uses=1]
+	%237 = getelementptr %struct.PixelPos* %pix, i64 0, i32 2		; <i32*> [#uses=2]
+	store i32 %236, i32* %237, align 4
+	%238 = and i32 %yM.0.reg2mem.1.SV.phi1068, %.SV68.phi1092		; <i32> [#uses=1]
+	%239 = getelementptr %struct.PixelPos* %pix, i64 0, i32 3		; <i32*> [#uses=2]
+	store i32 %238, i32* %239, align 4
+	%240 = getelementptr %struct.PixelPos* %pix, i64 0, i32 5		; <i32*> [#uses=1]
+	br label %meshBB376
+
+bb98.fragment:		; preds = %meshBB376
+	%Opq.sa.calc1008 = sub i32 %Opq.link.mask911, 13		; <i32> [#uses=1]
+	%241 = getelementptr %struct.PixelPos* %pix, i64 0, i32 4		; <i32*> [#uses=4]
+	%242 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	%243 = load i32* %242, align 4		; <i32> [#uses=1]
+	%244 = load void (i32, i32*, i32*)** @get_mb_block_pos, align 8		; <void (i32, i32*, i32*)*> [#uses=1]
+	tail call void %244(i32 %243, i32* %241, i32* %.SV317.phi) nounwind
+	%245 = load i32* %241, align 4		; <i32> [#uses=1]
+	%246 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%247 = getelementptr %struct.ImageParameters* %246, i64 0, i32 119, i64 %.load39.SV.phi, i64 0		; <i32*> [#uses=1]
+	%248 = load i32* %247, align 4		; <i32> [#uses=1]
+	%249 = mul i32 %248, %245		; <i32> [#uses=2]
+	store i32 %249, i32* %241, align 4
+	br label %bb98.fragment183
+
+bb98.fragment183:		; preds = %bb98.fragment
+	%Opq.sa.calc777 = sub i32 %Opq.sa.calc1008, -158		; <i32> [#uses=1]
+	%Opq.sa.calc776 = sub i32 %Opq.sa.calc777, 46		; <i32> [#uses=0]
+	%250 = load i32* %.SV317.phi, align 4		; <i32> [#uses=1]
+	%251 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%252 = getelementptr %struct.ImageParameters* %251, i64 0, i32 119, i64 %.load39.SV.phi, i64 1		; <i32*> [#uses=1]
+	%253 = load i32* %252, align 4		; <i32> [#uses=1]
+	%254 = mul i32 %253, %250		; <i32> [#uses=1]
+	%255 = load i32* %.SV313.phi, align 4		; <i32> [#uses=1]
+	%256 = add i32 %255, %249		; <i32> [#uses=1]
+	store i32 %256, i32* %241, align 4
+	%257 = load i32* %.SV315.phi, align 4		; <i32> [#uses=1]
+	%258 = add i32 %257, %254		; <i32> [#uses=1]
+	store i32 %258, i32* %.SV317.phi, align 4
+	ret void
+
+return:		; preds = %meshBB448, %meshBB396, %bb97.fragment
+	%Opq.link.SV697.phi = phi i32 [ %Opq.sa.calc957, %meshBB448 ], [ %Opq.sa.calc758, %meshBB396 ], [ %Opq.sa.calc928, %bb97.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask699 = and i32 %Opq.link.SV697.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc696 = add i32 %Opq.link.mask699, 238		; <i32> [#uses=0]
+	ret void
+
+meshBB:		; preds = %bb33.fragment, %bb14.fragment
+	%.SV38.phi1183 = phi i64 [ %.SV38.phi1115, %bb14.fragment ], [ %.SV38.phi1172, %bb33.fragment ]		; <i64> [#uses=3]
+	%.SV68.phi1181 = phi i32 [ %.SV68.phi1112, %bb14.fragment ], [ %.SV68.phi1169, %bb33.fragment ]		; <i32> [#uses=3]
+	%.SV70.phi1180 = phi i32 [ %.SV70.phi1111, %bb14.fragment ], [ %.SV70.phi1168, %bb33.fragment ]		; <i32> [#uses=3]
+	%.SV104.phi1084 = phi i32 [ undef, %bb14.fragment ], [ %.SV104.phi, %bb33.fragment ]		; <i32> [#uses=1]
+	%.SV111.phi1083 = phi i32* [ undef, %bb14.fragment ], [ %.SV111.phi, %bb33.fragment ]		; <i32*> [#uses=1]
+	%.SV118.phi1082 = phi i32 [ undef, %bb14.fragment ], [ %.SV118.phi, %bb33.fragment ]		; <i32> [#uses=2]
+	%.SV.phi1034 = phi i32 [ %50, %bb14.fragment ], [ undef, %bb33.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable.phi = phi i32 [ %Opq.sa.calc723, %bb14.fragment ], [ %Opq.sa.calc712, %bb33.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV829.phi = phi i32 [ %Opq.sa.calc723, %bb14.fragment ], [ %Opq.sa.calc534, %bb33.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask831 = and i32 %Opq.link.SV829.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc828 = sub i32 %Opq.link.mask831, -117		; <i32> [#uses=2]
+	%meshCmp = icmp eq i32 %meshStackVariable.phi, 3		; <i1> [#uses=1]
+	br i1 %meshCmp, label %bb35, label %bb96
+
+meshBB324:		; preds = %bb32, %bb15
+	%.SV38.phi1175 = phi i64 [ %.SV38.phi1172, %bb32 ], [ %.SV38.phi1115, %bb15 ]		; <i64> [#uses=3]
+	%.SV68.phi1174 = phi i32 [ %.SV68.phi1169, %bb32 ], [ %.SV68.phi1112, %bb15 ]		; <i32> [#uses=3]
+	%.SV70.phi1173 = phi i32 [ %.SV70.phi1168, %bb32 ], [ %.SV70.phi1111, %bb15 ]		; <i32> [#uses=3]
+	%.load94.SV.phi = phi i32 [ undef, %bb32 ], [ %50, %bb15 ]		; <i32> [#uses=1]
+	%.SV212.phi = phi %struct.Macroblock* [ undef, %bb32 ], [ %55, %bb15 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV214.phi = phi i32 [ undef, %bb32 ], [ %56, %bb15 ]		; <i32> [#uses=1]
+	%meshStackVariable325.phi = phi i32 [ %Opq.sa.calc531, %bb32 ], [ %Opq.sa.calc496, %bb15 ]		; <i32> [#uses=1]
+	%Opq.link.SV751.phi = phi i32 [ %Opq.sa.calc512, %bb32 ], [ %Opq.sa.calc723, %bb15 ]		; <i32> [#uses=1]
+	%.SV.phi1032 = phi i32 [ %.SV118.phi, %bb32 ], [ undef, %bb15 ]		; <i32> [#uses=1]
+	%.SV93.phi = phi i32 [ undef, %bb32 ], [ %50, %bb15 ]		; <i32> [#uses=1]
+	%.SV91.phi = phi i32* [ undef, %bb32 ], [ %48, %bb15 ]		; <i32*> [#uses=1]
+	%.SV87.phi = phi i32 [ undef, %bb32 ], [ %47, %bb15 ]		; <i32> [#uses=1]
+	%Opq.link.mask753 = and i32 %Opq.link.SV751.phi, 4		; <i32> [#uses=1]
+	%Opq.sa.calc750 = add i32 %Opq.link.mask753, 203		; <i32> [#uses=1]
+	%meshCmp327 = icmp eq i32 %meshStackVariable325.phi, 14		; <i1> [#uses=1]
+	br i1 %meshCmp327, label %bb15.fragment, label %bb96
+
+meshBB328:		; preds = %bb50, %bb34
+	%.SV38.phi1227 = phi i64 [ %.SV38.phi1179, %bb50 ], [ %.SV38.phi1172, %bb34 ]		; <i64> [#uses=2]
+	%.SV68.phi1226 = phi i32 [ %.SV68.phi1177, %bb50 ], [ %.SV68.phi1169, %bb34 ]		; <i32> [#uses=2]
+	%.SV70.phi1225 = phi i32 [ %.SV70.phi1176, %bb50 ], [ %.SV70.phi1168, %bb34 ]		; <i32> [#uses=2]
+	%.SV118.phi1081 = phi i32 [ %.SV118.phi1040, %bb50 ], [ %.SV118.phi, %bb34 ]		; <i32> [#uses=1]
+	%.load129.SV.phi = phi i32 [ undef, %bb50 ], [ %.SV118.phi, %bb34 ]		; <i32> [#uses=1]
+	%.load116.SV.phi = phi i32* [ undef, %bb50 ], [ %.SV111.phi, %bb34 ]		; <i32*> [#uses=1]
+	%.SV238.phi = phi i32 [ undef, %bb50 ], [ %100, %bb34 ]		; <i32> [#uses=1]
+	%meshStackVariable329.phi = phi i32 [ %Opq.sa.calc577, %bb50 ], [ %Opq.sa.calc537, %bb34 ]		; <i32> [#uses=1]
+	%Opq.link.SV788.phi = phi i32 [ %Opq.sa.calc577, %bb50 ], [ %Opq.sa.calc712, %bb34 ]		; <i32> [#uses=1]
+	%Opq.link.mask790 = and i32 %Opq.link.SV788.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc787 = sub i32 %Opq.link.mask790, -227		; <i32> [#uses=2]
+	%meshCmp331 = icmp eq i32 %meshStackVariable329.phi, 11		; <i1> [#uses=1]
+	br i1 %meshCmp331, label %bb34.fragment, label %bb51
+
+meshBB332:		; preds = %bb44, %bb11
+	%.SV38.phi1204 = phi i64 [ %.SV38.phi1231, %bb44 ], [ %.SV38.phi1164, %bb11 ]		; <i64> [#uses=2]
+	%.SV68.phi1203 = phi i32 [ %.SV68.phi1229, %bb44 ], [ %.SV68.phi1162, %bb11 ]		; <i32> [#uses=2]
+	%.SV70.phi1202 = phi i32 [ %.SV70.phi1228, %bb44 ], [ %.SV70.phi1161, %bb11 ]		; <i32> [#uses=2]
+	%.load127.SV.phi = phi i32 [ %.SV118.phi1125, %bb44 ], [ undef, %bb11 ]		; <i32> [#uses=1]
+	%.load114.SV.phi = phi i32* [ %.SV111.phi1126, %bb44 ], [ undef, %bb11 ]		; <i32*> [#uses=1]
+	%.load46.SV.phi = phi i32 [ %.SV43.phi1230, %bb44 ], [ undef, %bb11 ]		; <i32> [#uses=1]
+	%.SV248.phi = phi i32 [ %119, %bb44 ], [ undef, %bb11 ]		; <i32> [#uses=1]
+	%.load84.SV.phi = phi i32 [ undef, %bb44 ], [ %32, %bb11 ]		; <i32> [#uses=1]
+	%.load81.SV.phi = phi i32* [ undef, %bb44 ], [ %.SV80.phi, %bb11 ]		; <i32*> [#uses=1]
+	%.load50.SV.phi = phi i32 [ undef, %bb44 ], [ %.SV43.phi1163, %bb11 ]		; <i32> [#uses=1]
+	%.SV206.phi = phi i32 [ undef, %bb44 ], [ %43, %bb11 ]		; <i32> [#uses=1]
+	%meshStackVariable333.phi = phi i32 [ %Opq.sa.calc566, %bb44 ], [ %Opq.sa.calc485, %bb11 ]		; <i32> [#uses=1]
+	%Opq.link.SV857.phi = phi i32 [ %Opq.sa.calc987, %bb44 ], [ %Opq.sa.calc485, %bb11 ]		; <i32> [#uses=1]
+	%Opq.link.mask859 = and i32 %Opq.link.SV857.phi, 4		; <i32> [#uses=2]
+	%Opq.sa.calc856 = add i32 %Opq.link.mask859, 204		; <i32> [#uses=2]
+	%meshCmp335 = icmp eq i32 %meshStackVariable333.phi, 4		; <i1> [#uses=1]
+	br i1 %meshCmp335, label %bb11.fragment, label %bb44.fragment
+
+meshBB336:		; preds = %bb85, %bb40
+	%.SV52.phi1234 = phi i32* [ %.SV52.phi1213, %bb85 ], [ undef, %bb40 ]		; <i32*> [#uses=1]
+	%.SV38.phi1231 = phi i64 [ %.SV38.phi1214, %bb85 ], [ %.SV38.phi1179, %bb40 ]		; <i64> [#uses=4]
+	%.SV43.phi1230 = phi i32 [ undef, %bb85 ], [ %.SV43.phi1178, %bb40 ]		; <i32> [#uses=3]
+	%.SV68.phi1229 = phi i32 [ %.SV68.phi1212, %bb85 ], [ %.SV68.phi1177, %bb40 ]		; <i32> [#uses=4]
+	%.SV70.phi1228 = phi i32 [ %.SV70.phi1211, %bb85 ], [ %.SV70.phi1176, %bb40 ]		; <i32> [#uses=4]
+	%.SV99.phi1128 = phi i32* [ undef, %bb85 ], [ %.SV99.phi1037, %bb40 ]		; <i32*> [#uses=1]
+	%.SV104.phi1127 = phi i32 [ undef, %bb85 ], [ %.SV104.phi1036, %bb40 ]		; <i32> [#uses=2]
+	%.SV111.phi1126 = phi i32* [ undef, %bb85 ], [ %.SV111.phi1035, %bb40 ]		; <i32*> [#uses=2]
+	%.SV118.phi1125 = phi i32 [ undef, %bb85 ], [ %.SV118.phi1040, %bb40 ]		; <i32> [#uses=3]
+	%meshStackVariable337.phi = phi i32 [ %Opq.sa.calc665, %bb85 ], [ %Opq.sa.calc553, %bb40 ]		; <i32> [#uses=1]
+	%Opq.link.SV980.phi = phi i32 [ %Opq.sa.calc667, %bb85 ], [ %Opq.sa.calc554, %bb40 ]		; <i32> [#uses=1]
+	%Opq.link.mask982 = and i32 %Opq.link.SV980.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc979 = sub i32 %Opq.link.mask982, -153		; <i32> [#uses=2]
+	%meshCmp339 = icmp eq i32 %meshStackVariable337.phi, 4		; <i1> [#uses=1]
+	br i1 %meshCmp339, label %bb41, label %bb86
+
+meshBB340:		; preds = %bb29, %bb26
+	%.SV38.phi1167 = phi i64 [ %.SV38.phi1121, %bb29 ], [ %.SV38.phi1172, %bb26 ]		; <i64> [#uses=3]
+	%.SV68.phi1166 = phi i32 [ %.SV68.phi1120, %bb29 ], [ %.SV68.phi1169, %bb26 ]		; <i32> [#uses=3]
+	%.SV70.phi1165 = phi i32 [ %.SV70.phi1119, %bb29 ], [ %.SV70.phi1168, %bb26 ]		; <i32> [#uses=3]
+	%.SV104.phi1080 = phi i32 [ undef, %bb29 ], [ %.SV104.phi, %bb26 ]		; <i32> [#uses=1]
+	%.SV111.phi1079 = phi i32* [ undef, %bb29 ], [ %.SV111.phi, %bb26 ]		; <i32*> [#uses=1]
+	%.SV118.phi1078 = phi i32 [ %.SV118.phi1158, %bb29 ], [ %.SV118.phi, %bb26 ]		; <i32> [#uses=1]
+	%.load123.SV.phi = phi i32 [ undef, %bb29 ], [ %.SV118.phi, %bb26 ]		; <i32> [#uses=2]
+	%.SV228.phi = phi %struct.Macroblock* [ undef, %bb29 ], [ %81, %bb26 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV230.phi = phi i32 [ undef, %bb29 ], [ %82, %bb26 ]		; <i32> [#uses=1]
+	%meshStackVariable341.phi = phi i32 [ %Opq.sa.calc525, %bb29 ], [ %Opq.sa.calc518, %bb26 ]		; <i32> [#uses=1]
+	%Opq.link.SV755.phi = phi i32 [ %Opq.sa.calc525, %bb29 ], [ %Opq.sa.calc519, %bb26 ]		; <i32> [#uses=1]
+	%.SV.phi1042 = phi i32 [ %.SV118.phi1158, %bb29 ], [ undef, %bb26 ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1041 = phi i32 [ %89, %bb29 ], [ undef, %bb26 ]		; <i32> [#uses=1]
+	%Opq.link.mask757 = and i32 %Opq.link.SV755.phi, 12		; <i32> [#uses=1]
+	%Opq.sa.calc754 = add i32 %Opq.link.mask757, 225		; <i32> [#uses=2]
+	%meshCmp343 = icmp eq i32 %meshStackVariable341.phi, 9		; <i1> [#uses=1]
+	br i1 %meshCmp343, label %bb26.fragment, label %bb96
+
+meshBB344:		; preds = %bb68, %bb23.fragment182
+	%.SV38.phi1172 = phi i64 [ %.SV38.phi1115, %bb23.fragment182 ], [ %.SV38.phi1098, %bb68 ]		; <i64> [#uses=8]
+	%.SV52.phi1170 = phi i32* [ undef, %bb23.fragment182 ], [ %.SV52.phi1097, %bb68 ]		; <i32*> [#uses=2]
+	%.SV68.phi1169 = phi i32 [ %.SV68.phi1112, %bb23.fragment182 ], [ %.SV68.phi1096, %bb68 ]		; <i32> [#uses=8]
+	%.SV70.phi1168 = phi i32 [ %.SV70.phi1111, %bb23.fragment182 ], [ %.SV70.phi1095, %bb68 ]		; <i32> [#uses=8]
+	%.load144.SV.phi = phi i1 [ undef, %bb23.fragment182 ], [ %145, %bb68 ]		; <i1> [#uses=1]
+	%.SV274.phi = phi i32* [ undef, %bb23.fragment182 ], [ %167, %bb68 ]		; <i32*> [#uses=2]
+	%.SV118.phi = phi i32 [ %76, %bb23.fragment182 ], [ undef, %bb68 ]		; <i32> [#uses=7]
+	%.SV135.phi = phi i1 [ %78, %bb23.fragment182 ], [ undef, %bb68 ]		; <i1> [#uses=2]
+	%meshStackVariable345.phi = phi i32 [ %Opq.sa.calc743, %bb23.fragment182 ], [ %Opq.sa.calc624, %bb68 ]		; <i32> [#uses=1]
+	%Opq.link.SV717.phi = phi i32 [ %Opq.sa.calc744, %bb23.fragment182 ], [ %Opq.sa.calc624, %bb68 ]		; <i32> [#uses=1]
+	%Opq.link.SV720.phi = phi i32 [ %Opq.sa.calc743, %bb23.fragment182 ], [ %Opq.sa.calc624, %bb68 ]		; <i32> [#uses=1]
+	%.SV96.phi = phi i1 [ %71, %bb23.fragment182 ], [ undef, %bb68 ]		; <i1> [#uses=1]
+	%.SV99.phi = phi i32* [ %72, %bb23.fragment182 ], [ undef, %bb68 ]		; <i32*> [#uses=2]
+	%.SV104.phi = phi i32 [ %73, %bb23.fragment182 ], [ undef, %bb68 ]		; <i32> [#uses=3]
+	%.SV111.phi = phi i32* [ %74, %bb23.fragment182 ], [ undef, %bb68 ]		; <i32*> [#uses=3]
+	%Opq.link.mask722 = and i32 %Opq.link.SV720.phi, 9		; <i32> [#uses=3]
+	%Opq.link.mask719 = and i32 %Opq.link.SV717.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc715 = sub i32 %Opq.link.mask719, %Opq.link.mask722		; <i32> [#uses=1]
+	%Opq.sa.calc716 = sub i32 %Opq.sa.calc715, -101		; <i32> [#uses=2]
+	%meshCmp347 = icmp eq i32 %meshStackVariable345.phi, 9		; <i1> [#uses=1]
+	br i1 %meshCmp347, label %bb68.fragment, label %bb24
+
+meshBB348:		; preds = %bb37, %bb6
+	%.SV38.phi1103 = phi i64 [ %.SV38.phi1014, %bb6 ], [ %.SV38.phi1019, %bb37 ]		; <i64> [#uses=2]
+	%.SV43.phi1102 = phi i32 [ %.SV43.phi, %bb6 ], [ %.SV43.phi1018, %bb37 ]		; <i32> [#uses=1]
+	%.SV52.phi1101 = phi i32* [ %.SV52.phi, %bb6 ], [ undef, %bb37 ]		; <i32*> [#uses=1]
+	%.SV68.phi1100 = phi i32 [ %.SV68.phi1020, %bb6 ], [ %.SV68.phi1025, %bb37 ]		; <i32> [#uses=2]
+	%.SV70.phi1099 = phi i32 [ %.SV70.phi1026, %bb6 ], [ %.SV70.phi1233, %bb37 ]		; <i32> [#uses=2]
+	%.load131.SV.phi = phi i32 [ undef, %bb6 ], [ %.SV118.phi1155, %bb37 ]		; <i32> [#uses=1]
+	%.load115.SV.phi = phi i32* [ undef, %bb6 ], [ %.SV111.phi1156, %bb37 ]		; <i32*> [#uses=1]
+	%.load48.SV.phi = phi i32 [ undef, %bb6 ], [ %.SV43.phi1018, %bb37 ]		; <i32> [#uses=1]
+	%.SV242.phi = phi i32 [ undef, %bb6 ], [ %105, %bb37 ]		; <i32> [#uses=1]
+	%meshStackVariable349.phi = phi i32 [ %Opq.sa.calc473, %bb6 ], [ %Opq.sa.calc547, %bb37 ]		; <i32> [#uses=1]
+	%Opq.link.SV806.phi = phi i32 [ %Opq.sa.calc873, %bb6 ], [ %Opq.sa.calc958, %bb37 ]		; <i32> [#uses=1]
+	%Opq.link.mask808 = and i32 %Opq.link.SV806.phi, 12		; <i32> [#uses=1]
+	%Opq.sa.calc805 = sub i32 %Opq.link.mask808, -147		; <i32> [#uses=3]
+	%meshCmp351 = icmp eq i32 %meshStackVariable349.phi, 13		; <i1> [#uses=1]
+	br i1 %meshCmp351, label %bb37.fragment, label %bb8
+
+meshBB352:		; preds = %bb79, %bb71
+	%.SV38.phi1140 = phi i64 [ %.SV38.phi1110, %bb71 ], [ %.SV38.phi1098, %bb79 ]		; <i64> [#uses=2]
+	%.SV68.phi1139 = phi i32 [ %.SV68.phi1108, %bb71 ], [ %.SV68.phi1096, %bb79 ]		; <i32> [#uses=2]
+	%.SV70.phi1138 = phi i32 [ %.SV70.phi1107, %bb71 ], [ %.SV70.phi1095, %bb79 ]		; <i32> [#uses=2]
+	%.load166.SV.phi = phi i32 [ %.SV164.phi1104, %bb71 ], [ undef, %bb79 ]		; <i32> [#uses=1]
+	%.load163.SV.phi = phi i32* [ %.SV162.phi1105, %bb71 ], [ undef, %bb79 ]		; <i32*> [#uses=1]
+	%.SV282.phi = phi i32 [ %182, %bb71 ], [ undef, %bb79 ]		; <i32> [#uses=1]
+	%meshStackVariable353.phi = phi i32 [ %Opq.sa.calc633, %bb71 ], [ %Opq.sa.calc650, %bb79 ]		; <i32> [#uses=1]
+	%Opq.link.SV877.phi = phi i32 [ %Opq.sa.calc820, %bb71 ], [ %Opq.sa.calc650, %bb79 ]		; <i32> [#uses=1]
+	%Opq.link.mask879 = and i32 %Opq.link.SV877.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc876 = add i32 %Opq.link.mask879, 18		; <i32> [#uses=1]
+	%meshCmp355 = icmp eq i32 %meshStackVariable353.phi, 11		; <i1> [#uses=1]
+	br i1 %meshCmp355, label %bb97, label %bb71.fragment
+
+meshBB356:		; preds = %bb70.fragment, %bb26.fragment
+	%.SV104.phi1160 = phi i32 [ undef, %bb70.fragment ], [ %.SV104.phi1080, %bb26.fragment ]		; <i32> [#uses=1]
+	%.SV111.phi1159 = phi i32* [ undef, %bb70.fragment ], [ %.SV111.phi1079, %bb26.fragment ]		; <i32*> [#uses=1]
+	%.SV118.phi1158 = phi i32 [ undef, %bb70.fragment ], [ %.SV118.phi1078, %bb26.fragment ]		; <i32> [#uses=3]
+	%.SV38.phi1121 = phi i64 [ %.SV38.phi1014, %bb70.fragment ], [ %.SV38.phi1167, %bb26.fragment ]		; <i64> [#uses=3]
+	%.SV68.phi1120 = phi i32 [ %.SV68.phi1020, %bb70.fragment ], [ %.SV68.phi1166, %bb26.fragment ]		; <i32> [#uses=3]
+	%.SV70.phi1119 = phi i32 [ %.SV70.phi1026, %bb70.fragment ], [ %.SV70.phi1165, %bb26.fragment ]		; <i32> [#uses=3]
+	%.SV.phi1046 = phi i32 [ %.load165.SV.phi, %bb70.fragment ], [ %.load123.SV.phi, %bb26.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable357.phi = phi i32 [ %Opq.sa.calc738, %bb70.fragment ], [ %Opq.sa.calc917, %bb26.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV984.phi = phi i32 [ %Opq.sa.calc738, %bb70.fragment ], [ %Opq.sa.calc918, %bb26.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask986 = and i32 %Opq.link.SV984.phi, 9		; <i32> [#uses=1]
+	%Opq.sa.calc983 = xor i32 %Opq.link.mask986, 251		; <i32> [#uses=1]
+	%meshCmp359 = icmp eq i32 %meshStackVariable357.phi, 9		; <i1> [#uses=1]
+	br i1 %meshCmp359, label %bb28, label %bb96
+
+meshBB360:		; preds = %bb21, %bb13
+	%.SV38.phi1115 = phi i64 [ %4, %bb21 ], [ %.SV38.phi1014, %bb13 ]		; <i64> [#uses=5]
+	%.SV52.phi1113 = phi i32* [ %.SV52.phi1022, %bb21 ], [ %.SV52.phi, %bb13 ]		; <i32*> [#uses=3]
+	%.SV68.phi1112 = phi i32 [ %.SV68.phi1021, %bb21 ], [ %.SV68.phi1020, %bb13 ]		; <i32> [#uses=5]
+	%.SV70.phi1111 = phi i32 [ %.SV70.phi1027, %bb21 ], [ %.SV70.phi1026, %bb13 ]		; <i32> [#uses=5]
+	%.load74.SV.phi = phi i1 [ undef, %bb21 ], [ %21, %bb13 ]		; <i1> [#uses=1]
+	%.SV208.phi = phi i32* [ undef, %bb21 ], [ %46, %bb13 ]		; <i32*> [#uses=2]
+	%meshStackVariable361.phi = phi i32 [ %Opq.sa.calc505, %bb21 ], [ %Opq.sa.calc489, %bb13 ]		; <i32> [#uses=1]
+	%Opq.link.SV867.phi = phi i32 [ %Opq.sa.calc505, %bb21 ], [ %Opq.sa.calc873, %bb13 ]		; <i32> [#uses=1]
+	%Opq.link.mask869 = and i32 %Opq.link.SV867.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc866 = add i32 %Opq.link.mask869, 148		; <i32> [#uses=4]
+	%meshCmp363 = icmp eq i32 %meshStackVariable361.phi, 16		; <i1> [#uses=1]
+	br i1 %meshCmp363, label %bb13.fragment, label %bb23
+
+meshBB364:		; preds = %bb65.fragment, %bb56
+	%.SV38.phi1098 = phi i64 [ %.SV38.phi1017, %bb56 ], [ %.SV38.phi1147, %bb65.fragment ]		; <i64> [#uses=11]
+	%.SV52.phi1097 = phi i32* [ %.SV52.phi1024, %bb56 ], [ undef, %bb65.fragment ]		; <i32*> [#uses=8]
+	%.SV68.phi1096 = phi i32 [ %.SV68.phi1023, %bb56 ], [ %.SV68.phi1146, %bb65.fragment ]		; <i32> [#uses=11]
+	%.SV70.phi1095 = phi i32 [ %.SV70.phi1028, %bb56 ], [ %.SV70.phi1145, %bb65.fragment ]		; <i32> [#uses=11]
+	%or.cond.not.SV.phi1094 = phi i1 [ %or.cond.not.SV.phi1029, %bb56 ], [ undef, %bb65.fragment ]		; <i1> [#uses=1]
+	%.SV.phi1062 = phi i32 [ undef, %bb56 ], [ %.SV268.phi, %bb65.fragment ]		; <i32> [#uses=1]
+	%.not4.SV.phi = phi i1 [ %.not4, %bb56 ], [ undef, %bb65.fragment ]		; <i1> [#uses=1]
+	%.SV256.phi = phi i1 [ %139, %bb56 ], [ undef, %bb65.fragment ]		; <i1> [#uses=1]
+	%meshStackVariable365.phi = phi i32 [ %Opq.sa.calc592, %bb56 ], [ %Opq.sa.calc832, %bb65.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV735.phi = phi i32 [ %Opq.sa.calc592, %bb56 ], [ %Opq.sa.calc832, %bb65.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask737 = and i32 %Opq.link.SV735.phi, 0		; <i32> [#uses=2]
+	%Opq.sa.calc734 = sub i32 %Opq.link.mask737, -242		; <i32> [#uses=0]
+	%meshCmp367 = icmp eq i32 %meshStackVariable365.phi, 1		; <i1> [#uses=1]
+	br i1 %meshCmp367, label %bb96, label %bb56.fragment
+
+meshBB368:		; preds = %bb90.fragment, %bb8
+	%.SV38.phi1164 = phi i64 [ %.SV38.phi1103, %bb8 ], [ %.SV38.phi1191, %bb90.fragment ]		; <i64> [#uses=5]
+	%.SV43.phi1163 = phi i32 [ %.SV43.phi1102, %bb8 ], [ undef, %bb90.fragment ]		; <i32> [#uses=1]
+	%.SV68.phi1162 = phi i32 [ %.SV68.phi1100, %bb8 ], [ %.SV68.phi1189, %bb90.fragment ]		; <i32> [#uses=5]
+	%.SV70.phi1161 = phi i32 [ %.SV70.phi1099, %bb8 ], [ %.SV70.phi1188, %bb90.fragment ]		; <i32> [#uses=5]
+	%.SV178.phi = phi i32 [ undef, %bb8 ], [ %214, %bb90.fragment ]		; <i32> [#uses=2]
+	%.SV176.phi = phi i32* [ undef, %bb8 ], [ %212, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.SV170.phi = phi i32* [ undef, %bb8 ], [ %210, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.SV172.phi = phi i32 [ undef, %bb8 ], [ %211, %bb90.fragment ]		; <i32> [#uses=1]
+	%.SV76.phi = phi i32* [ %28, %bb8 ], [ undef, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.SV78.phi = phi i32 [ %29, %bb8 ], [ undef, %bb90.fragment ]		; <i32> [#uses=1]
+	%.SV80.phi = phi i32* [ %30, %bb8 ], [ undef, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.load66.SV.phi = phi i32* [ %.SV52.phi1101, %bb8 ], [ undef, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.load35.SV.phi = phi i64 [ %3, %bb8 ], [ undef, %bb90.fragment ]		; <i64> [#uses=1]
+	%.load16.SV.phi = phi %struct.Macroblock* [ %2, %bb8 ], [ undef, %bb90.fragment ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV198.phi = phi i32 [ %29, %bb8 ], [ undef, %bb90.fragment ]		; <i32> [#uses=1]
+	%.SV200.phi = phi i32* [ %30, %bb8 ], [ undef, %bb90.fragment ]		; <i32*> [#uses=1]
+	%meshStackVariable369.phi = phi i32 [ %Opq.sa.calc479, %bb8 ], [ %Opq.sa.calc772, %bb90.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV769.phi = phi i32 [ %Opq.sa.calc805, %bb8 ], [ %Opq.sa.calc772, %bb90.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask771 = and i32 %Opq.link.SV769.phi, 2		; <i32> [#uses=1]
+	%Opq.sa.calc768 = xor i32 %Opq.link.mask771, 135		; <i32> [#uses=3]
+	%meshCmp371 = icmp eq i32 %meshStackVariable369.phi, 2		; <i1> [#uses=1]
+	br i1 %meshCmp371, label %bb91, label %bb8.fragment
+
+meshBB372:		; preds = %bb84.fragment, %bb35
+	%.SV38.phi1214 = phi i64 [ %.SV38.phi1191, %bb84.fragment ], [ %.SV38.phi1183, %bb35 ]		; <i64> [#uses=3]
+	%.SV52.phi1213 = phi i32* [ %.SV52.phi1190, %bb84.fragment ], [ undef, %bb35 ]		; <i32*> [#uses=2]
+	%.SV68.phi1212 = phi i32 [ %.SV68.phi1189, %bb84.fragment ], [ %.SV68.phi1181, %bb35 ]		; <i32> [#uses=3]
+	%.SV70.phi1211 = phi i32 [ %.SV70.phi1188, %bb84.fragment ], [ %.SV70.phi1180, %bb35 ]		; <i32> [#uses=3]
+	%.SV118.phi1154 = phi i32 [ undef, %bb84.fragment ], [ %.SV118.phi1082, %bb35 ]		; <i32> [#uses=1]
+	%.SV167.phi = phi i1 [ %203, %bb84.fragment ], [ undef, %bb35 ]		; <i1> [#uses=1]
+	%meshStackVariable373.phi = phi i32 [ %Opq.sa.calc802, %bb84.fragment ], [ %Opq.sa.calc540, %bb35 ]		; <i32> [#uses=1]
+	%Opq.link.SV813.phi = phi i32 [ %Opq.sa.calc802, %bb84.fragment ], [ %Opq.sa.calc541, %bb35 ]		; <i32> [#uses=1]
+	%Opq.link.mask815 = and i32 %Opq.link.SV813.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc812 = sub i32 %Opq.link.mask815, -121		; <i32> [#uses=3]
+	%meshCmp375 = icmp eq i32 %meshStackVariable373.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp375, label %bb36, label %bb85
+
+meshBB376:		; preds = %bb98, %bb44.fragment
+	%.SV38.phi1153 = phi i64 [ %.SV38.phi1093, %bb98 ], [ %.SV38.phi1204, %bb44.fragment ]		; <i64> [#uses=1]
+	%.SV68.phi1152 = phi i32 [ %.SV68.phi1092, %bb98 ], [ %.SV68.phi1203, %bb44.fragment ]		; <i32> [#uses=1]
+	%.SV70.phi1151 = phi i32 [ %.SV70.phi1091, %bb98 ], [ %.SV70.phi1202, %bb44.fragment ]		; <i32> [#uses=1]
+	%.load39.SV.phi = phi i64 [ %.SV38.phi1093, %bb98 ], [ undef, %bb44.fragment ]		; <i64> [#uses=2]
+	%.SV313.phi = phi i32* [ %237, %bb98 ], [ undef, %bb44.fragment ]		; <i32*> [#uses=1]
+	%.SV315.phi = phi i32* [ %239, %bb98 ], [ undef, %bb44.fragment ]		; <i32*> [#uses=1]
+	%.SV317.phi = phi i32* [ %240, %bb98 ], [ undef, %bb44.fragment ]		; <i32*> [#uses=3]
+	%.SV.phi1050 = phi i32 [ undef, %bb98 ], [ %.load127.SV.phi, %bb44.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1049 = phi i32 [ undef, %bb98 ], [ %121, %bb44.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable377.phi = phi i32 [ %Opq.sa.calc695, %bb98 ], [ %Opq.sa.calc894, %bb44.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV909.phi = phi i32 [ %Opq.sa.calc695, %bb98 ], [ %Opq.sa.calc856, %bb44.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask911 = and i32 %Opq.link.SV909.phi, 16		; <i32> [#uses=2]
+	%Opq.sa.calc908 = add i32 %Opq.link.mask911, -11		; <i32> [#uses=0]
+	%meshCmp379 = icmp eq i32 %meshStackVariable377.phi, 8		; <i1> [#uses=1]
+	br i1 %meshCmp379, label %bb96, label %bb98.fragment
+
+meshBB380:		; preds = %bb92.fragment, %bb49.fragment
+	%.SV38.phi1207 = phi i64 [ %.SV38.phi1164, %bb92.fragment ], [ %.SV38.phi1179, %bb49.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1206 = phi i32 [ %.SV68.phi1162, %bb92.fragment ], [ %.SV68.phi1177, %bb49.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1205 = phi i32 [ %.SV70.phi1161, %bb92.fragment ], [ %.SV70.phi1176, %bb49.fragment ]		; <i32> [#uses=2]
+	%.SV104.phi1124 = phi i32 [ undef, %bb92.fragment ], [ %.SV104.phi1036, %bb49.fragment ]		; <i32> [#uses=1]
+	%.SV111.phi1123 = phi i32* [ undef, %bb92.fragment ], [ %.SV111.phi1035, %bb49.fragment ]		; <i32*> [#uses=1]
+	%.SV118.phi1122 = phi i32 [ undef, %bb92.fragment ], [ %.SV118.phi1040, %bb49.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable381.phi = phi i32 [ %Opq.sa.calc1005, %bb92.fragment ], [ %Opq.sa.calc860, %bb49.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV947.phi = phi i32 [ %Opq.sa.calc1005, %bb92.fragment ], [ %Opq.sa.calc860, %bb49.fragment ]		; <i32> [#uses=1]
+	%.SV.phi1052 = phi i32 [ %.SV178.phi, %bb92.fragment ], [ undef, %bb49.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1051 = phi i32 [ %226, %bb92.fragment ], [ undef, %bb49.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask949 = and i32 %Opq.link.SV947.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc946 = sub i32 %Opq.link.mask949, -4		; <i32> [#uses=1]
+	%meshCmp383 = icmp eq i32 %meshStackVariable381.phi, 1		; <i1> [#uses=1]
+	br i1 %meshCmp383, label %bb54, label %bb96
+
+meshBB384:		; preds = %bb95, %bb52
+	%.SV38.phi1221 = phi i64 [ %.SV38.phi1179, %bb52 ], [ %.SV38.phi1218, %bb95 ]		; <i64> [#uses=2]
+	%.SV68.phi1220 = phi i32 [ %.SV68.phi1177, %bb52 ], [ %.SV68.phi1216, %bb95 ]		; <i32> [#uses=2]
+	%.SV70.phi1219 = phi i32 [ %.SV70.phi1176, %bb52 ], [ %.SV70.phi1215, %bb95 ]		; <i32> [#uses=2]
+	%.load53.SV.phi = phi i32* [ undef, %bb52 ], [ %.SV52.phi1217, %bb95 ]		; <i32*> [#uses=1]
+	%.load20.SV.phi = phi i64 [ undef, %bb52 ], [ %3, %bb95 ]		; <i64> [#uses=1]
+	%.load.SV.phi = phi %struct.Macroblock* [ undef, %bb52 ], [ %2, %bb95 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV306.phi = phi i32 [ undef, %bb52 ], [ %227, %bb95 ]		; <i32> [#uses=1]
+	%.SV308.phi = phi i32* [ undef, %bb52 ], [ %228, %bb95 ]		; <i32*> [#uses=1]
+	%.load126.SV.phi = phi i32 [ %.SV118.phi1040, %bb52 ], [ undef, %bb95 ]		; <i32> [#uses=1]
+	%.load44.SV.phi = phi i32 [ %.SV43.phi1178, %bb52 ], [ undef, %bb95 ]		; <i32> [#uses=1]
+	%meshStackVariable385.phi = phi i32 [ %Opq.sa.calc583, %bb52 ], [ %Opq.sa.calc689, %bb95 ]		; <i32> [#uses=1]
+	%Opq.link.SV902.phi = phi i32 [ %Opq.sa.calc860, %bb52 ], [ %Opq.sa.calc689, %bb95 ]		; <i32> [#uses=1]
+	%Opq.link.SV905.phi = phi i32 [ %Opq.sa.calc584, %bb52 ], [ %Opq.sa.calc689, %bb95 ]		; <i32> [#uses=1]
+	%Opq.link.mask907 = and i32 %Opq.link.SV905.phi, 0		; <i32> [#uses=0]
+	%Opq.link.mask904 = and i32 %Opq.link.SV902.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc901 = xor i32 %Opq.link.mask904, 227		; <i32> [#uses=3]
+	%meshCmp387 = icmp eq i32 %meshStackVariable385.phi, 5		; <i1> [#uses=1]
+	br i1 %meshCmp387, label %bb95.fragment, label %bb52.fragment
+
+meshBB388:		; preds = %bb52.fragment, %bb7
+	%.SV38.phi1118 = phi i64 [ %.SV38.phi1014, %bb7 ], [ %.SV38.phi1221, %bb52.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1117 = phi i32 [ %.SV68.phi1020, %bb7 ], [ %.SV68.phi1220, %bb52.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1116 = phi i32 [ %.SV70.phi1026, %bb7 ], [ %.SV70.phi1219, %bb52.fragment ]		; <i32> [#uses=2]
+	%.SV.phi1054 = phi i32 [ undef, %bb7 ], [ %.load126.SV.phi, %bb52.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1053 = phi i32 [ undef, %bb7 ], [ %137, %bb52.fragment ]		; <i32> [#uses=1]
+	%.load67.SV.phi = phi i32* [ %.SV52.phi, %bb7 ], [ undef, %bb52.fragment ]		; <i32*> [#uses=1]
+	%.load36.SV.phi = phi i64 [ %3, %bb7 ], [ undef, %bb52.fragment ]		; <i64> [#uses=1]
+	%.load17.SV.phi = phi %struct.Macroblock* [ %2, %bb7 ], [ undef, %bb52.fragment ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV194.phi = phi i32 [ %24, %bb7 ], [ undef, %bb52.fragment ]		; <i32> [#uses=1]
+	%.SV196.phi = phi i32* [ %25, %bb7 ], [ undef, %bb52.fragment ]		; <i32*> [#uses=1]
+	%meshStackVariable389.phi = phi i32 [ %Opq.sa.calc476, %bb7 ], [ %Opq.sa.calc844, %bb52.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV887.phi = phi i32 [ %Opq.sa.calc873, %bb7 ], [ %Opq.sa.calc901, %bb52.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask889 = and i32 %Opq.link.SV887.phi, 64		; <i32> [#uses=1]
+	%Opq.sa.calc886 = sub i32 %Opq.link.mask889, -170		; <i32> [#uses=2]
+	%meshCmp391 = icmp eq i32 %meshStackVariable389.phi, 12		; <i1> [#uses=1]
+	br i1 %meshCmp391, label %bb96, label %bb7.fragment
+
+meshBB392:		; preds = %bb4, %entry
+	%meshStackVariable393.phi = phi i32 [ %Opq.sa.calc466, %bb4 ], [ %Opq.sa.calc, %entry ]		; <i32> [#uses=1]
+	%Opq.link.SV922.phi = phi i32 [ %Opq.sa.calc462, %bb4 ], [ %Opq.sa.calc, %entry ]		; <i32> [#uses=1]
+	%or.cond.not.SV.phi = phi i1 [ %or.cond.not, %bb4 ], [ undef, %entry ]		; <i1> [#uses=1]
+	%.SV70.phi1027 = phi i32 [ %12, %bb4 ], [ undef, %entry ]		; <i32> [#uses=2]
+	%.SV52.phi1022 = phi i32* [ %9, %bb4 ], [ undef, %entry ]		; <i32*> [#uses=1]
+	%.SV68.phi1021 = phi i32 [ %10, %bb4 ], [ undef, %entry ]		; <i32> [#uses=2]
+	%.SV43.phi1015 = phi i32 [ %8, %bb4 ], [ undef, %entry ]		; <i32> [#uses=3]
+	%Opq.link.mask924 = and i32 %Opq.link.SV922.phi, 2		; <i32> [#uses=1]
+	%Opq.sa.calc921 = add i32 %Opq.link.mask924, 57		; <i32> [#uses=3]
+	%meshCmp395 = icmp eq i32 %meshStackVariable393.phi, 2		; <i1> [#uses=1]
+	br i1 %meshCmp395, label %entry.fragment, label %bb21
+
+meshBB396:		; preds = %bb69.fragment, %bb.fragment
+	%.SV.phi1065 = phi i32 [ undef, %bb.fragment ], [ %171, %bb69.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable397.phi = phi i32 [ %Opq.sa.calc976, %bb.fragment ], [ %Opq.sa.calc995, %bb69.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV759.phi = phi i32 [ %Opq.sa.calc976, %bb.fragment ], [ %Opq.sa.calc995, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV70.phi = phi i32 [ %12, %bb.fragment ], [ %.SV70.phi1168, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV68.phi = phi i32 [ %10, %bb.fragment ], [ %.SV68.phi1169, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV38.phi = phi i64 [ %4, %bb.fragment ], [ %.SV38.phi1172, %bb69.fragment ]		; <i64> [#uses=1]
+	%Opq.link.mask761 = and i32 %Opq.link.SV759.phi, 6		; <i32> [#uses=1]
+	%Opq.sa.calc758 = add i32 %Opq.link.mask761, 53		; <i32> [#uses=1]
+	%meshCmp399 = icmp eq i32 %meshStackVariable397.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp399, label %bb96, label %return
+
+meshBB400:		; preds = %bb84, %bb69.fragment
+	%.SV38.phi1191 = phi i64 [ %.SV38.phi1098, %bb84 ], [ %.SV38.phi1172, %bb69.fragment ]		; <i64> [#uses=5]
+	%.SV52.phi1190 = phi i32* [ %.SV52.phi1097, %bb84 ], [ undef, %bb69.fragment ]		; <i32*> [#uses=3]
+	%.SV68.phi1189 = phi i32 [ %.SV68.phi1096, %bb84 ], [ %.SV68.phi1169, %bb69.fragment ]		; <i32> [#uses=5]
+	%.SV70.phi1188 = phi i32 [ %.SV70.phi1095, %bb84 ], [ %.SV70.phi1168, %bb69.fragment ]		; <i32> [#uses=5]
+	%.SV290.phi = phi i32 [ %200, %bb84 ], [ undef, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV164.phi = phi i32 [ undef, %bb84 ], [ %171, %bb69.fragment ]		; <i32> [#uses=2]
+	%meshStackVariable401.phi = phi i32 [ %Opq.sa.calc661, %bb84 ], [ %Opq.sa.calc996, %bb69.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV825.phi = phi i32 [ %Opq.sa.calc658, %bb84 ], [ %Opq.sa.calc996, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV162.phi = phi i32* [ undef, %bb84 ], [ %169, %bb69.fragment ]		; <i32*> [#uses=1]
+	%.SV156.phi = phi i32* [ undef, %bb84 ], [ %.SV274.phi, %bb69.fragment ]		; <i32*> [#uses=1]
+	%.SV158.phi = phi i32 [ undef, %bb84 ], [ %168, %bb69.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask827 = and i32 %Opq.link.SV825.phi, 4		; <i32> [#uses=1]
+	%Opq.sa.calc824 = xor i32 %Opq.link.mask827, 228		; <i32> [#uses=2]
+	%meshCmp403 = icmp eq i32 %meshStackVariable401.phi, 15		; <i1> [#uses=1]
+	br i1 %meshCmp403, label %bb70, label %bb84.fragment
+
+meshBB404:		; preds = %bb96, %bb3
+	%yM.0.reg2mem.1.SV.phi1077 = phi i32 [ %yM.0.SV.phi, %bb96 ], [ undef, %bb3 ]		; <i32> [#uses=1]
+	%meshStackVariable405.phi = phi i32 [ %Opq.sa.calc692, %bb96 ], [ %Opq.sa.calc461, %bb3 ]		; <i32> [#uses=1]
+	%Opq.link.SV940.phi = phi i32 [ %Opq.sa.calc693, %bb96 ], [ %Opq.sa.calc461, %bb3 ]		; <i32> [#uses=1]
+	%or.cond.not.SV.phi1029 = phi i1 [ undef, %bb96 ], [ %or.cond.not, %bb3 ]		; <i1> [#uses=1]
+	%.SV70.phi1028 = phi i32 [ %.SV70.phi1085, %bb96 ], [ %12, %bb3 ]		; <i32> [#uses=2]
+	%.SV52.phi1024 = phi i32* [ undef, %bb96 ], [ %9, %bb3 ]		; <i32*> [#uses=1]
+	%.SV68.phi1023 = phi i32 [ %.SV68.phi1086, %bb96 ], [ %10, %bb3 ]		; <i32> [#uses=2]
+	%.SV38.phi1017 = phi i64 [ %.SV38.phi1087, %bb96 ], [ %4, %bb3 ]		; <i64> [#uses=2]
+	%.SV40.phi = phi i32 [ undef, %bb96 ], [ %6, %bb3 ]		; <i32> [#uses=1]
+	%Opq.link.mask942 = and i32 %Opq.link.SV940.phi, 6		; <i32> [#uses=1]
+	%Opq.sa.calc939 = sub i32 %Opq.link.mask942, -87		; <i32> [#uses=1]
+	%meshCmp407 = icmp eq i32 %meshStackVariable405.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp407, label %bb56, label %bb98
+
+meshBB408:		; preds = %bb89.fragment, %bb87
+	%.SV38.phi1218 = phi i64 [ %.SV38.phi1191, %bb89.fragment ], [ %.SV38.phi1210, %bb87 ]		; <i64> [#uses=2]
+	%.SV52.phi1217 = phi i32* [ %.SV52.phi1190, %bb89.fragment ], [ %.SV52.phi1235, %bb87 ]		; <i32*> [#uses=1]
+	%.SV68.phi1216 = phi i32 [ %.SV68.phi1189, %bb89.fragment ], [ %.SV68.phi1209, %bb87 ]		; <i32> [#uses=2]
+	%.SV70.phi1215 = phi i32 [ %.SV70.phi1188, %bb89.fragment ], [ %.SV70.phi1208, %bb87 ]		; <i32> [#uses=2]
+	%.SV172.phi1074 = phi i32 [ %211, %bb89.fragment ], [ undef, %bb87 ]		; <i32> [#uses=1]
+	%meshStackVariable409.phi = phi i32 [ %Opq.sa.calc962, %bb89.fragment ], [ %Opq.sa.calc673, %bb87 ]		; <i32> [#uses=1]
+	%Opq.link.SV913.phi = phi i32 [ %Opq.sa.calc962, %bb89.fragment ], [ %Opq.sa.calc990, %bb87 ]		; <i32> [#uses=1]
+	%Opq.link.mask915 = and i32 %Opq.link.SV913.phi, 9		; <i32> [#uses=1]
+	%Opq.sa.calc912 = xor i32 %Opq.link.mask915, 195		; <i32> [#uses=1]
+	%meshCmp411 = icmp eq i32 %meshStackVariable409.phi, 1		; <i1> [#uses=1]
+	br i1 %meshCmp411, label %bb97, label %bb95
+
+meshBB412:		; preds = %bb68.fragment, %bb13.fragment
+	%.SV38.phi1187 = phi i64 [ %.SV38.phi1115, %bb13.fragment ], [ %.SV38.phi1172, %bb68.fragment ]		; <i64> [#uses=2]
+	%.SV52.phi1186 = phi i32* [ %.SV52.phi1113, %bb13.fragment ], [ %.SV52.phi1170, %bb68.fragment ]		; <i32*> [#uses=2]
+	%.SV68.phi1185 = phi i32 [ %.SV68.phi1112, %bb13.fragment ], [ %.SV68.phi1169, %bb68.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1184 = phi i32 [ %.SV70.phi1111, %bb13.fragment ], [ %.SV70.phi1168, %bb68.fragment ]		; <i32> [#uses=2]
+	%.SV158.phi1063 = phi i32 [ undef, %bb13.fragment ], [ %168, %bb68.fragment ]		; <i32> [#uses=1]
+	%.SV87.phi1030 = phi i32 [ %47, %bb13.fragment ], [ undef, %bb68.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable413.phi = phi i32 [ %Opq.sa.calc870, %bb13.fragment ], [ %Opq.sa.calc784, %bb68.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV933.phi = phi i32 [ %Opq.sa.calc870, %bb13.fragment ], [ %Opq.link.mask722, %bb68.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV936.phi = phi i32 [ %Opq.sa.calc866, %bb13.fragment ], [ %Opq.sa.calc784, %bb68.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask938 = and i32 %Opq.link.SV936.phi, 4		; <i32> [#uses=1]
+	%Opq.link.mask935 = and i32 %Opq.link.SV933.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc931 = sub i32 %Opq.link.mask935, %Opq.link.mask938		; <i32> [#uses=1]
+	%Opq.sa.calc932 = xor i32 %Opq.sa.calc931, -51		; <i32> [#uses=3]
+	%meshCmp415 = icmp eq i32 %meshStackVariable413.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp415, label %bb74, label %bb19
+
+meshBB416:		; preds = %bb90.fragment, %bb77
+	%.SV38.phi1201 = phi i64 [ %.SV38.phi1191, %bb90.fragment ], [ %.SV38.phi1098, %bb77 ]		; <i64> [#uses=2]
+	%.SV52.phi1200 = phi i32* [ undef, %bb90.fragment ], [ %.SV52.phi1097, %bb77 ]		; <i32*> [#uses=1]
+	%.SV68.phi1199 = phi i32 [ %.SV68.phi1189, %bb90.fragment ], [ %.SV68.phi1096, %bb77 ]		; <i32> [#uses=2]
+	%.SV70.phi1198 = phi i32 [ %.SV70.phi1188, %bb90.fragment ], [ %.SV70.phi1095, %bb77 ]		; <i32> [#uses=2]
+	%.SV.phi1076 = phi i32 [ %214, %bb90.fragment ], [ undef, %bb77 ]		; <i32> [#uses=1]
+	%meshStackVariable417.phi = phi i32 [ %Opq.sa.calc773, %bb90.fragment ], [ %Opq.sa.calc643, %bb77 ]		; <i32> [#uses=1]
+	%Opq.link.SV973.phi = phi i32 [ %Opq.sa.calc773, %bb90.fragment ], [ %Opq.sa.calc640, %bb77 ]		; <i32> [#uses=1]
+	%Opq.link.mask975 = and i32 %Opq.link.SV973.phi, 10		; <i32> [#uses=1]
+	%Opq.sa.calc972 = xor i32 %Opq.link.mask975, 110		; <i32> [#uses=1]
+	%Opq.sa.calc971 = add i32 %Opq.sa.calc972, -19		; <i32> [#uses=1]
+	%meshCmp419 = icmp eq i32 %meshStackVariable417.phi, 12		; <i1> [#uses=1]
+	br i1 %meshCmp419, label %bb78, label %bb96
+
+meshBB420:		; preds = %bb66, %bb26.fragment
+	%.SV38.phi1194 = phi i64 [ %.SV38.phi1098, %bb66 ], [ %.SV38.phi1167, %bb26.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1193 = phi i32 [ %.SV68.phi1096, %bb66 ], [ %.SV68.phi1166, %bb26.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1192 = phi i32 [ %.SV70.phi1095, %bb66 ], [ %.SV70.phi1165, %bb26.fragment ]		; <i32> [#uses=2]
+	%.load61.SV.phi = phi i32* [ %.SV52.phi1097, %bb66 ], [ undef, %bb26.fragment ]		; <i32*> [#uses=1]
+	%.SV270.phi = phi i32 [ %165, %bb66 ], [ undef, %bb26.fragment ]		; <i32> [#uses=1]
+	%.SV272.phi = phi i32* [ %166, %bb66 ], [ undef, %bb26.fragment ]		; <i32*> [#uses=1]
+	%.SV.phi1044 = phi i32 [ undef, %bb66 ], [ %.load123.SV.phi, %bb26.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable421.phi = phi i32 [ %Opq.sa.calc621, %bb66 ], [ %Opq.sa.calc918, %bb26.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV838.phi = phi i32 [ %Opq.sa.calc602, %bb66 ], [ %Opq.sa.calc918, %bb26.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask840 = and i32 %Opq.link.SV838.phi, 9		; <i32> [#uses=2]
+	%Opq.sa.calc837 = sub i32 %Opq.link.mask840, -202		; <i32> [#uses=2]
+	%Opq.sa.calc835 = sub i32 %Opq.sa.calc837, %Opq.link.mask840		; <i32> [#uses=1]
+	%Opq.sa.calc836 = xor i32 %Opq.sa.calc835, 176		; <i32> [#uses=0]
+	%meshCmp423 = icmp eq i32 %meshStackVariable421.phi, 9		; <i1> [#uses=1]
+	br i1 %meshCmp423, label %bb96, label %bb66.fragment
+
+meshBB424:		; preds = %bb86.fragment, %bb83
+	%.SV38.phi1197 = phi i64 [ %.SV38.phi1231, %bb86.fragment ], [ %.SV38.phi1098, %bb83 ]		; <i64> [#uses=2]
+	%.SV68.phi1196 = phi i32 [ %.SV68.phi1229, %bb86.fragment ], [ %.SV68.phi1096, %bb83 ]		; <i32> [#uses=2]
+	%.SV70.phi1195 = phi i32 [ %.SV70.phi1228, %bb86.fragment ], [ %.SV70.phi1095, %bb83 ]		; <i32> [#uses=2]
+	%.SV.phi1072 = phi i32 [ %209, %bb86.fragment ], [ undef, %bb83 ]		; <i32> [#uses=1]
+	%meshStackVariable425.phi = phi i32 [ %Opq.sa.calc943, %bb86.fragment ], [ %Opq.sa.calc658, %bb83 ]		; <i32> [#uses=1]
+	%Opq.link.SV951.phi = phi i32 [ %Opq.sa.calc943, %bb86.fragment ], [ %Opq.sa.calc1002, %bb83 ]		; <i32> [#uses=1]
+	%Opq.link.mask953 = and i32 %Opq.link.SV951.phi, 12		; <i32> [#uses=1]
+	%Opq.sa.calc950 = sub i32 %Opq.link.mask953, -208		; <i32> [#uses=0]
+	%meshCmp427 = icmp eq i32 %meshStackVariable425.phi, 4		; <i1> [#uses=1]
+	br i1 %meshCmp427, label %bb97, label %bb96
+
+meshBB428:		; preds = %bb70, %bb4
+	%.SV158.phi1090 = phi i32 [ %.SV158.phi, %bb70 ], [ undef, %bb4 ]		; <i32> [#uses=1]
+	%.SV162.phi1089 = phi i32* [ %.SV162.phi, %bb70 ], [ undef, %bb4 ]		; <i32*> [#uses=1]
+	%.SV164.phi1088 = phi i32 [ %.SV164.phi, %bb70 ], [ undef, %bb4 ]		; <i32> [#uses=1]
+	%.load165.SV.phi = phi i32 [ %.SV164.phi, %bb70 ], [ undef, %bb4 ]		; <i32> [#uses=1]
+	%.SV278.phi = phi %struct.Macroblock* [ %176, %bb70 ], [ undef, %bb4 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV280.phi = phi i32 [ %177, %bb70 ], [ undef, %bb4 ]		; <i32> [#uses=1]
+	%meshStackVariable429.phi = phi i32 [ %Opq.sa.calc630, %bb70 ], [ %Opq.sa.calc467, %bb4 ]		; <i32> [#uses=1]
+	%Opq.link.SV898.phi = phi i32 [ %Opq.sa.calc630, %bb70 ], [ %Opq.sa.calc462, %bb4 ]		; <i32> [#uses=1]
+	%.SV70.phi1026 = phi i32 [ %.SV70.phi1188, %bb70 ], [ %12, %bb4 ]		; <i32> [#uses=5]
+	%.SV52.phi = phi i32* [ undef, %bb70 ], [ %9, %bb4 ]		; <i32*> [#uses=3]
+	%.SV68.phi1020 = phi i32 [ %.SV68.phi1189, %bb70 ], [ %10, %bb4 ]		; <i32> [#uses=5]
+	%.SV38.phi1014 = phi i64 [ %.SV38.phi1191, %bb70 ], [ %4, %bb4 ]		; <i64> [#uses=5]
+	%.SV43.phi = phi i32 [ undef, %bb70 ], [ %8, %bb4 ]		; <i32> [#uses=1]
+	%Opq.link.mask900 = and i32 %Opq.link.SV898.phi, 4		; <i32> [#uses=1]
+	%Opq.sa.calc897 = xor i32 %Opq.link.mask900, 193		; <i32> [#uses=3]
+	%meshCmp431 = icmp eq i32 %meshStackVariable429.phi, 5		; <i1> [#uses=1]
+	br i1 %meshCmp431, label %bb5, label %bb70.fragment
+
+meshBB432:		; preds = %bb42, %bb23.fragment182
+	%.SV38.phi1179 = phi i64 [ %.SV38.phi1115, %bb23.fragment182 ], [ %.SV38.phi1231, %bb42 ]		; <i64> [#uses=7]
+	%.SV43.phi1178 = phi i32 [ %.SV43.phi1015, %bb23.fragment182 ], [ %.SV43.phi1230, %bb42 ]		; <i32> [#uses=3]
+	%.SV68.phi1177 = phi i32 [ %.SV68.phi1112, %bb23.fragment182 ], [ %.SV68.phi1229, %bb42 ]		; <i32> [#uses=7]
+	%.SV70.phi1176 = phi i32 [ %.SV70.phi1111, %bb23.fragment182 ], [ %.SV70.phi1228, %bb42 ]		; <i32> [#uses=7]
+	%.SV118.phi1040 = phi i32 [ %76, %bb23.fragment182 ], [ %.SV118.phi1125, %bb42 ]		; <i32> [#uses=7]
+	%.SV135.phi1039 = phi i1 [ %78, %bb23.fragment182 ], [ undef, %bb42 ]		; <i1> [#uses=2]
+	%meshStackVariable433.phi = phi i32 [ %Opq.sa.calc744, %bb23.fragment182 ], [ %Opq.sa.calc560, %bb42 ]		; <i32> [#uses=1]
+	%Opq.link.SV799.phi = phi i32 [ %Opq.sa.calc744, %bb23.fragment182 ], [ %Opq.sa.calc987, %bb42 ]		; <i32> [#uses=1]
+	%.SV96.phi1038 = phi i1 [ %71, %bb23.fragment182 ], [ undef, %bb42 ]		; <i1> [#uses=1]
+	%.SV99.phi1037 = phi i32* [ %72, %bb23.fragment182 ], [ undef, %bb42 ]		; <i32*> [#uses=2]
+	%.SV104.phi1036 = phi i32 [ %73, %bb23.fragment182 ], [ %.SV104.phi1127, %bb42 ]		; <i32> [#uses=3]
+	%.SV111.phi1035 = phi i32* [ %74, %bb23.fragment182 ], [ %.SV111.phi1126, %bb42 ]		; <i32*> [#uses=3]
+	%Opq.link.mask801 = and i32 %Opq.link.SV799.phi, 6		; <i32> [#uses=1]
+	%Opq.sa.calc798 = xor i32 %Opq.link.mask801, 3		; <i32> [#uses=5]
+	%meshCmp435 = icmp eq i32 %meshStackVariable433.phi, 1		; <i1> [#uses=1]
+	br i1 %meshCmp435, label %bb43, label %bb39
+
+meshBB436:		; preds = %bb71.fragment, %bb65
+	%.SV38.phi1147 = phi i64 [ %.SV38.phi1144, %bb65 ], [ %.SV38.phi1140, %bb71.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1146 = phi i32 [ %.SV68.phi1142, %bb65 ], [ %.SV68.phi1139, %bb71.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1145 = phi i32 [ %.SV70.phi1141, %bb65 ], [ %.SV70.phi1138, %bb71.fragment ]		; <i32> [#uses=2]
+	%.SV.phi1067 = phi i32 [ undef, %bb65 ], [ %.load166.SV.phi, %bb71.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1066 = phi i32 [ undef, %bb65 ], [ %183, %bb71.fragment ]		; <i32> [#uses=1]
+	%.load62.SV.phi = phi i32* [ %.SV52.phi1143, %bb65 ], [ undef, %bb71.fragment ]		; <i32*> [#uses=1]
+	%.SV268.phi = phi i32 [ %164, %bb65 ], [ undef, %bb71.fragment ]		; <i32> [#uses=2]
+	%meshStackVariable437.phi = phi i32 [ %Opq.sa.calc617, %bb65 ], [ %Opq.sa.calc809, %bb71.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV704.phi = phi i32 [ %Opq.sa.calc617, %bb65 ], [ %Opq.sa.calc809, %bb71.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask706 = and i32 %Opq.link.SV704.phi, 0		; <i32> [#uses=2]
+	%Opq.sa.calc703 = add i32 %Opq.link.mask706, 216		; <i32> [#uses=0]
+	%meshCmp439 = icmp eq i32 %meshStackVariable437.phi, 2		; <i1> [#uses=1]
+	br i1 %meshCmp439, label %bb96, label %bb65.fragment
+
+meshBB440:		; preds = %bb85, %bb54.fragment
+	%.SV52.phi1235 = phi i32* [ %.SV52.phi1213, %bb85 ], [ undef, %bb54.fragment ]		; <i32*> [#uses=2]
+	%.SV38.phi1210 = phi i64 [ %.SV38.phi1214, %bb85 ], [ %.SV38.phi1207, %bb54.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1209 = phi i32 [ %.SV68.phi1212, %bb85 ], [ %.SV68.phi1206, %bb54.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1208 = phi i32 [ %.SV70.phi1211, %bb85 ], [ %.SV70.phi1205, %bb54.fragment ]		; <i32> [#uses=2]
+	%.SV.phi1056 = phi i32 [ undef, %bb85 ], [ %.SV118.phi1122, %bb54.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable441.phi = phi i32 [ %Opq.sa.calc666, %bb85 ], [ %Opq.sa.calc883, %bb54.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV991.phi = phi i32 [ %Opq.sa.calc665, %bb85 ], [ %Opq.sa.calc883, %bb54.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask993 = and i32 %Opq.link.SV991.phi, 6		; <i32> [#uses=1]
+	%Opq.sa.calc990 = xor i32 %Opq.link.mask993, 139		; <i32> [#uses=2]
+	%meshCmp443 = icmp eq i32 %meshStackVariable441.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp443, label %bb96, label %bb87
+
+meshBB444:		; preds = %bb66.fragment, %bb40
+	%.SV38.phi1224 = phi i64 [ %.SV38.phi1194, %bb66.fragment ], [ %.SV38.phi1179, %bb40 ]		; <i64> [#uses=2]
+	%.SV68.phi1223 = phi i32 [ %.SV68.phi1193, %bb66.fragment ], [ %.SV68.phi1177, %bb40 ]		; <i32> [#uses=2]
+	%.SV70.phi1222 = phi i32 [ %.SV70.phi1192, %bb66.fragment ], [ %.SV70.phi1176, %bb40 ]		; <i32> [#uses=2]
+	%.SV.phi1048 = phi i32 [ undef, %bb66.fragment ], [ %.SV118.phi1040, %bb40 ]		; <i32> [#uses=1]
+	%meshStackVariable445.phi = phi i32 [ %Opq.sa.calc794, %bb66.fragment ], [ %Opq.sa.calc554, %bb40 ]		; <i32> [#uses=1]
+	%Opq.link.SV781.phi = phi i32 [ %Opq.sa.calc795, %bb66.fragment ], [ %Opq.sa.calc554, %bb40 ]		; <i32> [#uses=1]
+	%Opq.link.mask783 = and i32 %Opq.link.SV781.phi, 10		; <i32> [#uses=1]
+	%Opq.sa.calc780 = add i32 %Opq.link.mask783, 1		; <i32> [#uses=0]
+	%meshCmp447 = icmp eq i32 %meshStackVariable445.phi, 11		; <i1> [#uses=1]
+	br i1 %meshCmp447, label %bb96, label %bb98
+
+meshBB448:		; preds = %bb35, %entry.fragment181
+	%.SV70.phi1233 = phi i32 [ undef, %entry.fragment181 ], [ %.SV70.phi1180, %bb35 ]		; <i32> [#uses=1]
+	%.SV104.phi1157 = phi i32 [ undef, %entry.fragment181 ], [ %.SV104.phi1084, %bb35 ]		; <i32> [#uses=1]
+	%.SV111.phi1156 = phi i32* [ undef, %entry.fragment181 ], [ %.SV111.phi1083, %bb35 ]		; <i32*> [#uses=1]
+	%.SV118.phi1155 = phi i32 [ undef, %entry.fragment181 ], [ %.SV118.phi1082, %bb35 ]		; <i32> [#uses=1]
+	%.SV68.phi1025 = phi i32 [ %10, %entry.fragment181 ], [ %.SV68.phi1181, %bb35 ]		; <i32> [#uses=1]
+	%meshStackVariable449.phi = phi i32 [ %Opq.sa.calc863, %entry.fragment181 ], [ %Opq.sa.calc541, %bb35 ]		; <i32> [#uses=1]
+	%Opq.link.SV959.phi = phi i32 [ %Opq.sa.calc863, %entry.fragment181 ], [ %Opq.sa.calc828, %bb35 ]		; <i32> [#uses=1]
+	%.SV38.phi1019 = phi i64 [ %4, %entry.fragment181 ], [ %.SV38.phi1183, %bb35 ]		; <i64> [#uses=1]
+	%.SV43.phi1018 = phi i32 [ %8, %entry.fragment181 ], [ %.SV43.phi1015, %bb35 ]		; <i32> [#uses=2]
+	%Opq.link.mask961 = and i32 %Opq.link.SV959.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc958 = xor i32 %Opq.link.mask961, 63		; <i32> [#uses=3]
+	%Opq.sa.calc957 = xor i32 %Opq.sa.calc958, 126		; <i32> [#uses=1]
+	%meshCmp451 = icmp eq i32 %meshStackVariable449.phi, 5		; <i1> [#uses=1]
+	br i1 %meshCmp451, label %bb37, label %return
+
+meshBB452:		; preds = %bb70.fragment, %bb63
+	%.SV38.phi1110 = phi i64 [ %.SV38.phi1014, %bb70.fragment ], [ %.SV38.phi1098, %bb63 ]		; <i64> [#uses=3]
+	%.SV52.phi1109 = phi i32* [ undef, %bb70.fragment ], [ %.SV52.phi1097, %bb63 ]		; <i32*> [#uses=2]
+	%.SV68.phi1108 = phi i32 [ %.SV68.phi1020, %bb70.fragment ], [ %.SV68.phi1096, %bb63 ]		; <i32> [#uses=3]
+	%.SV70.phi1107 = phi i32 [ %.SV70.phi1026, %bb70.fragment ], [ %.SV70.phi1095, %bb63 ]		; <i32> [#uses=3]
+	%.SV158.phi1106 = phi i32 [ %.SV158.phi1090, %bb70.fragment ], [ undef, %bb63 ]		; <i32> [#uses=1]
+	%.SV162.phi1105 = phi i32* [ %.SV162.phi1089, %bb70.fragment ], [ undef, %bb63 ]		; <i32*> [#uses=1]
+	%.SV164.phi1104 = phi i32 [ %.SV164.phi1088, %bb70.fragment ], [ undef, %bb63 ]		; <i32> [#uses=1]
+	%.SV264.phi = phi %struct.Macroblock* [ undef, %bb70.fragment ], [ %157, %bb63 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV266.phi = phi i32 [ undef, %bb70.fragment ], [ %158, %bb63 ]		; <i32> [#uses=1]
+	%meshStackVariable453.phi = phi i32 [ %Opq.sa.calc739, %bb70.fragment ], [ %Opq.sa.calc611, %bb63 ]		; <i32> [#uses=1]
+	%Opq.link.SV821.phi = phi i32 [ %Opq.sa.calc897, %bb70.fragment ], [ %Opq.sa.calc611, %bb63 ]		; <i32> [#uses=1]
+	%.SV150.phi1060 = phi i32* [ undef, %bb70.fragment ], [ %148, %bb63 ]		; <i32*> [#uses=1]
+	%.SV152.phi1059 = phi i32* [ undef, %bb70.fragment ], [ %149, %bb63 ]		; <i32*> [#uses=2]
+	%.SV148.phi1057 = phi i32 [ undef, %bb70.fragment ], [ %147, %bb63 ]		; <i32> [#uses=1]
+	%Opq.link.mask823 = and i32 %Opq.link.SV821.phi, 4		; <i32> [#uses=2]
+	%Opq.sa.calc820 = sub i32 %Opq.link.mask823, -97		; <i32> [#uses=2]
+	%meshCmp455 = icmp eq i32 %meshStackVariable453.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp455, label %bb63.fragment, label %bb71
+
+meshBB456:		; preds = %bb79, %bb63.fragment
+	%.SV38.phi1137 = phi i64 [ %.SV38.phi1110, %bb63.fragment ], [ %.SV38.phi1098, %bb79 ]		; <i64> [#uses=2]
+	%.SV52.phi1136 = phi i32* [ %.SV52.phi1109, %bb63.fragment ], [ %.SV52.phi1097, %bb79 ]		; <i32*> [#uses=2]
+	%.SV68.phi1135 = phi i32 [ %.SV68.phi1108, %bb63.fragment ], [ %.SV68.phi1096, %bb79 ]		; <i32> [#uses=2]
+	%.SV70.phi1134 = phi i32 [ %.SV70.phi1107, %bb63.fragment ], [ %.SV70.phi1095, %bb79 ]		; <i32> [#uses=2]
+	%.SV152.phi1133 = phi i32* [ %.SV152.phi1059, %bb63.fragment ], [ undef, %bb79 ]		; <i32*> [#uses=1]
+	%meshStackVariable457.phi = phi i32 [ %Opq.sa.calc890, %bb63.fragment ], [ %Opq.sa.calc651, %bb79 ]		; <i32> [#uses=1]
+	%Opq.link.SV817.phi = phi i32 [ %Opq.sa.calc891, %bb63.fragment ], [ %Opq.sa.calc651, %bb79 ]		; <i32> [#uses=1]
+	%Opq.link.mask819 = and i32 %Opq.link.SV817.phi, 2		; <i32> [#uses=1]
+	%Opq.sa.calc816 = add i32 %Opq.link.mask819, 186		; <i32> [#uses=2]
+	%meshCmp459 = icmp eq i32 %meshStackVariable457.phi, 10		; <i1> [#uses=1]
+	br i1 %meshCmp459, label %bb81, label %bb65
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll b/src/LLVM/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll
new file mode 100644
index 0000000..d77e528
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9
+; PR4056
+
+define void @int163(i32 %p_4, i32 %p_5) nounwind {
+entry:
+	%0 = tail call i32 @bar(i32 1) nounwind		; <i32> [#uses=2]
+	%1 = icmp sgt i32 %0, 7		; <i1> [#uses=1]
+	br i1 %1, label %foo.exit, label %bb.i
+
+bb.i:		; preds = %entry
+	%2 = lshr i32 1, %0		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	%4 = zext i1 %3 to i32		; <i32> [#uses=1]
+	%.p_5 = shl i32 %p_5, %4		; <i32> [#uses=1]
+	br label %foo.exit
+
+foo.exit:		; preds = %bb.i, %entry
+	%5 = phi i32 [ %.p_5, %bb.i ], [ %p_5, %entry ]		; <i32> [#uses=1]
+	%6 = icmp eq i32 %5, 0		; <i1> [#uses=0]
+	%7 = tail call i32 @bar(i32 %p_5) nounwind		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @bar(i32)

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll b/src/LLVM/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll
new file mode 100644
index 0000000..f025654
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9
+; PR4051
+
+define void @int163(i32 %p_4, i32 %p_5) nounwind {
+entry:
+	%0 = tail call i32 @foo(i32 1) nounwind		; <i32> [#uses=2]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb.i, label %bar.exit
+
+bb.i:		; preds = %entry
+	%2 = lshr i32 1, %0		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	%retval.i = select i1 %3, i32 1, i32 %p_5		; <i32> [#uses=1]
+	br label %bar.exit
+
+bar.exit:		; preds = %bb.i, %entry
+	%4 = phi i32 [ %retval.i, %bb.i ], [ %p_5, %entry ]		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 0		; <i1> [#uses=0]
+	%6 = tail call i32 @foo(i32 %p_5) nounwind		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @foo(i32)

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/src/LLVM/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
new file mode 100644
index 0000000..a2fd2e4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s | grep {movl.*%ebx, 8(%esi)}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.0"
+
+define void @cpuid(i32* %data) nounwind {
+entry:
+	%arrayidx = getelementptr i32* %data, i32 1		; <i32*> [#uses=1]
+	%arrayidx2 = getelementptr i32* %data, i32 2		; <i32*> [#uses=1]
+	%arrayidx4 = getelementptr i32* %data, i32 3		; <i32*> [#uses=1]
+	%arrayidx6 = getelementptr i32* %data, i32 4		; <i32*> [#uses=1]
+	%arrayidx8 = getelementptr i32* %data, i32 5		; <i32*> [#uses=1]
+	%tmp9 = load i32* %arrayidx8		; <i32> [#uses=1]
+	%arrayidx11 = getelementptr i32* %data, i32 6		; <i32*> [#uses=1]
+	%tmp12 = load i32* %arrayidx11		; <i32> [#uses=1]
+	%arrayidx14 = getelementptr i32* %data, i32 7		; <i32*> [#uses=1]
+	%tmp15 = load i32* %arrayidx14		; <i32> [#uses=1]
+	%arrayidx17 = getelementptr i32* %data, i32 8		; <i32*> [#uses=1]
+	%tmp18 = load i32* %arrayidx17		; <i32> [#uses=1]
+	%0 = call i32 asm "cpuid", "={ax},=*{bx},=*{cx},=*{dx},{ax},{bx},{cx},{dx},~{dirflag},~{fpsr},~{flags}"(i32* %arrayidx2, i32* %arrayidx4, i32* %arrayidx6, i32 %tmp9, i32 %tmp12, i32 %tmp15, i32 %tmp18) nounwind		; <i32> [#uses=1]
+	store i32 %0, i32* %arrayidx
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-29-LinearScanBug.ll b/src/LLVM/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
new file mode 100644
index 0000000..2fbf7aa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-29-LinearScanBug.ll

@@ -0,0 +1,215 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10
+; rdar://6837009
+
+	%0 = type { %struct.pf_state*, %struct.pf_state*, %struct.pf_state*, i32 }
+	%1 = type { %2 }
+	%2 = type { %struct.pf_addr, %struct.pf_addr }
+	%3 = type { %struct.in6_addr }
+	%4 = type { [4 x i32] }
+	%5 = type { %struct.pfi_dynaddr*, [4 x i8] }
+	%6 = type { %struct.pfi_dynaddr*, %struct.pfi_dynaddr** }
+	%7 = type { %struct.pfr_ktable*, %struct.pfr_ktable*, %struct.pfr_ktable*, i32 }
+	%8 = type { %struct.pfr_ktable* }
+	%9 = type { i8* }
+	%10 = type { %11 }
+	%11 = type { i8*, i8*, %struct.radix_node* }
+	%12 = type { [2 x %struct.pf_rulequeue], %13, %13 }
+	%13 = type { %struct.pf_rulequeue*, %struct.pf_rule**, i32, i32, i32 }
+	%14 = type { %struct.pf_anchor*, %struct.pf_anchor*, %struct.pf_anchor*, i32 }
+	%15 = type { %struct.pfi_kif*, %struct.pfi_kif*, %struct.pfi_kif*, i32 }
+	%16 = type { %struct.ifnet*, %struct.ifnet** }
+	%17 = type { %18 }
+	%18 = type { %struct.pkthdr, %19 }
+	%19 = type { %struct.m_ext, [176 x i8] }
+	%20 = type { %struct.ifmultiaddr*, %struct.ifmultiaddr** }
+	%21 = type { i32, %22 }
+	%22 = type { i8*, [4 x i8] }
+	%23 = type { %struct.tcphdr* }
+	%24 = type { %struct.pf_ike_state }
+	%25 = type { %struct.pf_state_key*, %struct.pf_state_key*, %struct.pf_state_key*, i32 }
+	%26 = type { %struct.pf_src_node*, %struct.pf_src_node*, %struct.pf_src_node*, i32 }
+	%struct.anon = type { %struct.pf_state*, %struct.pf_state** }
+	%struct.au_mask_t = type { i32, i32 }
+	%struct.bpf_if = type opaque
+	%struct.dlil_threading_info = type opaque
+	%struct.ether_header = type { [6 x i8], [6 x i8], i16 }
+	%struct.ext_refsq = type { %struct.ext_refsq*, %struct.ext_refsq* }
+	%struct.hook_desc = type { %struct.hook_desc_head, void (i8*)*, i8* }
+	%struct.hook_desc_head = type { %struct.hook_desc*, %struct.hook_desc** }
+	%struct.if_data_internal = type { i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, %struct.au_mask_t, i32, i32, i32 }
+	%struct.ifaddr = type { %struct.sockaddr*, %struct.sockaddr*, %struct.sockaddr*, %struct.ifnet*, %struct.ifaddrhead, void (i32, %struct.rtentry*, %struct.sockaddr*)*, i32, i32, i32, void (%struct.ifaddr*)*, void (%struct.ifaddr*, i32)*, i32 }
+	%struct.ifaddrhead = type { %struct.ifaddr*, %struct.ifaddr** }
+	%struct.ifmultiaddr = type { %20, %struct.sockaddr*, %struct.ifmultiaddr*, %struct.ifnet*, i32, i8*, i32, void (i8*)* }
+	%struct.ifmultihead = type { %struct.ifmultiaddr* }
+	%struct.ifnet = type { i8*, i8*, %16, %struct.ifaddrhead, i32, i32 (%struct.ifnet*, %struct.sockaddr*)*, i32, %struct.bpf_if*, i16, i16, i16, i16, i32, i8*, i32, %struct.if_data_internal, i32, i32 (%struct.ifnet*, %struct.mbuf*)*, i32 (%struct.ifnet*, i32, i8*)*, i32 (%struct.ifnet*, i32, i32 (%struct.ifnet*, %struct.mbuf*)*)*, void (%struct.ifnet*)*, i32 (%struct.ifnet*, %struct.mbuf*, i8*, i32*)*, void (%struct.ifnet*, %struct.kev_msg*)*, i32 (%struct.ifnet*, %struct.mbuf**, %struct.sockaddr*, i8*, i8*)*, i32, %struct.ifnet_filter_head, i32, i8*, i32, %struct.ifmultihead, i32, i32 (%struct.ifnet*, i32, %struct.ifnet_demux_desc*, i32)*, i32 (%struct.ifnet*, i32)*, %struct.proto_hash_entry*, i8*, %struct.dlil_threading_info*, i8*, %struct.ifqueue, [1 x i32], i32, %struct.ifprefixhead, %struct.lck_rw_t*, %21, i32, %struct.thread*, %struct.pfi_kif*, %struct.lck_mtx_t*, %struct.route }
+	%struct.ifnet_demux_desc = type { i32, i8*, i32 }
+	%struct.ifnet_filter = type opaque
+	%struct.ifnet_filter_head = type { %struct.ifnet_filter*, %struct.ifnet_filter** }
+	%struct.ifprefix = type { %struct.sockaddr*, %struct.ifnet*, %struct.ifprefixhead, i8, i8 }
+	%struct.ifprefixhead = type { %struct.ifprefix*, %struct.ifprefix** }
+	%struct.ifqueue = type { i8*, i8*, i32, i32, i32 }
+	%struct.in6_addr = type { %4 }
+	%struct.in_addr = type { i32 }
+	%struct.kev_d_vectors = type { i32, i8* }
+	%struct.kev_msg = type { i32, i32, i32, i32, [5 x %struct.kev_d_vectors] }
+	%struct.lck_mtx_t = type { [3 x i32] }
+	%struct.lck_rw_t = type <{ [3 x i32] }>
+	%struct.m_ext = type { i8*, void (i8*, i32, i8*)*, i32, i8*, %struct.ext_refsq, %struct.au_mask_t* }
+	%struct.m_hdr = type { %struct.mbuf*, %struct.mbuf*, i32, i8*, i16, i16 }
+	%struct.m_tag = type { %struct.packet_tags, i16, i16, i32 }
+	%struct.mbuf = type { %struct.m_hdr, %17 }
+	%struct.packet_tags = type { %struct.m_tag* }
+	%struct.pf_addr = type { %3 }
+	%struct.pf_addr_wrap = type <{ %1, %5, i8, i8, [6 x i8] }>
+	%struct.pf_anchor = type { %14, %14, %struct.pf_anchor*, %struct.pf_anchor_node, [64 x i8], [1024 x i8], %struct.pf_ruleset, i32, i32 }
+	%struct.pf_anchor_node = type { %struct.pf_anchor* }
+	%struct.pf_app_state = type { void (%struct.pf_state*, i32, i32, %struct.pf_pdesc*, %struct.pfi_kif*)*, i32 (%struct.pf_app_state*, %struct.pf_app_state*)*, i32 (%struct.pf_app_state*, %struct.pf_app_state*)*, %24 }
+	%struct.pf_ike_state = type { i64 }
+	%struct.pf_mtag = type { i8*, i32, i32, i16, i8, i8 }
+	%struct.pf_palist = type { %struct.pf_pooladdr*, %struct.pf_pooladdr** }
+	%struct.pf_pdesc = type { %struct.pf_threshold, i64, %23, %struct.pf_addr, %struct.pf_addr, %struct.pf_rule*, %struct.pf_addr*, %struct.pf_addr*, %struct.ether_header*, %struct.mbuf*, i32, %struct.pf_mtag*, i16*, i32, i16, i8, i8, i8, i8 }
+	%struct.pf_pool = type { %struct.pf_palist, [2 x i32], %struct.pf_pooladdr*, [4 x i8], %struct.in6_addr, %struct.pf_addr, i32, [2 x i16], i8, i8, [1 x i32] }
+	%struct.pf_pooladdr = type <{ %struct.pf_addr_wrap, %struct.pf_palist, [2 x i32], [16 x i8], %struct.pfi_kif*, [1 x i32] }>
+	%struct.pf_rule = type <{ %struct.pf_rule_addr, %struct.pf_rule_addr, [8 x %struct.pf_rule_ptr], [64 x i8], [16 x i8], [64 x i8], [64 x i8], [64 x i8], [64 x i8], [32 x i8], %struct.pf_rulequeue, [2 x i32], %struct.pf_pool, i64, [2 x i64], [2 x i64], %struct.pfi_kif*, [4 x i8], %struct.pf_anchor*, [4 x i8], %struct.pfr_ktable*, [4 x i8], i32, i32, [26 x i32], i32, i32, i32, i32, i32, i32, %struct.au_mask_t, i32, i32, i32, i32, i32, i32, i32, i16, i16, i16, i16, i16, [2 x i8], %struct.pf_rule_gid, %struct.pf_rule_gid, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [2 x i8] }>
+	%struct.pf_rule_addr = type <{ %struct.pf_addr_wrap, %struct.pf_rule_xport, i8, [7 x i8] }>
+	%struct.pf_rule_gid = type { [2 x i32], i8, [3 x i8] }
+	%struct.pf_rule_ptr = type { %struct.pf_rule*, [4 x i8] }
+	%struct.pf_rule_xport = type { i32, [4 x i8] }
+	%struct.pf_rulequeue = type { %struct.pf_rule*, %struct.pf_rule** }
+	%struct.pf_ruleset = type { [5 x %12], %struct.pf_anchor*, i32, i32, i32 }
+	%struct.pf_src_node = type <{ %26, %struct.pf_addr, %struct.pf_addr, %struct.pf_rule_ptr, %struct.pfi_kif*, [2 x i64], [2 x i64], i32, i32, %struct.pf_threshold, i64, i64, i8, i8, [2 x i8] }>
+	%struct.pf_state = type <{ i64, i32, i32, %struct.anon, %struct.anon, %0, %struct.pf_state_peer, %struct.pf_state_peer, %struct.pf_rule_ptr, %struct.pf_rule_ptr, %struct.pf_rule_ptr, %struct.pf_addr, %struct.hook_desc_head, %struct.pf_state_key*, %struct.pfi_kif*, %struct.pfi_kif*, %struct.pf_src_node*, %struct.pf_src_node*, [2 x i64], [2 x i64], i64, i64, i64, i16, i8, i8, i8, i8, [6 x i8] }>
+	%struct.pf_state_host = type { %struct.pf_addr, %struct.in_addr }
+	%struct.pf_state_key = type { %struct.pf_state_host, %struct.pf_state_host, %struct.pf_state_host, i8, i8, i8, i8, %struct.pf_app_state*, %25, %25, %struct.anon, i16 }
+	%struct.pf_state_peer = type { i32, i32, i32, i16, i8, i8, i16, i8, %struct.pf_state_scrub*, [3 x i8] }
+	%struct.pf_state_scrub = type { %struct.au_mask_t, i32, i32, i32, i16, i8, i8, i32 }
+	%struct.pf_threshold = type { i32, i32, i32, i32 }
+	%struct.pfi_dynaddr = type { %6, %struct.pf_addr, %struct.pf_addr, %struct.pf_addr, %struct.pf_addr, %struct.pfr_ktable*, %struct.pfi_kif*, i8*, i32, i32, i32, i8, i8 }
+	%struct.pfi_kif = type { [16 x i8], %15, [2 x [2 x [2 x i64]]], [2 x [2 x [2 x i64]]], i64, i32, i8*, %struct.ifnet*, i32, i32, %6 }
+	%struct.pfr_ktable = type { %struct.pfr_tstats, %7, %8, %struct.radix_node_head*, %struct.radix_node_head*, %struct.pfr_ktable*, %struct.pfr_ktable*, %struct.pf_ruleset*, i64, i32 }
+	%struct.pfr_table = type { [1024 x i8], [32 x i8], i32, i8 }
+	%struct.pfr_tstats = type { %struct.pfr_table, [2 x [3 x i64]], [2 x [3 x i64]], i64, i64, i64, i32, [2 x i32] }
+	%struct.pkthdr = type { i32, %struct.ifnet*, i8*, i32, i32, i32, i16, i16, %struct.packet_tags }
+	%struct.proto_hash_entry = type opaque
+	%struct.radix_mask = type { i16, i8, i8, %struct.radix_mask*, %9, i32 }
+	%struct.radix_node = type { %struct.radix_mask*, %struct.radix_node*, i16, i8, i8, %10 }
+	%struct.radix_node_head = type { %struct.radix_node*, i32, i32, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*, %struct.radix_node*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*, %struct.radix_node*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*)*, %struct.radix_node* (i8*, %struct.radix_node_head*)*, %struct.radix_node* (i8*, %struct.radix_node_head*, i32 (%struct.radix_node*, i8*)*, i8*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*, i32 (%struct.radix_node*, i8*)*, i8*)*, %struct.radix_node* (i8*, %struct.radix_node_head*)*, i32 (%struct.radix_node_head*, i32 (%struct.radix_node*, i8*)*, i8*)*, i32 (%struct.radix_node_head*, i8*, i8*, i32 (%struct.radix_node*, i8*)*, i8*)*, void (%struct.radix_node*, %struct.radix_node_head*)*, [3 x %struct.radix_node], i32 }
+	%struct.route = type { %struct.rtentry*, i32, %struct.sockaddr }
+	%struct.rt_metrics = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x i32] }
+	%struct.rtentry = type { [2 x %struct.radix_node], %struct.sockaddr*, i32, i32, %struct.ifnet*, %struct.ifaddr*, %struct.sockaddr*, i8*, void (i8*)*, %struct.rt_metrics, %struct.rtentry*, %struct.rtentry*, i32, %struct.lck_mtx_t }
+	%struct.sockaddr = type { i8, i8, [14 x i8] }
+	%struct.tcphdr = type { i16, i16, i32, i32, i8, i8, i16, i16, i16 }
+	%struct.thread = type opaque
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.pf_state_key*, %struct.pf_state_key*)* @pf_state_compare_ext_gwy to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define fastcc i32 @pf_state_compare_ext_gwy(%struct.pf_state_key* nocapture %a, %struct.pf_state_key* nocapture %b) nounwind optsize ssp {
+entry:
+	%0 = zext i8 0 to i32		; <i32> [#uses=2]
+	%1 = load i8* null, align 1		; <i8> [#uses=2]
+	%2 = zext i8 %1 to i32		; <i32> [#uses=1]
+	%3 = sub i32 %0, %2		; <i32> [#uses=1]
+	%4 = icmp eq i8 0, %1		; <i1> [#uses=1]
+	br i1 %4, label %bb1, label %bb79
+
+bb1:		; preds = %entry
+	%5 = load i8* null, align 4		; <i8> [#uses=2]
+	%6 = zext i8 %5 to i32		; <i32> [#uses=2]
+	%7 = getelementptr %struct.pf_state_key* %b, i32 0, i32 3		; <i8*> [#uses=1]
+	%8 = load i8* %7, align 4		; <i8> [#uses=2]
+	%9 = zext i8 %8 to i32		; <i32> [#uses=1]
+	%10 = sub i32 %6, %9		; <i32> [#uses=1]
+	%11 = icmp eq i8 %5, %8		; <i1> [#uses=1]
+	br i1 %11, label %bb3, label %bb79
+
+bb3:		; preds = %bb1
+	switch i32 %0, label %bb23 [
+		i32 1, label %bb4
+		i32 6, label %bb6
+		i32 17, label %bb10
+		i32 47, label %bb17
+		i32 50, label %bb21
+		i32 58, label %bb4
+	]
+
+bb4:		; preds = %bb3, %bb3
+	%12 = load i16* null, align 4		; <i16> [#uses=1]
+	%13 = zext i16 %12 to i32		; <i32> [#uses=1]
+	%14 = sub i32 0, %13		; <i32> [#uses=1]
+	br i1 false, label %bb23, label %bb79
+
+bb6:		; preds = %bb3
+	%15 = load i16* null, align 4		; <i16> [#uses=1]
+	%16 = zext i16 %15 to i32		; <i32> [#uses=1]
+	%17 = sub i32 0, %16		; <i32> [#uses=1]
+	ret i32 %17
+
+bb10:		; preds = %bb3
+	%18 = load i8* null, align 1		; <i8> [#uses=2]
+	%19 = zext i8 %18 to i32		; <i32> [#uses=1]
+	%20 = sub i32 0, %19		; <i32> [#uses=1]
+	%21 = icmp eq i8 0, %18		; <i1> [#uses=1]
+	br i1 %21, label %bb12, label %bb79
+
+bb12:		; preds = %bb10
+	%22 = load i16* null, align 4		; <i16> [#uses=1]
+	%23 = zext i16 %22 to i32		; <i32> [#uses=1]
+	%24 = sub i32 0, %23		; <i32> [#uses=1]
+	ret i32 %24
+
+bb17:		; preds = %bb3
+	%25 = load i8* null, align 1		; <i8> [#uses=2]
+	%26 = icmp eq i8 %25, 1		; <i1> [#uses=1]
+	br i1 %26, label %bb18, label %bb23
+
+bb18:		; preds = %bb17
+	%27 = icmp eq i8 %25, 0		; <i1> [#uses=1]
+	br i1 %27, label %bb19, label %bb23
+
+bb19:		; preds = %bb18
+	%28 = load i16* null, align 4		; <i16> [#uses=1]
+	%29 = zext i16 %28 to i32		; <i32> [#uses=1]
+	%30 = sub i32 0, %29		; <i32> [#uses=1]
+	br i1 false, label %bb23, label %bb79
+
+bb21:		; preds = %bb3
+	%31 = getelementptr %struct.pf_state_key* %a, i32 0, i32 1, i32 1, i32 0		; <i32*> [#uses=1]
+	%32 = load i32* %31, align 4		; <i32> [#uses=2]
+	%33 = getelementptr %struct.pf_state_key* %b, i32 0, i32 1, i32 1, i32 0		; <i32*> [#uses=1]
+	%34 = load i32* %33, align 4		; <i32> [#uses=2]
+	%35 = sub i32 %32, %34		; <i32> [#uses=1]
+	%36 = icmp eq i32 %32, %34		; <i1> [#uses=1]
+	br i1 %36, label %bb23, label %bb79
+
+bb23:		; preds = %bb21, %bb19, %bb18, %bb17, %bb4, %bb3
+	%cond = icmp eq i32 %6, 2		; <i1> [#uses=1]
+	br i1 %cond, label %bb24, label %bb70
+
+bb24:		; preds = %bb23
+	ret i32 1
+
+bb70:		; preds = %bb23
+	%37 = load i32 (%struct.pf_app_state*, %struct.pf_app_state*)** null, align 4		; <i32 (%struct.pf_app_state*, %struct.pf_app_state*)*> [#uses=3]
+	br i1 false, label %bb78, label %bb73
+
+bb73:		; preds = %bb70
+	%38 = load i32 (%struct.pf_app_state*, %struct.pf_app_state*)** null, align 4		; <i32 (%struct.pf_app_state*, %struct.pf_app_state*)*> [#uses=2]
+	%39 = icmp eq i32 (%struct.pf_app_state*, %struct.pf_app_state*)* %38, null		; <i1> [#uses=1]
+	br i1 %39, label %bb78, label %bb74
+
+bb74:		; preds = %bb73
+	%40 = ptrtoint i32 (%struct.pf_app_state*, %struct.pf_app_state*)* %37 to i32		; <i32> [#uses=1]
+	%41 = sub i32 0, %40		; <i32> [#uses=1]
+	%42 = icmp eq i32 (%struct.pf_app_state*, %struct.pf_app_state*)* %38, %37		; <i1> [#uses=1]
+	br i1 %42, label %bb76, label %bb79
+
+bb76:		; preds = %bb74
+	%43 = tail call i32 %37(%struct.pf_app_state* null, %struct.pf_app_state* null) nounwind		; <i32> [#uses=1]
+	ret i32 %43
+
+bb78:		; preds = %bb73, %bb70
+	ret i32 0
+
+bb79:		; preds = %bb74, %bb21, %bb19, %bb10, %bb4, %bb1, %entry
+	%.0 = phi i32 [ %3, %entry ], [ %10, %bb1 ], [ %14, %bb4 ], [ %20, %bb10 ], [ %30, %bb19 ], [ %35, %bb21 ], [ %41, %bb74 ]		; <i32> [#uses=1]
+	ret i32 %.0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll b/src/LLVM/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
new file mode 100644
index 0000000..e803d6b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll

@@ -0,0 +1,117 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -disable-fp-elim -relocation-model=pic
+; PR4099
+
+	%0 = type { [62 x %struct.Bitvec*] }		; type %0
+	%1 = type { i8* }		; type %1
+	%2 = type { double }		; type %2
+	%struct..5sPragmaType = type { i8*, i32 }
+	%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
+	%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
+	%struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 }
+	%struct.AuxData = type { i8*, void (i8*)* }
+	%struct.Bitvec = type { i32, i32, i32, %0 }
+	%struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* }
+	%struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* }
+	%struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* }
+	%struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* }
+	%struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] }
+	%struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 }
+	%struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 }
+	%struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* }
+	%struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 }
+	%struct.Context = type { i64, i32, %struct.Fifo }
+	%struct.CountCtx = type { i64 }
+	%struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* }
+	%struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* }
+	%struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* }
+	%struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 }
+	%struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* }
+	%struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 }
+	%struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 }
+	%struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* }
+	%struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] }
+	%struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] }
+	%struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* }
+	%struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 }
+	%struct.IdList = type { %struct..5sPragmaType*, i32, i32 }
+	%struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** }
+	%struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] }
+	%struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* }
+	%struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* }
+	%struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* }
+	%struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 }
+	%struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] }
+	%struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* }
+	%struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* }
+	%struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* }
+	%struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] }
+	%struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
+	%struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 }
+	%struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* }
+	%struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* }
+	%struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* }
+	%struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 }
+	%struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] }
+	%struct._OvflCell = type { i8*, i16 }
+	%struct._ht = type { i32, %struct.HashElem* }
+	%struct.sColMap = type { i32, i8* }
+	%struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %2, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
+	%struct.sqlite3InitInfo = type { i32, i32, i8 }
+	%struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* }
+	%struct.sqlite3_file = type { %struct.sqlite3_io_methods* }
+	%struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
+	%struct.sqlite3_index_constraint_usage = type { i32, i8 }
+	%struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double }
+	%struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* }
+	%struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* }
+	%struct.sqlite3_mutex = type opaque
+	%struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* }
+	%struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* }
+	%struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* }
+
+define fastcc void @dropCell(%struct.MemPage* nocapture %pPage, i32 %idx, i32 %sz) nounwind ssp {
+entry:
+	%0 = load i8** null, align 8		; <i8*> [#uses=4]
+	%1 = or i32 0, 0		; <i32> [#uses=1]
+	%2 = icmp slt i32 %sz, 4		; <i1> [#uses=1]
+	%size_addr.0.i = select i1 %2, i32 4, i32 %sz		; <i32> [#uses=1]
+	br label %bb3.i
+
+bb3.i:		; preds = %bb3.i, %entry
+	%3 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	%or.cond.i = or i1 %3, false		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %bb5.i, label %bb3.i
+
+bb5.i:		; preds = %bb3.i
+	%4 = getelementptr i8* %0, i64 0		; <i8*> [#uses=1]
+	store i8 0, i8* %4, align 1
+	%5 = getelementptr i8* %0, i64 0		; <i8*> [#uses=1]
+	store i8 0, i8* %5, align 1
+	%6 = add i32 %1, 2		; <i32> [#uses=1]
+	%7 = zext i32 %6 to i64		; <i64> [#uses=2]
+	%8 = getelementptr i8* %0, i64 %7		; <i8*> [#uses=1]
+	%9 = lshr i32 %size_addr.0.i, 8		; <i32> [#uses=1]
+	%10 = trunc i32 %9 to i8		; <i8> [#uses=1]
+	store i8 %10, i8* %8, align 1
+	%.sum31.i = add i64 %7, 1		; <i64> [#uses=1]
+	%11 = getelementptr i8* %0, i64 %.sum31.i		; <i8*> [#uses=1]
+	store i8 0, i8* %11, align 1
+	br label %bb11.outer.i
+
+bb11.outer.i:		; preds = %bb11.outer.i, %bb5.i
+	%12 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %12, label %bb12.i, label %bb11.outer.i
+
+bb12.i:		; preds = %bb11.outer.i
+	%i.08 = add i32 %idx, 1		; <i32> [#uses=1]
+	%13 = icmp sgt i32 0, %i.08		; <i1> [#uses=1]
+	br i1 %13, label %bb, label %bb2
+
+bb:		; preds = %bb12.i
+	br label %bb2
+
+bb2:		; preds = %bb, %bb12.i
+	%14 = getelementptr %struct.MemPage* %pPage, i64 0, i32 1		; <i8*> [#uses=1]
+	store i8 1, i8* %14, align 1
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-04-scale.ll b/src/LLVM/test/CodeGen/X86/2009-04-scale.ll
new file mode 100644
index 0000000..e4c756c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-04-scale.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-unknown-linux-gnu
+; PR3995
+
+        %struct.vtable = type { i32 (...)** }
+	%struct.array = type { %struct.impl, [256 x %struct.pair], [256 x %struct.pair], [256 x %struct.pair], [256 x %struct.pair], [256 x %struct.pair], [256 x %struct.pair] }
+	%struct.impl = type { %struct.vtable, i8, %struct.impl*, i32, i32, i64, i64 }
+	%struct.pair = type { i64, i64 }
+
+define void @test() {
+entry:
+	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%1 = lshr i32 %0, 8		; <i32> [#uses=1]
+	%2 = and i32 %1, 255		; <i32> [#uses=1]
+	%3 = getelementptr %struct.array* null, i32 0, i32 3		; <[256 x %struct.pair]*> [#uses=1]
+	%4 = getelementptr [256 x %struct.pair]* %3, i32 0, i32 %2		; <%struct.pair*> [#uses=1]
+	%5 = getelementptr %struct.pair* %4, i32 0, i32 1		; <i64*> [#uses=1]
+	%6 = load i64* %5, align 4		; <i64> [#uses=1]
+	%7 = xor i64 0, %6		; <i64> [#uses=1]
+	%8 = xor i64 %7, 0		; <i64> [#uses=1]
+	%9 = xor i64 %8, 0		; <i64> [#uses=1]
+	store i64 %9, i64* null, align 8
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll b/src/LLVM/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
new file mode 100644
index 0000000..738b5fb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -relocation-model=static > %t
+; RUN: grep "1: ._pv_cpu_ops+8" %t
+; RUN: grep "2: ._G" %t
+; PR4152
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	%struct.pv_cpu_ops = type { i32, [2 x i32] }
+@pv_cpu_ops = external global %struct.pv_cpu_ops		; <%struct.pv_cpu_ops*> [#uses=1]
+@G = external global i32		; <i32*> [#uses=1]
+
+define void @x() nounwind {
+entry:
+	tail call void asm sideeffect "1: $0", "i,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr (%struct.pv_cpu_ops* @pv_cpu_ops, i32 0, i32 1, i32 1)) nounwind
+	tail call void asm sideeffect "2: $0", "i,~{dirflag},~{fpsr},~{flags}"(i32* @G) nounwind
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll b/src/LLVM/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
new file mode 100644
index 0000000..a5e28c0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86
+; PR4188
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+@g_9 = external global i32		; <i32*> [#uses=1]
+
+define i32 @int86(i32 %p_87) nounwind {
+entry:
+	%0 = trunc i32 %p_87 to i8		; <i8> [#uses=1]
+	%1 = icmp ne i8 %0, 0		; <i1> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb.i, %bb, %entry
+	%2 = volatile load i32* @g_9, align 4		; <i32> [#uses=2]
+	%3 = icmp sgt i32 %2, 1		; <i1> [#uses=1]
+	%4 = and i1 %3, %1		; <i1> [#uses=1]
+	br i1 %4, label %bb.i, label %bb
+
+bb.i:		; preds = %bb
+	%5 = icmp sgt i32 0, %2		; <i1> [#uses=0]
+	br label %bb
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll b/src/LLVM/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
new file mode 100644
index 0000000..6e062fb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64
+; PR3886
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+        %a = call <1 x i64> @bar()
+        %tmp5.i = extractelement <1 x i64> %a, i32 0
+        %tmp11 = bitcast i64 %tmp5.i to <1 x i64>
+        %tmp8 = extractelement <1 x i64> %tmp11, i32 0
+        %call6 = call i32 (i64)* @foo(i64 %tmp8)
+        ret i32 undef
+}
+
+declare i32 @foo(i64)
+
+declare <1 x i64> @bar()

diff --git a/src/LLVM/test/CodeGen/X86/2009-05-23-available_externally.ll b/src/LLVM/test/CodeGen/X86/2009-05-23-available_externally.ll
new file mode 100644
index 0000000..94773d9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-05-23-available_externally.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -relocation-model=pic | grep atoi | grep PLT
+; PR4253
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i8* %x) nounwind readonly {
+entry:
+	%call = tail call fastcc i32 @atoi(i8* %x) nounwind readonly		; <i32> [#uses=1]
+	ret i32 %call
+}
+
+define available_externally fastcc i32 @atoi(i8* %__nptr) nounwind readonly {
+entry:
+	%call = tail call i64 @strtol(i8* nocapture %__nptr, i8** null, i32 10) nounwind readonly		; <i64> [#uses=1]
+	%conv = trunc i64 %call to i32		; <i32> [#uses=1]
+	ret i32 %conv
+}
+
+declare i64 @strtol(i8*, i8** nocapture, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/src/LLVM/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
new file mode 100644
index 0000000..3cd5416
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s | FileCheck %s
+
+; Check that the shr(shl X, 56), 48) is not mistakenly turned into
+; a shr (X, -8) that gets subsequently "optimized away" as undef
+; PR4254
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @foo(i64 %b) nounwind readnone {
+entry:
+; CHECK: foo:
+; CHECK: shlq $56, %rdi
+; CHECK: sarq $48, %rdi
+; CHECK: leaq 1(%rdi), %rax
+	%shl = shl i64 %b, 56		; <i64> [#uses=1]
+	%shr = ashr i64 %shl, 48		; <i64> [#uses=1]
+	%add5 = or i64 %shr, 1		; <i64> [#uses=1]
+	ret i64 %add5
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll b/src/LLVM/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
new file mode 100644
index 0000000..1d14620
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64
+
+define fastcc void @S_next_symbol(i448* %P) nounwind ssp {
+entry:
+	br label %bb14
+
+bb14:		; preds = %bb
+	%srcval16 = load i448* %P, align 8		; <i448> [#uses=1]
+	%tmp = zext i32 undef to i448		; <i448> [#uses=1]
+	%tmp15 = shl i448 %tmp, 288		; <i448> [#uses=1]
+	%mask = and i448 %srcval16, -2135987035423586845985235064014169866455883682256196619149693890381755748887481053010428711403521		; <i448> [#uses=1]
+	%ins = or i448 %tmp15, %mask		; <i448> [#uses=1]
+	store i448 %ins, i448* %P, align 8
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-05-30-ISelBug.ll b/src/LLVM/test/CodeGen/X86/2009-05-30-ISelBug.ll
new file mode 100644
index 0000000..af552d4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-05-30-ISelBug.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86-64 | not grep {movzbl	%\[abcd\]h,}
+
+define void @BZ2_bzDecompress_bb5_2E_outer_bb35_2E_i_bb54_2E_i(i32*, i32 %c_nblock_used.2.i, i32 %.reload51, i32* %.out, i32* %.out1, i32* %.out2, i32* %.out3) nounwind {
+newFuncRoot:
+	br label %bb54.i
+
+bb35.i.backedge.exitStub:		; preds = %bb54.i
+	store i32 %6, i32* %.out
+	store i32 %10, i32* %.out1
+	store i32 %11, i32* %.out2
+	store i32 %12, i32* %.out3
+	ret void
+
+bb54.i:		; preds = %newFuncRoot
+	%1 = zext i32 %.reload51 to i64		; <i64> [#uses=1]
+	%2 = getelementptr i32* %0, i64 %1		; <i32*> [#uses=1]
+	%3 = load i32* %2, align 4		; <i32> [#uses=2]
+	%4 = lshr i32 %3, 8		; <i32> [#uses=1]
+	%5 = and i32 %3, 255		; <i32> [#uses=1]
+	%6 = add i32 %5, 4		; <i32> [#uses=1]
+	%7 = zext i32 %4 to i64		; <i64> [#uses=1]
+	%8 = getelementptr i32* %0, i64 %7		; <i32*> [#uses=1]
+	%9 = load i32* %8, align 4		; <i32> [#uses=2]
+	%10 = and i32 %9, 255		; <i32> [#uses=1]
+	%11 = lshr i32 %9, 8		; <i32> [#uses=1]
+	%12 = add i32 %c_nblock_used.2.i, 5		; <i32> [#uses=1]
+	br label %bb35.i.backedge.exitStub
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-06-02-RewriterBug.ll b/src/LLVM/test/CodeGen/X86/2009-06-02-RewriterBug.ll
new file mode 100644
index 0000000..779f985
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-06-02-RewriterBug.ll

@@ -0,0 +1,362 @@
+; RUN: llc < %s -mtriple=x86_64-undermydesk-freebsd8.0 -relocation-model=pic -disable-fp-elim
+; PR4225
+
+define void @sha256_block1(i32* nocapture %arr, i8* nocapture %in, i64 %num) nounwind {
+entry:
+	br i1 undef, label %while.end, label %bb.nph
+
+bb.nph:		; preds = %entry
+	br label %while.body
+
+while.body:		; preds = %for.end, %bb.nph
+	%indvar2787 = phi i64 [ 0, %bb.nph ], [ %indvar.next2788, %for.end ]		; <i64> [#uses=2]
+	%tmp2791 = mul i64 %indvar2787, 44		; <i64> [#uses=0]
+	%ctg22996 = getelementptr i8* %in, i64 0		; <i8*> [#uses=1]
+	%conv = zext i32 undef to i64		; <i64> [#uses=1]
+	%conv11 = zext i32 undef to i64		; <i64> [#uses=1]
+	%tmp18 = load i32* undef		; <i32> [#uses=1]
+	%conv19 = zext i32 %tmp18 to i64		; <i64> [#uses=1]
+	%tmp30 = load i32* undef		; <i32> [#uses=1]
+	%conv31 = zext i32 %tmp30 to i64		; <i64> [#uses=4]
+	%ptrincdec3065 = load i8* null		; <i8> [#uses=1]
+	%conv442709 = zext i8 %ptrincdec3065 to i64		; <i64> [#uses=1]
+	%shl45 = shl i64 %conv442709, 16		; <i64> [#uses=1]
+	%conv632707 = zext i8 undef to i64		; <i64> [#uses=1]
+	%or = or i64 %shl45, 0		; <i64> [#uses=1]
+	%or55 = or i64 %or, %conv632707		; <i64> [#uses=1]
+	%or64 = or i64 %or55, 0		; <i64> [#uses=1]
+	%shr85 = lshr i64 %conv31, 25		; <i64> [#uses=0]
+	%add = add i64 %conv11, 1508970993		; <i64> [#uses=1]
+	%add95 = add i64 %add, 0		; <i64> [#uses=1]
+	%add98 = add i64 %add95, 0		; <i64> [#uses=1]
+	%add99 = add i64 %add98, %or64		; <i64> [#uses=1]
+	%add134 = add i64 %add99, 0		; <i64> [#uses=4]
+	store i32 undef, i32* undef
+	%shl187 = shl i64 %add134, 21		; <i64> [#uses=0]
+	%and203 = and i64 %add134, %conv31		; <i64> [#uses=1]
+	%xor208 = xor i64 0, %and203		; <i64> [#uses=1]
+	%add212 = add i64 0, %xor208		; <i64> [#uses=1]
+	%add213 = add i64 %add212, 0		; <i64> [#uses=1]
+	%add248 = add i64 %add213, 0		; <i64> [#uses=3]
+	%conv2852690 = zext i8 undef to i64		; <i64> [#uses=1]
+	%or277 = or i64 0, %conv2852690		; <i64> [#uses=1]
+	%or286 = or i64 %or277, 0		; <i64> [#uses=1]
+	%neg319 = xor i64 %add248, 4294967295		; <i64> [#uses=1]
+	%and321 = and i64 %neg319, %conv31		; <i64> [#uses=1]
+	%xor322 = xor i64 %and321, 0		; <i64> [#uses=1]
+	%add314 = add i64 %conv, 2870763221		; <i64> [#uses=1]
+	%add323 = add i64 %add314, %or286		; <i64> [#uses=1]
+	%add326 = add i64 %add323, %xor322		; <i64> [#uses=1]
+	%add327 = add i64 %add326, 0		; <i64> [#uses=2]
+	%add362 = add i64 %add327, %conv19		; <i64> [#uses=4]
+	%add365 = add i64 0, %add327		; <i64> [#uses=3]
+	%shl409 = shl i64 %add362, 26		; <i64> [#uses=0]
+	%and431 = and i64 %add362, %add248		; <i64> [#uses=1]
+	%neg433 = xor i64 %add362, -1		; <i64> [#uses=1]
+	%and435 = and i64 %add134, %neg433		; <i64> [#uses=1]
+	%xor436 = xor i64 %and431, %and435		; <i64> [#uses=1]
+	%add428 = add i64 %conv31, 3624381080		; <i64> [#uses=1]
+	%add437 = add i64 %add428, 0		; <i64> [#uses=1]
+	%add440 = add i64 %add437, %xor436		; <i64> [#uses=1]
+	%add441 = add i64 %add440, 0		; <i64> [#uses=1]
+	%shl443 = shl i64 %add365, 30		; <i64> [#uses=1]
+	%and445 = lshr i64 %add365, 2		; <i64> [#uses=1]
+	%shr446 = and i64 %and445, 1073741823		; <i64> [#uses=1]
+	%or447 = or i64 %shr446, %shl443		; <i64> [#uses=1]
+	%xor461 = xor i64 0, %or447		; <i64> [#uses=1]
+	%add473 = add i64 %xor461, 0		; <i64> [#uses=1]
+	%add479 = add i64 %add473, %add441		; <i64> [#uses=3]
+	%conv4932682 = zext i8 undef to i64		; <i64> [#uses=1]
+	%shl494 = shl i64 %conv4932682, 16		; <i64> [#uses=1]
+	%ptrincdec4903012 = load i8* null		; <i8> [#uses=1]
+	%conv5032681 = zext i8 %ptrincdec4903012 to i64		; <i64> [#uses=1]
+	%shl504 = shl i64 %conv5032681, 8		; <i64> [#uses=1]
+	%ptrincdec5003009 = load i8* null		; <i8> [#uses=1]
+	%conv5132680 = zext i8 %ptrincdec5003009 to i64		; <i64> [#uses=1]
+	%or495 = or i64 %shl494, 0		; <i64> [#uses=1]
+	%or505 = or i64 %or495, %conv5132680		; <i64> [#uses=1]
+	%or514 = or i64 %or505, %shl504		; <i64> [#uses=1]
+	store i32 undef, i32* undef
+	%or540 = or i64 undef, 0		; <i64> [#uses=0]
+	%add542 = add i64 %add134, 310598401		; <i64> [#uses=1]
+	%add551 = add i64 %add542, %or514		; <i64> [#uses=1]
+	%add554 = add i64 %add551, 0		; <i64> [#uses=1]
+	%add555 = add i64 %add554, 0		; <i64> [#uses=1]
+	%or561 = or i64 undef, undef		; <i64> [#uses=1]
+	%or567 = or i64 undef, undef		; <i64> [#uses=1]
+	%and572 = lshr i64 %add479, 22		; <i64> [#uses=1]
+	%shr573 = and i64 %and572, 1023		; <i64> [#uses=1]
+	%or574 = or i64 %shr573, 0		; <i64> [#uses=1]
+	%xor568 = xor i64 %or567, %or574		; <i64> [#uses=1]
+	%xor575 = xor i64 %xor568, %or561		; <i64> [#uses=1]
+	%add587 = add i64 %xor575, 0		; <i64> [#uses=1]
+	%add593 = add i64 %add587, %add555		; <i64> [#uses=1]
+	%ptrincdec6043000 = load i8* null		; <i8> [#uses=1]
+	%conv6172676 = zext i8 %ptrincdec6043000 to i64		; <i64> [#uses=1]
+	%shl618 = shl i64 %conv6172676, 8		; <i64> [#uses=1]
+	%ptrincdec6142997 = load i8* %ctg22996		; <i8> [#uses=1]
+	%conv6272675 = zext i8 %ptrincdec6142997 to i64		; <i64> [#uses=1]
+	%or619 = or i64 0, %conv6272675		; <i64> [#uses=1]
+	%or628 = or i64 %or619, %shl618		; <i64> [#uses=1]
+	%add656 = add i64 %add248, 607225278		; <i64> [#uses=1]
+	%add665 = add i64 %add656, %or628		; <i64> [#uses=1]
+	%add668 = add i64 %add665, 0		; <i64> [#uses=1]
+	%add669 = add i64 %add668, 0		; <i64> [#uses=1]
+	%and699 = and i64 %add479, %add365		; <i64> [#uses=1]
+	%xor700 = xor i64 0, %and699		; <i64> [#uses=1]
+	%add701 = add i64 0, %xor700		; <i64> [#uses=1]
+	%add707 = add i64 %add701, %add669		; <i64> [#uses=4]
+	%ptrincdec6242994 = load i8* null		; <i8> [#uses=1]
+	%conv7122673 = zext i8 %ptrincdec6242994 to i64		; <i64> [#uses=1]
+	%shl713 = shl i64 %conv7122673, 24		; <i64> [#uses=1]
+	%conv7412670 = zext i8 undef to i64		; <i64> [#uses=1]
+	%or723 = or i64 0, %shl713		; <i64> [#uses=1]
+	%or733 = or i64 %or723, %conv7412670		; <i64> [#uses=1]
+	%or742 = or i64 %or733, 0		; <i64> [#uses=2]
+	%conv743 = trunc i64 %or742 to i32		; <i32> [#uses=1]
+	store i32 %conv743, i32* undef
+	%xor762 = xor i64 undef, 0		; <i64> [#uses=0]
+	%add770 = add i64 %add362, 1426881987		; <i64> [#uses=1]
+	%add779 = add i64 %add770, %or742		; <i64> [#uses=1]
+	%add782 = add i64 %add779, 0		; <i64> [#uses=1]
+	%add783 = add i64 %add782, 0		; <i64> [#uses=1]
+	%shl785 = shl i64 %add707, 30		; <i64> [#uses=1]
+	%and787 = lshr i64 %add707, 2		; <i64> [#uses=1]
+	%shr788 = and i64 %and787, 1073741823		; <i64> [#uses=1]
+	%or789 = or i64 %shr788, %shl785		; <i64> [#uses=1]
+	%shl791 = shl i64 %add707, 19		; <i64> [#uses=0]
+	%xor803 = xor i64 0, %or789		; <i64> [#uses=1]
+	%and813 = and i64 %add593, %add479		; <i64> [#uses=1]
+	%xor814 = xor i64 0, %and813		; <i64> [#uses=1]
+	%add815 = add i64 %xor803, %xor814		; <i64> [#uses=1]
+	%add821 = add i64 %add815, %add783		; <i64> [#uses=1]
+	%add1160 = add i64 0, %add707		; <i64> [#uses=0]
+	%add1157 = add i64 undef, undef		; <i64> [#uses=0]
+	%ptrincdec11742940 = load i8* null		; <i8> [#uses=1]
+	%conv11872651 = zext i8 %ptrincdec11742940 to i64		; <i64> [#uses=1]
+	%shl1188 = shl i64 %conv11872651, 8		; <i64> [#uses=1]
+	%or1198 = or i64 0, %shl1188		; <i64> [#uses=1]
+	store i32 undef, i32* undef
+	%add1226 = add i64 %or1198, 3248222580		; <i64> [#uses=1]
+	%add1235 = add i64 %add1226, 0		; <i64> [#uses=1]
+	%add1238 = add i64 %add1235, 0		; <i64> [#uses=1]
+	%add1239 = add i64 %add1238, 0		; <i64> [#uses=1]
+	br label %for.cond
+
+for.cond:		; preds = %for.body, %while.body
+	%add821.pn = phi i64 [ %add821, %while.body ], [ undef, %for.body ]		; <i64> [#uses=0]
+	%add1239.pn = phi i64 [ %add1239, %while.body ], [ 0, %for.body ]		; <i64> [#uses=0]
+	br i1 undef, label %for.end, label %for.body
+
+for.body:		; preds = %for.cond
+	br label %for.cond
+
+for.end:		; preds = %for.cond
+	%indvar.next2788 = add i64 %indvar2787, 1		; <i64> [#uses=1]
+	br i1 undef, label %while.end, label %while.body
+
+while.end:		; preds = %for.end, %entry
+	ret void
+}
+
+define void @sha256_block2(i32* nocapture %arr, i8* nocapture %in, i64 %num) nounwind {
+entry:
+	br i1 undef, label %while.end, label %bb.nph
+
+bb.nph:		; preds = %entry
+	%arrayidx5 = getelementptr i32* %arr, i64 1		; <i32*> [#uses=1]
+	%arrayidx9 = getelementptr i32* %arr, i64 2		; <i32*> [#uses=2]
+	%arrayidx13 = getelementptr i32* %arr, i64 3		; <i32*> [#uses=2]
+	%arrayidx25 = getelementptr i32* %arr, i64 6		; <i32*> [#uses=1]
+	%arrayidx29 = getelementptr i32* %arr, i64 7		; <i32*> [#uses=1]
+	br label %while.body
+
+while.body:		; preds = %for.end, %bb.nph
+	%tmp3 = load i32* %arr		; <i32> [#uses=2]
+	%conv = zext i32 %tmp3 to i64		; <i64> [#uses=1]
+	%tmp10 = load i32* %arrayidx9		; <i32> [#uses=1]
+	%conv11 = zext i32 %tmp10 to i64		; <i64> [#uses=1]
+	%tmp14 = load i32* %arrayidx13		; <i32> [#uses=3]
+	%conv15 = zext i32 %tmp14 to i64		; <i64> [#uses=2]
+	%tmp18 = load i32* undef		; <i32> [#uses=2]
+	%conv19 = zext i32 %tmp18 to i64		; <i64> [#uses=1]
+	%conv23 = zext i32 undef to i64		; <i64> [#uses=1]
+	%tmp26 = load i32* %arrayidx25		; <i32> [#uses=1]
+	%conv27 = zext i32 %tmp26 to i64		; <i64> [#uses=1]
+	%tmp30 = load i32* %arrayidx29		; <i32> [#uses=2]
+	%conv31 = zext i32 %tmp30 to i64		; <i64> [#uses=5]
+	%shl72 = shl i64 %conv31, 26		; <i64> [#uses=1]
+	%shr = lshr i64 %conv31, 6		; <i64> [#uses=1]
+	%or74 = or i64 %shl72, %shr		; <i64> [#uses=1]
+	%shr85 = lshr i64 %conv31, 25		; <i64> [#uses=0]
+	%xor87 = xor i64 0, %or74		; <i64> [#uses=1]
+	%and902706 = and i32 %tmp30, %tmp3		; <i32> [#uses=1]
+	%and90 = zext i32 %and902706 to i64		; <i64> [#uses=1]
+	%xor94 = xor i64 0, %and90		; <i64> [#uses=1]
+	%add = add i64 %conv11, 1508970993		; <i64> [#uses=1]
+	%add95 = add i64 %add, %xor94		; <i64> [#uses=1]
+	%add98 = add i64 %add95, %xor87		; <i64> [#uses=1]
+	%add99 = add i64 %add98, 0		; <i64> [#uses=2]
+	%xor130 = zext i32 undef to i64		; <i64> [#uses=1]
+	%add134 = add i64 %add99, %conv27		; <i64> [#uses=2]
+	%add131 = add i64 %xor130, 0		; <i64> [#uses=1]
+	%add137 = add i64 %add131, %add99		; <i64> [#uses=5]
+	%conv1422700 = zext i8 undef to i64		; <i64> [#uses=1]
+	%shl143 = shl i64 %conv1422700, 24		; <i64> [#uses=1]
+	%ptrincdec1393051 = load i8* undef		; <i8> [#uses=1]
+	%conv1512699 = zext i8 %ptrincdec1393051 to i64		; <i64> [#uses=1]
+	%shl152 = shl i64 %conv1512699, 16		; <i64> [#uses=1]
+	%conv1712697 = zext i8 undef to i64		; <i64> [#uses=1]
+	%or153 = or i64 %shl152, %shl143		; <i64> [#uses=1]
+	%or163 = or i64 %or153, %conv1712697		; <i64> [#uses=1]
+	%or172 = or i64 %or163, 0		; <i64> [#uses=1]
+	%and203 = and i64 %add134, %conv31		; <i64> [#uses=1]
+	%xor208 = xor i64 0, %and203		; <i64> [#uses=1]
+	%add200 = add i64 0, 2453635748		; <i64> [#uses=1]
+	%add209 = add i64 %add200, %or172		; <i64> [#uses=1]
+	%add212 = add i64 %add209, %xor208		; <i64> [#uses=1]
+	%add213 = add i64 %add212, 0		; <i64> [#uses=2]
+	%shl228 = shl i64 %add137, 10		; <i64> [#uses=1]
+	%and230 = lshr i64 %add137, 22		; <i64> [#uses=1]
+	%shr231 = and i64 %and230, 1023		; <i64> [#uses=1]
+	%or232 = or i64 %shr231, %shl228		; <i64> [#uses=1]
+	%xor226 = xor i64 0, %or232		; <i64> [#uses=1]
+	%xor233 = xor i64 %xor226, 0		; <i64> [#uses=1]
+	%and2362695 = zext i32 undef to i64		; <i64> [#uses=1]
+	%xor240 = and i64 %add137, %and2362695		; <i64> [#uses=1]
+	%and2432694 = and i32 %tmp18, %tmp14		; <i32> [#uses=1]
+	%and243 = zext i32 %and2432694 to i64		; <i64> [#uses=1]
+	%xor244 = xor i64 %xor240, %and243		; <i64> [#uses=1]
+	%add248 = add i64 %add213, %conv23		; <i64> [#uses=2]
+	%add245 = add i64 %xor233, %xor244		; <i64> [#uses=1]
+	%add251 = add i64 %add245, %add213		; <i64> [#uses=1]
+	%conv2752691 = zext i8 undef to i64		; <i64> [#uses=1]
+	%shl276 = shl i64 %conv2752691, 8		; <i64> [#uses=0]
+	%and317 = and i64 %add248, %add134		; <i64> [#uses=1]
+	%neg319 = xor i64 %add248, 4294967295		; <i64> [#uses=1]
+	%and321 = and i64 %neg319, %conv31		; <i64> [#uses=1]
+	%xor322 = xor i64 %and321, %and317		; <i64> [#uses=1]
+	%add314 = add i64 %conv, 2870763221		; <i64> [#uses=1]
+	%add323 = add i64 %add314, 0		; <i64> [#uses=1]
+	%add326 = add i64 %add323, %xor322		; <i64> [#uses=1]
+	%add327 = add i64 %add326, 0		; <i64> [#uses=2]
+	%and3502689 = xor i64 %add137, %conv15		; <i64> [#uses=1]
+	%xor354 = and i64 %add251, %and3502689		; <i64> [#uses=1]
+	%and357 = and i64 %add137, %conv15		; <i64> [#uses=1]
+	%xor358 = xor i64 %xor354, %and357		; <i64> [#uses=1]
+	%add362 = add i64 %add327, %conv19		; <i64> [#uses=1]
+	%add359 = add i64 0, %xor358		; <i64> [#uses=1]
+	%add365 = add i64 %add359, %add327		; <i64> [#uses=1]
+	%add770 = add i64 %add362, 1426881987		; <i64> [#uses=1]
+	%add779 = add i64 %add770, 0		; <i64> [#uses=1]
+	%add782 = add i64 %add779, 0		; <i64> [#uses=1]
+	%add783 = add i64 %add782, 0		; <i64> [#uses=2]
+	%add818 = add i64 %add783, %add365		; <i64> [#uses=1]
+	%add821 = add i64 0, %add783		; <i64> [#uses=1]
+	store i32 undef, i32* undef
+	%add1046 = add i64 undef, undef		; <i64> [#uses=1]
+	%add1160 = add i64 undef, undef		; <i64> [#uses=1]
+	store i32 0, i32* undef
+	%add1235 = add i64 0, %add818		; <i64> [#uses=1]
+	%add1238 = add i64 %add1235, 0		; <i64> [#uses=1]
+	%add1239 = add i64 %add1238, 0		; <i64> [#uses=1]
+	br label %for.cond
+
+for.cond:		; preds = %for.body, %while.body
+	%h.0 = phi i64 [ undef, %while.body ], [ %add2035, %for.body ]		; <i64> [#uses=1]
+	%g.0 = phi i64 [ %add1046, %while.body ], [ undef, %for.body ]		; <i64> [#uses=1]
+	%f.0 = phi i64 [ %add1160, %while.body ], [ undef, %for.body ]		; <i64> [#uses=1]
+	%add821.pn = phi i64 [ %add821, %while.body ], [ undef, %for.body ]		; <i64> [#uses=0]
+	%add1239.pn2648 = phi i64 [ %add1239, %while.body ], [ undef, %for.body ]		; <i64> [#uses=0]
+	%d.0 = phi i64 [ undef, %while.body ], [ %add2038, %for.body ]		; <i64> [#uses=2]
+	br i1 undef, label %for.end, label %for.body
+
+for.body:		; preds = %for.cond
+	%conv1390 = zext i32 undef to i64		; <i64> [#uses=1]
+	%add1375 = add i64 0, %h.0		; <i64> [#uses=1]
+	%add1384 = add i64 %add1375, 0		; <i64> [#uses=1]
+	%add1391 = add i64 %add1384, %conv1390		; <i64> [#uses=1]
+	%add1392 = add i64 %add1391, 0		; <i64> [#uses=2]
+	%or1411 = or i64 0, undef		; <i64> [#uses=1]
+	%xor1405 = xor i64 0, %or1411		; <i64> [#uses=1]
+	%xor1412 = xor i64 %xor1405, 0		; <i64> [#uses=1]
+	%add1427 = add i64 %add1392, %d.0		; <i64> [#uses=1]
+	%add1424 = add i64 %xor1412, 0		; <i64> [#uses=1]
+	%add1430 = add i64 %add1424, %add1392		; <i64> [#uses=5]
+	%tmp1438 = load i32* undef		; <i32> [#uses=1]
+	%conv1439 = zext i32 %tmp1438 to i64		; <i64> [#uses=4]
+	%shl1441 = shl i64 %conv1439, 25		; <i64> [#uses=1]
+	%shr1444 = lshr i64 %conv1439, 7		; <i64> [#uses=1]
+	%or1445 = or i64 %shl1441, %shr1444		; <i64> [#uses=1]
+	%shr1450 = lshr i64 %conv1439, 18		; <i64> [#uses=1]
+	%or1451 = or i64 0, %shr1450		; <i64> [#uses=1]
+	%shr1454 = lshr i64 %conv1439, 3		; <i64> [#uses=1]
+	%xor1452 = xor i64 %or1451, %shr1454		; <i64> [#uses=1]
+	%xor1455 = xor i64 %xor1452, %or1445		; <i64> [#uses=1]
+	%conv1464 = zext i32 undef to i64		; <i64> [#uses=4]
+	%shl1466 = shl i64 %conv1464, 15		; <i64> [#uses=1]
+	%shr1469 = lshr i64 %conv1464, 17		; <i64> [#uses=1]
+	%or1470 = or i64 %shl1466, %shr1469		; <i64> [#uses=1]
+	%shr1475 = lshr i64 %conv1464, 19		; <i64> [#uses=1]
+	%or1476 = or i64 0, %shr1475		; <i64> [#uses=1]
+	%shr1479 = lshr i64 %conv1464, 10		; <i64> [#uses=1]
+	%xor1477 = xor i64 %or1476, %shr1479		; <i64> [#uses=1]
+	%xor1480 = xor i64 %xor1477, %or1470		; <i64> [#uses=1]
+	%tmp1499 = load i32* null		; <i32> [#uses=1]
+	%conv1500 = zext i32 %tmp1499 to i64		; <i64> [#uses=1]
+	%add1491 = add i64 %conv1500, 0		; <i64> [#uses=1]
+	%add1501 = add i64 %add1491, %xor1455		; <i64> [#uses=1]
+	%add1502 = add i64 %add1501, %xor1480		; <i64> [#uses=1]
+	%conv1504 = and i64 %add1502, 4294967295		; <i64> [#uses=1]
+	%tmp1541 = load i32* undef		; <i32> [#uses=1]
+	%conv1542 = zext i32 %tmp1541 to i64		; <i64> [#uses=1]
+	%add1527 = add i64 %conv1542, %g.0		; <i64> [#uses=1]
+	%add1536 = add i64 %add1527, 0		; <i64> [#uses=1]
+	%add1543 = add i64 %add1536, %conv1504		; <i64> [#uses=1]
+	%add1544 = add i64 %add1543, 0		; <i64> [#uses=1]
+	%shl1546 = shl i64 %add1430, 30		; <i64> [#uses=1]
+	%and1548 = lshr i64 %add1430, 2		; <i64> [#uses=1]
+	%shr1549 = and i64 %and1548, 1073741823		; <i64> [#uses=1]
+	%or1550 = or i64 %shr1549, %shl1546		; <i64> [#uses=1]
+	%shl1552 = shl i64 %add1430, 19		; <i64> [#uses=1]
+	%or1556 = or i64 0, %shl1552		; <i64> [#uses=1]
+	%shl1559 = shl i64 %add1430, 10		; <i64> [#uses=1]
+	%or1563 = or i64 0, %shl1559		; <i64> [#uses=1]
+	%xor1557 = xor i64 %or1556, %or1563		; <i64> [#uses=1]
+	%xor1564 = xor i64 %xor1557, %or1550		; <i64> [#uses=1]
+	%add1576 = add i64 %xor1564, 0		; <i64> [#uses=1]
+	%add1582 = add i64 %add1576, %add1544		; <i64> [#uses=3]
+	store i32 undef, i32* undef
+	%tmp1693 = load i32* undef		; <i32> [#uses=1]
+	%conv1694 = zext i32 %tmp1693 to i64		; <i64> [#uses=1]
+	%add1679 = add i64 %conv1694, %f.0		; <i64> [#uses=1]
+	%add1688 = add i64 %add1679, 0		; <i64> [#uses=1]
+	%add1695 = add i64 %add1688, 0		; <i64> [#uses=1]
+	%add1696 = add i64 %add1695, 0		; <i64> [#uses=1]
+	%shl1698 = shl i64 %add1582, 30		; <i64> [#uses=0]
+	%shl1704 = shl i64 %add1582, 19		; <i64> [#uses=0]
+	%add1734 = add i64 0, %add1696		; <i64> [#uses=1]
+	%add1983 = add i64 0, %add1427		; <i64> [#uses=1]
+	%add1992 = add i64 %add1983, 0		; <i64> [#uses=1]
+	%add1999 = add i64 %add1992, 0		; <i64> [#uses=1]
+	%add2000 = add i64 %add1999, 0		; <i64> [#uses=2]
+	%and2030 = and i64 %add1734, %add1582		; <i64> [#uses=1]
+	%xor2031 = xor i64 0, %and2030		; <i64> [#uses=1]
+	%add2035 = add i64 %add2000, %add1430		; <i64> [#uses=1]
+	%add2032 = add i64 0, %xor2031		; <i64> [#uses=1]
+	%add2038 = add i64 %add2032, %add2000		; <i64> [#uses=1]
+	store i32 0, i32* undef
+	br label %for.cond
+
+for.end:		; preds = %for.cond
+	store i32 undef, i32* %arrayidx5
+	store i32 undef, i32* %arrayidx9
+	%d.02641 = trunc i64 %d.0 to i32		; <i32> [#uses=1]
+	%conv2524 = add i32 %tmp14, %d.02641		; <i32> [#uses=1]
+	store i32 %conv2524, i32* %arrayidx13
+	%exitcond2789 = icmp eq i64 undef, %num		; <i1> [#uses=1]
+	br i1 %exitcond2789, label %while.end, label %while.body
+
+while.end:		; preds = %for.end, %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/src/LLVM/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
new file mode 100644
index 0000000..98b1e0e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll

@@ -0,0 +1,8 @@
+; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; CHECK-NOT: -{{[1-9][0-9]*}}(%rsp)
+
+define x86_fp80 @a(i64 %x) nounwind readnone {
+entry:
+        %conv = sitofp i64 %x to x86_fp80               ; <x86_fp80> [#uses=1]
+        ret x86_fp80 %conv
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/src/LLVM/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
new file mode 100644
index 0000000..12bd285
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll

@@ -0,0 +1,10 @@
+; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; CHECK: subq    $40, %rsp
+; CHECK: movaps  %xmm8, (%rsp)
+; CHECK: movaps  %xmm7, 16(%rsp)
+
+define i32 @a() nounwind {
+entry:
+        tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind
+        ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll b/src/LLVM/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
new file mode 100644
index 0000000..3dcc0d4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86
+
+	%0 = type { %struct.GAP }		; type %0
+	%1 = type { i16, i8, i8 }		; type %1
+	%2 = type { [2 x i32], [2 x i32] }		; type %2
+	%3 = type { %struct.rec* }		; type %3
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %1 }
+	%struct.STYLE = type { %0, %0, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %2 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+
+define fastcc void @MinSize(%struct.rec* %x) nounwind {
+entry:
+	%tmp13 = load i8* undef, align 4		; <i8> [#uses=3]
+	%tmp14 = zext i8 %tmp13 to i32		; <i32> [#uses=2]
+	switch i32 %tmp14, label %bb1109 [
+		i32 42, label %bb246
+	]
+
+bb246:		; preds = %entry, %entry
+	switch i8 %tmp13, label %bb249 [
+		i8 42, label %bb269
+		i8 44, label %bb269
+	]
+
+bb249:		; preds = %bb246
+	%tmp3240 = icmp eq i8 %tmp13, 0		; <i1> [#uses=1]
+	br i1 %tmp3240, label %bb974, label %bb269
+
+bb269:
+	%tmp3424 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 0, i32 0, i32 1		; <%struct.rec**> [#uses=0]
+	unreachable
+
+bb974:
+	unreachable
+
+bb1109:		; preds = %entry
+	call fastcc void @Image(i32 %tmp14) nounwind		; <i8*> [#uses=0]
+	unreachable
+}
+
+declare fastcc void @Image(i32) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/src/LLVM/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
new file mode 100644
index 0000000..3076322
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+mmx,+sse2 | not grep movl
+
+define <8 x i8> @a(i8 zeroext %x) nounwind {
+  %r = insertelement <8 x i8> undef, i8 %x, i32 0
+  ret <8 x i8> %r
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/src/LLVM/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
new file mode 100644
index 0000000..5c51480
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-06-05-VZextByteShort.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
+; RUN: grep movzwl %t1 | count 2
+; RUN: grep movzbl %t1 | count 2
+; RUN: grep movd %t1 | count 4
+
+define <4 x i16> @a(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i16
+  %r = insertelement <4 x i16> zeroinitializer, i16 %x, i32 0
+  ret <4 x i16> %r
+}
+
+define <8 x i16> @b(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i16
+  %r = insertelement <8 x i16> zeroinitializer, i16 %x, i32 0
+  ret <8 x i16> %r
+}
+
+define <8 x i8> @c(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i8
+  %r = insertelement <8 x i8> zeroinitializer, i8 %x, i32 0
+  ret <8 x i8> %r
+}
+
+define <16 x i8> @d(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i8
+  %r = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0
+  ret <16 x i8> %r
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll b/src/LLVM/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
new file mode 100644
index 0000000..8bb3dc6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s
+
+define <2 x i64> @_mm_insert_epi16(<2 x i64> %a, i32 %b, i32 %imm) nounwind readnone {
+entry:
+	%conv = bitcast <2 x i64> %a to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%conv2 = trunc i32 %b to i16		; <i16> [#uses=1]
+	%and = and i32 %imm, 7		; <i32> [#uses=1]
+	%vecins = insertelement <8 x i16> %conv, i16 %conv2, i32 %and		; <<8 x i16>> [#uses=1]
+	%conv6 = bitcast <8 x i16> %vecins to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %conv6
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-06-05-sitofpCrash.ll b/src/LLVM/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
new file mode 100644
index 0000000..e361804
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-06-05-sitofpCrash.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse
+; PR2598
+
+define <2 x float> @a(<2 x i32> %i) nounwind {
+  %r = sitofp <2 x i32> %i to <2 x float> 
+  ret <2 x float> %r
+}
+
+define <2 x i32> @b(<2 x float> %i) nounwind {
+  %r = fptosi <2 x float> %i to <2 x i32> 
+  ret <2 x i32> %r
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2009-06-06-ConcatVectors.ll b/src/LLVM/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
new file mode 100644
index 0000000..92419fc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-06-06-ConcatVectors.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s
+
+define <2 x i64> @_mm_movpi64_pi64(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
+entry:
+  %0 = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
+	ret <2 x i64> %0
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll b/src/LLVM/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
new file mode 100644
index 0000000..07ef53e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep movl | count 2
+
+define i64 @a(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%0 = insertelement <2 x i32> undef, i32 %a, i32 0		; <<2 x i32>> [#uses=1]
+	%1 = insertelement <2 x i32> %0, i32 %b, i32 1		; <<2 x i32>> [#uses=1]
+	%conv = bitcast <2 x i32> %1 to i64		; <i64> [#uses=1]
+	ret i64 %conv
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll b/src/LLVM/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
new file mode 100644
index 0000000..673e936
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep fstpt
+; RUN: llc < %s -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep xmm
+
+; Check that x86-64 tail calls support x86_fp80 and v2f32 types. (Tail call
+; calling convention out of sync with standard c calling convention on x86_64)
+; Bug 4278.
+
+declare fastcc double @tailcallee(x86_fp80, <2 x float>) 
+	
+define fastcc double @tailcall() {
+entry:
+  %tmp = fpext float 1.000000e+00 to x86_fp80
+	%tmp2 = tail call fastcc double @tailcallee( x86_fp80 %tmp,  <2 x float> <float 1.000000e+00, float 1.000000e+00>)
+	ret double %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll b/src/LLVM/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
new file mode 100644
index 0000000..feb5780
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -tailcallopt | not grep TAILCALL 
+
+; Bug 4396. This tail call can NOT be optimized.
+
+declare fastcc i8* @_D3gcx2GC12mallocNoSyncMFmkZPv() nounwind
+
+define fastcc i8* @_D3gcx2GC12callocNoSyncMFmkZPv() nounwind {
+entry:
+	%tmp6 = tail call fastcc i8* @_D3gcx2GC12mallocNoSyncMFmkZPv()		; <i8*> [#uses=2]
+	%tmp9 = tail call i8* @memset(i8* %tmp6, i32 0, i64 2)		; <i8*> [#uses=0]
+	ret i8* %tmp6
+}
+
+declare i8* @memset(i8*, i32, i64)

diff --git a/src/LLVM/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll b/src/LLVM/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
new file mode 100644
index 0000000..8ea70b4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2 | FileCheck %s
+; PR2484
+
+define <4 x float> @f4523(<4 x float> %a,<4 x float> %b) nounwind {
+entry:
+; CHECK: shufps $-28, %xmm
+%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4,i32
+5,i32 2,i32 3>
+ret <4 x float> %shuffle
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll b/src/LLVM/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll
new file mode 100644
index 0000000..fcc71ae
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll

@@ -0,0 +1,137 @@
+; RUN: llc < %s -march=x86 -mtriple=x86_64-unknown-freebsd7.2
+; PR4478
+
+	%struct.sockaddr = type <{ i8, i8, [14 x i8] }>
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br label %while.cond
+
+while.cond:		; preds = %sw.bb6, %entry
+	switch i32 undef, label %sw.default [
+		i32 -1, label %while.end
+		i32 119, label %sw.bb6
+	]
+
+sw.bb6:		; preds = %while.cond
+	br i1 undef, label %if.then, label %while.cond
+
+if.then:		; preds = %sw.bb6
+	ret i32 1
+
+sw.default:		; preds = %while.cond
+	ret i32 1
+
+while.end:		; preds = %while.cond
+	br i1 undef, label %if.then15, label %if.end16
+
+if.then15:		; preds = %while.end
+	ret i32 1
+
+if.end16:		; preds = %while.end
+	br i1 undef, label %lor.lhs.false, label %if.then21
+
+lor.lhs.false:		; preds = %if.end16
+	br i1 undef, label %if.end22, label %if.then21
+
+if.then21:		; preds = %lor.lhs.false, %if.end16
+	ret i32 1
+
+if.end22:		; preds = %lor.lhs.false
+	br i1 undef, label %lor.lhs.false27, label %if.then51
+
+lor.lhs.false27:		; preds = %if.end22
+	br i1 undef, label %lor.lhs.false39, label %if.then51
+
+lor.lhs.false39:		; preds = %lor.lhs.false27
+	br i1 undef, label %if.end52, label %if.then51
+
+if.then51:		; preds = %lor.lhs.false39, %lor.lhs.false27, %if.end22
+	ret i32 1
+
+if.end52:		; preds = %lor.lhs.false39
+	br i1 undef, label %if.then57, label %if.end58
+
+if.then57:		; preds = %if.end52
+	ret i32 1
+
+if.end58:		; preds = %if.end52
+	br i1 undef, label %if.then64, label %if.end65
+
+if.then64:		; preds = %if.end58
+	ret i32 1
+
+if.end65:		; preds = %if.end58
+	br i1 undef, label %if.then71, label %if.end72
+
+if.then71:		; preds = %if.end65
+	ret i32 1
+
+if.end72:		; preds = %if.end65
+	br i1 undef, label %if.then83, label %if.end84
+
+if.then83:		; preds = %if.end72
+	ret i32 1
+
+if.end84:		; preds = %if.end72
+	br i1 undef, label %if.then101, label %if.end102
+
+if.then101:		; preds = %if.end84
+	ret i32 1
+
+if.end102:		; preds = %if.end84
+	br i1 undef, label %if.then113, label %if.end114
+
+if.then113:		; preds = %if.end102
+	ret i32 1
+
+if.end114:		; preds = %if.end102
+	br i1 undef, label %if.then209, label %if.end210
+
+if.then209:		; preds = %if.end114
+	ret i32 1
+
+if.end210:		; preds = %if.end114
+	br i1 undef, label %if.then219, label %if.end220
+
+if.then219:		; preds = %if.end210
+	ret i32 1
+
+if.end220:		; preds = %if.end210
+	br i1 undef, label %if.end243, label %lor.lhs.false230
+
+lor.lhs.false230:		; preds = %if.end220
+	unreachable
+
+if.end243:		; preds = %if.end220
+	br i1 undef, label %if.then249, label %if.end250
+
+if.then249:		; preds = %if.end243
+	ret i32 1
+
+if.end250:		; preds = %if.end243
+	br i1 undef, label %if.end261, label %if.then260
+
+if.then260:		; preds = %if.end250
+	ret i32 1
+
+if.end261:		; preds = %if.end250
+	br i1 undef, label %if.then270, label %if.end271
+
+if.then270:		; preds = %if.end261
+	ret i32 1
+
+if.end271:		; preds = %if.end261
+	%call.i = call i32 @arc4random() nounwind		; <i32> [#uses=1]
+	%rem.i = urem i32 %call.i, 16383		; <i32> [#uses=1]
+	%rem1.i = trunc i32 %rem.i to i16		; <i16> [#uses=1]
+	%conv2.i = or i16 %rem1.i, -16384		; <i16> [#uses=1]
+	%0 = call i16 asm "xchgb ${0:h}, ${0:b}", "=Q,0,~{dirflag},~{fpsr},~{flags}"(i16 %conv2.i) nounwind		; <i16> [#uses=1]
+	store i16 %0, i16* undef
+	%call281 = call i32 @bind(i32 undef, %struct.sockaddr* undef, i32 16) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @bind(i32, %struct.sockaddr*, i32)
+
+declare i32 @arc4random()

diff --git a/src/LLVM/test/CodeGen/X86/2009-07-07-SplitICmp.ll b/src/LLVM/test/CodeGen/X86/2009-07-07-SplitICmp.ll
new file mode 100644
index 0000000..3669856
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-07-07-SplitICmp.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86
+
+define void @test2(<2 x i32> %A, <2 x i32> %B, <2 x i32>* %C) nounwind {
+       %D = icmp sgt <2 x i32> %A, %B
+       %E = zext <2 x i1> %D to <2 x i32>
+       store <2 x i32> %E, <2 x i32>* %C
+       ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll b/src/LLVM/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll
new file mode 100644
index 0000000..0fdfdcb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86
+; PR3037
+
+define void @entry(<4 x i8>* %dest) {
+	%1 = xor <4 x i1> zeroinitializer, < i1 true, i1 true, i1 true, i1 true >
+	%2 = extractelement <4 x i1> %1, i32 3
+	%3 = zext i1 %2 to i8
+	%4 = insertelement <4 x i8> zeroinitializer, i8 %3, i32 3
+	store <4 x i8> %4, <4 x i8>* %dest, align 1
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-07-15-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
new file mode 100644
index 0000000..eabaf77
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-07-15-CoalescerBug.ll

@@ -0,0 +1,958 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+	%struct.ANY = type { i8* }
+	%struct.AV = type { %struct.XPVAV*, i32, i32 }
+	%struct.CLONE_PARAMS = type { %struct.AV*, i64, %struct.PerlInterpreter* }
+	%struct.CV = type { %struct.XPVCV*, i32, i32 }
+	%struct.DIR = type { i32, i64, i64, i8*, i32, i64, i64, i32, %struct.__darwin_pthread_mutex_t, %struct._telldir* }
+	%struct.GP = type { %struct.SV*, i32, %struct.io*, %struct.CV*, %struct.AV*, %struct.HV*, %struct.GV*, %struct.CV*, i32, i32, i32, i8* }
+	%struct.GV = type { %struct.XPVGV*, i32, i32 }
+	%struct.HE = type { %struct.HE*, %struct.HEK*, %struct.SV* }
+	%struct.HEK = type { i32, i32, [1 x i8] }
+	%struct.HV = type { %struct.XPVHV*, i32, i32 }
+	%struct.MAGIC = type { %struct.MAGIC*, %struct.MGVTBL*, i16, i8, i8, %struct.SV*, i8*, i32 }
+	%struct.MGVTBL = type { i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*, %struct.SV*, i8*, i32)*, i32 (%struct.MAGIC*, %struct.CLONE_PARAMS*)* }
+	%struct.OP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i64, i16, i16, i8, i8 }
+	%struct.PMOP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i64, i16, i16, i8, i8, %struct.OP*, %struct.OP*, %struct.OP*, %struct.OP*, %struct.PMOP*, %struct.REGEXP*, i32, i32, i8, %struct.HV* }
+	%struct.PerlIO_funcs = type { i64, i8*, i64, i32, i64 (%struct.PerlIOl**, i8*, %struct.SV*, %struct.PerlIO_funcs*)*, i64 (%struct.PerlIOl**)*, %struct.PerlIOl** (%struct.PerlIO_funcs*, %struct.PerlIO_list_t*, i64, i8*, i32, i32, i32, %struct.PerlIOl**, i32, %struct.SV**)*, i64 (%struct.PerlIOl**)*, %struct.SV* (%struct.PerlIOl**, %struct.CLONE_PARAMS*, i32)*, i64 (%struct.PerlIOl**)*, %struct.PerlIOl** (%struct.PerlIOl**, %struct.PerlIOl**, %struct.CLONE_PARAMS*, i32)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i64, i32)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, void (%struct.PerlIOl**)*, void (%struct.PerlIOl**)*, i8* (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i8* (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, void (%struct.PerlIOl**, i8*, i64)* }
+	%struct.PerlIO_list_t = type { i64, i64, i64, %struct.PerlIO_pair_t* }
+	%struct.PerlIO_pair_t = type { %struct.PerlIO_funcs*, %struct.SV* }
+	%struct.PerlIOl = type { %struct.PerlIOl*, %struct.PerlIO_funcs*, i32 }
+	%struct.PerlInterpreter = type { i8 }
+	%struct.REGEXP = type { i32*, i32*, %struct.regnode*, %struct.reg_substr_data*, i8*, %struct.reg_data*, i8*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, [1 x %struct.regnode] }
+	%struct.SV = type { i8*, i32, i32 }
+	%struct.XPVAV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.SV**, %struct.SV*, i8 }
+	%struct.XPVCV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.HV*, %struct.OP*, %struct.OP*, void (%struct.CV*)*, %struct.ANY, %struct.GV*, i8*, i64, %struct.AV*, %struct.CV*, i16, i32 }
+	%struct.XPVGV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.GP*, i8*, i64, %struct.HV*, i8 }
+	%struct.XPVHV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, i32, %struct.HE*, %struct.PMOP*, i8* }
+	%struct.XPVIO = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.PerlIOl**, %struct.PerlIOl**, %struct.anon, i64, i64, i64, i64, i8*, %struct.GV*, i8*, %struct.GV*, i8*, %struct.GV*, i16, i8, i8 }
+	%struct.__darwin_pthread_mutex_t = type { i64, [56 x i8] }
+	%struct._telldir = type opaque
+	%struct.anon = type { %struct.DIR* }
+	%struct.io = type { %struct.XPVIO*, i32, i32 }
+	%struct.reg_data = type { i32, i8*, [1 x i8*] }
+	%struct.reg_substr_data = type { [3 x %struct.reg_substr_datum] }
+	%struct.reg_substr_datum = type { i32, i32, %struct.SV*, %struct.SV* }
+	%struct.regnode = type { i8, i8, i16 }
+
+define i32 @Perl_yylex() nounwind ssp {
+entry:
+	br i1 undef, label %bb21, label %bb
+
+bb:		; preds = %entry
+	unreachable
+
+bb21:		; preds = %entry
+	switch i32 undef, label %bb103 [
+		i32 1, label %bb101
+		i32 4, label %bb75
+		i32 6, label %bb68
+		i32 7, label %bb67
+		i32 8, label %bb25
+	]
+
+bb25:		; preds = %bb21
+	ret i32 41
+
+bb67:		; preds = %bb21
+	ret i32 40
+
+bb68:		; preds = %bb21
+	br i1 undef, label %bb69, label %bb70
+
+bb69:		; preds = %bb68
+	ret i32 undef
+
+bb70:		; preds = %bb68
+	unreachable
+
+bb75:		; preds = %bb21
+	unreachable
+
+bb101:		; preds = %bb21
+	unreachable
+
+bb103:		; preds = %bb21
+	switch i32 undef, label %bb104 [
+		i32 0, label %bb126
+		i32 4, label %fake_eof
+		i32 26, label %fake_eof
+		i32 34, label %bb1423
+		i32 36, label %bb1050
+		i32 37, label %bb534
+		i32 39, label %bb1412
+		i32 41, label %bb643
+		i32 44, label %bb544
+		i32 48, label %bb1406
+		i32 49, label %bb1406
+		i32 50, label %bb1406
+		i32 51, label %bb1406
+		i32 52, label %bb1406
+		i32 53, label %bb1406
+		i32 54, label %bb1406
+		i32 55, label %bb1406
+		i32 56, label %bb1406
+		i32 57, label %bb1406
+		i32 59, label %bb639
+		i32 65, label %keylookup
+		i32 66, label %keylookup
+		i32 67, label %keylookup
+		i32 68, label %keylookup
+		i32 69, label %keylookup
+		i32 70, label %keylookup
+		i32 71, label %keylookup
+		i32 72, label %keylookup
+		i32 73, label %keylookup
+		i32 74, label %keylookup
+		i32 75, label %keylookup
+		i32 76, label %keylookup
+		i32 77, label %keylookup
+		i32 78, label %keylookup
+		i32 79, label %keylookup
+		i32 80, label %keylookup
+		i32 81, label %keylookup
+		i32 82, label %keylookup
+		i32 83, label %keylookup
+		i32 84, label %keylookup
+		i32 85, label %keylookup
+		i32 86, label %keylookup
+		i32 87, label %keylookup
+		i32 88, label %keylookup
+		i32 89, label %keylookup
+		i32 90, label %keylookup
+		i32 92, label %bb1455
+		i32 95, label %keylookup
+		i32 96, label %bb1447
+		i32 97, label %keylookup
+		i32 98, label %keylookup
+		i32 99, label %keylookup
+		i32 100, label %keylookup
+		i32 101, label %keylookup
+		i32 102, label %keylookup
+		i32 103, label %keylookup
+		i32 104, label %keylookup
+		i32 105, label %keylookup
+		i32 106, label %keylookup
+		i32 107, label %keylookup
+		i32 108, label %keylookup
+		i32 109, label %keylookup
+		i32 110, label %keylookup
+		i32 111, label %keylookup
+		i32 112, label %keylookup
+		i32 113, label %keylookup
+		i32 114, label %keylookup
+		i32 115, label %keylookup
+		i32 116, label %keylookup
+		i32 117, label %keylookup
+		i32 118, label %keylookup
+		i32 119, label %keylookup
+		i32 120, label %keylookup
+		i32 121, label %keylookup
+		i32 122, label %keylookup
+		i32 126, label %bb544
+	]
+
+bb104:		; preds = %bb103
+	unreachable
+
+bb126:		; preds = %bb103
+	ret i32 0
+
+fake_eof:		; preds = %bb1841, %bb103, %bb103
+	unreachable
+
+bb534:		; preds = %bb103
+	unreachable
+
+bb544:		; preds = %bb103, %bb103
+	ret i32 undef
+
+bb639:		; preds = %bb103
+	unreachable
+
+bb643:		; preds = %bb103
+	unreachable
+
+bb1050:		; preds = %bb103
+	unreachable
+
+bb1406:		; preds = %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103
+	unreachable
+
+bb1412:		; preds = %bb103
+	unreachable
+
+bb1423:		; preds = %bb103
+	unreachable
+
+bb1447:		; preds = %bb103
+	unreachable
+
+bb1455:		; preds = %bb103
+	unreachable
+
+keylookup:		; preds = %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103
+	br i1 undef, label %bb1498, label %bb1496
+
+bb1496:		; preds = %keylookup
+	br i1 undef, label %bb1498, label %bb1510.preheader
+
+bb1498:		; preds = %bb1496, %keylookup
+	unreachable
+
+bb1510.preheader:		; preds = %bb1496
+	br i1 undef, label %bb1511, label %bb1518
+
+bb1511:		; preds = %bb1510.preheader
+	br label %bb1518
+
+bb1518:		; preds = %bb1511, %bb1510.preheader
+	switch i32 undef, label %bb741.i4285 [
+		i32 95, label %bb744.i4287
+		i32 115, label %bb852.i4394
+	]
+
+bb741.i4285:		; preds = %bb1518
+	br label %Perl_keyword.exit4735
+
+bb744.i4287:		; preds = %bb1518
+	br label %Perl_keyword.exit4735
+
+bb852.i4394:		; preds = %bb1518
+	br i1 undef, label %bb861.i4404, label %bb856.i4399
+
+bb856.i4399:		; preds = %bb852.i4394
+	br label %Perl_keyword.exit4735
+
+bb861.i4404:		; preds = %bb852.i4394
+	br label %Perl_keyword.exit4735
+
+Perl_keyword.exit4735:		; preds = %bb861.i4404, %bb856.i4399, %bb744.i4287, %bb741.i4285
+	br i1 undef, label %bb1544, label %reserved_word
+
+bb1544:		; preds = %Perl_keyword.exit4735
+	br i1 undef, label %bb1565, label %bb1545
+
+bb1545:		; preds = %bb1544
+	br i1 undef, label %bb1563, label %bb1558
+
+bb1558:		; preds = %bb1545
+	%0 = load %struct.SV** undef		; <%struct.SV*> [#uses=1]
+	%1 = bitcast %struct.SV* %0 to %struct.GV*		; <%struct.GV*> [#uses=5]
+	br i1 undef, label %bb1563, label %bb1559
+
+bb1559:		; preds = %bb1558
+	br i1 undef, label %bb1560, label %bb1563
+
+bb1560:		; preds = %bb1559
+	br i1 undef, label %bb1563, label %bb1561
+
+bb1561:		; preds = %bb1560
+	br i1 undef, label %bb1562, label %bb1563
+
+bb1562:		; preds = %bb1561
+	br label %bb1563
+
+bb1563:		; preds = %bb1562, %bb1561, %bb1560, %bb1559, %bb1558, %bb1545
+	%gv19.3 = phi %struct.GV* [ %1, %bb1562 ], [ undef, %bb1545 ], [ %1, %bb1558 ], [ %1, %bb1559 ], [ %1, %bb1560 ], [ %1, %bb1561 ]		; <%struct.GV*> [#uses=0]
+	br i1 undef, label %bb1565, label %reserved_word
+
+bb1565:		; preds = %bb1563, %bb1544
+	br i1 undef, label %bb1573, label %bb1580
+
+bb1573:		; preds = %bb1565
+	br label %bb1580
+
+bb1580:		; preds = %bb1573, %bb1565
+	br i1 undef, label %bb1595, label %reserved_word
+
+bb1595:		; preds = %bb1580
+	br i1 undef, label %reserved_word, label %bb1597
+
+bb1597:		; preds = %bb1595
+	br i1 undef, label %reserved_word, label %bb1602
+
+bb1602:		; preds = %bb1597
+	br label %reserved_word
+
+reserved_word:		; preds = %bb1602, %bb1597, %bb1595, %bb1580, %bb1563, %Perl_keyword.exit4735
+	switch i32 undef, label %bb2012 [
+		i32 1, label %bb1819
+		i32 2, label %bb1830
+		i32 4, label %bb1841
+		i32 5, label %bb1841
+		i32 8, label %bb1880
+		i32 14, label %bb1894
+		i32 16, label %bb1895
+		i32 17, label %bb1896
+		i32 18, label %bb1897
+		i32 19, label %bb1898
+		i32 20, label %bb1899
+		i32 22, label %bb1906
+		i32 23, label %bb1928
+		i32 24, label %bb2555
+		i32 26, label %bb1929
+		i32 31, label %bb1921
+		i32 32, label %bb1930
+		i32 33, label %bb1905
+		i32 34, label %bb1936
+		i32 35, label %bb1927
+		i32 37, label %bb1962
+		i32 40, label %bb1951
+		i32 41, label %bb1946
+		i32 42, label %bb1968
+		i32 44, label %bb1969
+		i32 45, label %bb1970
+		i32 46, label %bb2011
+		i32 47, label %bb2006
+		i32 48, label %bb2007
+		i32 49, label %bb2009
+		i32 50, label %bb2010
+		i32 51, label %bb2008
+		i32 53, label %bb1971
+		i32 54, label %bb1982
+		i32 55, label %bb2005
+		i32 59, label %bb2081
+		i32 61, label %bb2087
+		i32 64, label %bb2080
+		i32 65, label %really_sub
+		i32 66, label %bb2079
+		i32 67, label %bb2089
+		i32 69, label %bb2155
+		i32 72, label %bb2137
+		i32 74, label %bb2138
+		i32 75, label %bb2166
+		i32 76, label %bb2144
+		i32 78, label %bb2145
+		i32 81, label %bb2102
+		i32 82, label %bb2108
+		i32 84, label %bb2114
+		i32 85, label %bb2115
+		i32 86, label %bb2116
+		i32 89, label %bb2146
+		i32 90, label %bb2147
+		i32 91, label %bb2148
+		i32 93, label %bb2154
+		i32 94, label %bb2167
+		i32 96, label %bb2091
+		i32 97, label %bb2090
+		i32 98, label %bb2088
+		i32 100, label %bb2173
+		i32 101, label %bb2174
+		i32 102, label %bb2175
+		i32 103, label %bb2180
+		i32 104, label %bb2181
+		i32 106, label %bb2187
+		i32 107, label %bb2188
+		i32 110, label %bb2206
+		i32 112, label %bb2217
+		i32 113, label %bb2218
+		i32 114, label %bb2199
+		i32 119, label %bb2205
+		i32 120, label %bb2229
+		i32 121, label %bb2233
+		i32 122, label %bb2234
+		i32 123, label %bb2235
+		i32 124, label %bb2236
+		i32 125, label %bb2237
+		i32 126, label %bb2238
+		i32 127, label %bb2239
+		i32 128, label %bb2268
+		i32 129, label %bb2267
+		i32 133, label %bb2276
+		i32 134, label %bb2348
+		i32 135, label %bb2337
+		i32 137, label %bb2239
+		i32 138, label %bb2367
+		i32 139, label %bb2368
+		i32 140, label %bb2369
+		i32 141, label %bb2357
+		i32 143, label %bb2349
+		i32 144, label %bb2350
+		i32 146, label %bb2356
+		i32 147, label %bb2370
+		i32 148, label %bb2445
+		i32 149, label %bb2453
+		i32 151, label %bb2381
+		i32 152, label %bb2457
+		i32 154, label %bb2516
+		i32 156, label %bb2522
+		i32 158, label %bb2527
+		i32 159, label %bb2537
+		i32 160, label %bb2503
+		i32 162, label %bb2504
+		i32 163, label %bb2464
+		i32 165, label %bb2463
+		i32 166, label %bb2538
+		i32 168, label %bb2515
+		i32 170, label %bb2549
+		i32 172, label %bb2566
+		i32 173, label %bb2595
+		i32 174, label %bb2565
+		i32 175, label %bb2567
+		i32 176, label %bb2568
+		i32 177, label %bb2569
+		i32 178, label %bb2570
+		i32 179, label %bb2594
+		i32 182, label %bb2571
+		i32 183, label %bb2572
+		i32 185, label %bb2593
+		i32 186, label %bb2583
+		i32 187, label %bb2596
+		i32 189, label %bb2602
+		i32 190, label %bb2603
+		i32 191, label %bb2604
+		i32 192, label %bb2605
+		i32 193, label %bb2606
+		i32 196, label %bb2617
+		i32 197, label %bb2618
+		i32 198, label %bb2619
+		i32 199, label %bb2627
+		i32 200, label %bb2625
+		i32 201, label %bb2626
+		i32 206, label %really_sub
+		i32 207, label %bb2648
+		i32 208, label %bb2738
+		i32 209, label %bb2739
+		i32 210, label %bb2740
+		i32 211, label %bb2742
+		i32 212, label %bb2741
+		i32 213, label %bb2737
+		i32 214, label %bb2743
+		i32 217, label %bb2758
+		i32 219, label %bb2764
+		i32 220, label %bb2765
+		i32 221, label %bb2744
+		i32 222, label %bb2766
+		i32 226, label %bb2785
+		i32 227, label %bb2783
+		i32 228, label %bb2784
+		i32 229, label %bb2790
+		i32 230, label %bb2797
+		i32 232, label %bb2782
+		i32 234, label %bb2791
+		i32 236, label %bb2815
+		i32 237, label %bb2818
+		i32 238, label %bb2819
+		i32 239, label %bb2820
+		i32 240, label %bb2817
+		i32 241, label %bb2816
+		i32 242, label %bb2821
+		i32 243, label %bb2826
+		i32 244, label %bb2829
+		i32 245, label %bb2830
+	]
+
+bb1819:		; preds = %reserved_word
+	unreachable
+
+bb1830:		; preds = %reserved_word
+	unreachable
+
+bb1841:		; preds = %reserved_word, %reserved_word
+	br i1 undef, label %fake_eof, label %bb1842
+
+bb1842:		; preds = %bb1841
+	unreachable
+
+bb1880:		; preds = %reserved_word
+	unreachable
+
+bb1894:		; preds = %reserved_word
+	ret i32 undef
+
+bb1895:		; preds = %reserved_word
+	ret i32 301
+
+bb1896:		; preds = %reserved_word
+	ret i32 undef
+
+bb1897:		; preds = %reserved_word
+	ret i32 undef
+
+bb1898:		; preds = %reserved_word
+	ret i32 undef
+
+bb1899:		; preds = %reserved_word
+	ret i32 undef
+
+bb1905:		; preds = %reserved_word
+	ret i32 278
+
+bb1906:		; preds = %reserved_word
+	unreachable
+
+bb1921:		; preds = %reserved_word
+	ret i32 288
+
+bb1927:		; preds = %reserved_word
+	ret i32 undef
+
+bb1928:		; preds = %reserved_word
+	ret i32 undef
+
+bb1929:		; preds = %reserved_word
+	ret i32 undef
+
+bb1930:		; preds = %reserved_word
+	ret i32 undef
+
+bb1936:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb1937
+
+bb1937:		; preds = %bb1936
+	ret i32 undef
+
+bb1946:		; preds = %reserved_word
+	unreachable
+
+bb1951:		; preds = %reserved_word
+	ret i32 undef
+
+bb1962:		; preds = %reserved_word
+	ret i32 undef
+
+bb1968:		; preds = %reserved_word
+	ret i32 280
+
+bb1969:		; preds = %reserved_word
+	ret i32 276
+
+bb1970:		; preds = %reserved_word
+	ret i32 277
+
+bb1971:		; preds = %reserved_word
+	ret i32 288
+
+bb1982:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb1986
+
+bb1986:		; preds = %bb1982
+	ret i32 undef
+
+bb2005:		; preds = %reserved_word
+	ret i32 undef
+
+bb2006:		; preds = %reserved_word
+	ret i32 282
+
+bb2007:		; preds = %reserved_word
+	ret i32 282
+
+bb2008:		; preds = %reserved_word
+	ret i32 282
+
+bb2009:		; preds = %reserved_word
+	ret i32 282
+
+bb2010:		; preds = %reserved_word
+	ret i32 282
+
+bb2011:		; preds = %reserved_word
+	ret i32 282
+
+bb2012:		; preds = %reserved_word
+	unreachable
+
+bb2079:		; preds = %reserved_word
+	ret i32 undef
+
+bb2080:		; preds = %reserved_word
+	ret i32 282
+
+bb2081:		; preds = %reserved_word
+	ret i32 undef
+
+bb2087:		; preds = %reserved_word
+	ret i32 undef
+
+bb2088:		; preds = %reserved_word
+	ret i32 287
+
+bb2089:		; preds = %reserved_word
+	ret i32 287
+
+bb2090:		; preds = %reserved_word
+	ret i32 undef
+
+bb2091:		; preds = %reserved_word
+	ret i32 280
+
+bb2102:		; preds = %reserved_word
+	ret i32 282
+
+bb2108:		; preds = %reserved_word
+	ret i32 undef
+
+bb2114:		; preds = %reserved_word
+	ret i32 undef
+
+bb2115:		; preds = %reserved_word
+	ret i32 282
+
+bb2116:		; preds = %reserved_word
+	ret i32 282
+
+bb2137:		; preds = %reserved_word
+	ret i32 undef
+
+bb2138:		; preds = %reserved_word
+	ret i32 282
+
+bb2144:		; preds = %reserved_word
+	ret i32 undef
+
+bb2145:		; preds = %reserved_word
+	ret i32 282
+
+bb2146:		; preds = %reserved_word
+	ret i32 undef
+
+bb2147:		; preds = %reserved_word
+	ret i32 undef
+
+bb2148:		; preds = %reserved_word
+	ret i32 282
+
+bb2154:		; preds = %reserved_word
+	ret i32 undef
+
+bb2155:		; preds = %reserved_word
+	ret i32 282
+
+bb2166:		; preds = %reserved_word
+	ret i32 282
+
+bb2167:		; preds = %reserved_word
+	ret i32 undef
+
+bb2173:		; preds = %reserved_word
+	ret i32 274
+
+bb2174:		; preds = %reserved_word
+	ret i32 undef
+
+bb2175:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2176
+
+bb2176:		; preds = %bb2175
+	ret i32 undef
+
+bb2180:		; preds = %reserved_word
+	ret i32 undef
+
+bb2181:		; preds = %reserved_word
+	ret i32 undef
+
+bb2187:		; preds = %reserved_word
+	ret i32 undef
+
+bb2188:		; preds = %reserved_word
+	ret i32 280
+
+bb2199:		; preds = %reserved_word
+	ret i32 295
+
+bb2205:		; preds = %reserved_word
+	ret i32 287
+
+bb2206:		; preds = %reserved_word
+	ret i32 287
+
+bb2217:		; preds = %reserved_word
+	ret i32 undef
+
+bb2218:		; preds = %reserved_word
+	ret i32 undef
+
+bb2229:		; preds = %reserved_word
+	unreachable
+
+bb2233:		; preds = %reserved_word
+	ret i32 undef
+
+bb2234:		; preds = %reserved_word
+	ret i32 undef
+
+bb2235:		; preds = %reserved_word
+	ret i32 undef
+
+bb2236:		; preds = %reserved_word
+	ret i32 undef
+
+bb2237:		; preds = %reserved_word
+	ret i32 undef
+
+bb2238:		; preds = %reserved_word
+	ret i32 undef
+
+bb2239:		; preds = %reserved_word, %reserved_word
+	unreachable
+
+bb2267:		; preds = %reserved_word
+	ret i32 280
+
+bb2268:		; preds = %reserved_word
+	ret i32 288
+
+bb2276:		; preds = %reserved_word
+	unreachable
+
+bb2337:		; preds = %reserved_word
+	ret i32 300
+
+bb2348:		; preds = %reserved_word
+	ret i32 undef
+
+bb2349:		; preds = %reserved_word
+	ret i32 undef
+
+bb2350:		; preds = %reserved_word
+	ret i32 undef
+
+bb2356:		; preds = %reserved_word
+	ret i32 undef
+
+bb2357:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2358
+
+bb2358:		; preds = %bb2357
+	ret i32 undef
+
+bb2367:		; preds = %reserved_word
+	ret i32 undef
+
+bb2368:		; preds = %reserved_word
+	ret i32 270
+
+bb2369:		; preds = %reserved_word
+	ret i32 undef
+
+bb2370:		; preds = %reserved_word
+	unreachable
+
+bb2381:		; preds = %reserved_word
+	unreachable
+
+bb2445:		; preds = %reserved_word
+	unreachable
+
+bb2453:		; preds = %reserved_word
+	unreachable
+
+bb2457:		; preds = %reserved_word
+	unreachable
+
+bb2463:		; preds = %reserved_word
+	ret i32 286
+
+bb2464:		; preds = %reserved_word
+	unreachable
+
+bb2503:		; preds = %reserved_word
+	ret i32 280
+
+bb2504:		; preds = %reserved_word
+	ret i32 undef
+
+bb2515:		; preds = %reserved_word
+	ret i32 undef
+
+bb2516:		; preds = %reserved_word
+	ret i32 undef
+
+bb2522:		; preds = %reserved_word
+	unreachable
+
+bb2527:		; preds = %reserved_word
+	unreachable
+
+bb2537:		; preds = %reserved_word
+	ret i32 undef
+
+bb2538:		; preds = %reserved_word
+	ret i32 undef
+
+bb2549:		; preds = %reserved_word
+	unreachable
+
+bb2555:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2556
+
+bb2556:		; preds = %bb2555
+	ret i32 undef
+
+bb2565:		; preds = %reserved_word
+	ret i32 undef
+
+bb2566:		; preds = %reserved_word
+	ret i32 undef
+
+bb2567:		; preds = %reserved_word
+	ret i32 undef
+
+bb2568:		; preds = %reserved_word
+	ret i32 undef
+
+bb2569:		; preds = %reserved_word
+	ret i32 undef
+
+bb2570:		; preds = %reserved_word
+	ret i32 undef
+
+bb2571:		; preds = %reserved_word
+	ret i32 undef
+
+bb2572:		; preds = %reserved_word
+	ret i32 undef
+
+bb2583:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2584
+
+bb2584:		; preds = %bb2583
+	ret i32 undef
+
+bb2593:		; preds = %reserved_word
+	ret i32 282
+
+bb2594:		; preds = %reserved_word
+	ret i32 282
+
+bb2595:		; preds = %reserved_word
+	ret i32 undef
+
+bb2596:		; preds = %reserved_word
+	ret i32 undef
+
+bb2602:		; preds = %reserved_word
+	ret i32 undef
+
+bb2603:		; preds = %reserved_word
+	ret i32 undef
+
+bb2604:		; preds = %reserved_word
+	ret i32 undef
+
+bb2605:		; preds = %reserved_word
+	ret i32 undef
+
+bb2606:		; preds = %reserved_word
+	ret i32 undef
+
+bb2617:		; preds = %reserved_word
+	ret i32 undef
+
+bb2618:		; preds = %reserved_word
+	ret i32 undef
+
+bb2619:		; preds = %reserved_word
+	unreachable
+
+bb2625:		; preds = %reserved_word
+	ret i32 undef
+
+bb2626:		; preds = %reserved_word
+	ret i32 undef
+
+bb2627:		; preds = %reserved_word
+	ret i32 undef
+
+bb2648:		; preds = %reserved_word
+	ret i32 undef
+
+really_sub:		; preds = %reserved_word, %reserved_word
+	unreachable
+
+bb2737:		; preds = %reserved_word
+	ret i32 undef
+
+bb2738:		; preds = %reserved_word
+	ret i32 undef
+
+bb2739:		; preds = %reserved_word
+	ret i32 undef
+
+bb2740:		; preds = %reserved_word
+	ret i32 undef
+
+bb2741:		; preds = %reserved_word
+	ret i32 undef
+
+bb2742:		; preds = %reserved_word
+	ret i32 undef
+
+bb2743:		; preds = %reserved_word
+	ret i32 undef
+
+bb2744:		; preds = %reserved_word
+	unreachable
+
+bb2758:		; preds = %reserved_word
+	ret i32 undef
+
+bb2764:		; preds = %reserved_word
+	ret i32 282
+
+bb2765:		; preds = %reserved_word
+	ret i32 282
+
+bb2766:		; preds = %reserved_word
+	ret i32 undef
+
+bb2782:		; preds = %reserved_word
+	ret i32 273
+
+bb2783:		; preds = %reserved_word
+	ret i32 275
+
+bb2784:		; preds = %reserved_word
+	ret i32 undef
+
+bb2785:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2786
+
+bb2786:		; preds = %bb2785
+	ret i32 undef
+
+bb2790:		; preds = %reserved_word
+	ret i32 undef
+
+bb2791:		; preds = %reserved_word
+	ret i32 undef
+
+bb2797:		; preds = %reserved_word
+	ret i32 undef
+
+bb2815:		; preds = %reserved_word
+	ret i32 undef
+
+bb2816:		; preds = %reserved_word
+	ret i32 272
+
+bb2817:		; preds = %reserved_word
+	ret i32 undef
+
+bb2818:		; preds = %reserved_word
+	ret i32 282
+
+bb2819:		; preds = %reserved_word
+	ret i32 undef
+
+bb2820:		; preds = %reserved_word
+	ret i32 282
+
+bb2821:		; preds = %reserved_word
+	unreachable
+
+bb2826:		; preds = %reserved_word
+	unreachable
+
+bb2829:		; preds = %reserved_word
+	ret i32 300
+
+bb2830:		; preds = %reserved_word
+	unreachable
+
+bb2834:		; preds = %bb2785, %bb2583, %bb2555, %bb2357, %bb2175, %bb1982, %bb1936
+	ret i32 283
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-07-16-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2009-07-16-CoalescerBug.ll
new file mode 100644
index 0000000..48af440
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-07-16-CoalescerBug.ll

@@ -0,0 +1,210 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; rdar://7059496
+
+	%struct.brinfo = type <{ %struct.brinfo*, %struct.brinfo*, i8*, i32, i32, i32, i8, i8, i8, i8 }>
+	%struct.cadata = type <{ i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i32, %struct.cmatcher*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i8, i8, i8, i8 }>
+	%struct.cline = type <{ %struct.cline*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8, i8*, i32, i32, %struct.cline*, %struct.cline*, i32, i32 }>
+	%struct.cmatch = type <{ i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i8, i8, i8, i8, i32*, i32*, i8*, i8*, i32, i32, i32, i32, i16, i8, i8, i16, i8, i8 }>
+	%struct.cmatcher = type <{ i32, i8, i8, i8, i8, %struct.cmatcher*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8 }>
+	%struct.cpattern = type <{ %struct.cpattern*, i32, i8, i8, i8, i8, %union.anon }>
+	%struct.patprog = type <{ i64, i64, i64, i64, i32, i32, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%union.anon = type <{ [8 x i8] }>
+
+define i32 @addmatches(%struct.cadata* %dat, i8** nocapture %argv) nounwind ssp {
+entry:
+	br i1 undef, label %if.else, label %if.then91
+
+if.then91:		; preds = %entry
+	br label %if.end96
+
+if.else:		; preds = %entry
+	br label %if.end96
+
+if.end96:		; preds = %if.else, %if.then91
+	br i1 undef, label %lor.lhs.false, label %if.then105
+
+lor.lhs.false:		; preds = %if.end96
+	br i1 undef, label %if.else139, label %if.then105
+
+if.then105:		; preds = %lor.lhs.false, %if.end96
+	unreachable
+
+if.else139:		; preds = %lor.lhs.false
+	br i1 undef, label %land.end, label %land.rhs
+
+land.rhs:		; preds = %if.else139
+	unreachable
+
+land.end:		; preds = %if.else139
+	br i1 undef, label %land.lhs.true285, label %if.then315
+
+land.lhs.true285:		; preds = %land.end
+	br i1 undef, label %if.end324, label %if.then322
+
+if.then315:		; preds = %land.end
+	unreachable
+
+if.then322:		; preds = %land.lhs.true285
+	unreachable
+
+if.end324:		; preds = %land.lhs.true285
+	br i1 undef, label %if.end384, label %if.then358
+
+if.then358:		; preds = %if.end324
+	unreachable
+
+if.end384:		; preds = %if.end324
+	br i1 undef, label %if.end394, label %land.lhs.true387
+
+land.lhs.true387:		; preds = %if.end384
+	unreachable
+
+if.end394:		; preds = %if.end384
+	br i1 undef, label %if.end498, label %land.lhs.true399
+
+land.lhs.true399:		; preds = %if.end394
+	br i1 undef, label %if.end498, label %if.then406
+
+if.then406:		; preds = %land.lhs.true399
+	unreachable
+
+if.end498:		; preds = %land.lhs.true399, %if.end394
+	br i1 undef, label %if.end514, label %if.then503
+
+if.then503:		; preds = %if.end498
+	unreachable
+
+if.end514:		; preds = %if.end498
+	br i1 undef, label %if.end585, label %if.then520
+
+if.then520:		; preds = %if.end514
+	br i1 undef, label %lor.lhs.false547, label %if.then560
+
+lor.lhs.false547:		; preds = %if.then520
+	unreachable
+
+if.then560:		; preds = %if.then520
+	br i1 undef, label %if.end585, label %land.lhs.true566
+
+land.lhs.true566:		; preds = %if.then560
+	br i1 undef, label %if.end585, label %if.then573
+
+if.then573:		; preds = %land.lhs.true566
+	unreachable
+
+if.end585:		; preds = %land.lhs.true566, %if.then560, %if.end514
+	br i1 undef, label %cond.true593, label %cond.false599
+
+cond.true593:		; preds = %if.end585
+	unreachable
+
+cond.false599:		; preds = %if.end585
+	br i1 undef, label %if.end647, label %if.then621
+
+if.then621:		; preds = %cond.false599
+	br i1 undef, label %cond.true624, label %cond.false630
+
+cond.true624:		; preds = %if.then621
+	br label %if.end647
+
+cond.false630:		; preds = %if.then621
+	unreachable
+
+if.end647:		; preds = %cond.true624, %cond.false599
+	br i1 undef, label %if.end723, label %if.then701
+
+if.then701:		; preds = %if.end647
+	br label %if.end723
+
+if.end723:		; preds = %if.then701, %if.end647
+	br i1 undef, label %if.else1090, label %if.then729
+
+if.then729:		; preds = %if.end723
+	br i1 undef, label %if.end887, label %if.then812
+
+if.then812:		; preds = %if.then729
+	unreachable
+
+if.end887:		; preds = %if.then729
+	br i1 undef, label %if.end972, label %if.then893
+
+if.then893:		; preds = %if.end887
+	br i1 undef, label %if.end919, label %if.then903
+
+if.then903:		; preds = %if.then893
+	unreachable
+
+if.end919:		; preds = %if.then893
+	br label %if.end972
+
+if.end972:		; preds = %if.end919, %if.end887
+	%sline.0 = phi %struct.cline* [ undef, %if.end919 ], [ null, %if.end887 ]		; <%struct.cline*> [#uses=5]
+	%bcs.0 = phi i32 [ undef, %if.end919 ], [ 0, %if.end887 ]		; <i32> [#uses=5]
+	br i1 undef, label %if.end1146, label %land.lhs.true975
+
+land.lhs.true975:		; preds = %if.end972
+	br i1 undef, label %if.end1146, label %if.then980
+
+if.then980:		; preds = %land.lhs.true975
+	br i1 undef, label %cond.false1025, label %cond.false1004
+
+cond.false1004:		; preds = %if.then980
+	unreachable
+
+cond.false1025:		; preds = %if.then980
+	br i1 undef, label %if.end1146, label %if.then1071
+
+if.then1071:		; preds = %cond.false1025
+	br i1 undef, label %if.then1074, label %if.end1081
+
+if.then1074:		; preds = %if.then1071
+	br label %if.end1081
+
+if.end1081:		; preds = %if.then1074, %if.then1071
+	%call1083 = call %struct.patprog* @patcompile(i8* undef, i32 0, i8** null) nounwind ssp		; <%struct.patprog*> [#uses=2]
+	br i1 undef, label %if.end1146, label %if.then1086
+
+if.then1086:		; preds = %if.end1081
+	br label %if.end1146
+
+if.else1090:		; preds = %if.end723
+	br i1 undef, label %if.end1146, label %land.lhs.true1093
+
+land.lhs.true1093:		; preds = %if.else1090
+	br i1 undef, label %if.end1146, label %if.then1098
+
+if.then1098:		; preds = %land.lhs.true1093
+	unreachable
+
+if.end1146:		; preds = %land.lhs.true1093, %if.else1090, %if.then1086, %if.end1081, %cond.false1025, %land.lhs.true975, %if.end972
+	%cp.0 = phi %struct.patprog* [ %call1083, %if.then1086 ], [ null, %if.end972 ], [ null, %land.lhs.true975 ], [ null, %cond.false1025 ], [ %call1083, %if.end1081 ], [ null, %if.else1090 ], [ null, %land.lhs.true1093 ]		; <%struct.patprog*> [#uses=1]
+	%sline.1 = phi %struct.cline* [ %sline.0, %if.then1086 ], [ %sline.0, %if.end972 ], [ %sline.0, %land.lhs.true975 ], [ %sline.0, %cond.false1025 ], [ %sline.0, %if.end1081 ], [ null, %if.else1090 ], [ null, %land.lhs.true1093 ]		; <%struct.cline*> [#uses=1]
+	%bcs.1 = phi i32 [ %bcs.0, %if.then1086 ], [ %bcs.0, %if.end972 ], [ %bcs.0, %land.lhs.true975 ], [ %bcs.0, %cond.false1025 ], [ %bcs.0, %if.end1081 ], [ 0, %if.else1090 ], [ 0, %land.lhs.true1093 ]		; <i32> [#uses=1]
+	br i1 undef, label %if.end1307, label %do.body1270
+
+do.body1270:		; preds = %if.end1146
+	unreachable
+
+if.end1307:		; preds = %if.end1146
+	br i1 undef, label %if.end1318, label %if.then1312
+
+if.then1312:		; preds = %if.end1307
+	unreachable
+
+if.end1318:		; preds = %if.end1307
+	br i1 undef, label %for.cond1330.preheader, label %if.then1323
+
+if.then1323:		; preds = %if.end1318
+	unreachable
+
+for.cond1330.preheader:		; preds = %if.end1318
+	%call1587 = call i8* @comp_match(i8* undef, i8* undef, i8* undef, %struct.patprog* %cp.0, %struct.cline** undef, i32 0, %struct.brinfo** undef, i32 0, %struct.brinfo** undef, i32 %bcs.1, i32* undef) nounwind ssp		; <i8*> [#uses=0]
+	%call1667 = call %struct.cmatch* @add_match_data(i32 0, i8* undef, i8* undef, %struct.cline* undef, i8* undef, i8* null, i8* undef, i8* undef, i8* undef, i8* undef, %struct.cline* null, i8* undef, %struct.cline* %sline.1, i8* undef, i32 undef, i32 undef) ssp		; <%struct.cmatch*> [#uses=0]
+	unreachable
+}
+
+declare %struct.patprog* @patcompile(i8*, i32, i8**) ssp
+
+declare i8* @comp_match(i8*, i8*, i8*, %struct.patprog*, %struct.cline**, i32, %struct.brinfo**, i32, %struct.brinfo**, i32, i32*) ssp
+
+declare %struct.cmatch* @add_match_data(i32, i8*, i8*, %struct.cline*, i8*, i8*, i8*, i8*, i8*, i8*, %struct.cline*, i8*, %struct.cline*, i8*, i32, i32) nounwind ssp

diff --git a/src/LLVM/test/CodeGen/X86/2009-07-17-StackColoringBug.ll b/src/LLVM/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
new file mode 100644
index 0000000..3e5bd34
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-07-17-StackColoringBug.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -disable-fp-elim -color-ss-with-regs | not grep dil
+; PR4552
+
+target triple = "i386-pc-linux-gnu"
+@g_8 = internal global i32 0		; <i32*> [#uses=1]
+@g_72 = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32, i8, i8)* @uint84 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @uint84(i32 %p_15, i8 signext %p_17, i8 signext %p_19) nounwind {
+entry:
+	%g_72.promoted = load i32* @g_72		; <i32> [#uses=1]
+	%g_8.promoted = load i32* @g_8		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %func_40.exit, %entry
+	%g_8.tmp.1 = phi i32 [ %g_8.promoted, %entry ], [ %g_8.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
+	%g_72.tmp.1 = phi i32 [ %g_72.promoted, %entry ], [ %g_72.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
+	%retval12.i4.i.i = trunc i32 %g_8.tmp.1 to i8		; <i8> [#uses=2]
+	%0 = trunc i32 %g_72.tmp.1 to i8		; <i8> [#uses=2]
+	%1 = mul i8 %retval12.i4.i.i, %0		; <i8> [#uses=1]
+	%2 = icmp eq i8 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb2.i.i, label %bb.i.i
+
+bb.i.i:		; preds = %bb
+	%3 = sext i8 %0 to i32		; <i32> [#uses=1]
+	%4 = and i32 %3, 50295		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
+	br i1 %5, label %bb2.i.i, label %func_55.exit.i
+
+bb2.i.i:		; preds = %bb.i.i, %bb
+	br label %func_55.exit.i
+
+func_55.exit.i:		; preds = %bb2.i.i, %bb.i.i
+	%g_72.tmp.2 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
+	%6 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
+	%7 = trunc i32 %6 to i8		; <i8> [#uses=2]
+	%8 = mul i8 %7, %retval12.i4.i.i		; <i8> [#uses=1]
+	%9 = icmp eq i8 %8, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb2.i4.i, label %bb.i3.i
+
+bb.i3.i:		; preds = %func_55.exit.i
+	%10 = sext i8 %7 to i32		; <i32> [#uses=1]
+	%11 = and i32 %10, 50295		; <i32> [#uses=1]
+	%12 = icmp eq i32 %11, 0		; <i1> [#uses=1]
+	br i1 %12, label %bb2.i4.i, label %func_40.exit
+
+bb2.i4.i:		; preds = %bb.i3.i, %func_55.exit.i
+	br label %func_40.exit
+
+func_40.exit:		; preds = %bb2.i4.i, %bb.i3.i
+	%g_72.tmp.0 = phi i32 [ 1, %bb2.i4.i ], [ %g_72.tmp.2, %bb.i3.i ]		; <i32> [#uses=1]
+	%phitmp = icmp sgt i32 %g_8.tmp.1, 0		; <i1> [#uses=1]
+	%g_8.tmp.0 = select i1 %phitmp, i32 %g_8.tmp.1, i32 1		; <i32> [#uses=1]
+	br label %bb
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll b/src/LLVM/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll
new file mode 100644
index 0000000..a0095ab
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64
+; PR4583
+
+define i32 @atomic_cmpset_long(i64* %dst, i64 %exp, i64 %src) nounwind ssp noredzone noimplicitfloat {
+entry:
+	%0 = call i8 asm sideeffect "\09lock ; \09\09\09cmpxchgq $2,$1 ;\09       sete\09$0 ;\09\091:\09\09\09\09# atomic_cmpset_long", "={ax},=*m,r,{ax},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* undef, i64 undef, i64 undef, i64* undef) nounwind		; <i8> [#uses=0]
+	br label %1
+
+; <label>:1		; preds = %entry
+	ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-07-20-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2009-07-20-CoalescerBug.ll
new file mode 100644
index 0000000..e99edd6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-07-20-CoalescerBug.ll

@@ -0,0 +1,165 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; PR4587
+; rdar://7072590
+
+	%struct.re_pattern_buffer = type <{ i8*, i64, i64, i64, i8*, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8 }>
+
+define fastcc i32 @regex_compile(i8* %pattern, i64 %size, i64 %syntax, %struct.re_pattern_buffer* nocapture %bufp) nounwind ssp {
+entry:
+	br i1 undef, label %return, label %if.end
+
+if.end:		; preds = %entry
+	%tmp35 = getelementptr %struct.re_pattern_buffer* %bufp, i64 0, i32 3		; <i64*> [#uses=1]
+	store i64 %syntax, i64* %tmp35
+	store i32 undef, i32* undef
+	br i1 undef, label %if.then66, label %if.end102
+
+if.then66:		; preds = %if.end
+	br i1 false, label %if.else, label %if.then70
+
+if.then70:		; preds = %if.then66
+	%call74 = call i8* @xrealloc(i8* undef, i64 32) nounwind ssp		; <i8*> [#uses=0]
+	unreachable
+
+if.else:		; preds = %if.then66
+	br i1 false, label %do.body86, label %if.end99
+
+do.body86:		; preds = %if.else
+	br i1 false, label %do.end, label %if.then90
+
+if.then90:		; preds = %do.body86
+	unreachable
+
+do.end:		; preds = %do.body86
+	ret i32 12
+
+if.end99:		; preds = %if.else
+	br label %if.end102
+
+if.end102:		; preds = %if.end99, %if.end
+	br label %while.body
+
+while.body:		; preds = %if.end1126, %sw.bb532, %while.body, %if.end102
+	%laststart.2 = phi i8* [ null, %if.end102 ], [ %laststart.7.ph, %if.end1126 ], [ %laststart.2, %sw.bb532 ], [ %laststart.2, %while.body ]		; <i8*> [#uses=6]
+	%b.1 = phi i8* [ undef, %if.end102 ], [ %ctg29688, %if.end1126 ], [ %b.1, %sw.bb532 ], [ %b.1, %while.body ]		; <i8*> [#uses=5]
+	br i1 undef, label %while.body, label %if.end127
+
+if.end127:		; preds = %while.body
+	switch i32 undef, label %sw.bb532 [
+		i32 123, label %handle_interval
+		i32 92, label %do.body3527
+	]
+
+sw.bb532:		; preds = %if.end127
+	br i1 undef, label %while.body, label %if.end808
+
+if.end808:		; preds = %sw.bb532
+	br i1 undef, label %while.cond1267.preheader, label %if.then811
+
+while.cond1267.preheader:		; preds = %if.end808
+	br i1 false, label %return, label %if.end1294
+
+if.then811:		; preds = %if.end808
+	%call817 = call fastcc i8* @skip_one_char(i8* %laststart.2) ssp		; <i8*> [#uses=0]
+	br i1 undef, label %cond.end834, label %lor.lhs.false827
+
+lor.lhs.false827:		; preds = %if.then811
+	br label %cond.end834
+
+cond.end834:		; preds = %lor.lhs.false827, %if.then811
+	br i1 undef, label %land.lhs.true838, label %while.cond979.preheader
+
+land.lhs.true838:		; preds = %cond.end834
+	br i1 undef, label %if.then842, label %while.cond979.preheader
+
+if.then842:		; preds = %land.lhs.true838
+	%conv851 = trunc i64 undef to i32		; <i32> [#uses=1]
+	br label %while.cond979.preheader
+
+while.cond979.preheader:		; preds = %if.then842, %land.lhs.true838, %cond.end834
+	%startoffset.0.ph = phi i32 [ 0, %cond.end834 ], [ 0, %land.lhs.true838 ], [ %conv851, %if.then842 ]		; <i32> [#uses=2]
+	%laststart.7.ph = phi i8* [ %laststart.2, %cond.end834 ], [ %laststart.2, %land.lhs.true838 ], [ %laststart.2, %if.then842 ]		; <i8*> [#uses=3]
+	%b.4.ph = phi i8* [ %b.1, %cond.end834 ], [ %b.1, %land.lhs.true838 ], [ %b.1, %if.then842 ]		; <i8*> [#uses=3]
+	%ctg29688 = getelementptr i8* %b.4.ph, i64 6		; <i8*> [#uses=1]
+	br label %while.cond979
+
+while.cond979:		; preds = %if.end1006, %while.cond979.preheader
+	%cmp991 = icmp ugt i64 undef, 0		; <i1> [#uses=1]
+	br i1 %cmp991, label %do.body994, label %while.end1088
+
+do.body994:		; preds = %while.cond979
+	br i1 undef, label %return, label %if.end1006
+
+if.end1006:		; preds = %do.body994
+	%cmp1014 = icmp ugt i64 undef, 32768		; <i1> [#uses=1]
+	%storemerge10953 = select i1 %cmp1014, i64 32768, i64 undef		; <i64> [#uses=1]
+	store i64 %storemerge10953, i64* undef
+	br i1 false, label %return, label %while.cond979
+
+while.end1088:		; preds = %while.cond979
+	br i1 undef, label %if.then1091, label %if.else1101
+
+if.then1091:		; preds = %while.end1088
+	store i8 undef, i8* undef
+	%idx.ext1132.pre = zext i32 %startoffset.0.ph to i64		; <i64> [#uses=1]
+	%add.ptr1133.pre = getelementptr i8* %laststart.7.ph, i64 %idx.ext1132.pre		; <i8*> [#uses=1]
+	%sub.ptr.lhs.cast1135.pre = ptrtoint i8* %add.ptr1133.pre to i64		; <i64> [#uses=1]
+	br label %if.end1126
+
+if.else1101:		; preds = %while.end1088
+	%cond1109 = select i1 undef, i32 18, i32 14		; <i32> [#uses=1]
+	%idx.ext1112 = zext i32 %startoffset.0.ph to i64		; <i64> [#uses=1]
+	%add.ptr1113 = getelementptr i8* %laststart.7.ph, i64 %idx.ext1112		; <i8*> [#uses=2]
+	%sub.ptr.rhs.cast1121 = ptrtoint i8* %add.ptr1113 to i64		; <i64> [#uses=1]
+	call fastcc void @insert_op1(i32 %cond1109, i8* %add.ptr1113, i32 undef, i8* %b.4.ph) ssp
+	br label %if.end1126
+
+if.end1126:		; preds = %if.else1101, %if.then1091
+	%sub.ptr.lhs.cast1135.pre-phi = phi i64 [ %sub.ptr.rhs.cast1121, %if.else1101 ], [ %sub.ptr.lhs.cast1135.pre, %if.then1091 ]		; <i64> [#uses=1]
+	%add.ptr1128 = getelementptr i8* %b.4.ph, i64 3		; <i8*> [#uses=1]
+	%sub.ptr.rhs.cast1136 = ptrtoint i8* %add.ptr1128 to i64		; <i64> [#uses=1]
+	%sub.ptr.sub1137 = sub i64 %sub.ptr.lhs.cast1135.pre-phi, %sub.ptr.rhs.cast1136		; <i64> [#uses=1]
+	%sub.ptr.sub11378527 = trunc i64 %sub.ptr.sub1137 to i32		; <i32> [#uses=1]
+	%conv1139 = add i32 %sub.ptr.sub11378527, -3		; <i32> [#uses=1]
+	store i8 undef, i8* undef
+	%shr10.i8599 = lshr i32 %conv1139, 8		; <i32> [#uses=1]
+	%conv6.i8600 = trunc i32 %shr10.i8599 to i8		; <i8> [#uses=1]
+	store i8 %conv6.i8600, i8* undef
+	br label %while.body
+
+if.end1294:		; preds = %while.cond1267.preheader
+	ret i32 12
+
+do.body3527:		; preds = %if.end127
+	br i1 undef, label %do.end3536, label %if.then3531
+
+if.then3531:		; preds = %do.body3527
+	unreachable
+
+do.end3536:		; preds = %do.body3527
+	ret i32 5
+
+handle_interval:		; preds = %if.end127
+	br i1 undef, label %do.body4547, label %cond.false4583
+
+do.body4547:		; preds = %handle_interval
+	br i1 undef, label %do.end4556, label %if.then4551
+
+if.then4551:		; preds = %do.body4547
+	unreachable
+
+do.end4556:		; preds = %do.body4547
+	ret i32 9
+
+cond.false4583:		; preds = %handle_interval
+	unreachable
+
+return:		; preds = %if.end1006, %do.body994, %while.cond1267.preheader, %entry
+	ret i32 undef
+}
+
+declare i8* @xrealloc(i8*, i64) ssp
+
+declare fastcc i8* @skip_one_char(i8*) nounwind readonly ssp
+
+declare fastcc void @insert_op1(i32, i8*, i32, i8*) nounwind ssp

diff --git a/src/LLVM/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll b/src/LLVM/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
new file mode 100644
index 0000000..e83b3a7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86
+
+@bsBuff = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @bsGetUInt32 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define fastcc i32 @bsGetUInt32() nounwind ssp {
+entry:
+	%bsBuff.promoted44 = load i32* @bsBuff		; <i32> [#uses=1]
+	%0 = add i32 0, -8		; <i32> [#uses=1]
+	%1 = lshr i32 %bsBuff.promoted44, %0		; <i32> [#uses=1]
+	%2 = shl i32 %1, 8		; <i32> [#uses=1]
+	br label %bb3.i17
+
+bb3.i9:		; preds = %bb3.i17
+	br i1 false, label %bb2.i16, label %bb1.i15
+
+bb1.i15:		; preds = %bb3.i9
+	unreachable
+
+bb2.i16:		; preds = %bb3.i9
+	br label %bb3.i17
+
+bb3.i17:		; preds = %bb2.i16, %entry
+	br i1 false, label %bb3.i9, label %bsR.exit18
+
+bsR.exit18:		; preds = %bb3.i17
+	%3 = or i32 0, %2		; <i32> [#uses=0]
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll b/src/LLVM/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
new file mode 100644
index 0000000..288eef4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64
+; PR4669
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
+
+define <1 x i64> @test(i64 %t) {
+entry:
+	%t1 = insertelement <1 x i64> undef, i64 %t, i32 0
+        %t0 = bitcast <1 x i64> %t1 to x86_mmx
+	%t2 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %t0, i32 48)
+        %t3 = bitcast x86_mmx %t2 to <1 x i64>
+	ret <1 x i64> %t3
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll b/src/LLVM/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
new file mode 100644
index 0000000..2080c0a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll

@@ -0,0 +1,140 @@
+; RUN: llc < %s -O3
+; PR4626
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@g_3 = common global i8 0, align 1		; <i8*> [#uses=2]
+
+define signext i8 @safe_mul_func_int16_t_s_s(i32 %_si1, i8 signext %_si2) nounwind readnone {
+entry:
+	%tobool = icmp eq i32 %_si1, 0		; <i1> [#uses=1]
+	%cmp = icmp sgt i8 %_si2, 0		; <i1> [#uses=2]
+	%or.cond = or i1 %cmp, %tobool		; <i1> [#uses=1]
+	br i1 %or.cond, label %lor.rhs, label %land.lhs.true3
+
+land.lhs.true3:		; preds = %entry
+	%conv5 = sext i8 %_si2 to i32		; <i32> [#uses=1]
+	%cmp7 = icmp slt i32 %conv5, %_si1		; <i1> [#uses=1]
+	br i1 %cmp7, label %cond.end, label %lor.rhs
+
+lor.rhs:		; preds = %land.lhs.true3, %entry
+	%cmp10.not = icmp slt i32 %_si1, 1		; <i1> [#uses=1]
+	%or.cond23 = and i1 %cmp, %cmp10.not		; <i1> [#uses=1]
+	br i1 %or.cond23, label %lor.end, label %cond.false
+
+lor.end:		; preds = %lor.rhs
+	%tobool19 = icmp ne i8 %_si2, 0		; <i1> [#uses=2]
+	%lor.ext = zext i1 %tobool19 to i32		; <i32> [#uses=1]
+	br i1 %tobool19, label %cond.end, label %cond.false
+
+cond.false:		; preds = %lor.end, %lor.rhs
+	%conv21 = sext i8 %_si2 to i32		; <i32> [#uses=1]
+	br label %cond.end
+
+cond.end:		; preds = %cond.false, %lor.end, %land.lhs.true3
+	%cond = phi i32 [ %conv21, %cond.false ], [ 1, %land.lhs.true3 ], [ %lor.ext, %lor.end ]		; <i32> [#uses=1]
+	%conv22 = trunc i32 %cond to i8		; <i8> [#uses=1]
+	ret i8 %conv22
+}
+
+define i32 @func_34(i8 signext %p_35) nounwind readonly {
+entry:
+	%tobool = icmp eq i8 %p_35, 0		; <i1> [#uses=1]
+	br i1 %tobool, label %lor.lhs.false, label %if.then
+
+lor.lhs.false:		; preds = %entry
+	%tmp1 = load i8* @g_3		; <i8> [#uses=1]
+	%tobool3 = icmp eq i8 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tobool3, label %return, label %if.then
+
+if.then:		; preds = %lor.lhs.false, %entry
+	%tmp4 = load i8* @g_3		; <i8> [#uses=1]
+	%conv5 = sext i8 %tmp4 to i32		; <i32> [#uses=1]
+	ret i32 %conv5
+
+return:		; preds = %lor.lhs.false
+	ret i32 0
+}
+
+define void @foo(i32 %p_5) noreturn nounwind {
+entry:
+	%cmp = icmp sgt i32 %p_5, 0		; <i1> [#uses=2]
+	%call = tail call i32 @safe() nounwind		; <i32> [#uses=1]
+	%conv1 = trunc i32 %call to i8		; <i8> [#uses=3]
+	%tobool.i = xor i1 %cmp, true		; <i1> [#uses=3]
+	%cmp.i = icmp sgt i8 %conv1, 0		; <i1> [#uses=3]
+	%or.cond.i = or i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %lor.rhs.i, label %land.lhs.true3.i
+
+land.lhs.true3.i:		; preds = %entry
+	%xor = zext i1 %cmp to i32		; <i32> [#uses=1]
+	%conv5.i = sext i8 %conv1 to i32		; <i32> [#uses=1]
+	%cmp7.i = icmp slt i32 %conv5.i, %xor		; <i1> [#uses=1]
+	%cmp7.i.not = xor i1 %cmp7.i, true		; <i1> [#uses=1]
+	%or.cond23.i = and i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	%or.cond = and i1 %cmp7.i.not, %or.cond23.i		; <i1> [#uses=1]
+	br i1 %or.cond, label %lor.end.i, label %for.inc
+
+lor.rhs.i:		; preds = %entry
+	%or.cond23.i.old = and i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond23.i.old, label %lor.end.i, label %for.inc
+
+lor.end.i:		; preds = %lor.rhs.i, %land.lhs.true3.i
+	%tobool19.i = icmp eq i8 %conv1, 0		; <i1> [#uses=0]
+	br label %for.inc
+
+for.inc:		; preds = %for.inc, %lor.end.i, %lor.rhs.i, %land.lhs.true3.i
+	br label %for.inc
+}
+
+define i32 @func_35(i8 signext %p_35) nounwind readonly {
+entry:
+  %tobool = icmp eq i8 %p_35, 0                   ; <i1> [#uses=1]
+  br i1 %tobool, label %lor.lhs.false, label %if.then
+
+lor.lhs.false:                                    ; preds = %entry
+  %tmp1 = load i8* @g_3                           ; <i8> [#uses=1]
+  %tobool3 = icmp eq i8 %tmp1, 0                  ; <i1> [#uses=1]
+  br i1 %tobool3, label %return, label %if.then
+
+if.then:                                          ; preds = %lor.lhs.false, %entry
+  %tmp4 = load i8* @g_3                           ; <i8> [#uses=1]
+  %conv5 = sext i8 %tmp4 to i32                   ; <i32> [#uses=1]
+  ret i32 %conv5
+
+return:                                           ; preds = %lor.lhs.false
+  ret i32 0
+}
+
+define void @bar(i32 %p_5) noreturn nounwind {
+entry:
+  %cmp = icmp sgt i32 %p_5, 0                     ; <i1> [#uses=2]
+  %call = tail call i32 @safe() nounwind          ; <i32> [#uses=1]
+  %conv1 = trunc i32 %call to i8                  ; <i8> [#uses=3]
+  %tobool.i = xor i1 %cmp, true                   ; <i1> [#uses=3]
+  %cmp.i = icmp sgt i8 %conv1, 0                  ; <i1> [#uses=3]
+  %or.cond.i = or i1 %cmp.i, %tobool.i            ; <i1> [#uses=1]
+  br i1 %or.cond.i, label %lor.rhs.i, label %land.lhs.true3.i
+
+land.lhs.true3.i:                                 ; preds = %entry
+  %xor = zext i1 %cmp to i32                      ; <i32> [#uses=1]
+  %conv5.i = sext i8 %conv1 to i32                ; <i32> [#uses=1]
+  %cmp7.i = icmp slt i32 %conv5.i, %xor           ; <i1> [#uses=1]
+  %cmp7.i.not = xor i1 %cmp7.i, true              ; <i1> [#uses=1]
+  %or.cond23.i = and i1 %cmp.i, %tobool.i         ; <i1> [#uses=1]
+  %or.cond = and i1 %cmp7.i.not, %or.cond23.i     ; <i1> [#uses=1]
+  br i1 %or.cond, label %lor.end.i, label %for.inc
+
+lor.rhs.i:                                        ; preds = %entry
+  %or.cond23.i.old = and i1 %cmp.i, %tobool.i     ; <i1> [#uses=1]
+  br i1 %or.cond23.i.old, label %lor.end.i, label %for.inc
+
+lor.end.i:                                        ; preds = %lor.rhs.i, %land.lhs.true3.i
+  %tobool19.i = icmp eq i8 %conv1, 0              ; <i1> [#uses=0]
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc, %lor.end.i, %lor.rhs.i, %land.lhs.true3.i
+  br label %for.inc
+}
+
+declare i32 @safe()

diff --git a/src/LLVM/test/CodeGen/X86/2009-08-06-inlineasm.ll b/src/LLVM/test/CodeGen/X86/2009-08-06-inlineasm.ll
new file mode 100644
index 0000000..f9b5f9e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-08-06-inlineasm.ll

@@ -0,0 +1,30 @@
+; RUN: false
+; XRUN: llc -mtriple=i386-pc-linux-gnu < %s
+; PR4668
+; XFAIL: *
+; FIXME: If the coalescer happens to coalesce %level.1 with the copy to EAX
+; (for ret) then this will fail to compile. The fundamental problem is
+; once the coalescer fixes a virtual register to physical register we can't
+; evict it. This started passing again due to the changes for PR8969
+; so I've disabled it with a bigger stick.
+
+define i32 @x(i32 %qscale) nounwind {
+entry:
+	%temp_block = alloca [64 x i16], align 16		; <[64 x i16]*> [#uses=0]
+	%tmp = call i32 asm sideeffect "xor %edx, %edx", "={dx},~{dirflag},~{fpsr},~{flags}"() nounwind		; <i32> [#uses=1]
+	br i1 undef, label %if.end78, label %if.then28
+
+if.then28:		; preds = %entry
+	br label %if.end78
+
+if.end78:		; preds = %if.then28, %entry
+	%level.1 = phi i32 [ %tmp, %if.then28 ], [ 0, %entry ]		; <i32> [#uses=1]
+	%add.ptr1 = getelementptr [64 x i16]* null, i32 0, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr2 = getelementptr [64 x i16]* null, i32 1, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr3 = getelementptr [64 x i16]* null, i32 2, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr4 = getelementptr [64 x i16]* null, i32 3, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr5 = getelementptr [64 x i16]* null, i32 4, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr6 = getelementptr [64 x i16]* null, i32 5, i32 %qscale		; <i16*> [#uses=1]
+	%tmp1 = call i32 asm sideeffect "nop", "={ax},r,r,r,r,r,0,~{dirflag},~{fpsr},~{flags}"(i16* %add.ptr6, i16* %add.ptr5, i16* %add.ptr4, i16* %add.ptr3, i16* %add.ptr2, i16* %add.ptr1) nounwind		; <i32> [#uses=0]
+	ret i32 %level.1
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-08-08-CastError.ll b/src/LLVM/test/CodeGen/X86/2009-08-08-CastError.ll
new file mode 100644
index 0000000..2dc812d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-08-08-CastError.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-pc-mingw64 | grep movabsq
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define <4 x float> @RecursiveTestFunc1(i8*) {
+EntryBlock:
+	%1 = call <4 x float> inttoptr (i64 5367207198 to <4 x float> (i8*, float, float, float, float)*)(i8* %0, float 8.000000e+00, float 5.000000e+00, float 3.000000e+00, float 4.000000e+00)		; <<4 x float>> [#uses=1]
+	ret <4 x float> %1
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-08-12-badswitch.ll b/src/LLVM/test/CodeGen/X86/2009-08-12-badswitch.ll
new file mode 100644
index 0000000..a94fce0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-08-12-badswitch.ll

@@ -0,0 +1,176 @@
+; RUN: llc < %s | grep LJT
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10"
+
+declare void @f1() nounwind readnone
+declare void @f2() nounwind readnone
+declare void @f3() nounwind readnone
+declare void @f4() nounwind readnone
+declare void @f5() nounwind readnone
+declare void @f6() nounwind readnone
+declare void @f7() nounwind readnone
+declare void @f8() nounwind readnone
+declare void @f9() nounwind readnone
+declare void @f10() nounwind readnone
+declare void @f11() nounwind readnone
+declare void @f12() nounwind readnone
+declare void @f13() nounwind readnone
+declare void @f14() nounwind readnone
+declare void @f15() nounwind readnone
+declare void @f16() nounwind readnone
+declare void @f17() nounwind readnone
+declare void @f18() nounwind readnone
+declare void @f19() nounwind readnone
+declare void @f20() nounwind readnone
+declare void @f21() nounwind readnone
+declare void @f22() nounwind readnone
+declare void @f23() nounwind readnone
+declare void @f24() nounwind readnone
+declare void @f25() nounwind readnone
+declare void @f26() nounwind readnone
+
+define internal fastcc i32 @foo(i64 %bar) nounwind ssp {
+entry:
+        br label %bb49
+
+bb49:
+	switch i64 %bar, label %RETURN [
+		i64 2, label %RRETURN_2
+		i64 3, label %RRETURN_6
+		i64 4, label %RRETURN_7
+		i64 5, label %RRETURN_14
+		i64 6, label %RRETURN_15
+		i64 7, label %RRETURN_16
+		i64 8, label %RRETURN_17
+		i64 9, label %RRETURN_18
+		i64 10, label %RRETURN_19
+		i64 11, label %RRETURN_20
+		i64 12, label %RRETURN_21
+		i64 13, label %RRETURN_22
+		i64 14, label %RRETURN_24
+		i64 15, label %RRETURN_26
+		i64 16, label %RRETURN_27
+		i64 17, label %RRETURN_28
+		i64 18, label %RRETURN_29
+		i64 19, label %RRETURN_30
+		i64 20, label %RRETURN_31
+		i64 21, label %RRETURN_38
+		i64 22, label %RRETURN_40
+		i64 23, label %RRETURN_42
+		i64 24, label %RRETURN_44
+		i64 25, label %RRETURN_48
+		i64 26, label %RRETURN_52
+		i64 27, label %RRETURN_1
+	]
+
+RETURN:
+        call void @f1()
+        br label %EXIT
+
+RRETURN_2:		; preds = %bb49
+        call void @f2()
+        br label %EXIT
+
+RRETURN_6:		; preds = %bb49
+        call void @f2()
+        br label %EXIT
+
+RRETURN_7:		; preds = %bb49
+        call void @f3()
+        br label %EXIT
+
+RRETURN_14:		; preds = %bb49
+        call void @f4()
+        br label %EXIT
+
+RRETURN_15:		; preds = %bb49
+        call void @f5()
+        br label %EXIT
+
+RRETURN_16:		; preds = %bb49
+        call void @f6()
+        br label %EXIT
+
+RRETURN_17:		; preds = %bb49
+        call void @f7()
+        br label %EXIT
+
+RRETURN_18:		; preds = %bb49
+        call void @f8()
+        br label %EXIT
+
+RRETURN_19:		; preds = %bb49
+        call void @f9()
+        br label %EXIT
+
+RRETURN_20:		; preds = %bb49
+        call void @f10()
+        br label %EXIT
+
+RRETURN_21:		; preds = %bb49
+        call void @f11()
+        br label %EXIT
+
+RRETURN_22:		; preds = %bb49
+        call void @f12()
+        br label %EXIT
+
+RRETURN_24:		; preds = %bb49
+        call void @f13()
+        br label %EXIT
+
+RRETURN_26:		; preds = %bb49
+        call void @f14()
+        br label %EXIT
+
+RRETURN_27:		; preds = %bb49
+        call void @f15()
+        br label %EXIT
+
+RRETURN_28:		; preds = %bb49
+        call void @f16()
+        br label %EXIT
+
+RRETURN_29:		; preds = %bb49
+        call void @f17()
+        br label %EXIT
+
+RRETURN_30:		; preds = %bb49
+        call void @f18()
+        br label %EXIT
+
+RRETURN_31:		; preds = %bb49
+        call void @f19()
+        br label %EXIT
+
+RRETURN_38:		; preds = %bb49
+        call void @f20()
+        br label %EXIT
+
+RRETURN_40:		; preds = %bb49
+        call void @f21()
+        br label %EXIT
+
+RRETURN_42:		; preds = %bb49
+        call void @f22()
+        br label %EXIT
+
+RRETURN_44:		; preds = %bb49
+        call void @f23()
+        br label %EXIT
+
+RRETURN_48:		; preds = %bb49
+        call void @f24()
+        br label %EXIT
+
+RRETURN_52:		; preds = %bb49
+        call void @f25()
+        br label %EXIT
+
+RRETURN_1:		; preds = %bb49
+        call void @f26()
+        br label %EXIT
+
+EXIT:
+        ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll b/src/LLVM/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
new file mode 100644
index 0000000..bf668e3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll

@@ -0,0 +1,57 @@
+; RUN: llc < %s
+target triple = "x86_64-mingw"
+
+; ModuleID = 'mm.bc'
+	%0 = type opaque		; type %0
+	%1 = type opaque		; type %1
+
+define internal fastcc float @computeMipmappingRho(%0* %shaderExecutionStatePtr, i32 %index, <4 x float> %texCoord, <4 x float> %texCoordDX, <4 x float> %texCoordDY) readonly {
+indexCheckBlock:
+	%indexCmp = icmp ugt i32 %index, 16		; <i1> [#uses=1]
+	br i1 %indexCmp, label %zeroReturnBlock, label %primitiveTextureFetchBlock
+
+primitiveTextureFetchBlock:		; preds = %indexCheckBlock
+	%pointerArithmeticTmp = bitcast %0* %shaderExecutionStatePtr to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp1 = getelementptr i8* %pointerArithmeticTmp, i64 1808		; <i8*> [#uses=1]
+	%pointerArithmeticTmp2 = bitcast i8* %pointerArithmeticTmp1 to %1**		; <%1**> [#uses=1]
+	%primitivePtr = load %1** %pointerArithmeticTmp2		; <%1*> [#uses=1]
+	%pointerArithmeticTmp3 = bitcast %1* %primitivePtr to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp4 = getelementptr i8* %pointerArithmeticTmp3, i64 19408		; <i8*> [#uses=1]
+	%pointerArithmeticTmp5 = bitcast i8* %pointerArithmeticTmp4 to %1**		; <%1**> [#uses=1]
+	%primitiveTexturePtr = getelementptr %1** %pointerArithmeticTmp5, i32 %index		; <%1**> [#uses=1]
+	%primitiveTexturePtr6 = load %1** %primitiveTexturePtr		; <%1*> [#uses=2]
+	br label %textureCheckBlock
+
+textureCheckBlock:		; preds = %primitiveTextureFetchBlock
+	%texturePtrInt = ptrtoint %1* %primitiveTexturePtr6 to i64		; <i64> [#uses=1]
+	%testTextureNULL = icmp eq i64 %texturePtrInt, 0		; <i1> [#uses=1]
+	br i1 %testTextureNULL, label %zeroReturnBlock, label %rhoCalculateBlock
+
+rhoCalculateBlock:		; preds = %textureCheckBlock
+	%pointerArithmeticTmp7 = bitcast %1* %primitiveTexturePtr6 to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp8 = getelementptr i8* %pointerArithmeticTmp7, i64 640		; <i8*> [#uses=1]
+	%pointerArithmeticTmp9 = bitcast i8* %pointerArithmeticTmp8 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%dimensionsPtr = load <4 x float>* %pointerArithmeticTmp9, align 1		; <<4 x float>> [#uses=2]
+	%texDiffDX = fsub <4 x float> %texCoordDX, %texCoord		; <<4 x float>> [#uses=1]
+	%texDiffDY = fsub <4 x float> %texCoordDY, %texCoord		; <<4 x float>> [#uses=1]
+	%ddx = fmul <4 x float> %texDiffDX, %dimensionsPtr		; <<4 x float>> [#uses=2]
+	%ddx10 = fmul <4 x float> %texDiffDY, %dimensionsPtr		; <<4 x float>> [#uses=2]
+	%ddxSquared = fmul <4 x float> %ddx, %ddx		; <<4 x float>> [#uses=3]
+	%0 = shufflevector <4 x float> %ddxSquared, <4 x float> %ddxSquared, <4 x i32> <i32 1, i32 0, i32 0, i32 0>		; <<4 x float>> [#uses=1]
+	%dxSquared = fadd <4 x float> %ddxSquared, %0		; <<4 x float>> [#uses=1]
+	%1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %dxSquared)		; <<4 x float>> [#uses=1]
+	%ddySquared = fmul <4 x float> %ddx10, %ddx10		; <<4 x float>> [#uses=3]
+	%2 = shufflevector <4 x float> %ddySquared, <4 x float> %ddySquared, <4 x i32> <i32 1, i32 0, i32 0, i32 0>		; <<4 x float>> [#uses=1]
+	%dySquared = fadd <4 x float> %ddySquared, %2		; <<4 x float>> [#uses=1]
+	%3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %dySquared)		; <<4 x float>> [#uses=1]
+	%4 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %3)		; <<4 x float>> [#uses=1]
+	%rho = extractelement <4 x float> %4, i32 0		; <float> [#uses=1]
+	ret float %rho
+
+zeroReturnBlock:		; preds = %textureCheckBlock, %indexCheckBlock
+	ret float 0.000000e+00
+}
+
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll b/src/LLVM/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
new file mode 100644
index 0000000..5f6cf3b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-pc-linux | FileCheck %s
+
+@a = external global i96, align 4
+@b = external global i64, align 8
+
+define void @c() nounwind {
+; CHECK: movl a+8, %eax
+  %srcval1 = load i96* @a, align 4
+  %sroa.store.elt2 = lshr i96 %srcval1, 64
+  %tmp = trunc i96 %sroa.store.elt2 to i64
+; CHECK: movl %eax, b
+; CHECK: movl $0, b+4
+  store i64 %tmp, i64* @b, align 8
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll b/src/LLVM/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
new file mode 100644
index 0000000..790fd88
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll

@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86
+; PR4753
+
+; This function has a sub-register reuse undone.
+
+@uint8 = external global i32                      ; <i32*> [#uses=3]
+
+declare signext i8 @foo(i32, i8 signext) nounwind readnone
+
+declare signext i8 @bar(i32, i8 signext) nounwind readnone
+
+define i32 @uint80(i8 signext %p_52) nounwind {
+entry:
+  %0 = sext i8 %p_52 to i16                       ; <i16> [#uses=1]
+  %1 = tail call i32 @func_24(i16 zeroext %0, i8 signext ptrtoint (i8 (i32, i8)* @foo to i8)) nounwind; <i32> [#uses=1]
+  %2 = trunc i32 %1 to i8                         ; <i8> [#uses=1]
+  %3 = or i8 %2, 1                                ; <i8> [#uses=1]
+  %4 = tail call i32 @safe(i32 1) nounwind        ; <i32> [#uses=0]
+  %5 = tail call i32 @func_24(i16 zeroext 0, i8 signext undef) nounwind; <i32> [#uses=1]
+  %6 = trunc i32 %5 to i8                         ; <i8> [#uses=1]
+  %7 = xor i8 %3, %p_52                           ; <i8> [#uses=1]
+  %8 = xor i8 %7, %6                              ; <i8> [#uses=1]
+  %9 = icmp ne i8 %p_52, 0                        ; <i1> [#uses=1]
+  %10 = zext i1 %9 to i8                          ; <i8> [#uses=1]
+  %11 = tail call i32 @func_24(i16 zeroext ptrtoint (i8 (i32, i8)* @bar to i16), i8 signext %10) nounwind; <i32> [#uses=1]
+  %12 = tail call i32 @func_24(i16 zeroext 0, i8 signext 1) nounwind; <i32> [#uses=0]
+  br i1 undef, label %bb2, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb, %entry
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %iftmp.2.0 = phi i32 [ 0, %bb2 ], [ 1, %bb ]    ; <i32> [#uses=1]
+  %13 = icmp ne i32 %11, %iftmp.2.0               ; <i1> [#uses=1]
+  %14 = tail call i32 @safe(i32 -2) nounwind      ; <i32> [#uses=0]
+  %15 = zext i1 %13 to i8                         ; <i8> [#uses=1]
+  %16 = tail call signext i8 @func_53(i8 signext undef, i8 signext 1, i8 signext %15, i8 signext %8) nounwind; <i8> [#uses=0]
+  br i1 undef, label %bb5, label %bb4
+
+bb4:                                              ; preds = %bb3
+  %17 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3
+  %18 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %19 = sext i8 undef to i16                      ; <i16> [#uses=1]
+  %20 = tail call i32 @func_24(i16 zeroext %19, i8 signext 1) nounwind; <i32> [#uses=0]
+  br i1 undef, label %return, label %bb6.preheader
+
+bb6.preheader:                                    ; preds = %bb5
+  %21 = sext i8 %p_52 to i32                      ; <i32> [#uses=1]
+  %22 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %23 = tail call i32 (...)* @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
+  unreachable
+
+return:                                           ; preds = %bb5
+  ret i32 undef
+}
+
+declare i32 @func_24(i16 zeroext, i8 signext)
+
+declare i32 @safe(i32)
+
+declare signext i8 @func_53(i8 signext, i8 signext, i8 signext, i8 signext)
+
+declare i32 @safefuncts(...)

diff --git a/src/LLVM/test/CodeGen/X86/2009-08-23-linkerprivate.ll b/src/LLVM/test/CodeGen/X86/2009-08-23-linkerprivate.ll
new file mode 100644
index 0000000..90fac15
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-08-23-linkerprivate.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc

diff --git a/src/LLVM/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll b/src/LLVM/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
new file mode 100644
index 0000000..5483b73
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+; It's not legal to fold a load from 32-bit stack slot into a 64-bit
+; instruction. If done, the instruction does a 64-bit load and that's not
+; safe. This can happen we a subreg_to_reg 0 has been coalesced. One
+; exception is when the instruction that folds the load is a move, then we
+; can simply turn it into a 32-bit load from the stack slot.
+; rdar://7170444
+
+%struct.ComplexType = type { i32 }
+
+define i32 @t(i32 %clientPort, i32 %pluginID, i32 %requestID, i32 %objectID, i64 %serverIdentifier, i64 %argumentsData, i32 %argumentsLength) ssp {
+entry:
+; CHECK: _t:
+; CHECK: movl 16(%rbp),
+  %0 = zext i32 %argumentsLength to i64           ; <i64> [#uses=1]
+  %1 = zext i32 %clientPort to i64                ; <i64> [#uses=1]
+  %2 = inttoptr i64 %1 to %struct.ComplexType*    ; <%struct.ComplexType*> [#uses=1]
+  %3 = invoke i8* @pluginInstance(i8* undef, i32 %pluginID)
+          to label %invcont unwind label %lpad    ; <i8*> [#uses=1]
+
+invcont:                                          ; preds = %entry
+  %4 = add i32 %requestID, %pluginID              ; <i32> [#uses=0]
+  %5 = invoke zeroext i8 @invoke(i8* %3, i32 %objectID, i8* undef, i64 %argumentsData, i32 %argumentsLength, i64* undef, i32* undef)
+          to label %invcont1 unwind label %lpad   ; <i8> [#uses=0]
+
+invcont1:                                         ; preds = %invcont
+  %6 = getelementptr inbounds %struct.ComplexType* %2, i64 0, i32 0 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 4                      ; <i32> [#uses=1]
+  invoke void @booleanAndDataReply(i32 %7, i32 undef, i32 %requestID, i32 undef, i64 undef, i32 undef)
+          to label %invcont2 unwind label %lpad
+
+invcont2:                                         ; preds = %invcont1
+  ret i32 0
+
+lpad:                                             ; preds = %invcont1, %invcont, %entry
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  %8 = call i32 @vm_deallocate(i32 undef, i64 0, i64 %0) ; <i32> [#uses=0]
+  unreachable
+}
+
+declare i32 @vm_deallocate(i32, i64, i64)
+
+declare i8* @pluginInstance(i8*, i32)
+
+declare zeroext i8 @invoke(i8*, i32, i8*, i64, i32, i64*, i32*)
+
+declare void @booleanAndDataReply(i32, i32, i32, i32, i64, i32)
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/X86/2009-09-10-SpillComments.ll b/src/LLVM/test/CodeGen/X86/2009-09-10-SpillComments.ll
new file mode 100644
index 0000000..adac203
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-09-10-SpillComments.ll

@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s
+
+; This test shouldn't require spills.
+
+; CHECK: pushq
+; CHECK-NOT: $rsp
+; CHECK: popq
+
+	%struct..0anon = type { i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
+@rtx_format = external global [116 x i8*]		; <[116 x i8*]*> [#uses=1]
+@rtx_length = external global [117 x i32]		; <[117 x i32]*> [#uses=1]
+
+declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*, i32)
+
+define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) {
+entry:
+	%tmp2 = icmp eq %struct.rtx_def* %x, null		; <i1> [#uses=1]
+	br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
+
+cond_next:		; preds = %entry
+	%tmp6 = getelementptr %struct.rtx_def* %x, i32 0, i32 0		; <i16*> [#uses=1]
+	%tmp7 = load i16* %tmp6		; <i16> [#uses=2]
+	%tmp78 = zext i16 %tmp7 to i32		; <i32> [#uses=2]
+	%tmp10 = icmp eq i16 %tmp7, 54		; <i1> [#uses=1]
+	br i1 %tmp10, label %cond_true13, label %cond_next32
+
+cond_true13:		; preds = %cond_next
+	%tmp15 = getelementptr %struct.rtx_def* %x, i32 0, i32 3		; <[1 x %struct..0anon]*> [#uses=1]
+	%tmp1718 = bitcast [1 x %struct..0anon]* %tmp15 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp19 = load %struct.rtx_def** %tmp1718		; <%struct.rtx_def*> [#uses=1]
+	%tmp20 = getelementptr %struct.rtx_def* %tmp19, i32 0, i32 0		; <i16*> [#uses=1]
+	%tmp21 = load i16* %tmp20		; <i16> [#uses=1]
+	%tmp22 = icmp eq i16 %tmp21, 57		; <i1> [#uses=1]
+	br i1 %tmp22, label %cond_true25, label %cond_next32
+
+cond_true25:		; preds = %cond_true13
+	%tmp29 = tail call %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 ) nounwind		; <%struct.rtx_def*> [#uses=1]
+	ret %struct.rtx_def* %tmp29
+
+cond_next32:		; preds = %cond_true13, %cond_next
+	%tmp34 = getelementptr [116 x i8*]* @rtx_format, i32 0, i32 %tmp78		; <i8**> [#uses=1]
+	%tmp35 = load i8** %tmp34, align 4		; <i8*> [#uses=1]
+	%tmp37 = getelementptr [117 x i32]* @rtx_length, i32 0, i32 %tmp78		; <i32*> [#uses=1]
+	%tmp38 = load i32* %tmp37, align 4		; <i32> [#uses=1]
+	%i.011 = add i32 %tmp38, -1		; <i32> [#uses=2]
+	%tmp12513 = icmp sgt i32 %i.011, -1		; <i1> [#uses=1]
+	br i1 %tmp12513, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %bb123, %cond_next32
+	%indvar = phi i32 [ %indvar.next26, %bb123 ], [ 0, %cond_next32 ]		; <i32> [#uses=2]
+	%i.01.0 = sub i32 %i.011, %indvar		; <i32> [#uses=5]
+	%tmp42 = getelementptr i8* %tmp35, i32 %i.01.0		; <i8*> [#uses=2]
+	%tmp43 = load i8* %tmp42		; <i8> [#uses=1]
+	switch i8 %tmp43, label %bb123 [
+		 i8 101, label %cond_true47
+		 i8 69, label %bb105.preheader
+	]
+
+cond_true47:		; preds = %bb
+	%tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
+	%tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp55 = load %struct.rtx_def** %tmp5354		; <%struct.rtx_def*> [#uses=1]
+	%tmp58 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn ) nounwind		; <%struct.rtx_def*> [#uses=1]
+	%tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0		; <i32*> [#uses=1]
+	%tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32		; <i32> [#uses=1]
+	store i32 %tmp58.c, i32* %tmp62
+	%tmp6816 = load i8* %tmp42		; <i8> [#uses=1]
+	%tmp6917 = icmp eq i8 %tmp6816, 69		; <i1> [#uses=1]
+	br i1 %tmp6917, label %bb105.preheader, label %bb123
+
+bb105.preheader:		; preds = %cond_true47, %bb
+	%tmp11020 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
+	%tmp11111221 = bitcast %struct..0anon* %tmp11020 to %struct.rtvec_def**		; <%struct.rtvec_def**> [#uses=3]
+	%tmp11322 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
+	%tmp11423 = getelementptr %struct.rtvec_def* %tmp11322, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp11524 = load i32* %tmp11423		; <i32> [#uses=1]
+	%tmp11625 = icmp eq i32 %tmp11524, 0		; <i1> [#uses=1]
+	br i1 %tmp11625, label %bb123, label %bb73
+
+bb73:		; preds = %bb73, %bb105.preheader
+	%j.019 = phi i32 [ %tmp104, %bb73 ], [ 0, %bb105.preheader ]		; <i32> [#uses=3]
+	%tmp81 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=2]
+	%tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019		; <%struct..0anon*> [#uses=1]
+	%tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp95 = load %struct.rtx_def** %tmp9394		; <%struct.rtx_def*> [#uses=1]
+	%tmp98 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn ) nounwind		; <%struct.rtx_def*> [#uses=1]
+	%tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0		; <i32*> [#uses=1]
+	%tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32		; <i32> [#uses=1]
+	store i32 %tmp98.c, i32* %tmp101
+	%tmp104 = add i32 %j.019, 1		; <i32> [#uses=2]
+	%tmp113 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
+	%tmp114 = getelementptr %struct.rtvec_def* %tmp113, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp115 = load i32* %tmp114		; <i32> [#uses=1]
+	%tmp116 = icmp ult i32 %tmp104, %tmp115		; <i1> [#uses=1]
+	br i1 %tmp116, label %bb73, label %bb123
+
+bb123:		; preds = %bb73, %bb105.preheader, %cond_true47, %bb
+	%i.0 = add i32 %i.01.0, -1		; <i32> [#uses=1]
+	%tmp125 = icmp sgt i32 %i.0, -1		; <i1> [#uses=1]
+	%indvar.next26 = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp125, label %bb, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %bb123, %cond_next32, %entry
+	%UnifiedRetVal = phi %struct.rtx_def* [ null, %entry ], [ %x, %cond_next32 ], [ %x, %bb123 ]		; <%struct.rtx_def*> [#uses=1]
+	ret %struct.rtx_def* %UnifiedRetVal
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-09-16-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
new file mode 100644
index 0000000..18b5a17
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-09-16-CoalescerBug.ll

@@ -0,0 +1,64 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10
+; PR4910
+
+%0 = type { i32, i32, i32, i32 }
+
+@boot_cpu_id = external global i32                ; <i32*> [#uses=1]
+@cpu_logical = common global i32 0, align 4       ; <i32*> [#uses=1]
+
+define void @topo_probe_0xb() nounwind ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc38, %entry
+  %0 = phi i32 [ 0, %entry ], [ %inc40, %for.inc38 ] ; <i32> [#uses=3]
+  %cmp = icmp slt i32 %0, 3                       ; <i1> [#uses=1]
+  br i1 %cmp, label %for.body, label %for.end41
+
+for.body:                                         ; preds = %for.cond
+  %1 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,{cx},~{dirflag},~{fpsr},~{flags}"(i32 11, i32 %0) nounwind ; <%0> [#uses=3]
+  %asmresult.i = extractvalue %0 %1, 0            ; <i32> [#uses=1]
+  %asmresult10.i = extractvalue %0 %1, 2          ; <i32> [#uses=1]
+  %and = and i32 %asmresult.i, 31                 ; <i32> [#uses=2]
+  %shr42 = lshr i32 %asmresult10.i, 8             ; <i32> [#uses=1]
+  %and12 = and i32 %shr42, 255                    ; <i32> [#uses=2]
+  %cmp14 = icmp eq i32 %and12, 0                  ; <i1> [#uses=1]
+  br i1 %cmp14, label %for.end41, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %for.body
+  %asmresult9.i = extractvalue %0 %1, 1           ; <i32> [#uses=1]
+  %and7 = and i32 %asmresult9.i, 65535            ; <i32> [#uses=1]
+  %cmp16 = icmp eq i32 %and7, 0                   ; <i1> [#uses=1]
+  br i1 %cmp16, label %for.end41, label %for.cond17.preheader
+
+for.cond17.preheader:                             ; preds = %lor.lhs.false
+  %tmp24 = load i32* @boot_cpu_id                 ; <i32> [#uses=1]
+  %shr26 = ashr i32 %tmp24, %and                  ; <i32> [#uses=1]
+  br label %for.body20
+
+for.body20:                                       ; preds = %for.body20, %for.cond17.preheader
+  %2 = phi i32 [ 0, %for.cond17.preheader ], [ %inc32, %for.body20 ] ; <i32> [#uses=2]
+  %cnt.143 = phi i32 [ 0, %for.cond17.preheader ], [ %inc.cnt.1, %for.body20 ] ; <i32> [#uses=1]
+  %shr23 = ashr i32 %2, %and                      ; <i32> [#uses=1]
+  %cmp27 = icmp eq i32 %shr23, %shr26             ; <i1> [#uses=1]
+  %inc = zext i1 %cmp27 to i32                    ; <i32> [#uses=1]
+  %inc.cnt.1 = add i32 %inc, %cnt.143             ; <i32> [#uses=2]
+  %inc32 = add nsw i32 %2, 1                      ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %inc32, 255             ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body20
+
+for.end:                                          ; preds = %for.body20
+  %cmp34 = icmp eq i32 %and12, 1                  ; <i1> [#uses=1]
+  br i1 %cmp34, label %if.then35, label %for.inc38
+
+if.then35:                                        ; preds = %for.end
+  store i32 %inc.cnt.1, i32* @cpu_logical
+  br label %for.inc38
+
+for.inc38:                                        ; preds = %for.end, %if.then35
+  %inc40 = add nsw i32 %0, 1                      ; <i32> [#uses=1]
+  br label %for.cond
+
+for.end41:                                        ; preds = %lor.lhs.false, %for.body, %for.cond
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-09-19-earlyclobber.ll b/src/LLVM/test/CodeGen/X86/2009-09-19-earlyclobber.ll
new file mode 100644
index 0000000..66f5118
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-09-19-earlyclobber.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = '4964.c'
+; PR 4964
+; Registers other than RAX, RCX are OK, but they must be different.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+	%0 = type { i64, i64 }		; type %0
+
+define i64 @flsst(i64 %find) nounwind ssp {
+entry:
+; CHECK: FOO %rax %rcx
+	%asmtmp = tail call %0 asm sideeffect "FOO $0 $1 $2", "=r,=&r,rm,~{dirflag},~{fpsr},~{flags},~{cc}"(i64 %find) nounwind		; <%0> [#uses=1]
+	%asmresult = extractvalue %0 %asmtmp, 0		; <i64> [#uses=1]
+	ret i64 %asmresult
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll b/src/LLVM/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
new file mode 100644
index 0000000..80b8835
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -relocation-model=pic | FileCheck %s
+
+define void @dot(i16* nocapture %A, i32 %As, i16* nocapture %B, i32 %Bs, i16* nocapture %C, i32 %N) nounwind ssp {
+; CHECK: dot:
+; CHECK: decl %
+; CHECK-NEXT: jne
+entry:
+	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %bb2
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%sum.04 = phi i32 [ 0, %entry ], [ %10, %bb ]		; <i32> [#uses=1]
+	%1 = mul i32 %i.03, %As		; <i32> [#uses=1]
+	%2 = getelementptr i16* %A, i32 %1		; <i16*> [#uses=1]
+	%3 = load i16* %2, align 2		; <i16> [#uses=1]
+	%4 = sext i16 %3 to i32		; <i32> [#uses=1]
+	%5 = mul i32 %i.03, %Bs		; <i32> [#uses=1]
+	%6 = getelementptr i16* %B, i32 %5		; <i16*> [#uses=1]
+	%7 = load i16* %6, align 2		; <i16> [#uses=1]
+	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
+	%9 = mul i32 %8, %4		; <i32> [#uses=1]
+	%10 = add i32 %9, %sum.04		; <i32> [#uses=2]
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb
+
+bb1.bb2_crit_edge:		; preds = %bb
+	%phitmp = trunc i32 %10 to i16		; <i16> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %entry, %bb1.bb2_crit_edge
+	%sum.0.lcssa = phi i16 [ %phitmp, %bb1.bb2_crit_edge ], [ 0, %entry ]		; <i16> [#uses=1]
+	store i16 %sum.0.lcssa, i16* %C, align 2
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-09-22-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
new file mode 100644
index 0000000..33f35f8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-09-22-CoalescerBug.ll

@@ -0,0 +1,124 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  br i1 undef, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  ret i32 3
+
+bb1:                                              ; preds = %entry
+  br i1 undef, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  ret i32 3
+
+bb3:                                              ; preds = %bb1
+  br i1 undef, label %bb.i18, label %quantum_getwidth.exit
+
+bb.i18:                                           ; preds = %bb.i18, %bb3
+  br i1 undef, label %bb.i18, label %quantum_getwidth.exit
+
+quantum_getwidth.exit:                            ; preds = %bb.i18, %bb3
+  br i1 undef, label %bb4, label %bb6.preheader
+
+bb4:                                              ; preds = %quantum_getwidth.exit
+  unreachable
+
+bb6.preheader:                                    ; preds = %quantum_getwidth.exit
+  br i1 undef, label %bb.i1, label %bb1.i2
+
+bb.i1:                                            ; preds = %bb6.preheader
+  unreachable
+
+bb1.i2:                                           ; preds = %bb6.preheader
+  br i1 undef, label %bb2.i, label %bb3.i4
+
+bb2.i:                                            ; preds = %bb1.i2
+  unreachable
+
+bb3.i4:                                           ; preds = %bb1.i2
+  br i1 undef, label %quantum_new_qureg.exit, label %bb4.i
+
+bb4.i:                                            ; preds = %bb3.i4
+  unreachable
+
+quantum_new_qureg.exit:                           ; preds = %bb3.i4
+  br i1 undef, label %bb9, label %bb11.thread
+
+bb11.thread:                                      ; preds = %quantum_new_qureg.exit
+  %.cast.i = zext i32 undef to i64                ; <i64> [#uses=1]
+  br label %bb.i37
+
+bb9:                                              ; preds = %quantum_new_qureg.exit
+  unreachable
+
+bb.i37:                                           ; preds = %bb.i37, %bb11.thread
+  %0 = load i64* undef, align 8                   ; <i64> [#uses=1]
+  %1 = shl i64 %0, %.cast.i                       ; <i64> [#uses=1]
+  store i64 %1, i64* undef, align 8
+  br i1 undef, label %bb.i37, label %quantum_addscratch.exit
+
+quantum_addscratch.exit:                          ; preds = %bb.i37
+  br i1 undef, label %bb12.preheader, label %bb14
+
+bb12.preheader:                                   ; preds = %quantum_addscratch.exit
+  unreachable
+
+bb14:                                             ; preds = %quantum_addscratch.exit
+  br i1 undef, label %bb17, label %bb.nph
+
+bb.nph:                                           ; preds = %bb14
+  unreachable
+
+bb17:                                             ; preds = %bb14
+  br i1 undef, label %bb1.i7, label %quantum_measure.exit
+
+bb1.i7:                                           ; preds = %bb17
+  br label %quantum_measure.exit
+
+quantum_measure.exit:                             ; preds = %bb1.i7, %bb17
+  switch i32 undef, label %bb21 [
+    i32 -1, label %bb18
+    i32 0, label %bb20
+  ]
+
+bb18:                                             ; preds = %quantum_measure.exit
+  unreachable
+
+bb20:                                             ; preds = %quantum_measure.exit
+  unreachable
+
+bb21:                                             ; preds = %quantum_measure.exit
+  br i1 undef, label %quantum_frac_approx.exit, label %bb1.i
+
+bb1.i:                                            ; preds = %bb21
+  unreachable
+
+quantum_frac_approx.exit:                         ; preds = %bb21
+  br i1 undef, label %bb25, label %bb26
+
+bb25:                                             ; preds = %quantum_frac_approx.exit
+  unreachable
+
+bb26:                                             ; preds = %quantum_frac_approx.exit
+  br i1 undef, label %quantum_gcd.exit, label %bb.i
+
+bb.i:                                             ; preds = %bb.i, %bb26
+  br i1 undef, label %quantum_gcd.exit, label %bb.i
+
+quantum_gcd.exit:                                 ; preds = %bb.i, %bb26
+  br i1 undef, label %bb32, label %bb33
+
+bb32:                                             ; preds = %quantum_gcd.exit
+  br i1 undef, label %bb.i.i, label %quantum_delete_qureg.exit
+
+bb.i.i:                                           ; preds = %bb32
+  ret i32 0
+
+quantum_delete_qureg.exit:                        ; preds = %bb32
+  ret i32 0
+
+bb33:                                             ; preds = %quantum_gcd.exit
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll b/src/LLVM/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll
new file mode 100644
index 0000000..d37d4b8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll

@@ -0,0 +1,91 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+; rdar://7247745
+
+%struct._lck_mtx_ = type { %union.anon }
+%struct._lck_rw_t_internal_ = type <{ i16, i8, i8, i32, i32, i32 }>
+%struct.anon = type { i64, i64, [2 x i8], i8, i8, i32 }
+%struct.memory_object = type { i32, i32, %struct.memory_object_pager_ops* }
+%struct.memory_object_control = type { i32, i32, %struct.vm_object* }
+%struct.memory_object_pager_ops = type { void (%struct.memory_object*)*, void (%struct.memory_object*)*, i32 (%struct.memory_object*, %struct.memory_object_control*, i32)*, i32 (%struct.memory_object*)*, i32 (%struct.memory_object*, i64, i32, i32, i32*)*, i32 (%struct.memory_object*, i64, i32, i64*, i32*, i32, i32, i32)*, i32 (%struct.memory_object*, i64, i32)*, i32 (%struct.memory_object*, i64, i64, i32)*, i32 (%struct.memory_object*, i64, i64, i32)*, i32 (%struct.memory_object*, i32)*, i32 (%struct.memory_object*)*, i8* }
+%struct.queue_entry = type { %struct.queue_entry*, %struct.queue_entry* }
+%struct.upl = type { %struct._lck_mtx_, i32, i32, %struct.vm_object*, i64, i32, i64, %struct.vm_object*, i32, i8* }
+%struct.upl_page_info = type <{ i32, i8, [3 x i8] }>
+%struct.vm_object = type { %struct.queue_entry, %struct._lck_rw_t_internal_, i64, %struct.vm_page*, i32, i32, i32, i32, %struct.vm_object*, %struct.vm_object*, i64, %struct.memory_object*, i64, %struct.memory_object_control*, i32, i16, i16, [2 x i8], i8, i8, %struct.queue_entry, %struct.queue_entry, i64, i32, i32, i32, i8*, i64, i8, i8, [2 x i8], %struct.queue_entry }
+%struct.vm_page = type { %struct.queue_entry, %struct.queue_entry, %struct.vm_page*, %struct.vm_object*, i64, [2 x i8], i8, i8, i32, i8, i8, i8, i8, i32 }
+%union.anon = type { %struct.anon }
+
+declare i64 @OSAddAtomic64(i64, i64*) noredzone noimplicitfloat
+
+define i32 @upl_commit_range(%struct.upl* %upl, i32 %offset, i32 %size, i32 %flags, %struct.upl_page_info* %page_list, i32 %count, i32* nocapture %empty) nounwind noredzone noimplicitfloat {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %if.end143, label %if.then136
+
+if.then136:                                       ; preds = %if.end
+  unreachable
+
+if.end143:                                        ; preds = %if.end
+  br i1 undef, label %if.else155, label %if.then153
+
+if.then153:                                       ; preds = %if.end143
+  br label %while.cond
+
+if.else155:                                       ; preds = %if.end143
+  unreachable
+
+while.cond:                                       ; preds = %if.end1039, %if.then153
+  br i1 undef, label %if.then1138, label %while.body
+
+while.body:                                       ; preds = %while.cond
+  br i1 undef, label %if.end260, label %if.then217
+
+if.then217:                                       ; preds = %while.body
+  br i1 undef, label %if.end260, label %if.then230
+
+if.then230:                                       ; preds = %if.then217
+  br i1 undef, label %if.then246, label %if.end260
+
+if.then246:                                       ; preds = %if.then230
+  br label %if.end260
+
+if.end260:                                        ; preds = %if.then246, %if.then230, %if.then217, %while.body
+  br i1 undef, label %if.end296, label %if.then266
+
+if.then266:                                       ; preds = %if.end260
+  unreachable
+
+if.end296:                                        ; preds = %if.end260
+  br i1 undef, label %if.end1039, label %if.end306
+
+if.end306:                                        ; preds = %if.end296
+  br i1 undef, label %if.end796, label %if.then616
+
+if.then616:                                       ; preds = %if.end306
+  br i1 undef, label %commit_next_page, label %do.body716
+
+do.body716:                                       ; preds = %if.then616
+  %call721 = call i64 @OSAddAtomic64(i64 1, i64* undef) nounwind noredzone noimplicitfloat ; <i64> [#uses=0]
+  call void asm sideeffect "movq\090x0($0),%rdi\0A\09movq\090x8($0),%rsi\0A\09.section __DATA, __data\0A\09.globl __dtrace_probeDOLLAR${:uid}4794___vminfo____pgrec\0A\09__dtrace_probeDOLLAR${:uid}4794___vminfo____pgrec:.quad 1f\0A\09.text\0A\091:nop\0A\09nop\0A\09nop\0A\09", "r,~{memory},~{di},~{si},~{dirflag},~{fpsr},~{flags}"(i64* undef) nounwind
+  br label %commit_next_page
+
+if.end796:                                        ; preds = %if.end306
+  unreachable
+
+commit_next_page:                                 ; preds = %do.body716, %if.then616
+  br i1 undef, label %if.end1039, label %if.then1034
+
+if.then1034:                                      ; preds = %commit_next_page
+  br label %if.end1039
+
+if.end1039:                                       ; preds = %if.then1034, %commit_next_page, %if.end296
+  br label %while.cond
+
+if.then1138:                                      ; preds = %while.cond
+  unreachable
+
+if.then:                                          ; preds = %entry
+  ret i32 4
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-10-14-LiveVariablesBug.ll b/src/LLVM/test/CodeGen/X86/2009-10-14-LiveVariablesBug.ll
new file mode 100644
index 0000000..c1aa17c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-10-14-LiveVariablesBug.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; rdar://7299435
+
+@i = internal global i32 0                        ; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (i16)* @foo to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define void @foo(i16 signext %source) nounwind ssp {
+entry:
+  %source_addr = alloca i16, align 2              ; <i16*> [#uses=2]
+  store i16 %source, i16* %source_addr
+  store i32 4, i32* @i, align 4
+  call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
+  %asmtmp = call i16 asm sideeffect "movw $1, $0", "=={ax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i16* %source_addr) nounwind ; <i16> [#uses=0]
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-10-16-Scope.ll b/src/LLVM/test/CodeGen/X86/2009-10-16-Scope.ll
new file mode 100644
index 0000000..e41038d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-10-16-Scope.ll

@@ -0,0 +1,32 @@
+; RUN: llc %s -O0 -o /dev/null -mtriple=x86_64-apple-darwin
+; PR 5197
+; There is not any llvm instruction assocated with !5. The code generator
+; should be able to handle this.
+
+define void @bar() nounwind ssp {
+entry:
+  %count_ = alloca i32, align 4                   ; <i32*> [#uses=2]
+  br label %do.body, !dbg !0
+
+do.body:                                          ; preds = %entry
+  call void @llvm.dbg.declare(metadata !{i32* %count_}, metadata !4)
+  %conv = ptrtoint i32* %count_ to i32, !dbg !0   ; <i32> [#uses=1]
+  %call = call i32 @foo(i32 %conv) ssp, !dbg !0   ; <i32> [#uses=0]
+  br label %do.end, !dbg !0
+
+do.end:                                           ; preds = %do.body
+  ret void, !dbg !7
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i32 @foo(i32) ssp
+
+!0 = metadata !{i32 5, i32 2, metadata !1, null}
+!1 = metadata !{i32 458763, metadata !2, i32 1, i32 1}; [DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", metadata !3, i32 4, null, i1 false, i1 true}; [DW_TAG_subprogram ]
+!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"genmodes.i", metadata !"/Users/yash/Downloads", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!4 = metadata !{i32 459008, metadata !5, metadata !"count_", metadata !3, i32 5, metadata !6}; [ DW_TAG_auto_variable ]
+!5 = metadata !{i32 458763, metadata !1, i32 1, i32 1}; [DW_TAG_lexical_block ]
+!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
+!7 = metadata !{i32 6, i32 1, metadata !2, null}

diff --git a/src/LLVM/test/CodeGen/X86/2009-10-19-EmergencySpill.ll b/src/LLVM/test/CodeGen/X86/2009-10-19-EmergencySpill.ll
new file mode 100644
index 0000000..ba44a2e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-10-19-EmergencySpill.ll

@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -disable-fp-elim
+; rdar://7291624
+
+%union.RtreeCoord = type { float }
+%struct.RtreeCell = type { i64, [10 x %union.RtreeCoord] }
+%struct.Rtree = type { i32, i32*, i32, i32, i32, i32, i8*, i8* }
+%struct.RtreeNode = type { i32*, i64, i32, i32, i8*, i32* }
+
+define fastcc void @nodeOverwriteCell(%struct.Rtree* nocapture %pRtree, %struct.RtreeNode* nocapture %pNode, %struct.RtreeCell* nocapture %pCell, i32 %iCell) nounwind ssp {
+entry:
+  %0 = load i8** undef, align 8                   ; <i8*> [#uses=2]
+  %1 = load i32* undef, align 8                   ; <i32> [#uses=1]
+  %2 = mul i32 %1, %iCell                         ; <i32> [#uses=1]
+  %3 = add nsw i32 %2, 4                          ; <i32> [#uses=1]
+  %4 = sext i32 %3 to i64                         ; <i64> [#uses=2]
+  %5 = load i64* null, align 8                    ; <i64> [#uses=2]
+  %6 = lshr i64 %5, 48                            ; <i64> [#uses=1]
+  %7 = trunc i64 %6 to i8                         ; <i8> [#uses=1]
+  store i8 %7, i8* undef, align 1
+  %8 = lshr i64 %5, 8                             ; <i64> [#uses=1]
+  %9 = trunc i64 %8 to i8                         ; <i8> [#uses=1]
+  %.sum4 = add i64 %4, 6                          ; <i64> [#uses=1]
+  %10 = getelementptr inbounds i8* %0, i64 %.sum4 ; <i8*> [#uses=1]
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds %struct.Rtree* %pRtree, i64 0, i32 3 ; <i32*> [#uses=1]
+  br i1 undef, label %bb.nph, label %bb2
+
+bb.nph:                                           ; preds = %entry
+  %tmp25 = add i64 %4, 11                         ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=3]
+  %scevgep = getelementptr %struct.RtreeCell* %pCell, i64 0, i32 1, i64 %indvar ; <%union.RtreeCoord*> [#uses=1]
+  %scevgep12 = bitcast %union.RtreeCoord* %scevgep to i32* ; <i32*> [#uses=1]
+  %tmp = shl i64 %indvar, 2                       ; <i64> [#uses=1]
+  %tmp26 = add i64 %tmp, %tmp25                   ; <i64> [#uses=1]
+  %scevgep27 = getelementptr i8* %0, i64 %tmp26   ; <i8*> [#uses=1]
+  %12 = load i32* %scevgep12, align 4             ; <i32> [#uses=1]
+  %13 = lshr i32 %12, 24                          ; <i32> [#uses=1]
+  %14 = trunc i32 %13 to i8                       ; <i8> [#uses=1]
+  store i8 %14, i8* undef, align 1
+  store i8 undef, i8* %scevgep27, align 1
+  %15 = load i32* %11, align 4                    ; <i32> [#uses=1]
+  %16 = shl i32 %15, 1                            ; <i32> [#uses=1]
+  %17 = icmp sgt i32 %16, undef                   ; <i1> [#uses=1]
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=1]
+  br i1 %17, label %bb, label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  %18 = getelementptr inbounds %struct.RtreeNode* %pNode, i64 0, i32 3 ; <i32*> [#uses=1]
+  store i32 1, i32* %18, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll b/src/LLVM/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll
new file mode 100644
index 0000000..006a02a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll

@@ -0,0 +1,67 @@
+; RUN: llvm-as <%s | llc | FileCheck %s
+; PR 5247
+; check that cmp is not scheduled before the add
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@.str76843 = external constant [45 x i8]          ; <[45 x i8]*> [#uses=1]
+@__profiling_callsite_timestamps_live = external global [1216 x i64] ; <[1216 x i64]*> [#uses=2]
+
+define i32 @cl_init(i32 %initoptions) nounwind {
+entry:
+  %retval.i = alloca i32                          ; <i32*> [#uses=3]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %initoptions.addr = alloca i32                  ; <i32*> [#uses=2]
+  tail call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
+  %0 = tail call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
+  store i32 %initoptions, i32* %initoptions.addr
+  %1 = bitcast i32* %initoptions.addr to { }*     ; <{ }*> [#uses=0]
+  call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
+  %2 = call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
+  %call.i = call i32 @lt_dlinit() nounwind        ; <i32> [#uses=1]
+  %tobool.i = icmp ne i32 %call.i, 0              ; <i1> [#uses=1]
+  br i1 %tobool.i, label %if.then.i, label %if.end.i
+
+if.then.i:                                        ; preds = %entry
+  %call1.i = call i32 @warn_dlerror(i8* getelementptr inbounds ([45 x i8]* @.str76843, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
+  store i32 -1, i32* %retval.i
+  br label %lt_init.exit
+
+if.end.i:                                         ; preds = %entry
+  store i32 0, i32* %retval.i
+  br label %lt_init.exit
+
+lt_init.exit:                                     ; preds = %if.end.i, %if.then.i
+  %3 = load i32* %retval.i                        ; <i32> [#uses=1]
+  call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
+  %4 = call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
+  %5 = sub i64 %4, %2                             ; <i64> [#uses=1]
+  %6 = atomicrmw add i64* getelementptr inbounds ([1216 x i64]* @__profiling_callsite_timestamps_live, i32 0, i32 51), i64 %5 monotonic
+;CHECK: lock
+;CHECK-NEXT: {{xadd|addq}} %rdx, __profiling_callsite_timestamps_live
+;CHECK-NEXT: cmpl $0,
+;CHECK-NEXT: jne
+  %cmp = icmp eq i32 %3, 0                        ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %lt_init.exit
+  call void @cli_rarload()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %lt_init.exit
+  store i32 0, i32* %retval
+  %7 = load i32* %retval                          ; <i32> [#uses=1]
+  tail call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
+  %8 = tail call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
+  %9 = sub i64 %8, %0                             ; <i64> [#uses=1]
+  %10 = atomicrmw add i64* getelementptr inbounds ([1216 x i64]* @__profiling_callsite_timestamps_live, i32 0, i32 50), i64 %9 monotonic
+  ret i32 %7
+}
+
+declare void @cli_rarload() nounwind
+
+declare i32 @lt_dlinit()
+
+declare i32 @warn_dlerror(i8*) nounwind
+
+declare i64 @llvm.readcyclecounter() nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2009-10-25-RewriterBug.ll b/src/LLVM/test/CodeGen/X86/2009-10-25-RewriterBug.ll
new file mode 100644
index 0000000..5b4e818
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-10-25-RewriterBug.ll

@@ -0,0 +1,171 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim
+
+%struct.DecRefPicMarking_t = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t* }
+%struct.FrameStore = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.StorablePicture*, %struct.StorablePicture*, %struct.StorablePicture* }
+%struct.StorablePicture = type { i32, i32, i32, i32, i32, [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16**, i16***, i8*, i16**, i8***, i64***, i64***, i16****, i8**, i8**, %struct.StorablePicture*, %struct.StorablePicture*, %struct.StorablePicture*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], i32, %struct.DecRefPicMarking_t*, i32 }
+
+define fastcc void @insert_picture_in_dpb(%struct.FrameStore* nocapture %fs, %struct.StorablePicture* %p) nounwind ssp {
+entry:
+  %0 = getelementptr inbounds %struct.FrameStore* %fs, i64 0, i32 12 ; <%struct.StorablePicture**> [#uses=1]
+  %1 = icmp eq i32 undef, 0                       ; <i1> [#uses=1]
+  br i1 %1, label %bb.i, label %bb36.i
+
+bb.i:                                             ; preds = %entry
+  br i1 undef, label %bb3.i, label %bb14.preheader.i
+
+bb3.i:                                            ; preds = %bb.i
+  unreachable
+
+bb14.preheader.i:                                 ; preds = %bb.i
+  br i1 undef, label %bb9.i, label %bb20.preheader.i
+
+bb9.i:                                            ; preds = %bb9.i, %bb14.preheader.i
+  br i1 undef, label %bb9.i, label %bb20.preheader.i
+
+bb20.preheader.i:                                 ; preds = %bb9.i, %bb14.preheader.i
+  br i1 undef, label %bb18.i, label %bb29.preheader.i
+
+bb18.i:                                           ; preds = %bb20.preheader.i
+  unreachable
+
+bb29.preheader.i:                                 ; preds = %bb20.preheader.i
+  br i1 undef, label %bb24.i, label %bb30.i
+
+bb24.i:                                           ; preds = %bb29.preheader.i
+  unreachable
+
+bb30.i:                                           ; preds = %bb29.preheader.i
+  store i32 undef, i32* undef, align 8
+  br label %bb67.preheader.i
+
+bb36.i:                                           ; preds = %entry
+  br label %bb67.preheader.i
+
+bb67.preheader.i:                                 ; preds = %bb36.i, %bb30.i
+  %2 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %3 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %4 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %5 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %6 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %7 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %8 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %9 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %10 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %11 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %12 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  br i1 undef, label %bb38.i, label %bb68.i
+
+bb38.i:                                           ; preds = %bb66.i, %bb67.preheader.i
+  %13 = phi %struct.StorablePicture* [ %37, %bb66.i ], [ %2, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %14 = phi %struct.StorablePicture* [ %38, %bb66.i ], [ %3, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %15 = phi %struct.StorablePicture* [ %39, %bb66.i ], [ %4, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %16 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %5, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %17 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %6, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %18 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %7, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %19 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %8, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %20 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %9, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %21 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %10, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %22 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %11, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %23 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %12, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %indvar248.i = phi i64 [ %indvar.next249.i, %bb66.i ], [ 0, %bb67.preheader.i ] ; <i64> [#uses=3]
+  %storemerge52.i = trunc i64 %indvar248.i to i32 ; <i32> [#uses=1]
+  %24 = getelementptr inbounds %struct.StorablePicture* %23, i64 0, i32 19 ; <i32*> [#uses=0]
+  br i1 undef, label %bb.nph51.i, label %bb66.i
+
+bb.nph51.i:                                       ; preds = %bb38.i
+  %25 = sdiv i32 %storemerge52.i, 8               ; <i32> [#uses=0]
+  br label %bb39.i
+
+bb39.i:                                           ; preds = %bb64.i, %bb.nph51.i
+  %26 = phi %struct.StorablePicture* [ %17, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %27 = phi %struct.StorablePicture* [ %18, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %28 = phi %struct.StorablePicture* [ %19, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %29 = phi %struct.StorablePicture* [ %20, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %30 = phi %struct.StorablePicture* [ %21, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %31 = phi %struct.StorablePicture* [ %22, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  br i1 undef, label %bb57.i, label %bb40.i
+
+bb40.i:                                           ; preds = %bb39.i
+  br i1 undef, label %bb57.i, label %bb41.i
+
+bb41.i:                                           ; preds = %bb40.i
+  %storemerge10.i = select i1 undef, i32 2, i32 4 ; <i32> [#uses=1]
+  %32 = zext i32 %storemerge10.i to i64           ; <i64> [#uses=1]
+  br i1 undef, label %bb45.i, label %bb47.i
+
+bb45.i:                                           ; preds = %bb41.i
+  %33 = getelementptr inbounds %struct.StorablePicture* %26, i64 0, i32 5, i64 undef, i64 %32, i64 undef ; <i64*> [#uses=1]
+  %34 = load i64* %33, align 8                    ; <i64> [#uses=1]
+  br label %bb47.i
+
+bb47.i:                                           ; preds = %bb45.i, %bb41.i
+  %storemerge11.i = phi i64 [ %34, %bb45.i ], [ 0, %bb41.i ] ; <i64> [#uses=0]
+  %scevgep246.i = getelementptr i64* undef, i64 undef ; <i64*> [#uses=0]
+  br label %bb64.i
+
+bb57.i:                                           ; preds = %bb40.i, %bb39.i
+  br i1 undef, label %bb58.i, label %bb60.i
+
+bb58.i:                                           ; preds = %bb57.i
+  br label %bb60.i
+
+bb60.i:                                           ; preds = %bb58.i, %bb57.i
+  %35 = load i64*** undef, align 8                ; <i64**> [#uses=1]
+  %scevgep256.i = getelementptr i64** %35, i64 %indvar248.i ; <i64**> [#uses=1]
+  %36 = load i64** %scevgep256.i, align 8         ; <i64*> [#uses=1]
+  %scevgep243.i = getelementptr i64* %36, i64 undef ; <i64*> [#uses=1]
+  store i64 -1, i64* %scevgep243.i, align 8
+  br label %bb64.i
+
+bb64.i:                                           ; preds = %bb60.i, %bb47.i
+  br i1 undef, label %bb39.i, label %bb66.i
+
+bb66.i:                                           ; preds = %bb64.i, %bb38.i
+  %37 = phi %struct.StorablePicture* [ %13, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %38 = phi %struct.StorablePicture* [ %14, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %39 = phi %struct.StorablePicture* [ %15, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %40 = phi %struct.StorablePicture* [ %16, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=8]
+  %indvar.next249.i = add i64 %indvar248.i, 1     ; <i64> [#uses=1]
+  br i1 undef, label %bb38.i, label %bb68.i
+
+bb68.i:                                           ; preds = %bb66.i, %bb67.preheader.i
+  %41 = phi %struct.StorablePicture* [ %2, %bb67.preheader.i ], [ %37, %bb66.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %42 = phi %struct.StorablePicture* [ %3, %bb67.preheader.i ], [ %38, %bb66.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %43 = phi %struct.StorablePicture* [ %4, %bb67.preheader.i ], [ %39, %bb66.i ] ; <%struct.StorablePicture*> [#uses=1]
+  br i1 undef, label %bb.nph48.i, label %bb108.i
+
+bb.nph48.i:                                       ; preds = %bb68.i
+  br label %bb80.i
+
+bb80.i:                                           ; preds = %bb104.i, %bb.nph48.i
+  %44 = phi %struct.StorablePicture* [ %42, %bb.nph48.i ], [ null, %bb104.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %45 = phi %struct.StorablePicture* [ %43, %bb.nph48.i ], [ null, %bb104.i ] ; <%struct.StorablePicture*> [#uses=1]
+  br i1 undef, label %bb.nph39.i, label %bb104.i
+
+bb.nph39.i:                                       ; preds = %bb80.i
+  br label %bb81.i
+
+bb81.i:                                           ; preds = %bb102.i, %bb.nph39.i
+  %46 = phi %struct.StorablePicture* [ %44, %bb.nph39.i ], [ %48, %bb102.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %47 = phi %struct.StorablePicture* [ %45, %bb.nph39.i ], [ %48, %bb102.i ] ; <%struct.StorablePicture*> [#uses=0]
+  br i1 undef, label %bb83.i, label %bb82.i
+
+bb82.i:                                           ; preds = %bb81.i
+  br i1 undef, label %bb83.i, label %bb101.i
+
+bb83.i:                                           ; preds = %bb82.i, %bb81.i
+  br label %bb102.i
+
+bb101.i:                                          ; preds = %bb82.i
+  br label %bb102.i
+
+bb102.i:                                          ; preds = %bb101.i, %bb83.i
+  %48 = load %struct.StorablePicture** %0, align 8 ; <%struct.StorablePicture*> [#uses=2]
+  br i1 undef, label %bb81.i, label %bb104.i
+
+bb104.i:                                          ; preds = %bb102.i, %bb80.i
+  br label %bb80.i
+
+bb108.i:                                          ; preds = %bb68.i
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll b/src/LLVM/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll
new file mode 100644
index 0000000..b5be65f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 | FileCheck %s
+; rdar://7362871
+
+define void @bar(i32 %b, i32 %a) nounwind optsize ssp {
+entry:
+; CHECK:     leal 15(%rsi), %edi
+; CHECK-NOT: movl
+; CHECK:     _foo
+  %0 = add i32 %a, 15                             ; <i32> [#uses=1]
+  %1 = zext i32 %0 to i64                         ; <i64> [#uses=1]
+  tail call void @foo(i64 %1) nounwind
+  ret void
+}
+
+declare void @foo(i64)

diff --git a/src/LLVM/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll b/src/LLVM/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll
new file mode 100644
index 0000000..5398eef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll

@@ -0,0 +1,133 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
+; rdar://7394770
+
+%struct.JVTLib_100487 = type <{ i8 }>
+
+define i32 @_Z13JVTLib_10335613JVTLib_10266513JVTLib_100579S_S_S_jPhj(i16* nocapture %ResidualX_Array.0, %struct.JVTLib_100487* nocapture byval align 4 %xqp, i16* nocapture %ResidualL_Array.0, i16* %ResidualDCZ_Array.0, i16* nocapture %ResidualACZ_FOArray.0, i32 %useFRextDequant, i8* nocapture %JVTLib_103357, i32 %use_field_scan) ssp {
+bb.nph:
+  %0 = shl i32 undef, 1                           ; <i32> [#uses=2]
+  %mask133.masked.masked.masked.masked.masked.masked = or i640 undef, undef ; <i640> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit, %bb.nph
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  br i1 undef, label %bb.i, label %bb1.i
+
+bb2:                                              ; preds = %bb
+  unreachable
+
+bb.i:                                             ; preds = %bb1
+  br label %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+
+bb1.i:                                            ; preds = %bb1
+  br label %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+
+_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit: ; preds = %bb1.i, %bb.i
+  br i1 undef, label %bb5, label %bb
+
+bb5:                                              ; preds = %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+  %mask271.masked.masked.masked.masked.masked.masked.masked = or i256 0, undef ; <i256> [#uses=2]
+  %mask266.masked.masked.masked.masked.masked.masked = or i256 %mask271.masked.masked.masked.masked.masked.masked.masked, undef ; <i256> [#uses=1]
+  %mask241.masked = or i256 undef, undef          ; <i256> [#uses=1]
+  %ins237 = or i256 undef, 0                      ; <i256> [#uses=1]
+  br i1 undef, label %bb9, label %bb10
+
+bb9:                                              ; preds = %bb5
+  br i1 undef, label %bb12.i, label %_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit
+
+bb12.i:                                           ; preds = %bb9
+  br label %_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit
+
+_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit:   ; preds = %bb12.i, %bb9
+  ret i32 undef
+
+bb10:                                             ; preds = %bb5
+  %1 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %2 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %3 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %4 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %5 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %6 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %tmp211 = lshr i256 %mask271.masked.masked.masked.masked.masked.masked.masked, 112 ; <i256> [#uses=0]
+  %7 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %tmp208 = lshr i256 %mask266.masked.masked.masked.masked.masked.masked, 128 ; <i256> [#uses=1]
+  %tmp209 = trunc i256 %tmp208 to i16             ; <i16> [#uses=1]
+  %8 = sext i16 %tmp209 to i32                    ; <i32> [#uses=1]
+  %9 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %10 = sext i16 undef to i32                     ; <i32> [#uses=1]
+  %tmp193 = lshr i256 %mask241.masked, 208        ; <i256> [#uses=1]
+  %tmp194 = trunc i256 %tmp193 to i16             ; <i16> [#uses=1]
+  %11 = sext i16 %tmp194 to i32                   ; <i32> [#uses=1]
+  %tmp187 = lshr i256 %ins237, 240                ; <i256> [#uses=1]
+  %tmp188 = trunc i256 %tmp187 to i16             ; <i16> [#uses=1]
+  %12 = sext i16 %tmp188 to i32                   ; <i32> [#uses=1]
+  %13 = add nsw i32 %4, %1                        ; <i32> [#uses=1]
+  %14 = add nsw i32 %5, 0                         ; <i32> [#uses=1]
+  %15 = add nsw i32 %6, %2                        ; <i32> [#uses=1]
+  %16 = add nsw i32 %7, %3                        ; <i32> [#uses=1]
+  %17 = add nsw i32 0, %8                         ; <i32> [#uses=1]
+  %18 = add nsw i32 %11, %9                       ; <i32> [#uses=1]
+  %19 = add nsw i32 0, %10                        ; <i32> [#uses=1]
+  %20 = add nsw i32 %12, 0                        ; <i32> [#uses=1]
+  %21 = add nsw i32 %17, %13                      ; <i32> [#uses=2]
+  %22 = add nsw i32 %18, %14                      ; <i32> [#uses=2]
+  %23 = add nsw i32 %19, %15                      ; <i32> [#uses=2]
+  %24 = add nsw i32 %20, %16                      ; <i32> [#uses=2]
+  %25 = add nsw i32 %22, %21                      ; <i32> [#uses=2]
+  %26 = add nsw i32 %24, %23                      ; <i32> [#uses=2]
+  %27 = sub i32 %21, %22                          ; <i32> [#uses=1]
+  %28 = sub i32 %23, %24                          ; <i32> [#uses=1]
+  %29 = add nsw i32 %26, %25                      ; <i32> [#uses=1]
+  %30 = sub i32 %25, %26                          ; <i32> [#uses=1]
+  %31 = sub i32 %27, %28                          ; <i32> [#uses=1]
+  %32 = ashr i32 %29, 1                           ; <i32> [#uses=2]
+  %33 = ashr i32 %30, 1                           ; <i32> [#uses=2]
+  %34 = ashr i32 %31, 1                           ; <i32> [#uses=2]
+  %35 = icmp sgt i32 %32, 32767                   ; <i1> [#uses=1]
+  %o0_0.0.i = select i1 %35, i32 32767, i32 %32   ; <i32> [#uses=2]
+  %36 = icmp slt i32 %o0_0.0.i, -32768            ; <i1> [#uses=1]
+  %37 = icmp sgt i32 %33, 32767                   ; <i1> [#uses=1]
+  %o1_0.0.i = select i1 %37, i32 32767, i32 %33   ; <i32> [#uses=2]
+  %38 = icmp slt i32 %o1_0.0.i, -32768            ; <i1> [#uses=1]
+  %39 = icmp sgt i32 %34, 32767                   ; <i1> [#uses=1]
+  %o2_0.0.i = select i1 %39, i32 32767, i32 %34   ; <i32> [#uses=2]
+  %40 = icmp slt i32 %o2_0.0.i, -32768            ; <i1> [#uses=1]
+  %tmp101 = lshr i640 %mask133.masked.masked.masked.masked.masked.masked, 256 ; <i640> [#uses=1]
+  %41 = trunc i32 %o0_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp358 = select i1 %36, i16 -32768, i16 %41    ; <i16> [#uses=2]
+  %42 = trunc i32 %o1_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp347 = select i1 %38, i16 -32768, i16 %42    ; <i16> [#uses=1]
+  %43 = trunc i32 %o2_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp335 = select i1 %40, i16 -32768, i16 %43    ; <i16> [#uses=1]
+  %44 = icmp sgt i16 %tmp358, -1                  ; <i1> [#uses=2]
+  %..i24 = select i1 %44, i16 %tmp358, i16 undef  ; <i16> [#uses=1]
+  %45 = icmp sgt i16 %tmp347, -1                  ; <i1> [#uses=1]
+  %46 = icmp sgt i16 %tmp335, -1                  ; <i1> [#uses=1]
+  %47 = zext i16 %..i24 to i32                    ; <i32> [#uses=1]
+  %tmp = trunc i640 %tmp101 to i32                ; <i32> [#uses=1]
+  %48 = and i32 %tmp, 65535                       ; <i32> [#uses=2]
+  %49 = mul i32 %47, %48                          ; <i32> [#uses=1]
+  %50 = zext i16 undef to i32                     ; <i32> [#uses=1]
+  %51 = mul i32 %50, %48                          ; <i32> [#uses=1]
+  %52 = add i32 %49, %0                           ; <i32> [#uses=1]
+  %53 = add i32 %51, %0                           ; <i32> [#uses=1]
+  %54 = lshr i32 %52, undef                       ; <i32> [#uses=1]
+  %55 = lshr i32 %53, undef                       ; <i32> [#uses=1]
+  %56 = trunc i32 %54 to i16                      ; <i16> [#uses=1]
+  %57 = trunc i32 %55 to i16                      ; <i16> [#uses=1]
+  %vs16Out0_0.0.i = select i1 %44, i16 %56, i16 undef ; <i16> [#uses=1]
+  %vs16Out0_4.0.i = select i1 %45, i16 0, i16 undef ; <i16> [#uses=1]
+  %vs16Out1_0.0.i = select i1 %46, i16 %57, i16 undef ; <i16> [#uses=1]
+  br i1 undef, label %bb129.i, label %_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit
+
+bb129.i:                                          ; preds = %bb10
+  br label %_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit
+
+_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit:   ; preds = %bb129.i, %bb10
+  %58 = phi i16 [ %vs16Out0_4.0.i, %bb129.i ], [ undef, %bb10 ] ; <i16> [#uses=0]
+  %59 = phi i16 [ undef, %bb129.i ], [ %vs16Out1_0.0.i, %bb10 ] ; <i16> [#uses=0]
+  store i16 %vs16Out0_0.0.i, i16* %ResidualDCZ_Array.0, align 2
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-11-16-MachineLICM.ll b/src/LLVM/test/CodeGen/X86/2009-11-16-MachineLICM.ll
new file mode 100644
index 0000000..2ac688f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-11-16-MachineLICM.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7395200
+
+@g = common global [4 x float] zeroinitializer, align 16 ; <[4 x float]*> [#uses=4]
+
+define void @foo(i32 %n, float* nocapture %x) nounwind ssp {
+entry:
+; CHECK: foo:
+  %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph, label %return
+
+bb.nph:                                           ; preds = %entry
+; CHECK: movq _g@GOTPCREL(%rip), [[REG:%[a-z]+]]
+  %tmp = zext i32 %n to i64                       ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+; CHECK: LBB0_2:
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=2]
+  %tmp9 = shl i64 %indvar, 2                      ; <i64> [#uses=4]
+  %tmp1016 = or i64 %tmp9, 1                      ; <i64> [#uses=1]
+  %scevgep = getelementptr float* %x, i64 %tmp1016 ; <float*> [#uses=1]
+  %tmp1117 = or i64 %tmp9, 2                      ; <i64> [#uses=1]
+  %scevgep12 = getelementptr float* %x, i64 %tmp1117 ; <float*> [#uses=1]
+  %tmp1318 = or i64 %tmp9, 3                      ; <i64> [#uses=1]
+  %scevgep14 = getelementptr float* %x, i64 %tmp1318 ; <float*> [#uses=1]
+  %x_addr.03 = getelementptr float* %x, i64 %tmp9 ; <float*> [#uses=1]
+  %1 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 0), align 16 ; <float> [#uses=1]
+  store float %1, float* %x_addr.03, align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 1), align 4 ; <float> [#uses=1]
+  store float %2, float* %scevgep, align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 2), align 8 ; <float> [#uses=1]
+  store float %3, float* %scevgep12, align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 3), align 4 ; <float> [#uses=1]
+  store float %4, float* %scevgep14, align 4
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next, %tmp      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/src/LLVM/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
new file mode 100644
index 0000000..94075e7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
+; rdar://7396984
+
+@str = private constant [28 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1
+
+define void @t(i32 %count) ssp nounwind {
+entry:
+; CHECK: t:
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
+; CHECK: movups L_str(%rip), %xmm0
+  %tmp0 = alloca [60 x i8], align 1
+  %tmp1 = getelementptr inbounds [60 x i8]* %tmp0, i64 0, i64 0
+  br label %bb1
+
+bb1:
+; CHECK: LBB0_1:
+; CHECK: movaps %xmm0, (%rsp)
+  %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ]
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1, i1 false)
+  %tmp3 = add i32 %tmp2, 1
+  %tmp4 = icmp eq i32 %tmp3, %count
+  br i1 %tmp4, label %bb2, label %bb1
+
+bb2:
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll b/src/LLVM/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll
new file mode 100644
index 0000000..5c1a2bc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll

@@ -0,0 +1,52 @@
+; RUN: llc -O3 < %s
+; This test fails with:
+; Assertion failed: (!B && "UpdateTerminators requires analyzable predecessors!"), function updateTerminator, MachineBasicBlock.cpp, line 255.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+%"struct.llvm::InlineAsm::ConstraintInfo" = type { i32, i8, i8, i8, i8, %"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" }
+%"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" = type { %"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >::_Vector_impl" = type { %"struct.llvm::InlineAsm::ConstraintInfo"*, %"struct.llvm::InlineAsm::ConstraintInfo"*, %"struct.llvm::InlineAsm::ConstraintInfo"* }
+%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" = type { %"struct.std::string"*, %"struct.std::string"*, %"struct.std::string"* }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
+%"struct.std::string" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
+%"struct.std::vector<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" = type { %"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" }
+%"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" }
+
+define zeroext i8 @_ZN4llvm9InlineAsm14ConstraintInfo5ParseENS_9StringRefERSt6vectorIS1_SaIS1_EE(%"struct.llvm::InlineAsm::ConstraintInfo"* nocapture %this, i64 %Str.0, i64 %Str.1, %"struct.std::vector<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >"* nocapture %ConstraintsSoFar) nounwind ssp align 2 {
+entry:
+  br i1 undef, label %bb56, label %bb27.outer
+
+bb8:                                              ; preds = %bb27.outer108, %bb13
+  switch i8 undef, label %bb27.outer [
+    i8 35, label %bb56
+    i8 37, label %bb14
+    i8 38, label %bb10
+    i8 42, label %bb56
+  ]
+
+bb27.outer:                                       ; preds = %bb8, %entry
+  %I.2.ph = phi i8* [ undef, %entry ], [ %I.2.ph109, %bb8 ] ; <i8*> [#uses=2]
+  br label %bb27.outer108
+
+bb10:                                             ; preds = %bb8
+  %toBool = icmp eq i8 0, 0                       ; <i1> [#uses=1]
+  %or.cond = and i1 undef, %toBool                ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb13, label %bb56
+
+bb13:                                             ; preds = %bb10
+  br i1 undef, label %bb27.outer108, label %bb8
+
+bb14:                                             ; preds = %bb8
+  ret i8 1
+
+bb27.outer108:                                    ; preds = %bb13, %bb27.outer
+  %I.2.ph109 = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=1]
+  %scevgep = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=0]
+  br label %bb8
+
+bb56:                                             ; preds = %bb10, %bb8, %bb8, %entry
+  ret i8 1
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-11-18-TwoAddrKill.ll b/src/LLVM/test/CodeGen/X86/2009-11-18-TwoAddrKill.ll
new file mode 100644
index 0000000..0edaa70
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-11-18-TwoAddrKill.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s
+; PR 5300
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+@g_296 = external global i8, align 1              ; <i8*> [#uses=1]
+
+define noalias i8** @func_31(i32** nocapture %int8p_33, i8** nocapture %p_34, i8* nocapture %p_35) nounwind {
+entry:
+  %cmp.i = icmp sgt i16 undef, 234                ; <i1> [#uses=1]
+  %tmp17 = select i1 %cmp.i, i16 undef, i16 0     ; <i16> [#uses=2]
+  %conv8 = trunc i16 %tmp17 to i8                 ; <i8> [#uses=3]
+  br i1 undef, label %cond.false.i29, label %land.lhs.true.i
+
+land.lhs.true.i:                                  ; preds = %entry
+  %tobool5.i = icmp eq i32 undef, undef           ; <i1> [#uses=1]
+  br i1 %tobool5.i, label %cond.false.i29, label %bar.exit
+
+cond.false.i29:                                   ; preds = %land.lhs.true.i, %entry
+  %tmp = sub i8 0, %conv8                         ; <i8> [#uses=1]
+  %mul.i = and i8 %conv8, %tmp                    ; <i8> [#uses=1]
+  br label %bar.exit
+
+bar.exit:                                         ; preds = %cond.false.i29, %land.lhs.true.i
+  %call1231 = phi i8 [ %mul.i, %cond.false.i29 ], [ %conv8, %land.lhs.true.i ] ; <i8> [#uses=0]
+  %conv21 = trunc i16 %tmp17 to i8                ; <i8> [#uses=1]
+  store i8 %conv21, i8* @g_296
+  ret i8** undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-11-25-ImpDefBug.ll b/src/LLVM/test/CodeGen/X86/2009-11-25-ImpDefBug.ll
new file mode 100644
index 0000000..396638f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-11-25-ImpDefBug.ll

@@ -0,0 +1,120 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; pr5600
+
+%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
+%struct.ASN1ObjHeader = type { i8, %"struct.__gmp_expr<__mpz_struct [1],__mpz_struct [1]>", i64, i32, i32, i32 }
+%struct.ASN1Object = type { i32 (...)**, i32, i32, i64 }
+%struct.ASN1Unit = type { [4 x i32 (%struct.ASN1ObjHeader*, %struct.ASN1Object**)*], %"struct.std::ASN1ObjList" }
+%"struct.__gmp_expr<__mpz_struct [1],__mpz_struct [1]>" = type { [1 x %struct.__mpz_struct] }
+%struct.__mpz_struct = type { i32, i32, i64* }
+%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
+%struct.pthread_attr_t = type { i64, [48 x i8] }
+%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
+%struct.pthread_mutexattr_t = type { i32 }
+%"struct.std::ASN1ObjList" = type { %"struct.std::_Vector_base<ASN1Object*,std::allocator<ASN1Object*> >" }
+%"struct.std::_Vector_base<ASN1Object*,std::allocator<ASN1Object*> >" = type { %"struct.std::_Vector_base<ASN1Object*,std::allocator<ASN1Object*> >::_Vector_impl" }
+%"struct.std::_Vector_base<ASN1Object*,std::allocator<ASN1Object*> >::_Vector_impl" = type { %struct.ASN1Object**, %struct.ASN1Object**, %struct.ASN1Object** }
+%struct.xmstream = type { i8*, i64, i64, i64, i8 }
+
+declare void @_ZNSt6vectorIP10ASN1ObjectSaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%"struct.std::ASN1ObjList"* nocapture, i64, %struct.ASN1Object** nocapture)
+
+declare i32 @_Z17LoadObjectFromBERR8xmstreamPP10ASN1ObjectPPF10ASN1StatusP13ASN1ObjHeaderS3_E(%struct.xmstream*, %struct.ASN1Object**, i32 (%struct.ASN1ObjHeader*, %struct.ASN1Object**)**)
+
+define i32 @_ZN8ASN1Unit4loadER8xmstreamjm18ASN1LengthEncoding(%struct.ASN1Unit* %this, %struct.xmstream* nocapture %stream, i32 %numObjects, i64 %size, i32 %lEncoding) {
+entry:
+  br label %meshBB85
+
+bb5:                                              ; preds = %bb13.fragment.cl135, %bb13.fragment.cl, %bb.i.i.bbcl.disp, %bb13.fragment
+  %0 = invoke i32 @_Z17LoadObjectFromBERR8xmstreamPP10ASN1ObjectPPF10ASN1StatusP13ASN1ObjHeaderS3_E(%struct.xmstream* undef, %struct.ASN1Object** undef, i32 (%struct.ASN1ObjHeader*, %struct.ASN1Object**)** undef)
+          to label %meshBB81.bbcl.disp unwind label %lpad ; <i32> [#uses=0]
+
+bb10.fragment:                                    ; preds = %bb13.fragment.bbcl.disp
+  br i1 undef, label %bb1.i.fragment.bbcl.disp, label %bb.i.i.bbcl.disp
+
+bb1.i.fragment:                                   ; preds = %bb1.i.fragment.bbcl.disp
+  invoke void @_ZNSt6vectorIP10ASN1ObjectSaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%"struct.std::ASN1ObjList"* undef, i64 undef, %struct.ASN1Object** undef)
+          to label %meshBB81.bbcl.disp unwind label %lpad
+
+bb13.fragment:                                    ; preds = %bb13.fragment.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb5
+
+bb.i4:                                            ; preds = %bb.i4.bbcl.disp, %bb1.i.fragment.bbcl.disp
+  ret i32 undef
+
+bb1.i5:                                           ; preds = %bb.i1
+  ret i32 undef
+
+lpad:                                             ; preds = %bb1.i.fragment.cl, %bb1.i.fragment, %bb5
+  %.SV10.phi807 = phi i8* [ undef, %bb1.i.fragment.cl ], [ undef, %bb1.i.fragment ], [ undef, %bb5 ] ; <i8*> [#uses=1]
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  %1 = load i8* %.SV10.phi807, align 8            ; <i8> [#uses=0]
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb13.fragment.bbcl.disp
+
+bb.i1:                                            ; preds = %bb.i.i.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb1.i5
+
+meshBB81:                                         ; preds = %meshBB81.bbcl.disp, %bb.i.i.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb.i4.bbcl.disp
+
+meshBB85:                                         ; preds = %meshBB81.bbcl.disp, %bb.i4.bbcl.disp, %bb1.i.fragment.bbcl.disp, %bb.i.i.bbcl.disp, %entry
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb13.fragment.bbcl.disp
+
+bb.i.i.bbcl.disp:                                 ; preds = %bb10.fragment
+  switch i8 undef, label %meshBB85 [
+    i8 123, label %bb.i1
+    i8 97, label %bb5
+    i8 44, label %meshBB81
+    i8 1, label %meshBB81.cl
+    i8 51, label %meshBB81.cl141
+  ]
+
+bb1.i.fragment.cl:                                ; preds = %bb1.i.fragment.bbcl.disp
+  invoke void @_ZNSt6vectorIP10ASN1ObjectSaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%"struct.std::ASN1ObjList"* undef, i64 undef, %struct.ASN1Object** undef)
+          to label %meshBB81.bbcl.disp unwind label %lpad
+
+bb1.i.fragment.bbcl.disp:                         ; preds = %bb10.fragment
+  switch i8 undef, label %bb.i4 [
+    i8 97, label %bb1.i.fragment
+    i8 7, label %bb1.i.fragment.cl
+    i8 35, label %bb.i4.cl
+    i8 77, label %meshBB85
+  ]
+
+bb13.fragment.cl:                                 ; preds = %bb13.fragment.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb5
+
+bb13.fragment.cl135:                              ; preds = %bb13.fragment.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb5
+
+bb13.fragment.bbcl.disp:                          ; preds = %meshBB85, %lpad
+  switch i8 undef, label %bb10.fragment [
+    i8 67, label %bb13.fragment.cl
+    i8 108, label %bb13.fragment
+    i8 58, label %bb13.fragment.cl135
+  ]
+
+bb.i4.cl:                                         ; preds = %bb.i4.bbcl.disp, %bb1.i.fragment.bbcl.disp
+  ret i32 undef
+
+bb.i4.bbcl.disp:                                  ; preds = %meshBB81.cl141, %meshBB81.cl, %meshBB81
+  switch i8 undef, label %bb.i4 [
+    i8 35, label %bb.i4.cl
+    i8 77, label %meshBB85
+  ]
+
+meshBB81.cl:                                      ; preds = %meshBB81.bbcl.disp, %bb.i.i.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb.i4.bbcl.disp
+
+meshBB81.cl141:                                   ; preds = %meshBB81.bbcl.disp, %bb.i.i.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb.i4.bbcl.disp
+
+meshBB81.bbcl.disp:                               ; preds = %meshBB81.cl141, %meshBB81.cl, %bb13.fragment.cl135, %bb13.fragment.cl, %bb1.i.fragment.cl, %meshBB85, %meshBB81, %bb.i1, %lpad, %bb13.fragment, %bb1.i.fragment, %bb5
+  switch i8 undef, label %meshBB85 [
+    i8 44, label %meshBB81
+    i8 1, label %meshBB81.cl
+    i8 51, label %meshBB81.cl141
+  ]
+}
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll b/src/LLVM/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
new file mode 100644
index 0000000..0700323
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; pr5391
+
+define void @t() nounwind ssp {
+entry:
+; CHECK: t:
+; CHECK: movl %ecx, %eax
+; CHECK: %eax = foo (%eax, %ecx)
+  %b = alloca i32                                 ; <i32*> [#uses=2]
+  %a = alloca i32                                 ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %1 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %asmtmp = call i32 asm "$0 = foo ($1, $2)", "=&{ax},%0,r,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp, i32* %a
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: movl
+; CHECK: [[D2:%e.x]] = foo
+; CHECK: ([[D2]],
+; CHECK-NOT: [[D2]]
+; CHECK: )
+  %b = alloca i32                                 ; <i32*> [#uses=2]
+  %a = alloca i32                                 ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %1 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %asmtmp = call i32 asm "$0 = foo ($1, $2)", "=&r,%0,r,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp, i32* %a
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll b/src/LLVM/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
new file mode 100644
index 0000000..823e0ca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll

@@ -0,0 +1,63 @@
+; RUN: llc -relocation-model=pic < %s | FileCheck %s
+; PR5723
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%0 = type { [1 x i64] }
+%link = type { %0* }
+%test = type { i32, %link }
+
+@data = global [2 x i64] zeroinitializer, align 64 ; <[2 x i64]*> [#uses=1]
+@ptr = linkonce thread_local global [1 x i64] [i64 ptrtoint ([2 x i64]* @data to i64)], align 64 ; <[1 x i64]*> [#uses=1]
+@link_ptr = linkonce thread_local global [1 x i64] zeroinitializer, align 64 ; <[1 x i64]*> [#uses=1]
+@_dm_my_pe = external global [1 x i64], align 64  ; <[1 x i64]*> [#uses=0]
+@_dm_pes_in_prog = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_npes_div_mult = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_npes_div_shift = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_pe_addr_loc = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_offset_addr_mask = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+
+define void @leaf() nounwind {
+; CHECK: leaf:
+; CHECK-NOT: -8(%rsp)
+; CHECK: leaq link_ptr@TLSGD
+; CHECK: callq __tls_get_addr@PLT
+"file foo2.c, line 14, bb1":
+  %p = alloca %test*, align 8                     ; <%test**> [#uses=4]
+  br label %"file foo2.c, line 14, bb2"
+
+"file foo2.c, line 14, bb2":                      ; preds = %"file foo2.c, line 14, bb1"
+  br label %"@CFE_debug_label_0"
+
+"@CFE_debug_label_0":                             ; preds = %"file foo2.c, line 14, bb2"
+  %r = load %test** bitcast ([1 x i64]* @ptr to %test**), align 8 ; <%test*> [#uses=1]
+  store %test* %r, %test** %p, align 8
+  br label %"@CFE_debug_label_2"
+
+"@CFE_debug_label_2":                             ; preds = %"@CFE_debug_label_0"
+  %r1 = load %link** bitcast ([1 x i64]* @link_ptr to %link**), align 8 ; <%link*> [#uses=1]
+  %r2 = load %test** %p, align 8                  ; <%test*> [#uses=1]
+  %r3 = ptrtoint %test* %r2 to i64                ; <i64> [#uses=1]
+  %r4 = inttoptr i64 %r3 to %link**               ; <%link**> [#uses=1]
+  %r5 = getelementptr %link** %r4, i64 1          ; <%link**> [#uses=1]
+  store %link* %r1, %link** %r5, align 8
+  br label %"@CFE_debug_label_3"
+
+"@CFE_debug_label_3":                             ; preds = %"@CFE_debug_label_2"
+  %r6 = load %test** %p, align 8                  ; <%test*> [#uses=1]
+  %r7 = ptrtoint %test* %r6 to i64                ; <i64> [#uses=1]
+  %r8 = inttoptr i64 %r7 to %link*                ; <%link*> [#uses=1]
+  %r9 = getelementptr %link* %r8, i64 1           ; <%link*> [#uses=1]
+  store %link* %r9, %link** bitcast ([1 x i64]* @link_ptr to %link**), align 8
+  br label %"@CFE_debug_label_4"
+
+"@CFE_debug_label_4":                             ; preds = %"@CFE_debug_label_3"
+  %r10 = load %test** %p, align 8                 ; <%test*> [#uses=1]
+  %r11 = ptrtoint %test* %r10 to i64              ; <i64> [#uses=1]
+  %r12 = inttoptr i64 %r11 to i32*                ; <i32*> [#uses=1]
+  store i32 1, i32* %r12, align 4
+  br label %"@CFE_debug_label_5"
+
+"@CFE_debug_label_5":                             ; preds = %"@CFE_debug_label_4"
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/20090313-signext.ll b/src/LLVM/test/CodeGen/X86/20090313-signext.ll
new file mode 100644
index 0000000..de930d5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/20090313-signext.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -relocation-model=pic > %t
+; RUN: grep {movswl	%ax, %edi} %t
+; RUN: grep {movw	(%rax), %ax} %t
+; XFAIL: *
+
+@x = common global i16 0
+
+define signext i16 @f() nounwind {
+entry:
+	%0 = tail call signext i16 @h() nounwind
+	%1 = sext i16 %0 to i32
+	tail call void @g(i32 %1) nounwind
+	%2 = load i16* @x, align 2
+	ret i16 %2
+}
+
+declare signext i16 @h()
+
+declare void @g(i32)

diff --git a/src/LLVM/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll b/src/LLVM/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll
new file mode 100644
index 0000000..e7004e2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64
+; <rdar://problem/7499313>
+target triple = "i686-apple-darwin8"
+
+declare void @func2(i16 zeroext)
+
+define void @func1() nounwind {
+entry:
+  %t1 = icmp ne i8 undef, 0
+  %t2 = icmp eq i8 undef, 14
+  %t3 = and i1 %t1, %t2
+  %t4 = select i1 %t3, i16 0, i16 128
+  call void @func2(i16 zeroext %t4) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-01-07-ISelBug.ll b/src/LLVM/test/CodeGen/X86/2010-01-07-ISelBug.ll
new file mode 100644
index 0000000..081fab7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-01-07-ISelBug.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; rdar://r7519827
+
+define i32 @t() nounwind ssp {
+entry:
+  br label %if.end.i11
+
+if.end.i11:                                       ; preds = %lor.lhs.false.i10, %lor.lhs.false.i10, %lor.lhs.false.i10
+  br i1 undef, label %for.body161, label %for.end197
+
+for.body161:                                      ; preds = %if.end.i11
+  br label %for.end197
+
+for.end197:                                       ; preds = %for.body161, %if.end.i11
+  %mlucEntry.4 = phi i96 [ undef, %for.body161 ], [ undef, %if.end.i11 ] ; <i96> [#uses=2]
+  store i96 %mlucEntry.4, i96* undef, align 8
+  %tmp172 = lshr i96 %mlucEntry.4, 64             ; <i96> [#uses=1]
+  %tmp173 = trunc i96 %tmp172 to i32              ; <i32> [#uses=1]
+  %tmp1.i1.i = call i32 @llvm.bswap.i32(i32 %tmp173) nounwind ; <i32> [#uses=1]
+  store i32 %tmp1.i1.i, i32* undef, align 8
+  unreachable
+
+if.then283:                                       ; preds = %lor.lhs.false.i10, %do.end105, %for.end
+  ret i32 undef
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/2010-01-07-UAMemFeature.ll b/src/LLVM/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
new file mode 100644
index 0000000..bb24adb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-01-07-UAMemFeature.ll

@@ -0,0 +1,11 @@
+; RUN: llc -mcpu=yonah -mattr=vector-unaligned-mem -march=x86 < %s | FileCheck %s
+; CHECK: addps (
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
+	%A = load <4 x float>* %P, align 4
+	%B = fadd <4 x float> %A, %In
+	ret <4 x float> %B
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/src/LLVM/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
new file mode 100644
index 0000000..8b55bd7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; rdar://r7512579
+
+; PHI defs in the atomic loop should be used by the add / adc
+; instructions. They should not be dead.
+
+define void @t(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: t:
+; CHECK: movl $1
+; CHECK: movl (%ebp), %eax
+; CHECK: movl 4(%ebp), %edx
+; CHECK: LBB0_1:
+; CHECK-NOT: movl $1
+; CHECK-NOT: movl $0
+; CHECK: addl
+; CHECK: adcl
+; CHECK: lock
+; CHECK: cmpxchg8b
+; CHECK: jne
+  %0 = atomicrmw add i64* %p, i64 1 seq_cst
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll b/src/LLVM/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll
new file mode 100644
index 0000000..db98eef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll

@@ -0,0 +1,97 @@
+; RUN: llc -verify-machineinstrs < %s
+;
+; The lowering of a switch combined with constand folding would leave spurious extra arguments on a PHI instruction.
+;
+target triple = "x86_64-apple-darwin10"
+
+define void @foo() {
+  br label %cond_true813.i
+
+cond_true813.i:                                   ; preds = %0
+  br i1 false, label %cond_true818.i, label %cond_next1146.i
+
+cond_true818.i:                                   ; preds = %cond_true813.i
+  br i1 false, label %recog_memoized.exit52, label %cond_next1146.i
+
+recog_memoized.exit52:                            ; preds = %cond_true818.i
+  switch i32 0, label %bb886.i.preheader [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i
+    i32 103, label %bb874.i
+    i32 114, label %bb874.i
+  ]
+
+bb857.i:                                          ; preds = %bb886.i, %bb866.i
+  %tmp862.i494.24 = phi i8* [ null, %bb866.i ], [ %tmp862.i494.26, %bb886.i ] ; <i8*> [#uses=1]
+  switch i32 0, label %bb886.i.preheader [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i
+    i32 103, label %bb874.i
+    i32 114, label %bb874.i
+  ]
+
+bb866.i.loopexit:                                 ; preds = %bb874.i
+  br label %bb866.i
+
+bb866.i.loopexit31:                               ; preds = %cond_true903.i
+  br label %bb866.i
+
+bb866.i:                                          ; preds = %bb866.i.loopexit31, %bb866.i.loopexit, %bb857.i, %recog_memoized.exit52
+  br i1 false, label %bb907.i, label %bb857.i
+
+bb874.i.preheader.loopexit:                       ; preds = %cond_true903.i, %cond_true903.i
+  ret void
+
+bb874.i:                                          ; preds = %bb857.i, %bb857.i, %recog_memoized.exit52, %recog_memoized.exit52
+  switch i32 0, label %bb886.i.preheader.loopexit [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i.loopexit
+    i32 103, label %bb874.i.backedge
+    i32 114, label %bb874.i.backedge
+  ]
+
+bb874.i.backedge:                                 ; preds = %bb874.i, %bb874.i
+  ret void
+
+bb886.i.preheader.loopexit:                       ; preds = %bb874.i
+  ret void
+
+bb886.i.preheader:                                ; preds = %bb857.i, %recog_memoized.exit52
+  %tmp862.i494.26 = phi i8* [ undef, %recog_memoized.exit52 ], [ %tmp862.i494.24, %bb857.i ] ; <i8*> [#uses=1]
+  br label %bb886.i
+
+bb886.i:                                          ; preds = %cond_true903.i, %bb886.i.preheader
+  br i1 false, label %bb857.i, label %cond_true903.i
+
+cond_true903.i:                                   ; preds = %bb886.i
+  switch i32 0, label %bb886.i [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i.loopexit31
+    i32 103, label %bb874.i.preheader.loopexit
+    i32 114, label %bb874.i.preheader.loopexit
+  ]
+
+bb907.i:                                          ; preds = %cond_true903.i, %bb874.i, %bb866.i, %bb857.i, %recog_memoized.exit52
+  br i1 false, label %cond_next1146.i, label %cond_true910.i
+
+cond_true910.i:                                   ; preds = %bb907.i
+  ret void
+
+cond_next1146.i:                                  ; preds = %bb907.i, %cond_true818.i, %cond_true813.i
+  ret void
+
+bb2060.i:                                         ; No predecessors!
+  br i1 false, label %cond_true2064.i, label %bb2067.i
+
+cond_true2064.i:                                  ; preds = %bb2060.i
+  unreachable
+
+bb2067.i:                                         ; preds = %bb2060.i
+  ret void
+
+cond_next3473:                                    ; No predecessors!
+  ret void
+
+cond_next3521:                                    ; No predecessors!
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-01-13-OptExtBug.ll b/src/LLVM/test/CodeGen/X86/2010-01-13-OptExtBug.ll
new file mode 100644
index 0000000..d49e2a8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-01-13-OptExtBug.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
+; PR6027
+
+%class.OlsonTimeZone = type { i16, i32*, i8*, i16 }
+
+define void @XX(%class.OlsonTimeZone* %this) align 2 {
+entry:
+  %call = tail call i8* @_Z15uprv_malloc_4_2v()
+  %0 = bitcast i8* %call to double*
+  %tmp = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 3
+  %tmp2 = load i16* %tmp
+  %tmp525 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 0
+  %tmp626 = load i16* %tmp525
+  %cmp27 = icmp slt i16 %tmp2, %tmp626
+  br i1 %cmp27, label %bb.nph, label %for.end
+
+for.cond:
+  %tmp6 = load i16* %tmp5
+  %cmp = icmp slt i16 %inc, %tmp6
+  %indvar.next = add i32 %indvar, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+bb.nph:
+  %tmp10 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 2
+  %tmp17 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 1
+  %tmp5 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 0
+  %tmp29 = sext i16 %tmp2 to i32
+  %tmp31 = add i16 %tmp2, 1
+  %tmp32 = zext i16 %tmp31 to i32
+  br label %for.body
+
+for.body:
+  %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %for.cond ]
+  %tmp30 = add i32 %indvar, %tmp29
+  %tmp33 = add i32 %indvar, %tmp32
+  %inc = trunc i32 %tmp33 to i16
+  %tmp11 = load i8** %tmp10
+  %arrayidx = getelementptr i8* %tmp11, i32 %tmp30
+  %tmp12 = load i8* %arrayidx
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+declare i8* @_Z15uprv_malloc_4_2v()

diff --git a/src/LLVM/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll b/src/LLVM/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll
new file mode 100644
index 0000000..5d96e4a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86-64
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @numvec_(i32* noalias %ncelet, i32* noalias %ncel, i32* noalias %nfac, i32* noalias %nfabor, i32* noalias %lregis, i32* noalias %irveci, i32* noalias %irvecb, [0 x [2 x i32]]* noalias %ifacel, [0 x i32]* noalias %ifabor, [0 x i32]* noalias %inumfi, [0 x i32]* noalias %inumfb, [1 x i32]* noalias %iworkf, [0 x i32]* noalias %ismbs) {
+"file bug754399.f90, line 1, bb1":
+	%r1037 = bitcast <2 x double> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=1]
+	br label %"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164"
+
+"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164":		; preds = %"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164", %"file bug754399.f90, line 1, bb1"
+	%tmp641 = add i64 0, 48		; <i64> [#uses=1]
+	%tmp641642 = inttoptr i64 %tmp641 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%r1258 = load <4 x i32>* %tmp641642, align 4		; <<4 x i32>> [#uses=2]
+	%r1295 = extractelement <4 x i32> %r1258, i32 3		; <i32> [#uses=1]
+	%r1296 = sext i32 %r1295 to i64		; <i64> [#uses=1]
+	%r1297 = add i64 %r1296, -1		; <i64> [#uses=1]
+	%r1298183 = getelementptr [0 x i32]* %ismbs, i64 0, i64 %r1297		; <i32*> [#uses=1]
+	%r1298184 = load i32* %r1298183, align 4		; <i32> [#uses=1]
+	%r1301 = extractelement <4 x i32> %r1037, i32 3		; <i32> [#uses=1]
+	%r1302 = mul i32 %r1298184, %r1301		; <i32> [#uses=1]
+	%r1306 = insertelement <4 x i32> zeroinitializer, i32 %r1302, i32 3		; <<4 x i32>> [#uses=1]
+	%r1321 = add <4 x i32> %r1306, %r1258		; <<4 x i32>> [#uses=1]
+	%tmp643 = add i64 0, 48		; <i64> [#uses=1]
+	%tmp643644 = inttoptr i64 %tmp643 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	store <4 x i32> %r1321, <4 x i32>* %tmp643644, align 4
+	br label %"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164"
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-01-18-DbgValue.ll b/src/LLVM/test/CodeGen/X86/2010-01-18-DbgValue.ll
new file mode 100644
index 0000000..85ee091
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-01-18-DbgValue.ll

@@ -0,0 +1,48 @@
+; RUN: llc -march=x86 -O0 < %s | FileCheck %s
+; Currently, dbg.declare generates a DEBUG_VALUE comment.  Eventually it will
+; generate DWARF and this test will need to be modified or removed.
+
+
+%struct.Pt = type { double, double }
+%struct.Rect = type { %struct.Pt, %struct.Pt }
+
+define double @foo(%struct.Rect* byval %my_r0) nounwind ssp {
+entry:
+;CHECK: DEBUG_VALUE
+  %retval = alloca double                         ; <double*> [#uses=2]
+  %0 = alloca double                              ; <double*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.Rect* %my_r0}, metadata !0), !dbg !15
+  %1 = getelementptr inbounds %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
+  %2 = getelementptr inbounds %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
+  %3 = load double* %2, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %3, double* %0, align 8, !dbg !16
+  %4 = load double* %0, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %4, double* %retval, align 8, !dbg !16
+  br label %return, !dbg !16
+
+return:                                           ; preds = %entry
+  %retval1 = load double* %retval, !dbg !16       ; <double> [#uses=1]
+  ret double %retval1, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"b2.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"b2.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !7}
+!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524307, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!8 = metadata !{metadata !9, metadata !14}
+!9 = metadata !{i32 524301, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 524307, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!11 = metadata !{metadata !12, metadata !13}
+!12 = metadata !{i32 524301, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 524301, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!14 = metadata !{i32 524301, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
+!15 = metadata !{i32 11, i32 0, metadata !1, null}
+!16 = metadata !{i32 12, i32 0, metadata !17, null}
+!17 = metadata !{i32 524299, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]

diff --git a/src/LLVM/test/CodeGen/X86/2010-01-19-OptExtBug.ll b/src/LLVM/test/CodeGen/X86/2010-01-19-OptExtBug.ll
new file mode 100644
index 0000000..cd8960b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-01-19-OptExtBug.ll

@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -relocation-model=pic -disable-fp-elim -stats |& not grep ext-opt
+
+define fastcc i8* @S_scan_str(i8* %start, i32 %keep_quoted, i32 %keep_delims) nounwind ssp {
+entry:
+  switch i8 undef, label %bb6 [
+    i8 9, label %bb5
+    i8 32, label %bb5
+    i8 10, label %bb5
+    i8 13, label %bb5
+    i8 12, label %bb5
+  ]
+
+bb5:                                              ; preds = %entry, %entry, %entry, %entry, %entry
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %entry
+  br i1 undef, label %bb7, label %bb9
+
+bb7:                                              ; preds = %bb6
+  unreachable
+
+bb9:                                              ; preds = %bb6
+  %0 = load i8* undef, align 1                    ; <i8> [#uses=3]
+  br i1 undef, label %bb12, label %bb10
+
+bb10:                                             ; preds = %bb9
+  br i1 undef, label %bb12, label %bb11
+
+bb11:                                             ; preds = %bb10
+  unreachable
+
+bb12:                                             ; preds = %bb10, %bb9
+  br i1 undef, label %bb13, label %bb14
+
+bb13:                                             ; preds = %bb12
+  store i8 %0, i8* undef, align 1
+  %1 = zext i8 %0 to i32                          ; <i32> [#uses=1]
+  br label %bb18
+
+bb14:                                             ; preds = %bb12
+  br label %bb18
+
+bb18:                                             ; preds = %bb14, %bb13
+  %termcode.0 = phi i32 [ %1, %bb13 ], [ undef, %bb14 ] ; <i32> [#uses=2]
+  %2 = icmp eq i8 %0, 0                           ; <i1> [#uses=1]
+  br i1 %2, label %bb21, label %bb19
+
+bb19:                                             ; preds = %bb18
+  br i1 undef, label %bb21, label %bb20
+
+bb20:                                             ; preds = %bb19
+  br label %bb21
+
+bb21:                                             ; preds = %bb20, %bb19, %bb18
+  %termcode.1 = phi i32 [ %termcode.0, %bb18 ], [ %termcode.0, %bb19 ], [ undef, %bb20 ] ; <i32> [#uses=0]
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll b/src/LLVM/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
new file mode 100644
index 0000000..2113263
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll

@@ -0,0 +1,33 @@
+; RUN: llc -O1 < %s
+; ModuleID = 'pr6157.bc'
+; formerly crashed in SelectionDAGBuilder
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}

diff --git a/src/LLVM/test/CodeGen/X86/2010-02-01-TaillCallCrash.ll b/src/LLVM/test/CodeGen/X86/2010-02-01-TaillCallCrash.ll
new file mode 100644
index 0000000..2751174
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-02-01-TaillCallCrash.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR6196
+
+%"char[]" = type [1 x i8]
+
+@.str = external constant %"char[]", align 1      ; <%"char[]"*> [#uses=1]
+
+define i32 @regex_subst() nounwind {
+entry:
+  %0 = tail call i32 bitcast (%"char[]"* @.str to i32 (i32)*)(i32 0) nounwind ; <i32> [#uses=1]
+  ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-02-03-DualUndef.ll b/src/LLVM/test/CodeGen/X86/2010-02-03-DualUndef.ll
new file mode 100644
index 0000000..d116ecc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-02-03-DualUndef.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64
+; PR6086
+define fastcc void @prepOutput() nounwind {
+bb:                                               ; preds = %output.exit
+  br label %bb.i1
+
+bb.i1:                                            ; preds = %bb7.i, %bb
+  br i1 undef, label %bb7.i, label %bb.nph.i
+
+bb.nph.i:                                         ; preds = %bb.i1
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %bb5.i6, %bb.nph.i
+  %tmp10.i = trunc i64 undef to i32               ; <i32> [#uses=1]
+  br i1 undef, label %bb4.i, label %bb5.i6
+
+bb4.i:                                            ; preds = %bb3.i
+  br label %bb5.i6
+
+bb5.i6:                                           ; preds = %bb4.i, %bb3.i
+  %0 = phi i32 [ undef, %bb4.i ], [ undef, %bb3.i ] ; <i32> [#uses=1]
+  %1 = icmp slt i32 %0, %tmp10.i                  ; <i1> [#uses=1]
+  br i1 %1, label %bb7.i, label %bb3.i
+
+bb7.i:                                            ; preds = %bb5.i6, %bb.i1
+  br label %bb.i1
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-02-04-SchedulerBug.ll b/src/LLVM/test/CodeGen/X86/2010-02-04-SchedulerBug.ll
new file mode 100644
index 0000000..c966e21
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-02-04-SchedulerBug.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin11
+; rdar://7604000
+
+%struct.a_t = type { i8*, i64*, i8*, i32, i32, i64*, i64*, i64* }
+%struct.b_t = type { i32, i32, i32, i32, i64, i64, i64, i64 }
+
+define void @t(i32 %cNum, i64 %max) nounwind optsize ssp noimplicitfloat {
+entry:
+  %0 = load %struct.b_t** null, align 4 ; <%struct.b_t*> [#uses=1]
+  %1 = getelementptr inbounds %struct.b_t* %0, i32 %cNum, i32 5 ; <i64*> [#uses=1]
+  %2 = load i64* %1, align 4                      ; <i64> [#uses=1]
+  %3 = icmp ult i64 %2, %max            ; <i1> [#uses=1]
+  %4 = getelementptr inbounds %struct.a_t* null, i32 0, i32 7 ; <i64**> [#uses=1]
+  %5 = load i64** %4, align 4                     ; <i64*> [#uses=0]
+  %6 = load i64* null, align 4                    ; <i64> [#uses=1]
+  br i1 %3, label %bb2, label %bb
+
+bb:                                               ; preds = %entry
+  br label %bb3
+
+bb2:                                              ; preds = %entry
+  %7 = or i64 %6, undef                           ; <i64> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %misc_enables.0 = phi i64 [ undef, %bb ], [ %7, %bb2 ] ; <i64> [#uses=0]
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-02-11-NonTemporal.ll b/src/LLVM/test/CodeGen/X86/2010-02-11-NonTemporal.ll
new file mode 100644
index 0000000..5789a0b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-02-11-NonTemporal.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK: movnt
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+!0 = metadata !{ i32 1 }
+
+define void @sub_(i32* noalias %n) {
+"file movnt.f90, line 2, bb1":
+	%n1 = alloca i32*, align 8
+	%i = alloca i32, align 4
+	%"$LCS_0" = alloca i64, align 8
+	%"$LCS_S2" = alloca <2 x double>, align 16
+	%r9 = load <2 x double>* %"$LCS_S2", align 8
+	%r10 = load i64* %"$LCS_0", align 8
+	%r11 = inttoptr i64 %r10 to <2 x double>*
+	store <2 x double> %r9, <2 x double>* %r11, align 16, !nontemporal !0
+	br label %"file movnt.f90, line 18, bb5"
+
+"file movnt.f90, line 18, bb5":	
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll b/src/LLVM/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
new file mode 100644
index 0000000..739a27a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll

@@ -0,0 +1,260 @@
+; RUN: llc < %s > %t
+; PR6283
+
+; Tricky coalescer bug:
+; After coalescing %RAX with a virtual register, this instruction was rematted:
+;
+;   %EAX<def> = MOV32rr %reg1070<kill>
+;
+; This instruction silently defined %RAX, and when rematting removed the
+; instruction, the live interval for %RAX was not properly updated. The valno
+; referred to a deleted instruction and bad things happened.
+;
+; The fix is to implicitly define %RAX when coalescing:
+;
+;   %EAX<def> = MOV32rr %reg1070<kill>, %RAX<imp-def>
+;
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.5.0 20100212 (experimental) LLVM: 95975\22"
+
+%0 = type { %"union gimple_statement_d"* }
+%"BITMAP_WORD[]" = type [2 x i64]
+%"uchar[]" = type [1 x i8]
+%"char[]" = type [4 x i8]
+%"enum dom_state[]" = type [2 x i32]
+%"int[]" = type [4 x i32]
+%"struct VEC_basic_block_base" = type { i32, i32, [1 x %"struct basic_block_def"*] }
+%"struct VEC_basic_block_gc" = type { %"struct VEC_basic_block_base" }
+%"struct VEC_edge_base" = type { i32, i32, [1 x %"struct edge_def"*] }
+%"struct VEC_edge_gc" = type { %"struct VEC_edge_base" }
+%"struct VEC_gimple_base" = type { i32, i32, [1 x %"union gimple_statement_d"*] }
+%"struct VEC_gimple_gc" = type { %"struct VEC_gimple_base" }
+%"struct VEC_iv_cand_p_base" = type { i32, i32, [1 x %"struct iv_cand"*] }
+%"struct VEC_iv_cand_p_heap" = type { %"struct VEC_iv_cand_p_base" }
+%"struct VEC_iv_use_p_base" = type { i32, i32, [1 x %"struct iv_use"*] }
+%"struct VEC_iv_use_p_heap" = type { %"struct VEC_iv_use_p_base" }
+%"struct VEC_loop_p_base" = type { i32, i32, [1 x %"struct loop"*] }
+%"struct VEC_loop_p_gc" = type { %"struct VEC_loop_p_base" }
+%"struct VEC_rtx_base" = type { i32, i32, [1 x %"struct rtx_def"*] }
+%"struct VEC_rtx_gc" = type { %"struct VEC_rtx_base" }
+%"struct VEC_tree_base" = type { i32, i32, [1 x %"union tree_node"*] }
+%"struct VEC_tree_gc" = type { %"struct VEC_tree_base" }
+%"struct _obstack_chunk" = type { i8*, %"struct _obstack_chunk"*, %"char[]" }
+%"struct basic_block_def" = type { %"struct VEC_edge_gc"*, %"struct VEC_edge_gc"*, i8*, %"struct loop"*, [2 x %"struct et_node"*], %"struct basic_block_def"*, %"struct basic_block_def"*, %"union basic_block_il_dependent", i64, i32, i32, i32, i32, i32 }
+%"struct bitmap_element" = type { %"struct bitmap_element"*, %"struct bitmap_element"*, i32, %"BITMAP_WORD[]" }
+%"struct bitmap_head_def" = type { %"struct bitmap_element"*, %"struct bitmap_element"*, i32, %"struct bitmap_obstack"* }
+%"struct bitmap_obstack" = type { %"struct bitmap_element"*, %"struct bitmap_head_def"*, %"struct obstack" }
+%"struct block_symbol" = type { [3 x %"union rtunion"], %"struct object_block"*, i64 }
+%"struct comp_cost" = type { i32, i32 }
+%"struct control_flow_graph" = type { %"struct basic_block_def"*, %"struct basic_block_def"*, %"struct VEC_basic_block_gc"*, i32, i32, i32, %"struct VEC_basic_block_gc"*, i32, %"enum dom_state[]", %"enum dom_state[]", i32, i32 }
+%"struct cost_pair" = type { %"struct iv_cand"*, %"struct comp_cost", %"struct bitmap_head_def"*, %"union tree_node"* }
+%"struct def_optype_d" = type { %"struct def_optype_d"*, %"union tree_node"** }
+%"struct double_int" = type { i64, i64 }
+%"struct edge_def" = type { %"struct basic_block_def"*, %"struct basic_block_def"*, %"union edge_def_insns", i8*, %"union tree_node"*, i32, i32, i32, i32, i64 }
+%"struct eh_status" = type opaque
+%"struct et_node" = type opaque
+%"struct function" = type { %"struct eh_status"*, %"struct control_flow_graph"*, %"struct gimple_seq_d"*, %"struct gimple_df"*, %"struct loops"*, %"struct htab"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"struct machine_function"*, %"struct language_function"*, %"struct htab"*, i32, i32, i32, i32, i32, i32, i8*, i8, i8, i8, i8 }
+%"struct gimple_bb_info" = type { %"struct gimple_seq_d"*, %"struct gimple_seq_d"* }
+%"struct gimple_df" = type { %"struct htab"*, %"struct VEC_gimple_gc"*, %"struct VEC_tree_gc"*, %"union tree_node"*, %"struct pt_solution", %"struct pt_solution", %"struct pointer_map_t"*, %"union tree_node"*, %"struct htab"*, %"struct bitmap_head_def"*, i8, %"struct ssa_operands" }
+%"struct gimple_seq_d" = type { %"struct gimple_seq_node_d"*, %"struct gimple_seq_node_d"*, %"struct gimple_seq_d"* }
+%"struct gimple_seq_node_d" = type { %"union gimple_statement_d"*, %"struct gimple_seq_node_d"*, %"struct gimple_seq_node_d"* }
+%"struct gimple_statement_base" = type { i8, i8, i16, i32, i32, i32, %"struct basic_block_def"*, %"union tree_node"* }
+%"struct phi_arg_d[]" = type [1 x %"struct phi_arg_d"]
+%"struct gimple_statement_phi" = type { %"struct gimple_statement_base", i32, i32, %"union tree_node"*, %"struct phi_arg_d[]" }
+%"struct htab" = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i64, i64, i64, i32, i32, i8* (i64, i64)*, void (i8*)*, i8*, i8* (i8*, i64, i64)*, void (i8*, i8*)*, i32 }
+%"struct iv" = type { %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, i8, i8, i32 }
+%"struct iv_cand" = type { i32, i8, i32, %"union gimple_statement_d"*, %"union tree_node"*, %"union tree_node"*, %"struct iv"*, i32, i32, %"struct iv_use"*, %"struct bitmap_head_def"* }
+%"struct iv_use" = type { i32, i32, %"struct iv"*, %"union gimple_statement_d"*, %"union tree_node"**, %"struct bitmap_head_def"*, i32, %"struct cost_pair"*, %"struct iv_cand"* }
+%"struct ivopts_data" = type { %"struct loop"*, %"struct pointer_map_t"*, i32, i32, %"struct version_info"*, %"struct bitmap_head_def"*, %"struct VEC_iv_use_p_heap"*, %"struct VEC_iv_cand_p_heap"*, %"struct bitmap_head_def"*, i32, i8, i8 }
+%"struct lang_decl" = type opaque
+%"struct language_function" = type opaque
+%"struct loop" = type { i32, i32, %"struct basic_block_def"*, %"struct basic_block_def"*, %"struct comp_cost", i32, i32, %"struct VEC_loop_p_gc"*, %"struct loop"*, %"struct loop"*, i8*, %"union tree_node"*, %"struct double_int", %"struct double_int", i8, i8, i32, %"struct nb_iter_bound"*, %"struct loop_exit"*, i8, %"union tree_node"* }
+%"struct loop_exit" = type { %"struct edge_def"*, %"struct loop_exit"*, %"struct loop_exit"*, %"struct loop_exit"* }
+%"struct loops" = type { i32, %"struct VEC_loop_p_gc"*, %"struct htab"*, %"struct loop"* }
+%"struct machine_cfa_state" = type { %"struct rtx_def"*, i64 }
+%"struct machine_function" = type { %"struct stack_local_entry"*, i8*, i32, i32, %"int[]", i32, %"struct machine_cfa_state", i32, i8 }
+%"struct nb_iter_bound" = type { %"union gimple_statement_d"*, %"struct double_int", i8, %"struct nb_iter_bound"* }
+%"struct object_block" = type { %"union section"*, i32, i64, %"struct VEC_rtx_gc"*, %"struct VEC_rtx_gc"* }
+%"struct obstack" = type { i64, %"struct _obstack_chunk"*, i8*, i8*, i8*, i64, i32, %"struct _obstack_chunk"* (i8*, i64)*, void (i8*, %"struct _obstack_chunk"*)*, i8*, i8 }
+%"struct phi_arg_d" = type { %"struct ssa_use_operand_d", %"union tree_node"*, i32 }
+%"struct pointer_map_t" = type opaque
+%"struct pt_solution" = type { i8, %"struct bitmap_head_def"* }
+%"struct rtx_def" = type { i16, i8, i8, %"union u" }
+%"struct section_common" = type { i32 }
+%"struct ssa_operand_memory_d" = type { %"struct ssa_operand_memory_d"*, %"uchar[]" }
+%"struct ssa_operands" = type { %"struct ssa_operand_memory_d"*, i32, i32, i8, %"struct def_optype_d"*, %"struct use_optype_d"* }
+%"struct ssa_use_operand_d" = type { %"struct ssa_use_operand_d"*, %"struct ssa_use_operand_d"*, %0, %"union tree_node"** }
+%"struct stack_local_entry" = type opaque
+%"struct tree_base" = type <{ i16, i8, i8, i8, [2 x i8], i8 }>
+%"struct tree_common" = type { %"struct tree_base", %"union tree_node"*, %"union tree_node"* }
+%"struct tree_decl_common" = type { %"struct tree_decl_minimal", %"union tree_node"*, i8, i8, i8, i8, i8, i32, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"struct lang_decl"* }
+%"struct tree_decl_minimal" = type { %"struct tree_common", i32, i32, %"union tree_node"*, %"union tree_node"* }
+%"struct tree_decl_non_common" = type { %"struct tree_decl_with_vis", %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"* }
+%"struct tree_decl_with_rtl" = type { %"struct tree_decl_common", %"struct rtx_def"* }
+%"struct tree_decl_with_vis" = type { %"struct tree_decl_with_rtl", %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, i8, i8, i8 }
+%"struct tree_function_decl" = type { %"struct tree_decl_non_common", %"struct function"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, i16, i8, i8 }
+%"struct unnamed_section" = type { %"struct section_common", void (i8*)*, i8*, %"union section"* }
+%"struct use_optype_d" = type { %"struct use_optype_d"*, %"struct ssa_use_operand_d" }
+%"struct version_info" = type { %"union tree_node"*, %"struct iv"*, i8, i32, i8 }
+%"union basic_block_il_dependent" = type { %"struct gimple_bb_info"* }
+%"union edge_def_insns" = type { %"struct gimple_seq_d"* }
+%"union gimple_statement_d" = type { %"struct gimple_statement_phi" }
+%"union rtunion" = type { i8* }
+%"union section" = type { %"struct unnamed_section" }
+%"union tree_node" = type { %"struct tree_function_decl" }
+%"union u" = type { %"struct block_symbol" }
+
+declare fastcc %"union tree_node"* @get_computation_at(%"struct loop"*, %"struct iv_use"* nocapture, %"struct iv_cand"* nocapture, %"union gimple_statement_d"*) nounwind
+
+declare fastcc i32 @computation_cost(%"union tree_node"*, i8 zeroext) nounwind
+
+define fastcc i64 @get_computation_cost_at(%"struct ivopts_data"* %data, %"struct iv_use"* nocapture %use, %"struct iv_cand"* nocapture %cand, i8 zeroext %address_p, %"struct bitmap_head_def"** %depends_on, %"union gimple_statement_d"* %at, i8* %can_autoinc) nounwind {
+entry:
+  br i1 undef, label %"100", label %"4"
+
+"4":                                              ; preds = %entry
+  br i1 undef, label %"6", label %"5"
+
+"5":                                              ; preds = %"4"
+  unreachable
+
+"6":                                              ; preds = %"4"
+  br i1 undef, label %"8", label %"7"
+
+"7":                                              ; preds = %"6"
+  unreachable
+
+"8":                                              ; preds = %"6"
+  br i1 undef, label %"100", label %"10"
+
+"10":                                             ; preds = %"8"
+  br i1 undef, label %"17", label %"16"
+
+"16":                                             ; preds = %"10"
+  unreachable
+
+"17":                                             ; preds = %"10"
+  br i1 undef, label %"19", label %"18"
+
+"18":                                             ; preds = %"17"
+  unreachable
+
+"19":                                             ; preds = %"17"
+  br i1 undef, label %"93", label %"20"
+
+"20":                                             ; preds = %"19"
+  br i1 undef, label %"23", label %"21"
+
+"21":                                             ; preds = %"20"
+  unreachable
+
+"23":                                             ; preds = %"20"
+  br i1 undef, label %"100", label %"25"
+
+"25":                                             ; preds = %"23"
+  br i1 undef, label %"100", label %"26"
+
+"26":                                             ; preds = %"25"
+  br i1 undef, label %"30", label %"28"
+
+"28":                                             ; preds = %"26"
+  unreachable
+
+"30":                                             ; preds = %"26"
+  br i1 undef, label %"59", label %"51"
+
+"51":                                             ; preds = %"30"
+  br i1 undef, label %"55", label %"52"
+
+"52":                                             ; preds = %"51"
+  unreachable
+
+"55":                                             ; preds = %"51"
+  %0 = icmp ugt i32 0, undef                      ; <i1> [#uses=1]
+  br i1 %0, label %"50.i", label %"9.i"
+
+"9.i":                                            ; preds = %"55"
+  unreachable
+
+"50.i":                                           ; preds = %"55"
+  br i1 undef, label %"55.i", label %"54.i"
+
+"54.i":                                           ; preds = %"50.i"
+  br i1 undef, label %"57.i", label %"55.i"
+
+"55.i":                                           ; preds = %"54.i", %"50.i"
+  unreachable
+
+"57.i":                                           ; preds = %"54.i"
+  br label %"63.i"
+
+"61.i":                                           ; preds = %"63.i"
+  br i1 undef, label %"64.i", label %"62.i"
+
+"62.i":                                           ; preds = %"61.i"
+  br label %"63.i"
+
+"63.i":                                           ; preds = %"62.i", %"57.i"
+  br i1 undef, label %"61.i", label %"64.i"
+
+"64.i":                                           ; preds = %"63.i", %"61.i"
+  unreachable
+
+"59":                                             ; preds = %"30"
+  br i1 undef, label %"60", label %"82"
+
+"60":                                             ; preds = %"59"
+  br i1 undef, label %"61", label %"82"
+
+"61":                                             ; preds = %"60"
+  br i1 undef, label %"62", label %"82"
+
+"62":                                             ; preds = %"61"
+  br i1 undef, label %"100", label %"63"
+
+"63":                                             ; preds = %"62"
+  br i1 undef, label %"65", label %"64"
+
+"64":                                             ; preds = %"63"
+  unreachable
+
+"65":                                             ; preds = %"63"
+  br i1 undef, label %"66", label %"67"
+
+"66":                                             ; preds = %"65"
+  unreachable
+
+"67":                                             ; preds = %"65"
+  %1 = load i32* undef, align 4                   ; <i32> [#uses=0]
+  br label %"100"
+
+"82":                                             ; preds = %"61", %"60", %"59"
+  unreachable
+
+"93":                                             ; preds = %"19"
+  %2 = call fastcc %"union tree_node"* @get_computation_at(%"struct loop"* undef, %"struct iv_use"* %use, %"struct iv_cand"* %cand, %"union gimple_statement_d"* %at) nounwind ; <%"union tree_node"*> [#uses=1]
+  br i1 undef, label %"100", label %"97"
+
+"97":                                             ; preds = %"93"
+  br i1 undef, label %"99", label %"98"
+
+"98":                                             ; preds = %"97"
+  br label %"99"
+
+"99":                                             ; preds = %"98", %"97"
+  %3 = phi %"union tree_node"* [ undef, %"98" ], [ %2, %"97" ] ; <%"union tree_node"*> [#uses=1]
+  %4 = call fastcc i32 @computation_cost(%"union tree_node"* %3, i8 zeroext undef) nounwind ; <i32> [#uses=1]
+  br label %"100"
+
+"100":                                            ; preds = %"99", %"93", %"67", %"62", %"25", %"23", %"8", %entry
+  %memtmp1.1.0 = phi i32 [ 0, %"99" ], [ 10000000, %entry ], [ 10000000, %"8" ], [ 10000000, %"23" ], [ 10000000, %"25" ], [ undef, %"62" ], [ undef, %"67" ], [ 10000000, %"93" ] ; <i32> [#uses=1]
+  %memtmp1.0.0 = phi i32 [ %4, %"99" ], [ 10000000, %entry ], [ 10000000, %"8" ], [ 10000000, %"23" ], [ 10000000, %"25" ], [ undef, %"62" ], [ undef, %"67" ], [ 10000000, %"93" ] ; <i32> [#uses=1]
+  %5 = zext i32 %memtmp1.0.0 to i64               ; <i64> [#uses=1]
+  %6 = zext i32 %memtmp1.1.0 to i64               ; <i64> [#uses=1]
+  %7 = shl i64 %6, 32                             ; <i64> [#uses=1]
+  %8 = or i64 %7, %5                              ; <i64> [#uses=1]
+  ret i64 %8
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-02-15-ImplicitDefBug.ll b/src/LLVM/test/CodeGen/X86/2010-02-15-ImplicitDefBug.ll
new file mode 100644
index 0000000..c429172
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-02-15-ImplicitDefBug.ll

@@ -0,0 +1,80 @@
+; RUN: llc < %s > %t
+; PR6300
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+; When the "154" loops back onto itself, it defines a register after using it.
+; The first value of the register is implicit-def.
+
+%"struct location_chain_def" = type { %"struct location_chain_def"*, %"struct rtx_def"*, %"struct rtx_def"*, i32 }
+%"struct real_value" = type { i32, [5 x i32] }
+%"struct rtx_def" = type { i16, i8, i8, %"union u" }
+%"union u" = type { %"struct real_value" }
+
+define i32 @variable_union(i8** nocapture %slot, i8* nocapture %data) nounwind {
+entry:
+  br i1 undef, label %"4.thread", label %"3"
+
+"4.thread":                                       ; preds = %entry
+  unreachable
+
+"3":                                              ; preds = %entry
+  br i1 undef, label %"19", label %"20"
+
+"19":                                             ; preds = %"3"
+  unreachable
+
+"20":                                             ; preds = %"3"
+  br i1 undef, label %"56.preheader", label %dv_onepart_p.exit
+
+dv_onepart_p.exit:                                ; preds = %"20"
+  unreachable
+
+"56.preheader":                                   ; preds = %"20"
+  br label %"56"
+
+"50":                                             ; preds = %"57"
+  br label %"56"
+
+"56":                                             ; preds = %"50", %"56.preheader"
+  br i1 undef, label %"57", label %"58"
+
+"57":                                             ; preds = %"56"
+  br i1 undef, label %"50", label %"58"
+
+"58":                                             ; preds = %"57", %"56"
+  br i1 undef, label %"62", label %"63"
+
+"62":                                             ; preds = %"58"
+  unreachable
+
+"63":                                             ; preds = %"58"
+  br i1 undef, label %"67", label %"66"
+
+"66":                                             ; preds = %"63"
+  br label %"67"
+
+"67":                                             ; preds = %"66", %"63"
+  br label %"68"
+
+"68":                                             ; preds = %"161", %"67"
+  br i1 undef, label %"153", label %"161"
+
+"153":                                            ; preds = %"68"
+  br i1 undef, label %"160", label %bb.nph46
+
+bb.nph46:                                         ; preds = %"153"
+  br label %"154"
+
+"154":                                            ; preds = %"154", %bb.nph46
+  %0 = phi %"struct location_chain_def"** [ undef, %bb.nph46 ], [ %1, %"154" ] ; <%"struct location_chain_def"**> [#uses=1]
+  %1 = bitcast i8* undef to %"struct location_chain_def"** ; <%"struct location_chain_def"**> [#uses=1]
+  store %"struct location_chain_def"* undef, %"struct location_chain_def"** %0, align 4
+  br i1 undef, label %"160", label %"154"
+
+"160":                                            ; preds = %"154", %"153"
+  br label %"161"
+
+"161":                                            ; preds = %"160", %"68"
+  br label %"68"
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/src/LLVM/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
new file mode 100644
index 0000000..f6ac2ba
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll

@@ -0,0 +1,55 @@
+; RUN: llc -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
+; Check that lowered argumens do not overwrite the return address before it is moved.
+; Bug 6225
+;
+; If a call is a fastcc tail call and tail call optimization is enabled, the
+; caller frame is replaced by the callee frame. This can require that arguments are 
+; placed on the former return address stack slot. Special care needs to be taken
+; taken that the return address is moved / or stored in a register before
+; lowering of arguments potentially overwrites the value.
+;
+; Move return address (76(%esp)) to a temporary register (%ebp)
+; CHECK: movl 76(%esp), [[REGISTER:%[a-z]+]]
+; Overwrite return addresss
+; CHECK: movl [[EBX:%[a-z]+]], 76(%esp)
+; Move return address from temporary register (%ebp) to new stack location (60(%esp))
+; CHECK: movl [[REGISTER]], 60(%esp)
+
+%tupl_p = type [9 x i32]*
+
+declare fastcc void @l297(i32 %r10, i32 %r9, i32 %r8, i32 %r7, i32 %r6, i32 %r5, i32 %r3, i32 %r2) noreturn nounwind
+declare fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind
+
+define fastcc void @l186(%tupl_p %r1) noreturn nounwind {
+entry:
+  %ptr1 = getelementptr %tupl_p %r1, i32 0, i32 0
+  %r2 = load i32* %ptr1
+  %ptr3 = getelementptr %tupl_p %r1, i32 0, i32 1
+  %r3 = load i32* %ptr3
+  %ptr5 = getelementptr %tupl_p %r1, i32 0, i32 2
+  %r4 = load i32* %ptr5
+  %ptr7 = getelementptr %tupl_p %r1, i32 0, i32 3
+  %r5 = load i32* %ptr7
+  %ptr9 = getelementptr %tupl_p %r1, i32 0, i32 4
+  %r6 = load i32* %ptr9
+  %ptr11 = getelementptr %tupl_p %r1, i32 0, i32 5
+  %r7 = load i32* %ptr11
+  %ptr13 = getelementptr %tupl_p %r1, i32 0, i32 6
+  %r8 = load i32* %ptr13
+  %ptr15 = getelementptr %tupl_p %r1, i32 0, i32 7
+  %r9 = load i32* %ptr15
+  %ptr17 = getelementptr %tupl_p %r1, i32 0, i32 8
+  %r10 = load i32* %ptr17
+  %cond = icmp eq i32 %r10, 3
+  br i1 %cond, label %true, label %false
+
+true:
+  tail call fastcc void @l297(i32 %r10, i32 %r9, i32 %r8, i32 %r7, i32 %r6, i32 %r5, i32 %r3, i32 %r2) noreturn nounwind
+  ret void
+
+false:
+  tail call fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind
+  ret void
+}
+
+

diff --git a/src/LLVM/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll b/src/LLVM/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll
new file mode 100644
index 0000000..6a58e9e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32* @t() nounwind optsize ssp {
+entry:
+; CHECK: t:
+; CHECK: testl %eax, %eax
+; CHECK: js
+  %cmp = icmp slt i32 undef, 0                    ; <i1> [#uses=1]
+  %outsearch.0 = select i1 %cmp, i1 false, i1 true ; <i1> [#uses=1]
+  br i1 %outsearch.0, label %if.then27, label %if.else29
+
+if.then27:                                        ; preds = %entry
+  ret i32* undef
+
+if.else29:                                        ; preds = %entry
+  unreachable
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2010-02-23-DIV8rDefinesAX.ll b/src/LLVM/test/CodeGen/X86/2010-02-23-DIV8rDefinesAX.ll
new file mode 100644
index 0000000..8543c80
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-02-23-DIV8rDefinesAX.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s
+; PR6374
+;
+; This test produces a DIV8r instruction and uses %AX instead of %AH and %AL.
+; The DIV8r must have the right imp-defs for that to work.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%struct._i386_state = type { %union.anon }
+%union.anon = type { [0 x i8] }
+
+define void @i386_aam(%struct._i386_state* nocapture %cpustate) nounwind ssp {
+entry:
+  %call = tail call fastcc signext i8 @FETCH()    ; <i8> [#uses=1]
+  %rem = urem i8 0, %call                         ; <i8> [#uses=1]
+  store i8 %rem, i8* undef
+  ret void
+}
+
+declare fastcc signext i8 @FETCH() nounwind readnone ssp

diff --git a/src/LLVM/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll b/src/LLVM/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
new file mode 100644
index 0000000..4a26ba0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s
+; PR6372
+;
+; This test produces a move instruction with an implicitly defined super-register:
+;
+;   %DL<def> = MOV8rr %reg1038<kill>, %RDX<imp-def>
+;
+; When %DL is rematerialized, we must remember to update live intervals for
+; sub-registers %DX and %EDX.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define noalias i8* @foo() nounwind ssp {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %if.end40, %entry
+  %tmp6 = load i8* undef, align 2                 ; <i8> [#uses=3]
+  %conv11 = sext i8 %tmp6 to i64                  ; <i64> [#uses=1]
+  %cmp15 = icmp slt i64 %conv11, undef            ; <i1> [#uses=1]
+  br i1 %cmp15, label %if.end, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %conv18 = sext i8 %tmp6 to i32                  ; <i32> [#uses=1]
+  %call = tail call i32 (...)* @invalid(i32 0, i32 0, i32 %conv18) nounwind ; <i32> [#uses=0]
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  %index.0 = phi i8 [ 0, %if.then ], [ %tmp6, %for.body ] ; <i8> [#uses=1]
+  store i8 %index.0, i8* undef
+  %tmp24 = load i8* undef                         ; <i8> [#uses=2]
+  br i1 undef, label %if.end40, label %if.then36
+
+if.then36:                                        ; preds = %if.end
+  %conv38 = sext i8 %tmp24 to i32                 ; <i32> [#uses=1]
+  %call39 = tail call i32 (...)* @invalid(i32 0, i32 0, i32 %conv38) nounwind ; <i32> [#uses=0]
+  br label %if.end40
+
+if.end40:                                         ; preds = %if.then36, %if.end
+  %index.1 = phi i8 [ 0, %if.then36 ], [ %tmp24, %if.end ] ; <i8> [#uses=1]
+  store i8 %index.1, i8* undef
+  br i1 false, label %for.body, label %for.end
+
+for.end:                                          ; preds = %if.end40, %entry
+  ret i8* undef
+}
+
+declare i32 @invalid(...)

diff --git a/src/LLVM/test/CodeGen/X86/2010-02-23-SingleDefPhiJoin.ll b/src/LLVM/test/CodeGen/X86/2010-02-23-SingleDefPhiJoin.ll
new file mode 100644
index 0000000..aeed401
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-02-23-SingleDefPhiJoin.ll

@@ -0,0 +1,146 @@
+; RUN: llc < %s
+; PR6363
+;
+; This test case creates a phi join register with a single definition. The other
+; predecessor blocks are implicit-def.
+;
+; If LiveIntervalAnalysis fails to recognize this as a phi join, the coalescer
+; will detect an infinity valno loop.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @decode(i8* nocapture %input, i32 %offset, i8* nocapture %output) nounwind {
+entry:
+  br i1 undef, label %meshBB86, label %meshBB102
+
+bb:                                               ; preds = %meshBB106, %meshBB102
+  br i1 false, label %bb9, label %meshBB90
+
+bb.nph:                                           ; preds = %meshBB90
+  br label %meshBB114
+
+bb.nph.fragment:                                  ; preds = %meshBB114
+  br label %meshBB118
+
+bb1.fragment:                                     ; preds = %meshBB118
+  br i1 false, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1.fragment
+  br label %meshBB74
+
+bb2.fragment15:                                   ; preds = %meshBB74
+  br label %meshBB98
+
+bb3:                                              ; preds = %bb1.fragment
+  br i1 undef, label %meshBB, label %meshBB102
+
+bb4:                                              ; preds = %meshBB
+  br label %meshBB118
+
+bb4.fragment:                                     ; preds = %meshBB118
+  br label %meshBB82
+
+bb5:                                              ; preds = %meshBB102, %meshBB82
+  br i1 false, label %bb6, label %bb7
+
+bb6:                                              ; preds = %bb5
+  br label %bb7
+
+bb7:                                              ; preds = %meshBB98, %bb6, %bb5
+  br label %meshBB114
+
+bb7.fragment:                                     ; preds = %meshBB114
+  br i1 undef, label %meshBB74, label %bb9
+
+bb9:                                              ; preds = %bb7.fragment, %bb
+  br label %bb1.i23
+
+bb1.i23:                                          ; preds = %meshBB110, %bb9
+  br i1 undef, label %meshBB106, label %meshBB110
+
+skip_to_newline.exit26:                           ; preds = %meshBB106
+  br label %meshBB86
+
+skip_to_newline.exit26.fragment:                  ; preds = %meshBB86
+  br i1 false, label %meshBB90, label %meshBB106
+
+bb11.fragment:                                    ; preds = %meshBB90, %meshBB86
+  br label %meshBB122
+
+bb1.i:                                            ; preds = %meshBB122, %meshBB
+  %ooffset.2.lcssa.phi.SV.phi203 = phi i32 [ 0, %meshBB122 ], [ %ooffset.2.lcssa.phi.SV.phi233, %meshBB ] ; <i32> [#uses=1]
+  br label %meshBB98
+
+bb1.i.fragment:                                   ; preds = %meshBB98
+  br i1 undef, label %meshBB78, label %meshBB
+
+skip_to_newline.exit:                             ; preds = %meshBB78
+  br i1 undef, label %bb12, label %meshBB110
+
+bb12:                                             ; preds = %skip_to_newline.exit
+  br label %meshBB94
+
+bb12.fragment:                                    ; preds = %meshBB94
+  br i1 false, label %bb13, label %meshBB78
+
+bb13:                                             ; preds = %bb12.fragment
+  br label %meshBB82
+
+bb13.fragment:                                    ; preds = %meshBB82
+  br i1 undef, label %meshBB94, label %meshBB122
+
+bb14:                                             ; preds = %meshBB94
+  ret i32 %ooffset.2.lcssa.phi.SV.phi250
+
+bb15:                                             ; preds = %meshBB122, %meshBB110, %meshBB78
+  unreachable
+
+meshBB:                                           ; preds = %bb1.i.fragment, %bb3
+  %ooffset.2.lcssa.phi.SV.phi233 = phi i32 [ undef, %bb3 ], [ %ooffset.2.lcssa.phi.SV.phi209, %bb1.i.fragment ] ; <i32> [#uses=1]
+  br i1 undef, label %bb1.i, label %bb4
+
+meshBB74:                                         ; preds = %bb7.fragment, %bb2
+  br i1 false, label %meshBB118, label %bb2.fragment15
+
+meshBB78:                                         ; preds = %bb12.fragment, %bb1.i.fragment
+  %ooffset.2.lcssa.phi.SV.phi239 = phi i32 [ %ooffset.2.lcssa.phi.SV.phi209, %bb1.i.fragment ], [ %ooffset.2.lcssa.phi.SV.phi250, %bb12.fragment ] ; <i32> [#uses=1]
+  br i1 false, label %bb15, label %skip_to_newline.exit
+
+meshBB82:                                         ; preds = %bb13, %bb4.fragment
+  br i1 false, label %bb5, label %bb13.fragment
+
+meshBB86:                                         ; preds = %skip_to_newline.exit26, %entry
+  br i1 undef, label %skip_to_newline.exit26.fragment, label %bb11.fragment
+
+meshBB90:                                         ; preds = %skip_to_newline.exit26.fragment, %bb
+  br i1 false, label %bb11.fragment, label %bb.nph
+
+meshBB94:                                         ; preds = %bb13.fragment, %bb12
+  %ooffset.2.lcssa.phi.SV.phi250 = phi i32 [ 0, %bb13.fragment ], [ %ooffset.2.lcssa.phi.SV.phi239, %bb12 ] ; <i32> [#uses=2]
+  br i1 false, label %bb12.fragment, label %bb14
+
+meshBB98:                                         ; preds = %bb1.i, %bb2.fragment15
+  %ooffset.2.lcssa.phi.SV.phi209 = phi i32 [ undef, %bb2.fragment15 ], [ %ooffset.2.lcssa.phi.SV.phi203, %bb1.i ] ; <i32> [#uses=2]
+  br i1 undef, label %bb1.i.fragment, label %bb7
+
+meshBB102:                                        ; preds = %bb3, %entry
+  br i1 undef, label %bb5, label %bb
+
+meshBB106:                                        ; preds = %skip_to_newline.exit26.fragment, %bb1.i23
+  br i1 undef, label %bb, label %skip_to_newline.exit26
+
+meshBB110:                                        ; preds = %skip_to_newline.exit, %bb1.i23
+  br i1 false, label %bb15, label %bb1.i23
+
+meshBB114:                                        ; preds = %bb7, %bb.nph
+  %meshStackVariable115.phi = phi i32 [ 19, %bb7 ], [ 8, %bb.nph ] ; <i32> [#uses=0]
+  br i1 undef, label %bb.nph.fragment, label %bb7.fragment
+
+meshBB118:                                        ; preds = %meshBB74, %bb4, %bb.nph.fragment
+  %meshCmp121 = icmp eq i32 undef, 10             ; <i1> [#uses=1]
+  br i1 %meshCmp121, label %bb4.fragment, label %bb1.fragment
+
+meshBB122:                                        ; preds = %bb13.fragment, %bb11.fragment
+  br i1 false, label %bb1.i, label %bb15
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-03-04-Mul8Bug.ll b/src/LLVM/test/CodeGen/X86/2010-03-04-Mul8Bug.ll
new file mode 100644
index 0000000..48e75e9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-03-04-Mul8Bug.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s
+; PR6489
+;
+; This test case produces a MUL8 instruction and then tries to read the result
+; from the AX register instead of AH/AL. That confuses live interval analysis.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @func_56(i64 %p_57, i32*** %p_58) nounwind ssp {
+for.end:
+  %conv49 = trunc i32 undef to i8                 ; <i8> [#uses=1]
+  %div.i = udiv i8 %conv49, 5                     ; <i8> [#uses=1]
+  %conv51 = zext i8 %div.i to i32                 ; <i32> [#uses=1]
+  %call55 = call i32 @qux(i32 undef, i32 -2) nounwind ; <i32> [#uses=1]
+  %rem.i = urem i32 %call55, -1                   ; <i32> [#uses=1]
+  %cmp57 = icmp uge i32 %conv51, %rem.i           ; <i1> [#uses=1]
+  %conv58 = zext i1 %cmp57 to i32                 ; <i32> [#uses=1]
+  %call85 = call i32 @func_35(i32*** undef, i32 undef, i32 %conv58, i32 1247, i32 0) nounwind ; <i32> [#uses=0]
+  ret void
+}
+
+declare i32 @func_35(i32***, i32, i32, i32, i32)
+
+declare i32 @qux(i32, i32)

diff --git a/src/LLVM/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll b/src/LLVM/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll
new file mode 100644
index 0000000..5de1966
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -verify-machineinstrs
+;
+; When BRCOND is constant-folded to BR, make sure that PHI nodes don't get
+; spurious operands when the CFG is trimmed.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+define fastcc void @_ZSt16__introsort_loopIPdl17less_than_functorEvT_S2_T0_T1_(double* %__first, double* %__last, i64 %__depth_limit) nounwind ssp {
+entry:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %entry
+  ret void
+
+bb2:                                              ; preds = %entry
+  br label %bb2.outer.i
+
+bb2.outer.i:                                      ; preds = %bb9.i, %bb2
+  br i1 undef, label %bb1.i, label %bb5.preheader.i
+
+bb1.i:                                            ; preds = %bb1.i, %bb2.outer.i
+  %indvar5.i = phi i64 [ %tmp, %bb1.i ], [ 0, %bb2.outer.i ] ; <i64> [#uses=1]
+  %tmp = add i64 %indvar5.i, 1                    ; <i64> [#uses=2]
+  %scevgep.i = getelementptr double* undef, i64 %tmp ; <double*> [#uses=0]
+  br i1 undef, label %bb1.i, label %bb5.preheader.i
+
+bb5.preheader.i:                                  ; preds = %bb1.i, %bb2.outer.i
+  br label %bb5.i
+
+bb5.i:                                            ; preds = %bb5.i, %bb5.preheader.i
+  br i1 undef, label %bb5.i, label %bb7.i6
+
+bb7.i6:                                           ; preds = %bb5.i
+  br i1 undef, label %bb9.i, label %_ZSt21__unguarded_partitionIPdd17less_than_functorET_S2_S2_T0_T1_.exit
+
+bb9.i:                                            ; preds = %bb7.i6
+  br label %bb2.outer.i
+
+_ZSt21__unguarded_partitionIPdd17less_than_functorET_S2_S2_T0_T1_.exit: ; preds = %bb7.i6
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll b/src/LLVM/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll
new file mode 100644
index 0000000..3cca10e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -verify-machineinstrs
+;
+; This test case is transformed into a single basic block by the machine
+; branch folding pass. That makes a complete mess of the %EFLAGS liveness, but
+; we don't care about liveness this late anyway.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) ssp {
+entry:
+  br i1 undef, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  br i1 undef, label %bb3, label %bb5
+
+bb3:                                              ; preds = %bb2
+  br label %bb5
+
+bb5:                                              ; preds = %bb3, %bb2
+  br i1 undef, label %bb.nph239, label %bb8
+
+bb.nph239:                                        ; preds = %bb5
+  unreachable
+
+bb8:                                              ; preds = %bb5
+  br i1 undef, label %bb.nph237, label %bb47
+
+bb.nph237:                                        ; preds = %bb8
+  unreachable
+
+bb47:                                             ; preds = %bb8
+  br i1 undef, label %bb49, label %bb48
+
+bb48:                                             ; preds = %bb47
+  unreachable
+
+bb49:                                             ; preds = %bb47
+  br i1 undef, label %bb51, label %bb50
+
+bb50:                                             ; preds = %bb49
+  ret i32 0
+
+bb51:                                             ; preds = %bb49
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-03-17-ISelBug.ll b/src/LLVM/test/CodeGen/X86/2010-03-17-ISelBug.ll
new file mode 100644
index 0000000..ba21902
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-03-17-ISelBug.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin5
+
+; rdar://7761790
+
+%"struct..0$_485" = type { i16, i16, i32 }
+%union.PPToken = type { %"struct..0$_485" }
+%struct.PPOperation = type { %union.PPToken, %union.PPToken, [6 x %union.PPToken], i32, i32, i32, [1 x i32], [0 x i8] }
+
+define i32* @t() align 2 nounwind {
+entry:
+  %operation = alloca %struct.PPOperation, align 8 ; <%struct.PPOperation*> [#uses=2]
+  %0 = load i32*** null, align 4  ; [#uses=1]
+  %1 = ptrtoint i32** %0 to i32   ; <i32> [#uses=1]
+  %2 = sub nsw i32 %1, undef                      ; <i32> [#uses=2]
+  br i1 false, label %bb20, label %bb.nph380
+
+bb20:                                             ; preds = %entry
+  ret i32* null
+
+bb.nph380:                                        ; preds = %entry
+  %scevgep403 = getelementptr %struct.PPOperation* %operation, i32 0, i32 1, i32 0, i32 2 ; <i32*> [#uses=1]
+  %3 = ashr i32 %2, 1                             ; <i32> [#uses=1]
+  %tmp405 = and i32 %3, -2                        ; <i32> [#uses=1]
+  %scevgep408 = getelementptr %struct.PPOperation* %operation, i32 0, i32 1, i32 0, i32 1 ; <i16*> [#uses=1]
+  %tmp410 = and i32 %2, -4                        ; <i32> [#uses=1]
+  br label %bb169
+
+bb169:                                            ; preds = %bb169, %bb.nph380
+  %index.6379 = phi i32 [ 0, %bb.nph380 ], [ %4, %bb169 ] ; <i32> [#uses=3]
+  %tmp404 = mul i32 %index.6379, -2               ; <i32> [#uses=1]
+  %tmp406 = add i32 %tmp405, %tmp404              ; <i32> [#uses=1]
+  %scevgep407 = getelementptr i32* %scevgep403, i32 %tmp406 ; <i32*> [#uses=1]
+  %tmp409 = mul i32 %index.6379, -4               ; <i32> [#uses=1]
+  %tmp411 = add i32 %tmp410, %tmp409              ; <i32> [#uses=1]
+  %scevgep412 = getelementptr i16* %scevgep408, i32 %tmp411 ; <i16*> [#uses=1]
+  store i16 undef, i16* %scevgep412, align 2
+  store i32 undef, i32* %scevgep407, align 4
+  %4 = add nsw i32 %index.6379, 1                 ; <i32> [#uses=1]
+  br label %bb169
+}
+
+; PR7368
+
+%struct.bufBit_s = type { i8*, i8 }
+
+define fastcc void @printSwipe([2 x [256 x %struct.bufBit_s]]* nocapture %colourLines) nounwind {
+entry:
+  br label %for.body190
+  
+for.body261.i:                                    ; preds = %for.body261.i, %for.body190
+  %line.3300.i = phi i32 [ undef, %for.body190 ], [ %add292.i, %for.body261.i ] ; <i32> [#uses=3]
+  %conv268.i = and i32 %line.3300.i, 255          ; <i32> [#uses=1]
+  %tmp278.i = getelementptr [2 x [256 x %struct.bufBit_s]]* %colourLines, i32 undef, i32 %pen.1100, i32 %conv268.i, i32 0 ; <i8**> [#uses=1]
+  store i8* undef, i8** %tmp278.i
+  %tmp338 = shl i32 %line.3300.i, 3               ; <i32> [#uses=1]
+  %tmp339 = and i32 %tmp338, 2040                 ; <i32> [#uses=1]
+  %tmp285.i = getelementptr i8* %scevgep328, i32 %tmp339 ; <i8*> [#uses=1]
+  store i8 undef, i8* %tmp285.i
+  %add292.i = add nsw i32 0, %line.3300.i         ; <i32> [#uses=1]
+  br i1 undef, label %for.body190, label %for.body261.i
+
+for.body190:                                      ; preds = %for.body261.i, %for.body190, %bb.nph104
+  %pen.1100 = phi i32 [ 0, %entry ], [ %inc230, %for.body261.i ], [ %inc230, %for.body190 ] ; <i32> [#uses=3]
+  %scevgep328 = getelementptr [2 x [256 x %struct.bufBit_s]]* %colourLines, i32 undef, i32 %pen.1100, i32 0, i32 1 ; <i8*> [#uses=1]
+  %inc230 = add i32 %pen.1100, 1                  ; <i32> [#uses=2]
+  br i1 undef, label %for.body190, label %for.body261.i
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll b/src/LLVM/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
new file mode 100644
index 0000000..864ebf1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll

@@ -0,0 +1,68 @@
+; RUN: llc < %s -O3 -relocation-model=pic -disable-fp-elim -mcpu=nocona
+;
+; This test case is reduced from Bullet. It crashes SSEDomainFix.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0"
+
+declare i32 @_ZN11HullLibrary16CreateConvexHullERK8HullDescR10HullResult(i8*, i8* nocapture, i8* nocapture) ssp align 2
+
+define void @_ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi(i8* %psb, i8* %idraw, i32 %drawflags) ssp align 2 {
+entry:
+  br i1 undef, label %bb92, label %bb58
+
+bb58:                                             ; preds = %entry
+  %0 = invoke i32 @_ZN11HullLibrary16CreateConvexHullERK8HullDescR10HullResult(i8* undef, i8* undef, i8* undef)
+          to label %invcont64 unwind label %lpad159 ; <i32> [#uses=0]
+
+invcont64:                                        ; preds = %bb58
+  br i1 undef, label %invcont65, label %bb.i.i
+
+bb.i.i:                                           ; preds = %invcont64
+  %1 = load <4 x float>* undef, align 16          ; <<4 x float>> [#uses=5]
+  br i1 undef, label %bb.nph.i.i, label %invcont65
+
+bb.nph.i.i:                                       ; preds = %bb.i.i
+  %tmp22.i.i = bitcast <4 x float> %1 to i128     ; <i128> [#uses=1]
+  %tmp23.i.i = trunc i128 %tmp22.i.i to i32       ; <i32> [#uses=1]
+  %2 = bitcast i32 %tmp23.i.i to float            ; <float> [#uses=1]
+  %tmp6.i = extractelement <4 x float> %1, i32 1  ; <float> [#uses=1]
+  %tmp2.i = extractelement <4 x float> %1, i32 2  ; <float> [#uses=1]
+  br label %bb1.i.i
+
+bb1.i.i:                                          ; preds = %bb1.i.i, %bb.nph.i.i
+  %.tmp6.0.i.i = phi float [ %tmp2.i, %bb.nph.i.i ], [ %5, %bb1.i.i ] ; <float> [#uses=1]
+  %.tmp5.0.i.i = phi float [ %tmp6.i, %bb.nph.i.i ], [ %4, %bb1.i.i ] ; <float> [#uses=1]
+  %.tmp.0.i.i = phi float [ %2, %bb.nph.i.i ], [ %3, %bb1.i.i ] ; <float> [#uses=1]
+  %3 = fadd float %.tmp.0.i.i, undef              ; <float> [#uses=2]
+  %4 = fadd float %.tmp5.0.i.i, undef             ; <float> [#uses=2]
+  %5 = fadd float %.tmp6.0.i.i, undef             ; <float> [#uses=2]
+  br i1 undef, label %bb2.return.loopexit_crit_edge.i.i, label %bb1.i.i
+
+bb2.return.loopexit_crit_edge.i.i:                ; preds = %bb1.i.i
+  %tmp8.i = insertelement <4 x float> %1, float %3, i32 0 ; <<4 x float>> [#uses=1]
+  %tmp4.i = insertelement <4 x float> %tmp8.i, float %4, i32 1 ; <<4 x float>> [#uses=1]
+  %tmp.i = insertelement <4 x float> %tmp4.i, float %5, i32 2 ; <<4 x float>> [#uses=1]
+  br label %invcont65
+
+invcont65:                                        ; preds = %bb2.return.loopexit_crit_edge.i.i, %bb.i.i, %invcont64
+  %.0.i = phi <4 x float> [ %tmp.i, %bb2.return.loopexit_crit_edge.i.i ], [ undef, %invcont64 ], [ %1, %bb.i.i ] ; <<4 x float>> [#uses=1]
+  %tmp15.i = extractelement <4 x float> %.0.i, i32 2 ; <float> [#uses=1]
+  %6 = fmul float %tmp15.i, undef                 ; <float> [#uses=1]
+  br label %bb.i265
+
+bb.i265:                                          ; preds = %bb.i265, %invcont65
+  %7 = fsub float 0.000000e+00, %6                ; <float> [#uses=1]
+  store float %7, float* undef, align 4
+  br label %bb.i265
+
+bb92:                                             ; preds = %entry
+  unreachable
+
+lpad159:                                          ; preds = %bb58
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  unreachable
+}
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll b/src/LLVM/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 0000000..42f19b3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll

@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=x86 -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=x86-64 -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}

diff --git a/src/LLVM/test/CodeGen/X86/2010-04-08-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
new file mode 100644
index 0000000..9a5958e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-04-08-CoalescerBug.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s
+; rdar://7842028
+
+; Do not delete partially dead copy instructions.
+; %RDI<def,dead> = MOV64rr %RAX<kill>, %EDI<imp-def>
+; REP_MOVSD %ECX<imp-def,dead>, %EDI<imp-def,dead>, %ESI<imp-def,dead>, %ECX<imp-use,kill>, %EDI<imp-use,kill>, %ESI<imp-use,kill>
+
+
+%struct.F = type { %struct.FC*, i32, i32, i8, i32, i32, i32 }
+%struct.FC = type { [10 x i8], [32 x i32], %struct.FC*, i32 }
+
+define void @t(%struct.F* %this) nounwind {
+entry:
+; CHECK: t:
+; CHECK: addq $12, %rsi
+  %BitValueArray = alloca [32 x i32], align 4
+  %tmp2 = getelementptr inbounds %struct.F* %this, i64 0, i32 0
+  %tmp3 = load %struct.FC** %tmp2, align 8
+  %tmp4 = getelementptr inbounds %struct.FC* %tmp3, i64 0, i32 1, i64 0
+  %tmp5 = bitcast [32 x i32]* %BitValueArray to i8*
+  %tmp6 = bitcast i32* %tmp4 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp5, i8* %tmp6, i64 128, i32 4, i1 false)
+  unreachable
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll b/src/LLVM/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll
new file mode 100644
index 0000000..fadbd21
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2
+; rdar://7857830
+
+%0 = type opaque
+%1 = type opaque
+
+define void @t(%0* %self, i8* nocapture %_cmd, %1* %scroller, i32 %hitPart, float %multiplier) nounwind optsize ssp {
+entry:
+  switch i32 %hitPart, label %if.else [
+    i32 7, label %if.then
+    i32 8, label %if.then
+  ]
+
+if.then:                                          ; preds = %entry, %entry
+  %tmp69 = load float* null, align 4              ; <float> [#uses=1]
+  %cmp19 = icmp eq %1* null, %scroller            ; <i1> [#uses=2]
+  %cond = select i1 %cmp19, float %tmp69, float 0.000000e+00 ; <float> [#uses=1]
+  %call36 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*)*)(i8* undef, i8* undef) nounwind optsize ; <i64> [#uses=2]
+  br i1 %cmp19, label %cond.true32, label %cond.false39
+
+cond.true32:                                      ; preds = %if.then
+  %sroa.store.elt68 = lshr i64 %call36, 32        ; <i64> [#uses=1]
+  %0 = trunc i64 %sroa.store.elt68 to i32         ; <i32> [#uses=1]
+  br label %cond.end47
+
+cond.false39:                                     ; preds = %if.then
+  %1 = trunc i64 %call36 to i32                   ; <i32> [#uses=1]
+  br label %cond.end47
+
+cond.end47:                                       ; preds = %cond.false39, %cond.true32
+  %cond48.in = phi i32 [ %0, %cond.true32 ], [ %1, %cond.false39 ] ; <i32> [#uses=1]
+  %cond48 = bitcast i32 %cond48.in to float       ; <float> [#uses=1]
+  %div = fdiv float %cond, undef                  ; <float> [#uses=1]
+  %div58 = fdiv float %div, %cond48               ; <float> [#uses=1]
+  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, float)*)(i8* undef, i8* undef, float %div58) nounwind optsize
+  ret void
+
+if.else:                                          ; preds = %entry
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...)

diff --git a/src/LLVM/test/CodeGen/X86/2010-04-21-CoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2010-04-21-CoalescerBug.ll
new file mode 100644
index 0000000..d598764
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-04-21-CoalescerBug.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; rdar://7886733
+
+%struct.CMTime = type <{ i64, i32, i32, i64 }>
+%struct.CMTimeMapping = type { %struct.CMTimeRange, %struct.CMTimeRange }
+%struct.CMTimeRange = type { %struct.CMTime, %struct.CMTime }
+
+define void @t(%struct.CMTimeMapping* noalias nocapture sret %agg.result) nounwind optsize ssp {
+entry:
+  %agg.result1 = bitcast %struct.CMTimeMapping* %agg.result to i8* ; <i8*> [#uses=1]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %agg.result1, i8* null, i64 96, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/src/LLVM/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
new file mode 100644
index 0000000..69787c7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll

@@ -0,0 +1,94 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
+; There are no MMX operations here, so we use XMM or i64.
+
+define void @ti8(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to <8 x i8>
+        %tmp2 = bitcast double %b to <8 x i8>
+        %tmp3 = add <8 x i8> %tmp1, %tmp2
+; CHECK:  paddb %xmm1, %xmm0
+        store <8 x i8> %tmp3, <8 x i8>* null
+        ret void
+}
+
+define void @ti16(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to <4 x i16>
+        %tmp2 = bitcast double %b to <4 x i16>
+        %tmp3 = add <4 x i16> %tmp1, %tmp2
+; CHECK:  paddw %xmm1, %xmm0
+        store <4 x i16> %tmp3, <4 x i16>* null
+        ret void
+}
+
+define void @ti32(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to <2 x i32>
+        %tmp2 = bitcast double %b to <2 x i32>
+        %tmp3 = add <2 x i32> %tmp1, %tmp2
+; CHECK:  paddd %xmm1, %xmm0
+        store <2 x i32> %tmp3, <2 x i32>* null
+        ret void
+}
+
+; CHECK: ti64
+define void @ti64(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to <1 x i64>
+        %tmp2 = bitcast double %b to <1 x i64>
+        %tmp3 = add <1 x i64> %tmp1, %tmp2
+; CHECK:  addq
+        store <1 x i64> %tmp3, <1 x i64>* null
+        ret void
+}
+
+; MMX intrinsics calls get us MMX instructions.
+; CHECK: ti8a
+define void @ti8a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+
+define void @ti16a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+
+define void @ti32a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+
+define void @ti64a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+ 
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)

diff --git a/src/LLVM/test/CodeGen/X86/2010-04-29-CoalescerCrash.ll b/src/LLVM/test/CodeGen/X86/2010-04-29-CoalescerCrash.ll
new file mode 100644
index 0000000..a22f38a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-04-29-CoalescerCrash.ll

@@ -0,0 +1,142 @@
+; RUN: llc < %s -relocation-model=pic -disable-fp-elim -verify-machineinstrs
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @_ZN12_GLOBAL__N_113SPUAsmPrinter15EmitInstructionEPKN4llvm12MachineInstrE(i8* %this, i8* %MI) nounwind inlinehint align 2 {
+entry:
+  br i1 undef, label %"3.i", label %"4.i"
+
+"3.i":                                            ; preds = %entry
+  unreachable
+
+"4.i":                                            ; preds = %entry
+  switch i32 undef, label %_ZN12_GLOBAL__N_113SPUAsmPrinter16printInstructionEPKN4llvm12MachineInstrERNS1_11raw_ostreamE.exit [
+    i32 1, label %"5.i"
+    i32 2, label %"6.i"
+    i32 3, label %"7.i"
+    i32 4, label %"8.i"
+    i32 5, label %"9.i"
+  ]
+
+"5.i":                                            ; preds = %"4.i"
+  unreachable
+
+"6.i":                                            ; preds = %"4.i"
+  switch i32 undef, label %"11.i" [
+    i32 1, label %"12.i"
+    i32 2, label %"13.i"
+    i32 3, label %_ZN12_GLOBAL__N_113SPUAsmPrinter16printInstructionEPKN4llvm12MachineInstrERNS1_11raw_ostreamE.exit
+    i32 4, label %"14.i"
+  ]
+
+"7.i":                                            ; preds = %"4.i"
+  unreachable
+
+"8.i":                                            ; preds = %"4.i"
+  unreachable
+
+"9.i":                                            ; preds = %"4.i"
+  unreachable
+
+"11.i":                                           ; preds = %"6.i"
+  switch i32 undef, label %"15.i" [
+    i32 1, label %"16.i"
+    i32 2, label %"17.i"
+    i32 3, label %"18.i"
+    i32 4, label %"19.i"
+    i32 5, label %"20.i"
+    i32 6, label %"21.i"
+    i32 7, label %"24.i"
+    i32 8, label %"27.i"
+    i32 9, label %"28.i"
+    i32 10, label %"29.i"
+    i32 11, label %"30.i"
+    i32 12, label %"31.i"
+    i32 13, label %"32.i"
+    i32 14, label %"39.i"
+  ]
+
+"12.i":                                           ; preds = %"6.i"
+  unreachable
+
+"13.i":                                           ; preds = %"6.i"
+  unreachable
+
+"14.i":                                           ; preds = %"6.i"
+  unreachable
+
+"15.i":                                           ; preds = %"11.i"
+  unreachable
+
+"16.i":                                           ; preds = %"11.i"
+  unreachable
+
+"17.i":                                           ; preds = %"11.i"
+  unreachable
+
+"18.i":                                           ; preds = %"11.i"
+  unreachable
+
+"19.i":                                           ; preds = %"11.i"
+  unreachable
+
+"20.i":                                           ; preds = %"11.i"
+  unreachable
+
+"21.i":                                           ; preds = %"11.i"
+  br i1 undef, label %"22.i", label %"23.i"
+
+"22.i":                                           ; preds = %"21.i"
+  unreachable
+
+"23.i":                                           ; preds = %"21.i"
+  unreachable
+
+"24.i":                                           ; preds = %"11.i"
+  unreachable
+
+"27.i":                                           ; preds = %"11.i"
+  unreachable
+
+"28.i":                                           ; preds = %"11.i"
+  unreachable
+
+"29.i":                                           ; preds = %"11.i"
+  unreachable
+
+"30.i":                                           ; preds = %"11.i"
+  unreachable
+
+"31.i":                                           ; preds = %"11.i"
+  unreachable
+
+"32.i":                                           ; preds = %"11.i"
+  unreachable
+
+"39.i":                                           ; preds = %"11.i"
+  br i1 undef, label %"41.i", label %"40.i"
+
+"40.i":                                           ; preds = %"39.i"
+  unreachable
+
+"41.i":                                           ; preds = %"39.i"
+  %0 = call i64 @_ZNK4llvm14MachineOperand6getImmEv(i8 undef) nounwind inlinehint ; <i64> [#uses=1]
+  %1 = trunc i64 %0 to i16                        ; <i16> [#uses=1]
+  br i1 undef, label %"42.i", label %"43.i"
+
+"42.i":                                           ; preds = %"41.i"
+  unreachable
+
+"43.i":                                           ; preds = %"41.i"
+  %2 = and i16 %1, -16                            ; <i16> [#uses=1]
+  %3 = sext i16 %2 to i64                         ; <i64> [#uses=1]
+  %4 = call i8 @_ZN4llvm11raw_ostreamlsEl(i8 undef, i64 %3) nounwind ; <i8> [#uses=0]
+  unreachable
+
+_ZN12_GLOBAL__N_113SPUAsmPrinter16printInstructionEPKN4llvm12MachineInstrERNS1_11raw_ostreamE.exit: ; preds = %"6.i", %"4.i"
+  ret void
+}
+
+declare i64 @_ZNK4llvm14MachineOperand6getImmEv(i8) nounwind inlinehint align 2
+
+declare i8 @_ZN4llvm11raw_ostreamlsEl(i8, i64)

diff --git a/src/LLVM/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/src/LLVM/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
new file mode 100644
index 0000000..7af58dc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll

@@ -0,0 +1,141 @@
+; RUN: llc < %s -O0 -regalloc=fast -relocation-model=pic -disable-fp-elim | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0.0"
+
+%struct.S = type { [2 x i8*] }
+
+@_ZTIi = external constant i8*                    ; <i8**> [#uses=1]
+@.str = internal constant [4 x i8] c"%p\0A\00"    ; <[4 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i8* (%struct.S*, i32, %struct.S*)* @_Z4test1SiS_ to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+; Verify that %esi gets spilled before the call.
+; CHECK: Z4test1SiS
+; CHECK: movl %esi,{{.*}}(%ebp) 
+; CHECK: calll __Z6throwsv
+
+define i8* @_Z4test1SiS_(%struct.S* byval %s1, i32 %n, %struct.S* byval %s2) ssp {
+entry:
+  %retval = alloca i8*, align 4                   ; <i8**> [#uses=2]
+  %n.addr = alloca i32, align 4                   ; <i32*> [#uses=1]
+  %_rethrow = alloca i8*                          ; <i8**> [#uses=4]
+  %0 = alloca i32, align 4                        ; <i32*> [#uses=1]
+  %cleanup.dst = alloca i32                       ; <i32*> [#uses=3]
+  %cleanup.dst7 = alloca i32                      ; <i32*> [#uses=6]
+  store i32 %n, i32* %n.addr
+  invoke void @_Z6throwsv()
+          to label %invoke.cont unwind label %try.handler
+
+invoke.cont:                                      ; preds = %entry
+  store i32 1, i32* %cleanup.dst7
+  br label %finally
+
+terminate.handler:                                ; preds = %match.end
+  %exc = call i8* @llvm.eh.exception()            ; <i8*> [#uses=1]
+  %1 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1) ; <i32> [#uses=0]
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+
+try.handler:                                      ; preds = %entry
+  %exc1 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=3]
+  %selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc1, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTIi to i8*), i8* null) ; <i32> [#uses=1]
+  %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) ; <i32> [#uses=1]
+  %3 = icmp eq i32 %selector, %2                  ; <i1> [#uses=1]
+  br i1 %3, label %match, label %catch.next
+
+match:                                            ; preds = %try.handler
+  %4 = call i8* @__cxa_begin_catch(i8* %exc1)     ; <i8*> [#uses=1]
+  %5 = bitcast i8* %4 to i32*                     ; <i32*> [#uses=1]
+  %6 = load i32* %5                               ; <i32> [#uses=1]
+  store i32 %6, i32* %0
+  %call = invoke i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), %struct.S* %s2)
+          to label %invoke.cont2 unwind label %match.handler ; <i32> [#uses=0]
+
+invoke.cont2:                                     ; preds = %match
+  store i32 1, i32* %cleanup.dst
+  br label %match.end
+
+match.handler:                                    ; preds = %match
+  %exc3 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=2]
+  %7 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc3, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0) ; <i32> [#uses=0]
+  store i8* %exc3, i8** %_rethrow
+  store i32 2, i32* %cleanup.dst
+  br label %match.end
+
+cleanup.pad:                                      ; preds = %cleanup.switch
+  store i32 1, i32* %cleanup.dst7
+  br label %finally
+
+cleanup.pad4:                                     ; preds = %cleanup.switch
+  store i32 2, i32* %cleanup.dst7
+  br label %finally
+
+match.end:                                        ; preds = %match.handler, %invoke.cont2
+  invoke void @__cxa_end_catch()
+          to label %invoke.cont5 unwind label %terminate.handler
+
+invoke.cont5:                                     ; preds = %match.end
+  br label %cleanup.switch
+
+cleanup.switch:                                   ; preds = %invoke.cont5
+  %tmp = load i32* %cleanup.dst                   ; <i32> [#uses=1]
+  switch i32 %tmp, label %cleanup.end [
+    i32 1, label %cleanup.pad
+    i32 2, label %cleanup.pad4
+  ]
+
+cleanup.end:                                      ; preds = %cleanup.switch
+  store i32 2, i32* %cleanup.dst7
+  br label %finally
+
+catch.next:                                       ; preds = %try.handler
+  store i8* %exc1, i8** %_rethrow
+  store i32 2, i32* %cleanup.dst7
+  br label %finally
+
+finally:                                          ; preds = %catch.next, %cleanup.end, %cleanup.pad4, %cleanup.pad, %invoke.cont
+  br label %cleanup.switch9
+
+cleanup.switch9:                                  ; preds = %finally
+  %tmp8 = load i32* %cleanup.dst7                 ; <i32> [#uses=1]
+  switch i32 %tmp8, label %cleanup.end10 [
+    i32 1, label %finally.end
+    i32 2, label %finally.throw
+  ]
+
+cleanup.end10:                                    ; preds = %cleanup.switch9
+  br label %finally.end
+
+finally.throw:                                    ; preds = %cleanup.switch9
+  %8 = load i8** %_rethrow                        ; <i8*> [#uses=1]
+  call void @_Unwind_Resume_or_Rethrow(i8* %8)
+  unreachable
+
+finally.end:                                      ; preds = %cleanup.end10, %cleanup.switch9
+  %tmp11 = getelementptr inbounds %struct.S* %s1, i32 0, i32 0 ; <[2 x i8*]*> [#uses=1]
+  %arraydecay = getelementptr inbounds [2 x i8*]* %tmp11, i32 0, i32 0 ; <i8**> [#uses=1]
+  %arrayidx = getelementptr inbounds i8** %arraydecay, i32 1 ; <i8**> [#uses=1]
+  %tmp12 = load i8** %arrayidx                    ; <i8*> [#uses=1]
+  store i8* %tmp12, i8** %retval
+  %9 = load i8** %retval                          ; <i8*> [#uses=1]
+  ret i8* %9
+}
+
+declare void @_Z6throwsv() ssp
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare void @_ZSt9terminatev()
+
+declare void @_Unwind_Resume_or_Rethrow(i8*)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare i32 @printf(i8*, ...)
+
+declare void @__cxa_end_catch()

diff --git a/src/LLVM/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll b/src/LLVM/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
new file mode 100644
index 0000000..5accfd7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s | FileCheck %s
+; PR6941
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define i32 @snd_xbytes(i32 %v, i32 %from, i32 %to) nounwind readnone ssp {
+entry:
+  %cmp19 = icmp eq i32 %to, 0                     ; <i1> [#uses=1]
+  br i1 %cmp19, label %while.end, label %while.cond
+
+while.cond:                                       ; preds = %entry, %while.cond
+  %y.021 = phi i32 [ %rem, %while.cond ], [ %to, %entry ] ; <i32> [#uses=3]
+  %x.020 = phi i32 [ %y.021, %while.cond ], [ %from, %entry ] ; <i32> [#uses=1]
+  %rem = urem i32 %x.020, %y.021                  ; <i32> [#uses=2]
+  %cmp = icmp eq i32 %rem, 0                      ; <i1> [#uses=1]
+  br i1 %cmp, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond, %entry
+  %x.0.lcssa = phi i32 [ %from, %entry ], [ %y.021, %while.cond ] ; <i32> [#uses=2]
+  %div = udiv i32 %from, %x.0.lcssa               ; <i32> [#uses=1]
+  %div11 = udiv i32 %to, %x.0.lcssa               ; <i32> [#uses=1]
+  %conv = zext i32 %v to i64                      ; <i64> [#uses=1]
+  %conv14 = zext i32 %div11 to i64                ; <i64> [#uses=1]
+; Verify that we don't clobber %eax after putting the imulq result in %rax
+; CHECK: imulq	%r{{.}}x, %r[[RES:..]]
+; CHECK-NOT: movl	{{.*}}, %e[[RES]]
+; CHECK: div
+  %mul = mul i64 %conv14, %conv                   ; <i64> [#uses=1]
+  %conv16 = zext i32 %div to i64                  ; <i64> [#uses=1]
+  %div17 = udiv i64 %mul, %conv16                 ; <i64> [#uses=1]
+  %conv18 = trunc i64 %div17 to i32               ; <i32> [#uses=1]
+  ret i32 %conv18
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll b/src/LLVM/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
new file mode 100644
index 0000000..74a5ec2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -O0 -regalloc=fast | FileCheck %s
+; PR6520
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0.0"
+
+%0 = type { i8*, i8*, i32 }
+
+define i8* @func() nounwind ssp {
+entry:
+  %retval = alloca i8*, align 4                   ; <i8**> [#uses=2]
+  %ret = alloca i8*, align 4                      ; <i8**> [#uses=2]
+  %p = alloca i8*, align 4                        ; <i8**> [#uses=1]
+  %t = alloca i32, align 4                        ; <i32*> [#uses=1]
+; The earlyclobber $1 should only appear once. It should not be shared.
+; CHECK: deafbeef, [[REG:%e.x]]
+; CHECK-NOT: [[REG]]
+; CHECK: InlineAsm End
+  %0 = call %0 asm "mov    $$0xdeafbeef, $1\0A\09mov    $$0xcafebabe, $0\0A\09mov    $0, $2\0A\09", "=&r,=&r,=&{cx},~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0 ; <%0> [#uses=3]
+  %asmresult = extractvalue %0 %0, 0              ; <i8*> [#uses=1]
+  %asmresult1 = extractvalue %0 %0, 1             ; <i8*> [#uses=1]
+  %asmresult2 = extractvalue %0 %0, 2             ; <i32> [#uses=1]
+  store i8* %asmresult, i8** %ret
+  store i8* %asmresult1, i8** %p
+  store i32 %asmresult2, i32* %t
+  %tmp = load i8** %ret                           ; <i8*> [#uses=1]
+  store i8* %tmp, i8** %retval
+  %1 = load i8** %retval                          ; <i8*> [#uses=1]
+  ret i8* %1
+}
+
+!0 = metadata !{i32 79}

diff --git a/src/LLVM/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll b/src/LLVM/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll
new file mode 100644
index 0000000..90eb84d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll

@@ -0,0 +1,10 @@
+; RUN: llc -regalloc=fast %s -o %t
+; PR7066
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @sys_clone(i32 (i8*)* %fn, i8* %child_stack, i32 %flags, i8* %arg, i32* %parent_tidptr, i8* %newtls, i32* %child_tidptr) nounwind {
+  call i64 asm sideeffect "", "={ax},0,i,i,r,{si},{di},r,{dx},imr,imr,~{sp},~{memory},~{r8},~{r10},~{r11},~{cx},~{dirflag},~{fpsr},~{flags}"(i64 4294967274, i32 56, i32 60, i32 (i8*)* undef, i8* undef, i32 undef, i8* undef, i32* undef, i8* undef, i32* undef) nounwind ; <i64> [#uses=0]
+  ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-05-07-ldconvert.ll b/src/LLVM/test/CodeGen/X86/2010-05-07-ldconvert.ll
new file mode 100644
index 0000000..0ba6a8f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-05-07-ldconvert.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11
+; PR 7087 - used to crash
+
+define i32 @main() ssp {
+entry:
+  %retval = alloca i32, align 4                   ; <i32*> [#uses=2]
+  %r = alloca i32, align 4                        ; <i32*> [#uses=2]
+  store i32 0, i32* %retval
+  %tmp = call x86_fp80 @llvm.powi.f80(x86_fp80 0xK3FFF8000000000000000, i32 -64) ; <x86_fp80> [#uses=1]
+  %conv = fptosi x86_fp80 %tmp to i32             ; <i32> [#uses=1]
+  store i32 %conv, i32* %r
+  %tmp1 = load i32* %r                            ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %tmp1, 0                  ; <i1> [#uses=1]
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @_Z1fv()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %0 = load i32* %retval                          ; <i32> [#uses=1]
+  ret i32 %0
+}
+
+declare x86_fp80 @llvm.powi.f80(x86_fp80, i32) nounwind readonly
+
+declare void @_Z1fv()

diff --git a/src/LLVM/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll b/src/LLVM/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll
new file mode 100644
index 0000000..e719da3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10
+; PR7018
+; rdar://7939869
+
+define i32 @CXB30130(i32 %num1, i16* nocapture %num2, float* nocapture %num3, double* nocapture %num4) nounwind ssp {
+entry:
+  %0 = load i16* %num2, align 2                   ; <i16> [#uses=2]
+  %1 = mul nsw i16 %0, %0                         ; <i16> [#uses=1]
+  store i16 %1, i16* %num2, align 2
+  ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-05-12-FastAllocKills.ll b/src/LLVM/test/CodeGen/X86/2010-05-12-FastAllocKills.ll
new file mode 100644
index 0000000..36a99d6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-05-12-FastAllocKills.ll

@@ -0,0 +1,59 @@
+; RUN: llc -regalloc=fast -verify-machineinstrs < %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin"
+
+; This test causes a virtual FP register to be redefined while it is live:
+;BB#5: derived from LLVM BB %bb10
+;    Predecessors according to CFG: BB#4 BB#5
+;	%reg1024<def> = MOV_Fp8080 %reg1034
+;	%reg1025<def> = MUL_Fp80m32 %reg1024, %RIP, 1, %reg0, <cp#0>, %reg0; mem:LD4[ConstantPool]
+;	%reg1034<def> = MOV_Fp8080 %reg1025
+;	FP_REG_KILL %FP0<imp-def>, %FP1<imp-def>, %FP2<imp-def>, %FP3<imp-def>, %FP4<imp-def>, %FP5<imp-def>, %FP6<imp-def>
+;	JMP_4 <BB#5>
+;    Successors according to CFG: BB#5
+;
+; The X86FP pass needs good kill flags, like on %FP0 representing %reg1034:
+;BB#5: derived from LLVM BB %bb10
+;    Predecessors according to CFG: BB#4 BB#5
+;	%FP0<def> = LD_Fp80m <fi#3>, 1, %reg0, 0, %reg0; mem:LD10[FixedStack3](align=4)
+;	%FP1<def> = MOV_Fp8080 %FP0<kill>
+;	%FP2<def> = MUL_Fp80m32 %FP1, %RIP, 1, %reg0, <cp#0>, %reg0; mem:LD4[ConstantPool]
+;	%FP0<def> = MOV_Fp8080 %FP2
+;	ST_FpP80m <fi#3>, 1, %reg0, 0, %reg0, %FP0<kill>; mem:ST10[FixedStack3](align=4)
+;	ST_FpP80m <fi#4>, 1, %reg0, 0, %reg0, %FP1<kill>; mem:ST10[FixedStack4](align=4)
+;	ST_FpP80m <fi#5>, 1, %reg0, 0, %reg0, %FP2<kill>; mem:ST10[FixedStack5](align=4)
+;	FP_REG_KILL %FP0<imp-def>, %FP1<imp-def>, %FP2<imp-def>, %FP3<imp-def>, %FP4<imp-def>, %FP5<imp-def>, %FP6<imp-def>
+;	JMP_4 <BB#5>
+;    Successors according to CFG: BB#5
+
+define fastcc i32 @sqlite3AtoF(i8* %z, double* nocapture %pResult) nounwind ssp {
+entry:
+  br i1 undef, label %bb2, label %bb1.i.i
+
+bb1.i.i:                                          ; preds = %entry
+  unreachable
+
+bb2:                                              ; preds = %entry
+  br i1 undef, label %isdigit339.exit11.preheader, label %bb13
+
+isdigit339.exit11.preheader:                      ; preds = %bb2
+  br i1 undef, label %bb12, label %bb10
+
+bb10:                                             ; preds = %bb10, %isdigit339.exit11.preheader
+  %divisor.041 = phi x86_fp80 [ %0, %bb10 ], [ 0xK3FFF8000000000000000, %isdigit339.exit11.preheader ] ; <x86_fp80> [#uses=1]
+  %0 = fmul x86_fp80 %divisor.041, 0xK4002A000000000000000 ; <x86_fp80> [#uses=2]
+  br i1 false, label %bb12, label %bb10
+
+bb12:                                             ; preds = %bb10, %isdigit339.exit11.preheader
+  %divisor.0.lcssa = phi x86_fp80 [ 0xK3FFF8000000000000000, %isdigit339.exit11.preheader ], [ %0, %bb10 ] ; <x86_fp80> [#uses=0]
+  br label %bb13
+
+bb13:                                             ; preds = %bb12, %bb2
+  br i1 undef, label %bb34, label %bb36
+
+bb34:                                             ; preds = %bb13
+  br label %bb36
+
+bb36:                                             ; preds = %bb34, %bb13
+  ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-05-16-nosseconversion.ll b/src/LLVM/test/CodeGen/X86/2010-05-16-nosseconversion.ll
new file mode 100644
index 0000000..889575c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-05-16-nosseconversion.ll

@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mattr=-sse < %s
+; PR 7135
+
+@x = common global i64 0                          ; <i64*> [#uses=1]
+
+define i32 @foo() nounwind readonly ssp {
+entry:
+  %0 = load i64* @x, align 8                      ; <i64> [#uses=1]
+  %1 = uitofp i64 %0 to double                    ; <double> [#uses=1]
+  %2 = fptosi double %1 to i32                    ; <i32> [#uses=1]
+  ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/src/LLVM/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
new file mode 100644
index 0000000..2fceab6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll

@@ -0,0 +1,245 @@
+; RUN: llc -mtriple=x86_64-pc-linux -O2 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux -O2 -regalloc=basic < %s | FileCheck %s
+; Test to check .debug_loc support. This test case emits many debug_loc entries.
+
+; CHECK: Loc expr size
+; CHECK-NEXT: .short
+; CHECK-NEXT: .Ltmp
+; CHECK-NEXT: DW_OP_reg
+
+%0 = type { double }
+
+define hidden %0 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone {
+entry:
+  tail call void @llvm.dbg.value(metadata !{float %a}, i64 0, metadata !0)
+  tail call void @llvm.dbg.value(metadata !{float %b}, i64 0, metadata !11)
+  tail call void @llvm.dbg.value(metadata !{float %c}, i64 0, metadata !12)
+  tail call void @llvm.dbg.value(metadata !{float %d}, i64 0, metadata !13)
+  %0 = tail call float @fabsf(float %c) nounwind readnone, !dbg !19 ; <float> [#uses=1]
+  %1 = tail call float @fabsf(float %d) nounwind readnone, !dbg !19 ; <float> [#uses=1]
+  %2 = fcmp olt float %0, %1, !dbg !19            ; <i1> [#uses=1]
+  br i1 %2, label %bb, label %bb1, !dbg !19
+
+bb:                                               ; preds = %entry
+  %3 = fdiv float %c, %d, !dbg !20                ; <float> [#uses=3]
+  tail call void @llvm.dbg.value(metadata !{float %3}, i64 0, metadata !16), !dbg !20
+  %4 = fmul float %3, %c, !dbg !21                ; <float> [#uses=1]
+  %5 = fadd float %4, %d, !dbg !21                ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %5}, i64 0, metadata !14), !dbg !21
+  %6 = fmul float %3, %a, !dbg !22                ; <float> [#uses=1]
+  %7 = fadd float %6, %b, !dbg !22                ; <float> [#uses=1]
+  %8 = fdiv float %7, %5, !dbg !22                ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %8}, i64 0, metadata !17), !dbg !22
+  %9 = fmul float %3, %b, !dbg !23                ; <float> [#uses=1]
+  %10 = fsub float %9, %a, !dbg !23               ; <float> [#uses=1]
+  %11 = fdiv float %10, %5, !dbg !23              ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %11}, i64 0, metadata !18), !dbg !23
+  br label %bb2, !dbg !23
+
+bb1:                                              ; preds = %entry
+  %12 = fdiv float %d, %c, !dbg !24               ; <float> [#uses=3]
+  tail call void @llvm.dbg.value(metadata !{float %12}, i64 0, metadata !16), !dbg !24
+  %13 = fmul float %12, %d, !dbg !25              ; <float> [#uses=1]
+  %14 = fadd float %13, %c, !dbg !25              ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %14}, i64 0, metadata !14), !dbg !25
+  %15 = fmul float %12, %b, !dbg !26              ; <float> [#uses=1]
+  %16 = fadd float %15, %a, !dbg !26              ; <float> [#uses=1]
+  %17 = fdiv float %16, %14, !dbg !26             ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %17}, i64 0, metadata !17), !dbg !26
+  %18 = fmul float %12, %a, !dbg !27              ; <float> [#uses=1]
+  %19 = fsub float %b, %18, !dbg !27              ; <float> [#uses=1]
+  %20 = fdiv float %19, %14, !dbg !27             ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %20}, i64 0, metadata !18), !dbg !27
+  br label %bb2, !dbg !27
+
+bb2:                                              ; preds = %bb1, %bb
+  %y.0 = phi float [ %11, %bb ], [ %20, %bb1 ]    ; <float> [#uses=5]
+  %x.0 = phi float [ %8, %bb ], [ %17, %bb1 ]     ; <float> [#uses=5]
+  %21 = fcmp uno float %x.0, 0.000000e+00, !dbg !28 ; <i1> [#uses=1]
+  %22 = fcmp uno float %y.0, 0.000000e+00, !dbg !28 ; <i1> [#uses=1]
+  %or.cond = and i1 %21, %22                      ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb4, label %bb46, !dbg !28
+
+bb4:                                              ; preds = %bb2
+  %23 = fcmp une float %c, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %24 = fcmp une float %d, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %or.cond93 = or i1 %23, %24                     ; <i1> [#uses=1]
+  br i1 %or.cond93, label %bb9, label %bb6, !dbg !29
+
+bb6:                                              ; preds = %bb4
+  %25 = fcmp uno float %a, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %26 = fcmp uno float %b, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %or.cond94 = and i1 %25, %26                    ; <i1> [#uses=1]
+  br i1 %or.cond94, label %bb9, label %bb8, !dbg !29
+
+bb8:                                              ; preds = %bb6
+  %27 = tail call float @copysignf(float 0x7FF0000000000000, float %c) nounwind readnone, !dbg !30 ; <float> [#uses=2]
+  %28 = fmul float %27, %a, !dbg !30              ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %28}, i64 0, metadata !17), !dbg !30
+  %29 = fmul float %27, %b, !dbg !31              ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %29}, i64 0, metadata !18), !dbg !31
+  br label %bb46, !dbg !31
+
+bb9:                                              ; preds = %bb6, %bb4
+  %30 = fcmp ord float %a, 0.000000e+00           ; <i1> [#uses=1]
+  %31 = fsub float %a, %a, !dbg !32               ; <float> [#uses=3]
+  %32 = fcmp uno float %31, 0.000000e+00          ; <i1> [#uses=1]
+  %33 = and i1 %30, %32, !dbg !32                 ; <i1> [#uses=2]
+  br i1 %33, label %bb14, label %bb11, !dbg !32
+
+bb11:                                             ; preds = %bb9
+  %34 = fcmp ord float %b, 0.000000e+00           ; <i1> [#uses=1]
+  %35 = fsub float %b, %b, !dbg !32               ; <float> [#uses=1]
+  %36 = fcmp uno float %35, 0.000000e+00          ; <i1> [#uses=1]
+  %37 = and i1 %34, %36, !dbg !32                 ; <i1> [#uses=1]
+  br i1 %37, label %bb14, label %bb27, !dbg !32
+
+bb14:                                             ; preds = %bb11, %bb9
+  %38 = fsub float %c, %c, !dbg !32               ; <float> [#uses=1]
+  %39 = fcmp ord float %38, 0.000000e+00          ; <i1> [#uses=1]
+  br i1 %39, label %bb15, label %bb27, !dbg !32
+
+bb15:                                             ; preds = %bb14
+  %40 = fsub float %d, %d, !dbg !32               ; <float> [#uses=1]
+  %41 = fcmp ord float %40, 0.000000e+00          ; <i1> [#uses=1]
+  br i1 %41, label %bb16, label %bb27, !dbg !32
+
+bb16:                                             ; preds = %bb15
+  %iftmp.0.0 = select i1 %33, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %42 = tail call float @copysignf(float %iftmp.0.0, float %a) nounwind readnone, !dbg !33 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %42}, i64 0, metadata !0), !dbg !33
+  %43 = fcmp ord float %b, 0.000000e+00           ; <i1> [#uses=1]
+  %44 = fsub float %b, %b, !dbg !34               ; <float> [#uses=1]
+  %45 = fcmp uno float %44, 0.000000e+00          ; <i1> [#uses=1]
+  %46 = and i1 %43, %45, !dbg !34                 ; <i1> [#uses=1]
+  %iftmp.1.0 = select i1 %46, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %47 = tail call float @copysignf(float %iftmp.1.0, float %b) nounwind readnone, !dbg !34 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %47}, i64 0, metadata !11), !dbg !34
+  %48 = fmul float %42, %c, !dbg !35              ; <float> [#uses=1]
+  %49 = fmul float %47, %d, !dbg !35              ; <float> [#uses=1]
+  %50 = fadd float %48, %49, !dbg !35             ; <float> [#uses=1]
+  %51 = fmul float %50, 0x7FF0000000000000, !dbg !35 ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %51}, i64 0, metadata !17), !dbg !35
+  %52 = fmul float %47, %c, !dbg !36              ; <float> [#uses=1]
+  %53 = fmul float %42, %d, !dbg !36              ; <float> [#uses=1]
+  %54 = fsub float %52, %53, !dbg !36             ; <float> [#uses=1]
+  %55 = fmul float %54, 0x7FF0000000000000, !dbg !36 ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %55}, i64 0, metadata !18), !dbg !36
+  br label %bb46, !dbg !36
+
+bb27:                                             ; preds = %bb15, %bb14, %bb11
+  %56 = fcmp ord float %c, 0.000000e+00           ; <i1> [#uses=1]
+  %57 = fsub float %c, %c, !dbg !37               ; <float> [#uses=1]
+  %58 = fcmp uno float %57, 0.000000e+00          ; <i1> [#uses=1]
+  %59 = and i1 %56, %58, !dbg !37                 ; <i1> [#uses=2]
+  br i1 %59, label %bb33, label %bb30, !dbg !37
+
+bb30:                                             ; preds = %bb27
+  %60 = fcmp ord float %d, 0.000000e+00           ; <i1> [#uses=1]
+  %61 = fsub float %d, %d, !dbg !37               ; <float> [#uses=1]
+  %62 = fcmp uno float %61, 0.000000e+00          ; <i1> [#uses=1]
+  %63 = and i1 %60, %62, !dbg !37                 ; <i1> [#uses=1]
+  %64 = fcmp ord float %31, 0.000000e+00          ; <i1> [#uses=1]
+  %or.cond95 = and i1 %63, %64                    ; <i1> [#uses=1]
+  br i1 %or.cond95, label %bb34, label %bb46, !dbg !37
+
+bb33:                                             ; preds = %bb27
+  %.old = fcmp ord float %31, 0.000000e+00        ; <i1> [#uses=1]
+  br i1 %.old, label %bb34, label %bb46, !dbg !37
+
+bb34:                                             ; preds = %bb33, %bb30
+  %65 = fsub float %b, %b, !dbg !37               ; <float> [#uses=1]
+  %66 = fcmp ord float %65, 0.000000e+00          ; <i1> [#uses=1]
+  br i1 %66, label %bb35, label %bb46, !dbg !37
+
+bb35:                                             ; preds = %bb34
+  %iftmp.2.0 = select i1 %59, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %67 = tail call float @copysignf(float %iftmp.2.0, float %c) nounwind readnone, !dbg !38 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %67}, i64 0, metadata !12), !dbg !38
+  %68 = fcmp ord float %d, 0.000000e+00           ; <i1> [#uses=1]
+  %69 = fsub float %d, %d, !dbg !39               ; <float> [#uses=1]
+  %70 = fcmp uno float %69, 0.000000e+00          ; <i1> [#uses=1]
+  %71 = and i1 %68, %70, !dbg !39                 ; <i1> [#uses=1]
+  %iftmp.3.0 = select i1 %71, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %72 = tail call float @copysignf(float %iftmp.3.0, float %d) nounwind readnone, !dbg !39 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %72}, i64 0, metadata !13), !dbg !39
+  %73 = fmul float %67, %a, !dbg !40              ; <float> [#uses=1]
+  %74 = fmul float %72, %b, !dbg !40              ; <float> [#uses=1]
+  %75 = fadd float %73, %74, !dbg !40             ; <float> [#uses=1]
+  %76 = fmul float %75, 0.000000e+00, !dbg !40    ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %76}, i64 0, metadata !17), !dbg !40
+  %77 = fmul float %67, %b, !dbg !41              ; <float> [#uses=1]
+  %78 = fmul float %72, %a, !dbg !41              ; <float> [#uses=1]
+  %79 = fsub float %77, %78, !dbg !41             ; <float> [#uses=1]
+  %80 = fmul float %79, 0.000000e+00, !dbg !41    ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %80}, i64 0, metadata !18), !dbg !41
+  br label %bb46, !dbg !41
+
+bb46:                                             ; preds = %bb35, %bb34, %bb33, %bb30, %bb16, %bb8, %bb2
+  %y.1 = phi float [ %80, %bb35 ], [ %y.0, %bb34 ], [ %y.0, %bb33 ], [ %y.0, %bb30 ], [ %55, %bb16 ], [ %29, %bb8 ], [ %y.0, %bb2 ] ; <float> [#uses=2]
+  %x.1 = phi float [ %76, %bb35 ], [ %x.0, %bb34 ], [ %x.0, %bb33 ], [ %x.0, %bb30 ], [ %51, %bb16 ], [ %28, %bb8 ], [ %x.0, %bb2 ] ; <float> [#uses=1]
+  %81 = fmul float %y.1, 0.000000e+00, !dbg !42   ; <float> [#uses=1]
+  %82 = fadd float %y.1, 0.000000e+00, !dbg !42   ; <float> [#uses=1]
+  %tmpr = fadd float %x.1, %81, !dbg !42          ; <float> [#uses=1]
+  %tmp89 = bitcast float %tmpr to i32             ; <i32> [#uses=1]
+  %tmp90 = zext i32 %tmp89 to i64                 ; <i64> [#uses=1]
+  %tmp85 = bitcast float %82 to i32               ; <i32> [#uses=1]
+  %tmp86 = zext i32 %tmp85 to i64                 ; <i64> [#uses=1]
+  %tmp87 = shl i64 %tmp86, 32                     ; <i64> [#uses=1]
+  %ins = or i64 %tmp90, %tmp87                    ; <i64> [#uses=1]
+  %tmp84 = bitcast i64 %ins to double             ; <double> [#uses=1]
+  %mrv75 = insertvalue %0 undef, double %tmp84, 0, !dbg !42 ; <%0> [#uses=1]
+  ret %0 %mrv75, !dbg !42
+}
+
+declare float @fabsf(float)
+
+declare float @copysignf(float, float) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.lv = !{!0, !11, !12, !13, !14, !16, !17, !18}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !9, metadata !9, metadata !9, metadata !9}
+!6 = metadata !{i32 524310, metadata !7, metadata !"SCtype", metadata !7, i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 524329, metadata !"libgcc2.h", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !3} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 524324, metadata !2, metadata !"complex float", metadata !2, i32 0, i64 64, i64 32, i64 0, i32 0, i32 3} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524310, metadata !7, metadata !"SFtype", metadata !7, i32 167, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!10 = metadata !{i32 524324, metadata !2, metadata !"float", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 524545, metadata !1, metadata !"b", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 524545, metadata !1, metadata !"c", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 524545, metadata !1, metadata !"d", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!14 = metadata !{i32 524544, metadata !15, metadata !"denom", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 524299, metadata !1, i32 1922, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 524544, metadata !15, metadata !"ratio", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!17 = metadata !{i32 524544, metadata !15, metadata !"x", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 524544, metadata !15, metadata !"y", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!19 = metadata !{i32 1929, i32 0, metadata !15, null}
+!20 = metadata !{i32 1931, i32 0, metadata !15, null}
+!21 = metadata !{i32 1932, i32 0, metadata !15, null}
+!22 = metadata !{i32 1933, i32 0, metadata !15, null}
+!23 = metadata !{i32 1934, i32 0, metadata !15, null}
+!24 = metadata !{i32 1938, i32 0, metadata !15, null}
+!25 = metadata !{i32 1939, i32 0, metadata !15, null}
+!26 = metadata !{i32 1940, i32 0, metadata !15, null}
+!27 = metadata !{i32 1941, i32 0, metadata !15, null}
+!28 = metadata !{i32 1946, i32 0, metadata !15, null}
+!29 = metadata !{i32 1948, i32 0, metadata !15, null}
+!30 = metadata !{i32 1950, i32 0, metadata !15, null}
+!31 = metadata !{i32 1951, i32 0, metadata !15, null}
+!32 = metadata !{i32 1953, i32 0, metadata !15, null}
+!33 = metadata !{i32 1955, i32 0, metadata !15, null}
+!34 = metadata !{i32 1956, i32 0, metadata !15, null}
+!35 = metadata !{i32 1957, i32 0, metadata !15, null}
+!36 = metadata !{i32 1958, i32 0, metadata !15, null}
+!37 = metadata !{i32 1960, i32 0, metadata !15, null}
+!38 = metadata !{i32 1962, i32 0, metadata !15, null}
+!39 = metadata !{i32 1963, i32 0, metadata !15, null}
+!40 = metadata !{i32 1964, i32 0, metadata !15, null}
+!41 = metadata !{i32 1965, i32 0, metadata !15, null}
+!42 = metadata !{i32 1969, i32 0, metadata !15, null}

diff --git a/src/LLVM/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/src/LLVM/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
new file mode 100644
index 0000000..7909d27
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll

@@ -0,0 +1,82 @@
+; RUN: llc -O2 < %s | FileCheck %s
+; RUN: llc -O2 -regalloc=basic < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10"
+
+%struct.a = type { i32, %struct.a* }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i8* (%struct.a*)* @bar to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i8* @bar(%struct.a* %myvar) nounwind optsize noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.a* %myvar}, i64 0, metadata !8)
+  %0 = getelementptr inbounds %struct.a* %myvar, i64 0, i32 0, !dbg !28 ; <i32*> [#uses=1]
+  %1 = load i32* %0, align 8, !dbg !28            ; <i32> [#uses=1]
+  tail call void @foo(i32 %1) nounwind optsize noinline ssp, !dbg !28
+  %2 = bitcast %struct.a* %myvar to i8*, !dbg !30 ; <i8*> [#uses=1]
+  ret i8* %2, !dbg !30
+}
+
+declare void @foo(i32) nounwind optsize noinline ssp
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.gv = !{!0}
+!llvm.dbg.lv = !{!4, !8, !18, !25, !26}
+
+!0 = metadata !{i32 524340, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null} ; [ DW_TAG_variable ]
+!1 = metadata !{i32 524329, metadata !"foo.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!4 = metadata !{i32 524545, metadata !5, metadata !"x", metadata !1, i32 12, metadata !3} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{null, metadata !3}
+!8 = metadata !{i32 524545, metadata !9, metadata !"myvar", metadata !1, i32 17, metadata !13} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{metadata !12, metadata !13}
+!12 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 524307, metadata !1, metadata !"a", metadata !1, i32 2, i64 128, i64 64, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_structure_type ]
+!15 = metadata !{metadata !16, metadata !17}
+!16 = metadata !{i32 524301, metadata !14, metadata !"c", metadata !1, i32 3, i64 32, i64 32, i64 0, i32 0, metadata !3} ; [ DW_TAG_member ]
+!17 = metadata !{i32 524301, metadata !14, metadata !"d", metadata !1, i32 4, i64 64, i64 64, i64 64, i32 0, metadata !13} ; [ DW_TAG_member ]
+!18 = metadata !{i32 524545, metadata !19, metadata !"argc", metadata !1, i32 22, metadata !3} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 524334, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!21 = metadata !{metadata !3, metadata !3, metadata !22}
+!22 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ]
+!24 = metadata !{i32 524324, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!25 = metadata !{i32 524545, metadata !19, metadata !"argv", metadata !1, i32 22, metadata !22} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 524544, metadata !27, metadata !"e", metadata !1, i32 23, metadata !14} ; [ DW_TAG_auto_variable ]
+!27 = metadata !{i32 524299, metadata !19, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 18, i32 0, metadata !29, null}
+!29 = metadata !{i32 524299, metadata !9, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 19, i32 0, metadata !29, null}
+
+; The variable bar:myvar changes registers after the first movq.
+; It is cobbered by popq %rbx
+; CHECK: movq
+; CHECK-NEXT: [[LABEL:Ltmp[0-9]*]]
+; CHECK: .loc	1 19 0
+; CHECK: popq
+; CHECK-NEXT: [[CLOBBER:Ltmp[0-9]*]]
+
+
+; CHECK: Ldebug_loc0:
+; CHECK-NEXT: .quad   Lfunc_begin0
+; CHECK-NEXT: .quad   [[LABEL]]
+; CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}               ## Loc expr size
+; CHECK-NEXT: .short  Lset{{.*}}
+; CHECK-NEXT: Ltmp{{.*}}:
+; CHECK-NEXT: .byte   85
+; CHECK-NEXT: Ltmp{{.*}}:
+; CHECK-NEXT: .quad   [[LABEL]]
+; CHECK-NEXT: .quad   [[CLOBBER]]
+; CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}               ## Loc expr size
+; CHECK-NEXT: .short  Lset{{.*}}
+; CHECK-NEXT: Ltmp{{.*}}:
+; CHECK-NEXT: .byte   83
+; CHECK-NEXT: Ltmp{{.*}}:
\ No newline at end of file

diff --git a/src/LLVM/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll b/src/LLVM/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll
new file mode 100644
index 0000000..38dcb80
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll

@@ -0,0 +1,16 @@
+; RUN: llc -O0 -mcpu=i386 -mattr=-sse,-mmx < %s
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.5.1 20100510 (prerelease) LLVM: 104604:104605\22"
+
+define i32 @f2(double %x) nounwind {
+entry:
+  %0 = load double* undef, align 64               ; <double> [#uses=1]
+  %1 = fptoui double %0 to i16                    ; <i16> [#uses=1]
+  %2 = zext i16 %1 to i32                         ; <i32> [#uses=1]
+  %3 = add nsw i32 0, %2                          ; <i32> [#uses=1]
+  store i32 %3, i32* undef, align 1
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-05-28-Crash.ll b/src/LLVM/test/CodeGen/X86/2010-05-28-Crash.ll
new file mode 100644
index 0000000..1a0da31
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-05-28-Crash.ll

@@ -0,0 +1,45 @@
+; RUN: llc  -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc  -mtriple=x86_64-apple-darwin -regalloc=basic < %s | FileCheck %s
+; Test to check separate label for inlined function argument.
+
+define i32 @foo(i32 %y) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %y}, i64 0, metadata !0)
+  %0 = tail call i32 (...)* @zoo(i32 %y) nounwind, !dbg !9 ; <i32> [#uses=1]
+  ret i32 %0, !dbg !9
+}
+
+declare i32 @zoo(...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define i32 @bar(i32 %x) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !7)
+  tail call void @llvm.dbg.value(metadata !11, i64 0, metadata !0) nounwind
+  %0 = tail call i32 (...)* @zoo(i32 1) nounwind, !dbg !12 ; <i32> [#uses=1]
+  %1 = add nsw i32 %0, %x, !dbg !13               ; <i32> [#uses=1]
+  ret i32 %1, !dbg !13
+}
+
+!llvm.dbg.lv = !{!0, !7}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"f.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"f.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !6}
+!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524545, metadata !8, metadata !"x", metadata !2, i32 6, metadata !6} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 524334, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 6, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 3, i32 0, metadata !10, null}
+!10 = metadata !{i32 524299, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 1}
+!12 = metadata !{i32 3, i32 0, metadata !10, metadata !13}
+!13 = metadata !{i32 7, i32 0, metadata !14, null}
+!14 = metadata !{i32 524299, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
+
+;CHECK: DEBUG_VALUE: bar:x <- E
+;CHECK: Ltmp
+;CHECK:	DEBUG_VALUE: foo:y <- 1+0

diff --git a/src/LLVM/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/src/LLVM/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
new file mode 100644
index 0000000..a9c03ee
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll

@@ -0,0 +1,54 @@
+; RUN: llc -O2 < %s | FileCheck %s
+; RUN: llc -O2 -regalloc=basic < %s | FileCheck %s
+; Test to check that unused argument 'this' is not undefined in debug info.
+
+target triple = "x86_64-apple-darwin10.2"
+%struct.foo = type { i32 }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.foo*, i32)* @_ZN3foo3bazEi to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i32 @_ZN3foo3bazEi(%struct.foo* nocapture %this, i32 %x) nounwind readnone optsize noinline ssp align 2 {
+;CHECK: DEBUG_VALUE: baz:this <- RDI+0
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.foo* %this}, i64 0, metadata !15)
+  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !16)
+  %0 = mul nsw i32 %x, 7, !dbg !29                ; <i32> [#uses=1]
+  %1 = add nsw i32 %0, 1, !dbg !29                ; <i32> [#uses=1]
+  ret i32 %1, !dbg !29
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", metadata !3, i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524307, metadata !3, metadata !"foo", metadata !3, i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{i32 524329, metadata !"foo.cp", metadata !"/tmp/", metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 524305, i32 0, i32 4, metadata !"foo.cp", metadata !"/tmp/", metadata !"4.2.1 LLVM build", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{metadata !6, metadata !1, metadata !8}
+!6 = metadata !{i32 524301, metadata !2, metadata !"y", metadata !3, i32 8, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ]
+!7 = metadata !{i32 524324, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 524334, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", metadata !3, i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{metadata !7, metadata !11, metadata !7}
+!11 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !2} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 524326, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !13} ; [ DW_TAG_const_type ]
+!13 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 524545, metadata !1, metadata !"x", metadata !3, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 524545, metadata !8, metadata !"this", metadata !3, i32 15, metadata !12} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 524545, metadata !8, metadata !"x", metadata !3, i32 15, metadata !7} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 524545, metadata !18, metadata !"argc", metadata !3, i32 19, metadata !7} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 524334, i32 0, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!19 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!20 = metadata !{metadata !7, metadata !7, metadata !21}
+!21 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 524324, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 524545, metadata !18, metadata !"argv", metadata !3, i32 19, metadata !21} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 524544, metadata !26, metadata !"a", metadata !3, i32 20, metadata !2} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 524299, metadata !27, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 524299, metadata !18, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 524544, metadata !26, metadata !"b", metadata !3, i32 21, metadata !7} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 16, i32 0, metadata !30, null}
+!30 = metadata !{i32 524299, metadata !8, i32 15, i32 0} ; [ DW_TAG_lexical_block ]

diff --git a/src/LLVM/test/CodeGen/X86/2010-06-09-FastAllocRegisters.ll b/src/LLVM/test/CodeGen/X86/2010-06-09-FastAllocRegisters.ll
new file mode 100644
index 0000000..7c7792a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-06-09-FastAllocRegisters.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic
+; PR7313
+;
+; The inline asm in this function clobbers almost all allocatable registers.
+; Make sure that the register allocator recovers.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @snapshot()
+
+define void @test_too_many_longs() nounwind {
+entry:
+  call void asm sideeffect "xor %rax, %rax\0A\09xor %rbx, %rbx\0A\09xor %rcx, %rcx\0A\09xor %rdx, %rdx\0A\09xor %rsi, %rsi\0A\09xor %rdi, %rdi\0A\09xor %r8, %r8\0A\09xor %r9, %r9\0A\09xor %r10, %r10\0A\09xor %r11, %r11\0A\09xor %r12, %r12\0A\09xor %r13, %r13\0A\09xor %r14, %r14\0A\09xor %r15, %r15\0A\09", "~{fpsr},~{flags},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rdi},~{rsi},~{rdx},~{rcx},~{rbx},~{rax}"() nounwind
+  call void bitcast (void ()* @snapshot to void (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64)*)(i64 32, i64 33, i64 34, i64 35, i64 36, i64 37, i64 38, i64 39, i64 40, i64 41, i64 42, i64 43) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll b/src/LLVM/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
new file mode 100644
index 0000000..b22a391
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll

@@ -0,0 +1,6 @@
+; RUN: llc -fast-isel -march=x86 < %s | grep %fs:
+
+define i32 @test1(i32 addrspace(257)* %arg) nounwind {
+       %tmp = load i32 addrspace(257)* %arg
+       ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll b/src/LLVM/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
new file mode 100644
index 0000000..4639866
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll

@@ -0,0 +1,29 @@
+; RUN: llc -regalloc=fast < %s | FileCheck %s
+; PR7382
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@.str = private constant [23 x i8] c"This should be -1: %d\0A\00" ; <[23 x i8]*> [#uses=1]
+
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4                   ; <i32*> [#uses=3]
+  %v = alloca i32, align 4                        ; <i32*> [#uses=3]
+  store i32 0, i32* %retval
+  %zero = load i32* %retval
+; The earlyclobber register EC0 should not be spilled before the inline asm.
+; Yes, check-not can refer to FileCheck variables defined in the future.
+; CHECK-NOT: [[EC0]]{{.*}}(%rsp)
+; CHECK: bsr {{[^,]*}}, [[EC0:%...]]
+  %0 = call i32 asm "bsr   $1, $0\0A\09cmovz $2, $0", "=&r,ro,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %zero, i32 -1) nounwind, !srcloc !0 ; <i32> [#uses=1]
+  store i32 %0, i32* %v
+  %tmp = load i32* %v                             ; <i32> [#uses=1]
+  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 0), i32 %tmp) ; <i32> [#uses=0]
+  store i32 0, i32* %retval
+  %1 = load i32* %retval                          ; <i32> [#uses=1]
+  ret i32 %0
+}
+
+declare i32 @printf(i8*, ...)
+
+!0 = metadata !{i32 191}

diff --git a/src/LLVM/test/CodeGen/X86/2010-06-24-g-constraint-crash.ll b/src/LLVM/test/CodeGen/X86/2010-06-24-g-constraint-crash.ll
new file mode 100644
index 0000000..2a938d9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-06-24-g-constraint-crash.ll

@@ -0,0 +1,15 @@
+; RUN: llc %s -mtriple=x86_64-apple-darwin10 -disable-fp-elim -o /dev/null
+; Formerly crashed, rdar://8015842
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%0 = type { i64, i64, i64, i64, i64 }
+
+@utcbs.1559 = internal global [3 x i64] zeroinitializer ; <[3 x i64]*> [#uses=1]
+
+define void @bar() nounwind ssp {
+entry:
+  %asmtmp.i.i = tail call %0 asm sideeffect "push %rbp; syscall; pop %rbp\0A", "={ax},={di},={si},={dx},={bx},{ax},{di},{si},{dx},{bx},~{dirflag},~{fpsr},~{flags},~{memory},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rcx}"(i32 7, i64 -1, i64 0, i64 -1, i64 -1) nounwind ; <%0> [#uses=0]
+  %asmtmp.i1.i = tail call %0 asm sideeffect "mov $10, %r8;\0Amov $11, %r9;\0Amov $12, %r10;\0Apush %rbp; syscall; pop %rbp\0A", "={ax},={di},={si},={dx},={bx},{ax},{di},{si},{dx},{bx},imr,imr,imr,~{dirflag},~{fpsr},~{flags},~{memory},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rcx}"(i32 8, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 0, i8* bitcast (i64* getelementptr inbounds ([3 x i64]* @utcbs.1559, i64 0, i64 2) to i8*)) nounwind ; <%0> [#uses=0]
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll b/src/LLVM/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
new file mode 100644
index 0000000..05f581a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll

@@ -0,0 +1,39 @@
+; RUN: llc -O1 -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
+; <rdar://problem/8124405>
+
+%struct.type = type { %struct.subtype*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
+%struct.subtype = type { i8*, i32, i32, i32, i8*, i32, i32, i32, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8* }
+
+define i32 @func(%struct.type* %s) nounwind optsize ssp {
+entry:
+  %tmp1 = getelementptr inbounds %struct.type* %s, i32 0, i32 1
+  %tmp2 = load i32* %tmp1, align 8
+  %tmp3 = icmp eq i32 %tmp2, 10
+  %tmp4 = getelementptr inbounds %struct.type* %s, i32 0, i32 40
+  br i1 %tmp3, label %bb, label %entry.bb1_crit_edge
+
+entry.bb1_crit_edge:
+  br label %bb1
+
+bb:
+
+; The point of this code is that %rdi is set to %rdi+64036 for the rep;stosl
+; statement. It can be an ADD or LEA instruction, it's not important which one
+; it is.
+;
+; CHECK: # %bb
+; CHECK: leaq	64036(%rdx), %rdi
+; CHECK: rep;stosl
+
+  %tmp5 = bitcast i32* %tmp4 to i8*
+  call void @llvm.memset.p0i8.i64(i8* %tmp5, i8 0, i64 84, i32 4, i1 false)
+  %tmp6 = getelementptr inbounds %struct.type* %s, i32 0, i32 62
+  store i32* null, i32** %tmp6, align 8
+  br label %bb1
+
+bb1:
+  store i32 10, i32* %tmp1, align 8
+  ret i32 42
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll b/src/LLVM/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll
new file mode 100644
index 0000000..68a6a13
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -disable-fp-elim -mtriple=i686-pc-mingw32
+
+%struct.__SEH2Frame = type {}
+
+define void @_SEH2FrameHandler() nounwind {
+entry:
+  %target.addr.i = alloca i8*, align 4            ; <i8**> [#uses=2]
+  %frame = alloca %struct.__SEH2Frame*, align 4   ; <%struct.__SEH2Frame**> [#uses=1]
+  %tmp = load %struct.__SEH2Frame** %frame        ; <%struct.__SEH2Frame*> [#uses=1]
+  %conv = bitcast %struct.__SEH2Frame* %tmp to i8* ; <i8*> [#uses=1]
+  store i8* %conv, i8** %target.addr.i
+  %tmp.i = load i8** %target.addr.i               ; <i8*> [#uses=1]
+  call void asm sideeffect "push %ebp\0Apush $$0\0Apush $$0\0Apush $$Return${:uid}\0Apush $0\0Acall ${1:c}\0AReturn${:uid}: pop %ebp\0A", "imr,imr,~{ax},~{bx},~{cx},~{dx},~{si},~{di},~{flags},~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %tmp.i, void (...)* @RtlUnwind) nounwind, !srcloc !0
+  ret void
+}
+
+declare x86_stdcallcc void @RtlUnwind(...)
+
+!0 = metadata !{i32 215}

diff --git a/src/LLVM/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll b/src/LLVM/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll
new file mode 100644
index 0000000..2ba12df
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll

@@ -0,0 +1,108 @@
+; RUN: llc -O2 -mtriple=i386-apple-darwin <%s | FileCheck %s
+; Use DW_FORM_addr for DW_AT_entry_pc.
+; Radar 8094785
+
+; CHECK:	.byte	17                      ## DW_TAG_compile_unit
+; CHECK-NEXT:	.byte	1                       ## DW_CHILDREN_yes
+; CHECK-NEXT:	.byte	37                      ## DW_AT_producer
+; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
+; CHECK-NEXT:	.byte	19                      ## DW_AT_language
+; CHECK-NEXT:	.byte	5                       ## DW_FORM_data2
+; CHECK-NEXT:	.byte	3                       ## DW_AT_name
+; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
+; CHECK-NEXT:	.byte	82                      ## DW_AT_entry_pc
+; CHECK-NEXT:	.byte	1                       ## DW_FORM_addr
+; CHECK-NEXT:	.byte	16                      ## DW_AT_stmt_list
+; CHECK-NEXT:	.byte	6                       ## DW_FORM_data4
+; CHECK-NEXT:	.byte	27                      ## DW_AT_comp_dir
+; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
+; CHECK-NEXT:	.byte	225                     ## DW_AT_APPLE_optimized
+
+%struct.a = type { i32, %struct.a* }
+
+@ret = common global i32 0                        ; <i32*> [#uses=2]
+
+define void @foo(i32 %x) nounwind noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !21), !dbg !28
+  store i32 %x, i32* @ret, align 4, !dbg !29
+  ret void, !dbg !31
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define i8* @bar(%struct.a* %b) nounwind noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !22), !dbg !32
+  %0 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !33 ; <i32*> [#uses=1]
+  %1 = load i32* %0, align 8, !dbg !33            ; <i32> [#uses=1]
+  tail call void @foo(i32 %1) nounwind noinline ssp, !dbg !33
+  %2 = bitcast %struct.a* %b to i8*, !dbg !35     ; <i8*> [#uses=1]
+  ret i8* %2, !dbg !35
+}
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  %e = alloca %struct.a, align 8                  ; <%struct.a*> [#uses=4]
+  call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !23), !dbg !36
+  call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !24), !dbg !36
+  call void @llvm.dbg.declare(metadata !{%struct.a* %e}, metadata !25), !dbg !37
+  %0 = getelementptr inbounds %struct.a* %e, i64 0, i32 0, !dbg !38 ; <i32*> [#uses=1]
+  store i32 4, i32* %0, align 8, !dbg !38
+  %1 = getelementptr inbounds %struct.a* %e, i64 0, i32 1, !dbg !39 ; <%struct.a**> [#uses=1]
+  store %struct.a* %e, %struct.a** %1, align 8, !dbg !39
+  %2 = call i8* @bar(%struct.a* %e) nounwind noinline ssp, !dbg !40 ; <i8*> [#uses=0]
+  %3 = load i32* @ret, align 4, !dbg !41          ; <i32> [#uses=1]
+  ret i32 %3, !dbg !41
+}
+
+!llvm.dbg.sp = !{!0, !6, !15}
+!llvm.dbg.lv.foo = !{!21}
+!llvm.dbg.lv.bar = !{!22}
+!llvm.dbg.lv.main = !{!23, !24, !25}
+!llvm.dbg.gv = !{!27}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 34, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"2010-06-28-DbgEntryPC.c", metadata !"/Users/yash/clean/llvm/test/FrontendC", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"2010-06-28-DbgEntryPC.c", metadata !"/Users/yash/clean/llvm/test/FrontendC", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 38, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !10}
+!9 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 524307, metadata !1, metadata !"a", metadata !1, i32 23, i64 128, i64 64, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_structure_type ]
+!12 = metadata !{metadata !13, metadata !14}
+!13 = metadata !{i32 524301, metadata !11, metadata !"c", metadata !1, i32 24, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!14 = metadata !{i32 524301, metadata !11, metadata !"d", metadata !1, i32 25, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
+!15 = metadata !{i32 524334, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 43, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ]
+!16 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{metadata !5, metadata !5, metadata !18}
+!18 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !19} ; [ DW_TAG_pointer_type ]
+!19 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 524324, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 524545, metadata !0, metadata !"x", metadata !1, i32 33, metadata !5} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 524545, metadata !6, metadata !"b", metadata !1, i32 38, metadata !10} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 524545, metadata !15, metadata !"argc", metadata !1, i32 43, metadata !5} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 524545, metadata !15, metadata !"argv", metadata !1, i32 43, metadata !18} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 524544, metadata !26, metadata !"e", metadata !1, i32 44, metadata !11} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 524299, metadata !15, i32 43, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 524340, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 28, metadata !5, i1 false, i1 true, i32* @ret} ; [ DW_TAG_variable ]
+!28 = metadata !{i32 33, i32 0, metadata !0, null}
+!29 = metadata !{i32 35, i32 0, metadata !30, null}
+!30 = metadata !{i32 524299, metadata !0, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
+!31 = metadata !{i32 36, i32 0, metadata !30, null}
+!32 = metadata !{i32 38, i32 0, metadata !6, null}
+!33 = metadata !{i32 39, i32 0, metadata !34, null}
+!34 = metadata !{i32 524299, metadata !6, i32 38, i32 0} ; [ DW_TAG_lexical_block ]
+!35 = metadata !{i32 40, i32 0, metadata !34, null}
+!36 = metadata !{i32 43, i32 0, metadata !15, null}
+!37 = metadata !{i32 44, i32 0, metadata !26, null}
+!38 = metadata !{i32 45, i32 0, metadata !26, null}
+!39 = metadata !{i32 46, i32 0, metadata !26, null}
+!40 = metadata !{i32 48, i32 0, metadata !26, null}
+!41 = metadata !{i32 49, i32 0, metadata !26, null}

diff --git a/src/LLVM/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll b/src/LLVM/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll
new file mode 100644
index 0000000..e1491a0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -O0 | FileCheck %s
+; PR7509
+target triple = "i386-apple-darwin10"
+%asmtype = type { i32, i8*, i32, i32 }
+
+; Arguments 1 and 4 must be the same. No other output arguments may be
+; allocated %eax.
+
+; CHECK: InlineAsm Start
+; CHECK: arg1 %[[A1:...]]
+; CHECK-NOT: ax
+; CHECK: arg4 %[[A1]]
+; CHECK: InlineAsm End
+
+define i32 @func(i8* %s) nounwind ssp {
+entry:
+  %0 = tail call %asmtype asm "arg0 $0\0A\09arg1 $1\0A\09arg2 $2\0A\09arg3 $3\0A\09arg4 $4", "={ax},=r,=r,=r,1,~{dirflag},~{fpsr},~{flags}"(i8* %s) nounwind, !srcloc !0 ; <%0> [#uses=1]
+  %asmresult = extractvalue %asmtype %0, 0              ; <i64> [#uses=1]
+  ret i32 %asmresult
+}
+
+!0 = metadata !{i32 108}

diff --git a/src/LLVM/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll b/src/LLVM/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll
new file mode 100644
index 0000000..82dac9d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 | FileCheck %s
+; Any register is OK for %0, but it must be a register, not memory.
+
+define i32 @foo() nounwind ssp {
+entry:
+; CHECK: GCROOT %eax
+  %_r = alloca i32, align 4                       ; <i32*> [#uses=2]
+  call void asm "/* GCROOT $0 */", "=*imr,0,~{dirflag},~{fpsr},~{flags}"(i32* %_r, i32 4) nounwind
+  %0 = load i32* %_r, align 4                     ; <i32> [#uses=1]
+  ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-07-02-UnfoldBug.ll b/src/LLVM/test/CodeGen/X86/2010-07-02-UnfoldBug.ll
new file mode 100644
index 0000000..79219dc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-07-02-UnfoldBug.ll

@@ -0,0 +1,99 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; rdar://8154265
+
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define void @_ZN2CA3OGL20fill_surface_mesh_3dERNS0_7ContextEPKNS_6Render13MeshTransformEPKNS0_5LayerEPNS0_7SurfaceEfNS0_13TextureFilterESC_f() nounwind optsize ssp {
+entry:
+  br i1 undef, label %bb2.thread, label %bb2
+
+bb2.thread:                                       ; preds = %entry
+  br i1 undef, label %bb41, label %bb10.preheader
+
+bb2:                                              ; preds = %entry
+  unreachable
+
+bb10.preheader:                                   ; preds = %bb2.thread
+  br i1 undef, label %bb9, label %bb12
+
+bb9:                                              ; preds = %bb9, %bb10.preheader
+  br i1 undef, label %bb9, label %bb12
+
+bb12:                                             ; preds = %bb9, %bb10.preheader
+  br i1 undef, label %bb4.i.i, label %bb3.i.i
+
+bb3.i.i:                                          ; preds = %bb12
+  unreachable
+
+bb4.i.i:                                          ; preds = %bb12
+  br i1 undef, label %bb8.i.i, label %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit
+
+bb8.i.i:                                          ; preds = %bb4.i.i
+  br i1 undef, label %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit, label %bb9.i.i
+
+bb9.i.i:                                          ; preds = %bb8.i.i
+  br i1 undef, label %bb11.i.i, label %bb10.i.i
+
+bb10.i.i:                                         ; preds = %bb9.i.i
+  unreachable
+
+bb11.i.i:                                         ; preds = %bb9.i.i
+  unreachable
+
+_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit: ; preds = %bb8.i.i, %bb4.i.i
+  br i1 undef, label %bb19, label %bb14
+
+bb14:                                             ; preds = %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit
+  unreachable
+
+bb19:                                             ; preds = %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit
+  br i1 undef, label %bb.i50, label %bb6.i
+
+bb.i50:                                           ; preds = %bb19
+  unreachable
+
+bb6.i:                                            ; preds = %bb19
+  br i1 undef, label %bb28, label %bb.nph106
+
+bb22:                                             ; preds = %bb24.preheader
+  br i1 undef, label %bb2.i.i, label %bb.i.i49
+
+bb.i.i49:                                         ; preds = %bb22
+  %0 = load float* undef, align 4                 ; <float> [#uses=1]
+  %1 = insertelement <4 x float> undef, float %0, i32 0 ; <<4 x float>> [#uses=1]
+  %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> %1) nounwind readnone ; <<4 x float>> [#uses=1]
+  %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %2, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>) nounwind readnone ; <<4 x float>> [#uses=1]
+  %4 = extractelement <4 x float> %3, i32 0       ; <float> [#uses=1]
+  store float %4, float* undef, align 4
+  %5 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> undef) nounwind readnone ; <<4 x float>> [#uses=1]
+  %6 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %5, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>) nounwind readnone ; <<4 x float>> [#uses=1]
+  %7 = extractelement <4 x float> %6, i32 0       ; <float> [#uses=1]
+  store float %7, float* undef, align 4
+  unreachable
+
+bb2.i.i:                                          ; preds = %bb22
+  unreachable
+
+bb26.loopexit:                                    ; preds = %bb24.preheader
+  br i1 undef, label %bb28, label %bb24.preheader
+
+bb.nph106:                                        ; preds = %bb6.i
+  br label %bb24.preheader
+
+bb24.preheader:                                   ; preds = %bb.nph106, %bb26.loopexit
+  br i1 undef, label %bb22, label %bb26.loopexit
+
+bb28:                                             ; preds = %bb26.loopexit, %bb6.i
+  unreachable
+
+bb41:                                             ; preds = %bb2.thread
+  br i1 undef, label %return, label %bb46
+
+bb46:                                             ; preds = %bb41
+  ret void
+
+return:                                           ; preds = %bb41
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-07-02-asm-alignstack.ll b/src/LLVM/test/CodeGen/X86/2010-07-02-asm-alignstack.ll
new file mode 100644
index 0000000..0bbb24f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-07-02-asm-alignstack.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+
+define void @foo() nounwind ssp {
+entry:
+; CHECK: foo
+; CHECK: pushq
+; CHECK: int $3
+  call void asm sideeffect alignstack "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
+  call void asm sideeffect alignstack ".file \22small.c\22", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  call void asm sideeffect alignstack ".line 3", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  call void asm sideeffect alignstack "int $$3", "~{dirflag},~{fpsr},~{flags},~{memory}"() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @bar() nounwind ssp {
+entry:
+; CHECK: bar
+; CHECK-NOT: pushq
+; CHECK: int $3
+  call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
+  call void asm sideeffect ".file \22small.c\22", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  call void asm sideeffect ".line 3", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  call void asm sideeffect "int $$3", "~{dirflag},~{fpsr},~{flags},~{memory}"() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-07-06-DbgCrash.ll b/src/LLVM/test/CodeGen/X86/2010-07-06-DbgCrash.ll
new file mode 100644
index 0000000..edd6015
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-07-06-DbgCrash.ll

@@ -0,0 +1,29 @@
+; RUN: llc -O0 -relocation-model pic < %s -o /dev/null
+; PR7545
+@.str = private constant [4 x i8] c"one\00", align 1 ; <[4 x i8]*> [#uses=1]
+@.str1 = private constant [4 x i8] c"two\00", align 1 ; <[5 x i8]*> [#uses=1]
+@C.9.2167 = internal constant [2 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0)]
+!38 = metadata !{i32 524329, metadata !"pbmsrch.c", metadata !"/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch", metadata !39} ; [ DW_TAG_file_type ]
+!39 = metadata !{i32 524305, i32 0, i32 1, metadata !"pbmsrch.c", metadata !"/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!46 = metadata !{i32 524303, metadata !38, metadata !"", metadata !38, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !47} ; [ DW_TAG_pointer_type ]!97 = metadata !{i32 524334, i32 0, metadata !38, metadata !"main", metadata !"main", metadata !"main", metadata !38, i32 73, metadata !98, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]!101 = metadata !{[2 x i8*]* @C.9.2167}
+!47 = metadata !{i32 524324, metadata !38, metadata !"char", metadata !38, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!97 = metadata !{i32 524334, i32 0, metadata !38, metadata !"main", metadata !"main", metadata !"main", metadata !38, i32 73, metadata !98, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!98 = metadata !{i32 524309, metadata !38, metadata !"", metadata !38, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !99, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!99 = metadata !{metadata !100}
+!100 = metadata !{i32 524324, metadata !38, metadata !"int", metadata !38, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!101 = metadata !{[2 x i8*]* @C.9.2167}
+!102 = metadata !{i32 524544, metadata !103, metadata !"find_strings", metadata !38, i32 75, metadata !104} ; [ DW_TAG_auto_variable ]
+!103 = metadata !{i32 524299, metadata !97, i32 73, i32 0} ; [ DW_TAG_lexical_block ]
+!104 = metadata !{i32 524289, metadata !38, metadata !"", metadata !38, i32 0, i64 85312, i64 64, i64 0, i32 0, metadata !46, metadata !105, i32 0, null} ; [ DW_TAG_array_type ]
+!105 = metadata !{metadata !106}
+!106 = metadata !{i32 524321, i64 0, i64 1332}    ; [ DW_TAG_subrange_type ]
+!107 = metadata !{i32 73, i32 0, metadata !103, null}
+
+define i32 @main() nounwind ssp {
+bb.nph:
+  tail call void @llvm.dbg.declare(metadata !101, metadata !102), !dbg !107
+  ret i32 0, !dbg !107
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+

diff --git a/src/LLVM/test/CodeGen/X86/2010-07-06-asm-RIP.ll b/src/LLVM/test/CodeGen/X86/2010-07-06-asm-RIP.ll
new file mode 100644
index 0000000..9526b8d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-07-06-asm-RIP.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; PR 4752
+
+@n = global i32 0                                 ; <i32*> [#uses=2]
+
+define void @f(i32*) nounwind ssp {
+  ret void
+}
+
+define void @g() nounwind ssp {
+entry:
+; CHECK: _g:
+; CHECK: push $_f$_f
+; CHECK: call _f(%rip)
+  call void asm sideeffect "push\09$1$1\0A\09call\09${1:a}\0A\09pop\09%edx", "imr,i,~{dirflag},~{fpsr},~{flags},~{memory},~{cc},~{edi},~{esi},~{edx},~{ecx},~{ebx},~{eax}"(i32* @n, void (i32*)* @f) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll b/src/LLVM/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
new file mode 100644
index 0000000..e96da94
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mcpu=core2
+; PR7375
+;
+; This function contains a block (while.cond) with a lonely RFP use that is
+; not a kill. We still need an FP_REG_KILL for that block since the register
+; allocator will insert a reload.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @_ZN7QVectorIdE4fillERKdi(double* nocapture %t) nounwind ssp align 2 {
+entry:
+  %tmp2 = load double* %t                         ; <double> [#uses=1]
+  br i1 undef, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br i1 undef, label %if.end, label %bb.nph
+
+while.cond:                                       ; preds = %bb.nph, %while.cond
+  store double %tmp2, double* undef
+  br i1 undef, label %if.end, label %while.cond
+
+bb.nph:                                           ; preds = %if.then
+  br label %while.cond
+
+if.end:                                           ; preds = %while.cond, %if.then, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-07-13-indirectXconstraint.ll b/src/LLVM/test/CodeGen/X86/2010-07-13-indirectXconstraint.ll
new file mode 100644
index 0000000..97cbe3e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-07-13-indirectXconstraint.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; PR 7528
+; formerly crashed
+
+%0 = type { [12 x i16] }
+%union..0anon = type { [3 x <1 x i64>] }
+
+@gsm_H.1466 = internal constant %0 { [12 x i16] [i16 -134, i16 -374, i16 0, i16 2054, i16 5741, i16 8192, i16 5741, i16 2054, i16 0, i16 -374, i16 -134, i16 0] }, align 8 ; <%0*> [#uses=1]
+
+define void @weighting_filter() nounwind ssp {
+entry:
+; CHECK: leaq _gsm_H.1466(%rip),%rax;
+  call void asm sideeffect "leaq $0,%rax;\0A", "*X,~{dirflag},~{fpsr},~{flags},~{memory},~{rax}"(%union..0anon* bitcast (%0* @gsm_H.1466 to %union..0anon*)) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-07-15-Crash.ll b/src/LLVM/test/CodeGen/X86/2010-07-15-Crash.ll
new file mode 100644
index 0000000..3ac4cf5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-07-15-Crash.ll

@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s -o /dev/null
+; PR7653
+
+@__FUNCTION__.1623 = external constant [4 x i8]   ; <[4 x i8]*> [#uses=1]
+
+define void @foo() nounwind {
+entry:
+  tail call void asm sideeffect "", "s,i,~{fpsr},~{flags}"(i8* getelementptr
+inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0), i8* getelementptr
+inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0)) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-07-29-SetccSimplify.ll b/src/LLVM/test/CodeGen/X86/2010-07-29-SetccSimplify.ll
new file mode 100644
index 0000000..96016cf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-07-29-SetccSimplify.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define i32 @extend2bit_v2(i32 %val) {
+entry:
+  %0 = trunc i32 %val to i2                       ; <i2> [#uses=1]
+  %1 = sext i2 %0 to i32                          ; <i32> [#uses=1]
+  %2 = icmp eq i32 %1, 3                          ; <i1> [#uses=1]
+  %3 = zext i1 %2 to i32                          ; <i32> [#uses=1]
+  ret i32 %3
+}
+
+; CHECK: extend2bit_v2:
+; CHECK: xorl	%eax, %eax
+; CHECK-NEXT: ret

diff --git a/src/LLVM/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/src/LLVM/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
new file mode 100644
index 0000000..1919d2e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; PR7814
+
+@g_16 = global i64 -3738643449681751625, align 8  ; <i64*> [#uses=1]
+@g_38 = global i32 0, align 4                     ; <i32*> [#uses=2]
+@.str = private constant [4 x i8] c"%d\0A\00"     ; <[4 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+  %tmp = load i64* @g_16                          ; <i64> [#uses=1]
+  %not.lnot = icmp ne i64 %tmp, 0                 ; <i1> [#uses=1]
+  %conv = sext i1 %not.lnot to i64                ; <i64> [#uses=1]
+  %and = and i64 %conv, 150                       ; <i64> [#uses=1]
+  %conv.i = trunc i64 %and to i8                  ; <i8> [#uses=1]
+  %cmp = icmp sgt i8 %conv.i, 0                   ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+
+; CHECK: andl	$150
+; CHECK-NEXT: testb
+; CHECK-NEXT: jg
+
+entry.if.end_crit_edge:                           ; preds = %entry
+  %tmp4.pre = load i32* @g_38                     ; <i32> [#uses=1]
+  br label %if.end
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* @g_38
+  br label %if.end
+
+if.end:                                           ; preds = %entry.if.end_crit_edge, %if.then
+  %tmp4 = phi i32 [ %tmp4.pre, %entry.if.end_crit_edge ], [ 1, %if.then ] ; <i32> [#uses=1]
+  %call5 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %tmp4) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2010-08-04-MingWCrash.ll b/src/LLVM/test/CodeGen/X86/2010-08-04-MingWCrash.ll
new file mode 100644
index 0000000..98a0887
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-08-04-MingWCrash.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=i386-pc-mingw32
+
+define void @func() nounwind {
+invoke.cont:
+  %call = tail call i8* @malloc()
+  %a = invoke i32 @bar()
+          to label %bb1 unwind label %lpad
+
+bb1:
+  ret void
+
+lpad:
+  %exn = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1, i8* null) nounwind
+  %ehspec.fails = icmp slt i32 %eh.selector, 0
+  br i1 %ehspec.fails, label %ehspec.unexpected, label %cleanup
+
+cleanup:
+  tail call void @_Unwind_Resume_or_Rethrow(i8* %exn) noreturn nounwind
+  unreachable
+
+ehspec.unexpected:
+  tail call void @__cxa_call_unexpected(i8* %exn) noreturn nounwind
+  unreachable
+}
+
+declare noalias i8* @malloc()
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare void @_Unwind_Resume_or_Rethrow(i8*)
+
+declare void @__cxa_call_unexpected(i8*)
+
+declare i32 @bar()

diff --git a/src/LLVM/test/CodeGen/X86/2010-08-04-StackVariable.ll b/src/LLVM/test/CodeGen/X86/2010-08-04-StackVariable.ll
new file mode 100644
index 0000000..ba36fe7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-08-04-StackVariable.ll

@@ -0,0 +1,124 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin < %s | grep DW_OP_breg7
+; Use DW_OP_breg7 in variable's location expression if the variable is in a stack slot.
+
+%struct.SVal = type { i8*, i32 }
+
+define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !23), !dbg !24
+  call void @llvm.dbg.value(metadata !{%struct.SVal* %location}, i64 0, metadata !25), !dbg !24
+  %0 = icmp ne i32 %i, 0, !dbg !27                ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb1, !dbg !27
+
+bb:                                               ; preds = %entry
+  %1 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1]
+  %2 = load i32* %1, align 8, !dbg !29            ; <i32> [#uses=1]
+  %3 = add i32 %2, %i, !dbg !29                   ; <i32> [#uses=1]
+  br label %bb2, !dbg !29
+
+bb1:                                              ; preds = %entry
+  %4 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1]
+  %5 = load i32* %4, align 8, !dbg !30            ; <i32> [#uses=1]
+  %6 = sub i32 %5, 1, !dbg !30                    ; <i32> [#uses=1]
+  br label %bb2, !dbg !30
+
+bb2:                                              ; preds = %bb1, %bb
+  %.0 = phi i32 [ %3, %bb ], [ %6, %bb1 ]         ; <i32> [#uses=1]
+  br label %return, !dbg !29
+
+return:                                           ; preds = %bb2
+  ret i32 %.0, !dbg !29
+}
+
+define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.value(metadata !{%struct.SVal* %this}, i64 0, metadata !31), !dbg !34
+  %0 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; <i8**> [#uses=1]
+  store i8* null, i8** %0, align 8, !dbg !34
+  %1 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; <i32*> [#uses=1]
+  store i32 0, i32* %1, align 8, !dbg !34
+  br label %return, !dbg !34
+
+return:                                           ; preds = %entry
+  ret void, !dbg !35
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @main() nounwind ssp {
+entry:
+  %0 = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=3]
+  %v = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=4]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.SVal* %v}, metadata !38), !dbg !41
+  call void @_ZN4SValC1Ev(%struct.SVal* %v) nounwind, !dbg !41
+  %1 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; <i32*> [#uses=1]
+  store i32 1, i32* %1, align 8, !dbg !42
+  %2 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
+  %3 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
+  %4 = load i8** %3, align 8, !dbg !43            ; <i8*> [#uses=1]
+  store i8* %4, i8** %2, align 8, !dbg !43
+  %5 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
+  %6 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 8, !dbg !43            ; <i32> [#uses=1]
+  store i32 %7, i32* %5, align 8, !dbg !43
+  %8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0]
+  call void @llvm.dbg.value(metadata !{i32 %8}, i64 0, metadata !44), !dbg !43
+  br label %return, !dbg !45
+
+return:                                           ; preds = %entry
+  ret i32 0, !dbg !45
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !9, !16, !17, !20}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524307, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
+!2 = metadata !{i32 524329, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
+!5 = metadata !{i32 524301, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 524301, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
+!8 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{null, metadata !12, metadata !13}
+!12 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!15 = metadata !{null, metadata !12}
+!16 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ]
+!18 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!19 = metadata !{metadata !13, metadata !13, metadata !1}
+!20 = metadata !{i32 524334, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!21 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!22 = metadata !{metadata !13}
+!23 = metadata !{i32 524545, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 16, i32 0, metadata !17, null}
+!25 = metadata !{i32 524545, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 524304, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
+!27 = metadata !{i32 17, i32 0, metadata !28, null}
+!28 = metadata !{i32 524299, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 18, i32 0, metadata !28, null}
+!30 = metadata !{i32 20, i32 0, metadata !28, null}
+!31 = metadata !{i32 524545, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32} ; [ DW_TAG_arg_variable ]
+!32 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
+!33 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 11, i32 0, metadata !16, null}
+!35 = metadata !{i32 11, i32 0, metadata !36, null}
+!36 = metadata !{i32 524299, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!37 = metadata !{i32 524299, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!38 = metadata !{i32 524544, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1} ; [ DW_TAG_auto_variable ]
+!39 = metadata !{i32 524299, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 524299, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 24, i32 0, metadata !39, null}
+!42 = metadata !{i32 25, i32 0, metadata !39, null}
+!43 = metadata !{i32 26, i32 0, metadata !39, null}
+!44 = metadata !{i32 524544, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13} ; [ DW_TAG_auto_variable ]
+!45 = metadata !{i32 27, i32 0, metadata !39, null}

diff --git a/src/LLVM/test/CodeGen/X86/2010-08-10-DbgConstant.ll b/src/LLVM/test/CodeGen/X86/2010-08-10-DbgConstant.ll
new file mode 100644
index 0000000..d98ef14
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-08-10-DbgConstant.ll

@@ -0,0 +1,25 @@
+; RUN: llc  -march=x86 -O0 < %s | FileCheck %s
+; CHECK: DW_TAG_constant
+; CHECK-NEXT: ascii	 "ro"                   #{{#?}} DW_AT_name
+
+define void @foo() nounwind ssp {
+entry:
+  call void @bar(i32 201), !dbg !8
+  ret void, !dbg !8
+}
+
+declare void @bar(i32)
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.gv = !{!5}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !"clang 2.8", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 524327, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201} ; [ DW_TAG_constant ]
+!6 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_const_type ]
+!7 = metadata !{i32 524324, metadata !1, metadata !"unsigned int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 3, i32 14, metadata !9, null}
+!9 = metadata !{i32 524299, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]

diff --git a/src/LLVM/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll b/src/LLVM/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
new file mode 100644
index 0000000..e5542ba
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; This test exercises the alias checking in SimpleRegisterCoalescing::RemoveCopyByCommutingDef.
+
+define void @f(i32* %w, i32* %h, i8* %_this, i8* %image) nounwind ssp {
+  %x1 = tail call i64 @g(i8* %_this, i8* %image) nounwind ; <i64> [#uses=3]
+  %tmp1 = trunc i64 %x1 to i32                     ; <i32> [#uses=1]
+; CHECK: movl (%r{{.*}}), %
+  %x4 = load i32* %h, align 4                      ; <i32> [#uses=1]
+
+; The imull clobbers a 32-bit register.
+; CHECK: imull %{{...}}, %e[[CLOBBER:..]]
+  %x5 = mul nsw i32 %x4, %tmp1                      ; <i32> [#uses=1]
+
+; So we cannot use the corresponding 64-bit register anymore.
+; CHECK-NOT: shrq $32, %r[[CLOBBER]]
+  %btmp3 = lshr i64 %x1, 32                         ; <i64> [#uses=1]
+  %btmp4 = trunc i64 %btmp3 to i32                  ; <i32> [#uses=1]
+
+; CHECK: idiv
+  %x6 = sdiv i32 %x5, %btmp4                         ; <i32> [#uses=1]
+  store i32 %x6, i32* %w, align 4
+  ret void
+}
+
+declare i64 @g(i8*, i8*)

diff --git a/src/LLVM/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/src/LLVM/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
new file mode 100644
index 0000000..bed8c8a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-09-16-EmptyFilename.ll

@@ -0,0 +1,29 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin10 < %s - | FileCheck %s
+; Radar 8286101
+; CHECK: .file   2 "<stdin>"
+
+define i32 @foo() nounwind ssp {
+entry:
+  ret i32 42, !dbg !8
+}
+
+define i32 @bar() nounwind ssp {
+entry:
+  ret i32 21, !dbg !10
+}
+
+!llvm.dbg.sp = !{!0, !6}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"bug.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 114084)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524334, i32 0, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", metadata !7, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 524329, metadata !"bug.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 53, i32 13, metadata !9, null}
+!9 = metadata !{i32 524299, metadata !0, i32 53, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 4, i32 13, metadata !11, null}
+!11 = metadata !{i32 524299, metadata !12, i32 4, i32 13, metadata !7, i32 2} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 524299, metadata !6, i32 4, i32 11, metadata !7, i32 1} ; [ DW_TAG_lexical_block ]

diff --git a/src/LLVM/test/CodeGen/X86/2010-09-16-asmcrash.ll b/src/LLVM/test/CodeGen/X86/2010-09-16-asmcrash.ll
new file mode 100644
index 0000000..9bbd691
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-09-16-asmcrash.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd8.1 -o /dev/null
+; This formerly crashed, PR 8154.
+
+module asm ".weak sem_close"
+module asm ".equ sem_close, _sem_close"
+module asm ".weak sem_destroy"
+module asm ".equ sem_destroy, _sem_destroy"
+module asm ".weak sem_getvalue"
+module asm ".equ sem_getvalue, _sem_getvalue"
+module asm ".weak sem_init"
+module asm ".equ sem_init, _sem_init"
+module asm ".weak sem_open"
+module asm ".equ sem_open, _sem_open"
+module asm ".weak sem_post"
+module asm ".equ sem_post, _sem_post"
+module asm ".weak sem_timedwait"
+module asm ".equ sem_timedwait, _sem_timedwait"
+module asm ".weak sem_trywait"
+module asm ".equ sem_trywait, _sem_trywait"
+module asm ".weak sem_unlink"
+module asm ".equ sem_unlink, _sem_unlink"
+module asm ".weak sem_wait"
+module asm ".equ sem_wait, _sem_wait"
+
+%struct._sem = type { i32, %struct._usem }
+%struct._usem = type { i32, i32, i32 }
+
+define void @_sem_timedwait(%struct._sem* noalias %sem) nounwind ssp {
+entry:
+  br i1 undef, label %while.cond.preheader, label %sem_check_validity.exit
+
+while.cond.preheader:                             ; preds = %entry
+  %tmp4 = getelementptr inbounds %struct._sem* %sem, i64 0, i32 1, i32 1
+  br label %while.cond
+
+sem_check_validity.exit:                          ; preds = %entry
+  ret void
+
+while.cond:                                       ; preds = %while.body, %while.cond.preheader
+  br i1 undef, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %0 = call i8 asm sideeffect "\09lock ; \09\09\09cmpxchgl $2,$1 ;\09       sete\09$0 ;\09\091:\09\09\09\09# atomic_cmpset_int", "={ax},=*m,r,{ax},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %tmp4, i32 undef, i32 undef, i32* %tmp4) nounwind, !srcloc !0
+  br i1 undef, label %while.cond, label %return
+
+while.end:                                        ; preds = %while.cond
+  br i1 undef, label %if.end18, label %return
+
+if.end18:                                         ; preds = %while.end
+  unreachable
+
+return:                                           ; preds = %while.end, %while.body
+  ret void
+}
+
+!0 = metadata !{i32 158484}

diff --git a/src/LLVM/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/src/LLVM/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
new file mode 100644
index 0000000..1b33977
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -combiner-alias-analysis -march=x86-64 -mcpu=core2 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.4"
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define fastcc i32 @cli_magic_scandesc(i8* %in) nounwind ssp {
+entry:
+  %a = alloca [64 x i8]
+  %b = getelementptr inbounds [64 x i8]* %a, i64 0, i32 0
+  %c = getelementptr inbounds [64 x i8]* %a, i64 0, i32 30
+  %d = load i8* %b, align 8
+  %e = load i8* %c, align 8
+  %f = bitcast [64 x i8]* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* %in, i64 64, i32 8, i1 false) nounwind
+  store i8 %d, i8* %b, align 8
+  store i8 %e, i8* %c, align 8
+  ret i32 0
+}
+
+; CHECK: movq	___stack_chk_guard@GOTPCREL(%rip)
+; CHECK: movb   38(%rsp), [[R0:%.+]]
+; CHECK: movb   8(%rsp), [[R1:%.+]]
+; CHECK: movb   [[R1]], 8(%rsp)
+; CHECK: movb   [[R0]], 38(%rsp)
+; CHECK: callq	___stack_chk_fail

diff --git a/src/LLVM/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll b/src/LLVM/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
new file mode 100644
index 0000000..73e996c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll

@@ -0,0 +1,71 @@
+; RUN: llc -verify-machineinstrs -mcpu=i386 < %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+; The bb.i basic block gets split while emitting the schedule because
+; -mcpu=i386 doesn't have CMOV.'
+;
+; That causes the PHI to be updated wrong because the jumptable data structure is remembering the original MBB.
+;
+; -cgp-critical-edge-splitting=0 prevents the edge to PHI from being split.
+
+@.str146 = external constant [4 x i8], align 1
+@.str706 = external constant [4 x i8], align 1
+@.str1189 = external constant [5 x i8], align 1
+
+declare i32 @memcmp(i8* nocapture, i8* nocapture, i32) nounwind readonly
+declare i32 @strlen(i8* nocapture) nounwind readonly
+
+define hidden zeroext i8 @f(i8* %this, i8* %Name.0, i32 %Name.1, i8* noalias %NameLoc, i8* %Operands) nounwind align 2 {
+bb.i:
+  %0 = icmp eq i8 undef, 0
+  %iftmp.285.0 = select i1 %0, i8* getelementptr inbounds ([5 x i8]* @.str1189, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str706, i32 0, i32 0)
+  %1 = call i32 @strlen(i8* %iftmp.285.0) nounwind readonly
+  switch i32 %Name.1, label %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit [
+    i32 3, label %bb1.i
+    i32 4, label %bb1.i1237
+    i32 5, label %bb1.i1266
+    i32 6, label %bb1.i1275
+    i32 2, label %bb1.i1434
+    i32 8, label %bb1.i1523
+    i32 7, label %bb1.i1537
+  ]
+
+bb1.i:                                            ; preds = %bb.i
+  unreachable
+
+bb1.i1237:                                        ; preds = %bb.i
+  br i1 undef, label %bb.i1820, label %bb1.i1241
+
+bb1.i1241:                                        ; preds = %bb1.i1237
+  unreachable
+
+bb1.i1266:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1275:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1434:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1523:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1537:                                        ; preds = %bb.i
+  unreachable
+
+bb.i1820:                                         ; preds = %bb1.i1237
+  br label %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+
+_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit: ; preds = %bb.i1820, %bb.i
+  %PatchedName.0.0 = phi i8* [ undef, %bb.i1820 ], [ %Name.0, %bb.i ]
+  br i1 undef, label %bb141, label %_ZNK4llvm9StringRef10startswithES0_.exit
+
+_ZNK4llvm9StringRef10startswithES0_.exit:         ; preds = %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+  %2 = call i32 @memcmp(i8* %PatchedName.0.0, i8* getelementptr inbounds ([4 x i8]* @.str146, i32 0, i32 0), i32 3) nounwind readonly
+  unreachable
+
+bb141:                                            ; preds = %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll b/src/LLVM/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
new file mode 100644
index 0000000..0e4118a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin | FileCheck %s
+; PR8297
+;
+; On i386, i64 cmpxchg is lowered during legalize types to extract the
+; 64-bit result into a pair of fixed regs. So creation of the DAG node
+; happens in a different place. See
+; X86TargetLowering::ReplaceNodeResults, case ATOMIC_CMP_SWAP.
+;
+; Neither Atomic-xx.ll nor atomic_op.ll cover this. Those tests were
+; autogenerated from C source before 64-bit variants were supported.
+;
+; Note that this case requires a loop around the cmpxchg to force
+; machine licm to query alias anlysis, exposing a bad
+; MachineMemOperand.
+define void @foo(i64* %ptr) nounwind inlinehint {
+entry:
+  br label %loop
+loop:
+; CHECK: lock
+; CHECK-NEXT: cmpxchg8b
+  %r = cmpxchg i64* %ptr, i64 0, i64 1 monotonic
+  %stored1  = icmp eq i64 %r, 0
+  br i1 %stored1, label %loop, label %continue
+continue:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/src/LLVM/test/CodeGen/X86/2010-11-02-DbgParameter.ll
new file mode 100644
index 0000000..79c0cf3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-11-02-DbgParameter.ll

@@ -0,0 +1,35 @@
+; RUN: llc -O2 -asm-verbose < %s | FileCheck %s
+; Radar 8616981
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+%struct.bar = type { i32, i32 }
+
+define i32 @foo(%struct.bar* nocapture %i) nounwind readnone optsize noinline ssp {
+; CHECK: TAG_formal_parameter
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.bar* %i}, i64 0, metadata !6), !dbg !12
+  ret i32 1, !dbg !13
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.foo = !{!6}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"one.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 117922)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !0, metadata !"i", metadata !1, i32 3, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 589843, metadata !1, metadata !"bar", metadata !1, i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{metadata !10, metadata !11}
+!10 = metadata !{i32 589837, metadata !1, metadata !"x", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 589837, metadata !1, metadata !"y", metadata !1, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
+!12 = metadata !{i32 3, i32 47, metadata !0, null}
+!13 = metadata !{i32 4, i32 2, metadata !14, null}
+!14 = metadata !{i32 589835, metadata !0, i32 3, i32 50, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]

diff --git a/src/LLVM/test/CodeGen/X86/2010-11-09-MOVLPS.ll b/src/LLVM/test/CodeGen/X86/2010-11-09-MOVLPS.ll
new file mode 100644
index 0000000..710cb86
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-11-09-MOVLPS.ll

@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 -O0
+; PR8211
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.5.2 20100914 (prerelease) LLVM: 114628\22"
+
+%"int[]" = type [4 x i32]
+%0 = type { %"int[]" }
+%float = type float
+%"float[]" = type [4 x float]
+%int = type i32
+%"long unsigned int" = type i64
+
+define void @swizzle(i8* %a, %0* %b, %0* %c) nounwind {
+entry:
+  %a_addr = alloca i8*
+  %b_addr = alloca %0*
+  %c_addr = alloca %0*
+  %"alloca point" = bitcast i32 0 to i32
+  store i8* %a, i8** %a_addr
+  store %0* %b, %0** %b_addr
+  store %0* %c, %0** %c_addr
+  %0 = load i8** %a_addr, align 64
+  %1 = load %0** %b_addr, align 64
+  %2 = load %0** %c_addr, align 64
+  %"ssa point" = bitcast i32 0 to i32
+  br label %"2"
+
+"2":                                              ; preds = %entry
+  %3 = bitcast i8* %0 to <2 x i32>*
+  %4 = getelementptr inbounds %0* %1, i32 0, i32 0
+  %5 = bitcast %"int[]"* %4 to <4 x float>*
+  %6 = load <4 x float>* %5, align 16
+  %7 = bitcast <2 x i32>* %3 to <2 x float>*
+  %8 = bitcast <2 x float>* %7 to double*
+  %9 = load double* %8
+  %10 = insertelement <2 x double> undef, double %9, i32 0
+  %11 = insertelement <2 x double> %10, double undef, i32 1
+  %12 = bitcast <2 x double> %11 to <4 x float>
+  %13 = shufflevector <4 x float> %6, <4 x float> %12, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %14 = getelementptr inbounds %0* %1, i32 0, i32 0
+  %15 = bitcast %"int[]"* %14 to <4 x float>*
+  store <4 x float> %13, <4 x float>* %15, align 16
+  %16 = bitcast i8* %0 to <2 x i32>*
+  %17 = bitcast <2 x i32>* %16 to i8*
+  %18 = getelementptr i8* %17, i64 8
+  %19 = bitcast i8* %18 to <2 x i32>*
+  %20 = getelementptr inbounds %0* %2, i32 0, i32 0
+  %21 = bitcast %"int[]"* %20 to <4 x float>*
+  %22 = load <4 x float>* %21, align 16
+  %23 = bitcast <2 x i32>* %19 to <2 x float>*
+  %24 = bitcast <2 x float>* %23 to double*
+  %25 = load double* %24
+  %26 = insertelement <2 x double> undef, double %25, i32 0
+  %27 = insertelement <2 x double> %26, double undef, i32 1
+  %28 = bitcast <2 x double> %27 to <4 x float>
+  %29 = shufflevector <4 x float> %22, <4 x float> %28, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %30 = getelementptr inbounds %0* %2, i32 0, i32 0
+  %31 = bitcast %"int[]"* %30 to <4 x float>*
+  store <4 x float> %29, <4 x float>* %31, align 16
+  br label %return
+
+return:                                           ; preds = %"2"
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll b/src/LLVM/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
new file mode 100644
index 0000000..a1074b6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; Both values were being zero extended.
+@u = external global i8
+@s = external global i8
+define i32 @foo(i1 %cond) {
+; CHECK: @foo
+  %u_base = load i8* @u
+  %u_val = zext i8 %u_base to i32
+; CHECK: movzbl
+; CHECK: movsbl
+  %s_base = load i8* @s
+  %s_val = sext i8 %s_base to i32
+  %val = select i1 %cond, i32 %u_val, i32 %s_val
+  ret i32 %val
+}

diff --git a/src/LLVM/test/CodeGen/X86/2010-12-02-MC-Set.ll b/src/LLVM/test/CodeGen/X86/2010-12-02-MC-Set.ll
new file mode 100644
index 0000000..3144678
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2010-12-02-MC-Set.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -disable-dot-loc -mtriple=x86_64-apple-darwin -O0 | FileCheck %s
+
+
+define void @foo() nounwind ssp {
+entry:
+  ret void, !dbg !5
+}
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"e.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"e.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 120563)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 5, i32 1, metadata !6, null}
+!6 = metadata !{i32 589835, metadata !0, i32 3, i32 16, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+
+; CHECK: .subsections_via_symbols
+; CHECK-NEXT: __debug_line
+; CHECK-NEXT: Ltmp
+; CHECK-NEXT: Ltmp{{[0-9]}} = (Ltmp

diff --git a/src/LLVM/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll b/src/LLVM/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll
new file mode 100644
index 0000000..b9cf65b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -enable-legalize-types-checking
+; PR8582
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+define void @test() nounwind {
+ %i17 = icmp eq <4 x i8> undef, zeroinitializer
+ %cond = extractelement <4 x i1> %i17, i32 0
+ %_comp = select i1 %cond, i8 0, i8 undef
+ %merge = insertelement <4 x i8> undef, i8 %_comp, i32 0
+ %cond3 = extractelement <4 x i1> %i17, i32 1
+ %_comp4 = select i1 %cond3, i8 0, i8 undef
+ %merge5 = insertelement <4 x i8> %merge, i8 %_comp4, i32 1
+ %cond8 = extractelement <4 x i1> %i17, i32 2
+ %_comp9 = select i1 %cond8, i8 0, i8 undef
+ %m387 = insertelement <4 x i8> %merge5, i8 %_comp9, i32 2
+ store <4 x i8> %m387, <4 x i8>* undef
+ ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2011-01-10-DagCombineHang.ll b/src/LLVM/test/CodeGen/X86/2011-01-10-DagCombineHang.ll
new file mode 100644
index 0000000..bf438b8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-01-10-DagCombineHang.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; This formerly got DagCombine into a loop, PR 8916.
+
+define i32 @foo(i64 %x, i64 %y, i64 %z, i32 %a, i32 %b) {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %t1 = shl i64 %x, 15
+  %t2 = and i64 %t1, 4294934528
+  %t3 = or i64 %t2, %y
+  %t4 = xor i64 %z, %t3
+  %t5 = trunc i64 %t4 to i32
+  %t6 = add i32 %a, %t5
+  %t7 = add i32 %t6, %b
+  ret i32 %t7
+}

diff --git a/src/LLVM/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/src/LLVM/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
new file mode 100644
index 0000000..7f13411
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll

@@ -0,0 +1,104 @@
+; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -regalloc=basic | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; Check debug info for variable z_s
+;CHECK:       .ascii   "z_s"                  ## DW_AT_name
+;CHECK-NEXT:  .byte   0
+;CHECK-NEXT:  ## DW_AT_decl_file
+;CHECK-NEXT:  ## DW_AT_decl_line
+;CHECK-NEXT:  ## DW_AT_type
+;CHECK-NEXT:  ## DW_AT_location
+
+
+@.str1 = private unnamed_addr constant [14 x i8] c"m=%u, z_s=%d\0A\00"
+@str = internal constant [21 x i8] c"Failing test vector:\00"
+
+define i64 @gcd(i64 %a, i64 %b) nounwind readnone optsize noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !10), !dbg !18
+  tail call void @llvm.dbg.value(metadata !{i64 %b}, i64 0, metadata !11), !dbg !19
+  br label %while.body, !dbg !20
+
+while.body:                                       ; preds = %while.body, %entry
+  %b.addr.0 = phi i64 [ %b, %entry ], [ %rem, %while.body ]
+  %a.addr.0 = phi i64 [ %a, %entry ], [ %b.addr.0, %while.body ]
+  %rem = srem i64 %a.addr.0, %b.addr.0, !dbg !21
+  %cmp = icmp eq i64 %rem, 0, !dbg !23
+  br i1 %cmp, label %if.then, label %while.body, !dbg !23
+
+if.then:                                          ; preds = %while.body
+  tail call void @llvm.dbg.value(metadata !{i64 %rem}, i64 0, metadata !12), !dbg !21
+  ret i64 %b.addr.0, !dbg !23
+}
+
+define i32 @main() nounwind optsize ssp {
+entry:
+  %call = tail call i32 @rand() nounwind optsize, !dbg !24
+  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !14), !dbg !24
+  %cmp = icmp ugt i32 %call, 21, !dbg !25
+  br i1 %cmp, label %cond.true, label %cond.end, !dbg !25
+
+cond.true:                                        ; preds = %entry
+  %call1 = tail call i32 @rand() nounwind optsize, !dbg !25
+  br label %cond.end, !dbg !25
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %call1, %cond.true ], [ %call, %entry ], !dbg !25
+  tail call void @llvm.dbg.value(metadata !{i32 %cond}, i64 0, metadata !17), !dbg !25
+  %conv = sext i32 %cond to i64, !dbg !26
+  %conv5 = zext i32 %call to i64, !dbg !26
+  %call6 = tail call i64 @gcd(i64 %conv, i64 %conv5) optsize, !dbg !26
+  %cmp7 = icmp eq i64 %call6, 0
+  br i1 %cmp7, label %return, label %if.then, !dbg !26
+
+if.then:                                          ; preds = %cond.end
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([21 x i8]* @str, i64 0, i64 0))
+  %call12 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str1, i64 0, i64 0), i32 %call, i32 %cond) nounwind optsize, !dbg !26
+  ret i32 1, !dbg !27
+
+return:                                           ; preds = %cond.end
+  ret i32 0, !dbg !27
+}
+
+declare i32 @rand() optsize
+
+declare i32 @printf(i8* nocapture, ...) nounwind optsize
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!llvm.dbg.sp = !{!0, !6}
+!llvm.dbg.lv.gcd = !{!10, !11, !12}
+!llvm.dbg.lv.main = !{!14, !17}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"rem_small.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"rem_small.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 124117)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"long int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 5, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 590081, metadata !0, metadata !"b", metadata !1, i32 5, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 590080, metadata !13, metadata !"c", metadata !1, i32 6, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 589835, metadata !0, i32 5, i32 52, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 590080, metadata !15, metadata !"m", metadata !1, i32 26, metadata !16, i32 0} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 589835, metadata !6, i32 25, i32 12, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 590080, metadata !15, metadata !"z_s", metadata !1, i32 27, metadata !9, i32 0} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 5, i32 41, metadata !0, null}
+!19 = metadata !{i32 5, i32 49, metadata !0, null}
+!20 = metadata !{i32 7, i32 5, metadata !13, null}
+!21 = metadata !{i32 8, i32 9, metadata !22, null}
+!22 = metadata !{i32 589835, metadata !13, i32 7, i32 14, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 9, i32 9, metadata !22, null}
+!24 = metadata !{i32 26, i32 38, metadata !15, null}
+!25 = metadata !{i32 27, i32 38, metadata !15, null}
+!26 = metadata !{i32 28, i32 9, metadata !15, null}
+!27 = metadata !{i32 30, i32 1, metadata !15, null}

diff --git a/src/LLVM/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll b/src/LLVM/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll
new file mode 100644
index 0000000..cedd6a2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll

@@ -0,0 +1,14 @@
+; RUN: llc -O0 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i32 @foo()
+
+define i32 @bar() nounwind {
+; CHECK: bar
+; CHECK-NOT: pop.*ax
+  %call = call i32 @foo()
+  ret i32 %call
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2011-02-12-shuffle.ll b/src/LLVM/test/CodeGen/X86/2011-02-12-shuffle.ll
new file mode 100644
index 0000000..b4d56d1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-02-12-shuffle.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s
+; PR9165
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+define void @m_387() nounwind {
+entry:
+  br i1 undef, label %if.end, label %UnifiedReturnBlock
+
+if.end:                                           ; preds = %entry
+  %tmp1067 = load <16 x i32> addrspace(1)* null, align 64
+  %tmp1082 = shufflevector         <16 x i32> <i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef>, 
+                                                                                                                <16 x i32> %tmp1067, 
+                                                                                                                <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 26, i32 5, i32 6, i32 undef, i32 8, i32 9, i32 31, i32 30, i32 12, i32 undef, i32 undef, i32 undef>
+  
+  %tmp1100 = shufflevector         <16 x i32> %tmp1082, 
+                                                                                                                <16 x i32> %tmp1067, 
+                                                                                                                <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 4, i32 5, i32 6, i32 18, i32 8, i32 9, i32 10, i32 11, i32 12, i32 25, i32 undef, i32 17>
+  
+  %tmp1112 = shufflevector         <16 x i32> %tmp1100, 
+                                                                                                                <16 x i32> %tmp1067, 
+                                                                                                                <16 x i32> <i32 0, i32 1, i32 2, i32 24, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 18, i32 15>
+  
+  store <16 x i32> %tmp1112, <16 x i32> addrspace(1)* undef, align 64
+  
+  ret void
+
+UnifiedReturnBlock:                               ; preds = %entry
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll b/src/LLVM/test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll
new file mode 100644
index 0000000..f982723
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -O2 -march=x86 -mtriple=i386-pc-linux-gnu -relocation-model=pic | FileCheck %s
+; PR9237: Assertion in VirtRegRewriter.cpp, ResurrectConfirmedKill
+;         `KillOps[*SR] == KillOp && "invalid subreg kill flags"'
+
+%t = type { i32 }
+
+define i32 @foo(%t* %s) nounwind {
+entry:
+  br label %if.then735
+
+if.then735:
+  %call747 = call i32 undef(%t* %s, i8* null, i8* undef, i32 128, i8* undef, i32 516) nounwind
+  br i1 undef, label %if.then751, label %if.then758
+
+if.then751:
+  unreachable
+
+if.then758:
+  %add761 = add i32 %call747, 4
+  %add763 = add i32 %add761, %call747
+  %add.ptr768 = getelementptr inbounds [516 x i8]* null, i32 0, i32 %add761
+  br i1 undef, label %cond.false783, label %cond.true771
+
+cond.true771:
+  %call782 = call i8* @__memmove_chk(i8* %add.ptr768, i8* undef, i32 %call747, i32 undef)
+  br label %cond.end791
+
+; CHECK: calll __memmove_chk
+cond.false783:
+  %call.i1035 = call i8* @__memmove_chk(i8* %add.ptr768, i8* undef, i32 %call747, i32 undef) nounwind
+  br label %cond.end791
+
+cond.end791:
+  %conv801 = trunc i32 %call747 to i8
+  %add.ptr822.sum = add i32 %call747, 3
+  %arrayidx833 = getelementptr inbounds [516 x i8]* null, i32 0, i32 %add.ptr822.sum
+  store i8 %conv801, i8* %arrayidx833, align 1
+  %cmp841 = icmp eq i8* undef, null
+  br i1 %cmp841, label %if.end849, label %if.then843
+
+if.then843:
+  unreachable
+
+if.end849:
+  %call921 = call i32 undef(%t* %s, i8* undef, i8* undef, i32 %add763) nounwind
+  unreachable
+
+}
+
+declare i8* @__memmove_chk(i8*, i8*, i32, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/2011-02-23-UnfoldBug.ll b/src/LLVM/test/CodeGen/X86/2011-02-23-UnfoldBug.ll
new file mode 100644
index 0000000..900106a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-02-23-UnfoldBug.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; rdar://9045024
+; PR9305
+
+define void @calc_gb_rad_still_sse2_double() nounwind ssp {
+entry:
+  br label %for.cond.outer
+
+for.cond.outer:                                   ; preds = %if.end71, %entry
+  %theta.0.ph = phi <2 x double> [ undef, %entry ], [ %theta.1, %if.end71 ]
+  %mul.i97 = fmul <2 x double> %theta.0.ph, undef
+  %mul.i96 = fmul <2 x double> %mul.i97, fmul (<2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> undef)
+  br i1 undef, label %for.body, label %for.end82
+
+for.body:                                         ; preds = %for.cond.outer
+  br i1 undef, label %for.body33.lr.ph, label %for.end
+
+for.body33.lr.ph:                                 ; preds = %for.body
+  %dccf.2 = select i1 undef, <2 x double> %mul.i96, <2 x double> undef
+  unreachable
+
+for.end:                                          ; preds = %for.body
+  %vecins.i94 = insertelement <2 x double> undef, double 0.000000e+00, i32 0
+  %cmpsd.i = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %vecins.i94, <2 x double> <double 0x3FE984B204153B34, double 0x3FE984B204153B34>, i8 2) nounwind
+  tail call void (...)* @_mm_movemask_pd(<2 x double> %cmpsd.i) nounwind
+  br i1 undef, label %if.then67, label %if.end71
+
+if.then67:                                        ; preds = %for.end
+  %vecins.i91 = insertelement <2 x double> %vecins.i94, double undef, i32 0
+  br label %if.end71
+
+if.end71:                                         ; preds = %if.then67, %for.end
+  %theta.1 = phi <2 x double> [ %vecins.i91, %if.then67 ], [ %theta.0.ph, %for.end ]
+  br label %for.cond.outer
+
+for.end82:                                        ; preds = %for.cond.outer
+  ret void
+}
+
+declare void @_mm_movemask_pd(...)
+
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/2011-02-27-Fpextend.ll b/src/LLVM/test/CodeGen/X86/2011-02-27-Fpextend.ll
new file mode 100644
index 0000000..c12b956
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-02-27-Fpextend.ll

@@ -0,0 +1,7 @@
+; RUN: llc -mtriple=x86_64-pc-linux < %s
+; PR9309
+
+define <4 x double> @f_fu(<4 x float>) nounwind {
+  %float2double.i = fpext <4 x float> %0 to <4 x double>
+  ret <4 x double> %float2double.i
+}

diff --git a/src/LLVM/test/CodeGen/X86/2011-03-02-DAGCombiner.ll b/src/LLVM/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
new file mode 100644
index 0000000..be58ced
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-03-02-DAGCombiner.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86-64
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0.0"
+
+%0 = type { i8, [3 x i8] }
+%struct.anon = type { float, x86_fp80 }
+
+define i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32, align 4
+  %F = alloca %struct.anon, align 16
+  %K = alloca %0, align 4
+  store i32 0, i32* %retval
+  %0 = bitcast %0* %K to i32*
+  %1 = load i32* %0, align 4
+  %2 = and i32 %1, -121
+  %3 = or i32 %2, 32
+  store i32 %3, i32* %0, align 4
+  %4 = bitcast %0* %K to i32*
+  %5 = load i32* %4, align 4
+  %6 = lshr i32 %5, 3
+  %bf.clear = and i32 %6, 15
+  %conv = sitofp i32 %bf.clear to float
+  %f = getelementptr inbounds %struct.anon* %F, i32 0, i32 0
+  %tmp = load float* %f, align 4
+  %sub = fsub float %tmp, %conv
+  store float %sub, float* %f, align 4
+  %ld = getelementptr inbounds %struct.anon* %F, i32 0, i32 1
+  %tmp1 = load x86_fp80* %ld, align 16
+  %7 = bitcast %0* %K to i32*
+  %8 = load i32* %7, align 4
+  %9 = lshr i32 %8, 7
+  %bf.clear2 = and i32 %9, 1
+  %conv3 = uitofp i32 %bf.clear2 to x86_fp80
+  %sub4 = fsub x86_fp80 %conv3, %tmp1
+  %conv5 = fptoui x86_fp80 %sub4 to i32
+  %bf.value = and i32 %conv5, 1
+  %10 = bitcast %0* %K to i32*
+  %11 = and i32 %bf.value, 1
+  %12 = shl i32 %11, 7
+  %13 = load i32* %10, align 4
+  %14 = and i32 %13, -129
+  %15 = or i32 %14, %12
+  store i32 %15, i32* %10, align 4
+  %call = call i32 (...)* @iequals(i32 1841, i32 %bf.value, i32 0)
+  %16 = load i32* %retval
+  ret i32 %16
+}
+
+declare i32 @iequals(...)

diff --git a/src/LLVM/test/CodeGen/X86/2011-03-08-Sched-crash.ll b/src/LLVM/test/CodeGen/X86/2011-03-08-Sched-crash.ll
new file mode 100644
index 0000000..6329ae6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-03-08-Sched-crash.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin9.0.0"
+
+%0 = type { i32, i1 }
+
+declare %0 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
+define linkonce_odr hidden void @_ZN2js5QueueINS_7SlotMap8SlotInfoEE6ensureEj(i8* nocapture %this, i32 %size) nounwind align 2 {
+  br i1 undef, label %14, label %1
+
+; <label>:1                                       ; preds = %0
+  br i1 undef, label %2, label %3
+
+; <label>:2                                       ; preds = %1
+  br label %3
+
+; <label>:3                                       ; preds = %2, %1
+  br i1 undef, label %13, label %4
+
+; <label>:4                                       ; preds = %3
+  %5 = tail call %0 @llvm.umul.with.overflow.i32(i32 undef, i32 16)
+  %6 = extractvalue %0 %5, 1
+  %7 = extractvalue %0 %5, 0
+  %.op = add i32 %7, 7
+  %.op.op = and i32 %.op, -8
+  %8 = select i1 %6, i32 0, i32 %.op.op
+  br i1 undef, label %10, label %9
+
+; <label>:9                                       ; preds = %4
+  br label %_ZnamRN7nanojit9AllocatorE.exit
+
+; <label>:10                                      ; preds = %4
+  %11 = tail call i8* @_ZN7nanojit9Allocator9allocSlowEmb(i8* undef, i32 %8, i1 zeroext false) nounwind
+  br label %_ZnamRN7nanojit9AllocatorE.exit
+
+_ZnamRN7nanojit9AllocatorE.exit:                  ; preds = %10, %9
+  br i1 false, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %_ZnamRN7nanojit9AllocatorE.exit
+  br label %12
+
+; <label>:12                                      ; preds = %12, %.lr.ph
+  br i1 undef, label %._crit_edge, label %12
+
+._crit_edge:                                      ; preds = %12, %_ZnamRN7nanojit9AllocatorE.exit
+  br label %14
+
+; <label>:13                                      ; preds = %3
+  br label %14
+
+; <label>:14                                      ; preds = %13, %._crit_edge, %0
+  ret void
+}
+
+declare i8* @_ZN7nanojit9Allocator9allocSlowEmb(i8*, i32, i1 zeroext)

diff --git a/src/LLVM/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll b/src/LLVM/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
new file mode 100644
index 0000000..e48edf7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll

@@ -0,0 +1,22 @@
+; RUN: llc -mcpu=yonah < %s
+; PR9438
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-unknown-freebsd9.0"
+
+; The 'call fastcc' ties down %ebx, %ecx, and %edx.
+; A MUL8r ties down %al, leaving no GR32_ABCD registers available.
+; The coalescer can easily overallocate physical registers,
+; and register allocation fails.
+
+declare fastcc i8* @save_string(i8* %d, i8* nocapture %s) nounwind
+
+define i32 @cvtchar(i8* nocapture %sp) nounwind {
+  %temp.i = alloca [2 x i8], align 1
+  %tmp1 = load i8* %sp, align 1
+  %div = udiv i8 %tmp1, 10
+  %rem = urem i8 %div, 10
+  %arrayidx.i = getelementptr inbounds [2 x i8]* %temp.i, i32 0, i32 0
+  store i8 %rem, i8* %arrayidx.i, align 1
+  %call.i = call fastcc i8* @save_string(i8* %sp, i8* %arrayidx.i) nounwind
+  ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/2011-03-30-CreateFixedObjCrash.ll b/src/LLVM/test/CodeGen/X86/2011-03-30-CreateFixedObjCrash.ll
new file mode 100644
index 0000000..38a9b3d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-03-30-CreateFixedObjCrash.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86
+
+; rdar://7983260
+
+%struct.T0 = type {}
+
+define void @fn4(%struct.T0* byval %arg0) nounwind ssp {
+entry:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll b/src/LLVM/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
new file mode 100644
index 0000000..c6f4b49
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll

@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s
+; Reduced from JavaScriptCore
+
+%"class.JSC::CodeLocationCall" = type { [8 x i8] }
+%"class.JSC::JSGlobalData" = type { [4 x i8] }
+%"class.JSC::FunctionPtr" = type { i8* }
+%"class.JSC::Structure" = type { [4 x i8] }
+%"class.JSC::UString" = type { i8* }
+%"class.JSC::JSString" = type { [16 x i8], i32, %"class.JSC::UString", i32 }
+
+declare hidden fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalDataENS_16ReturnAddressPtrERS2_(%"class.JSC::JSGlobalData"* nocapture, i8*, %"class.JSC::FunctionPtr"* nocapture) nounwind noinline ssp
+
+; Avoid hoisting the test above loads or copies
+; CHECK: %entry
+; CHECK: cmpq
+; CHECK-NOT: mov
+; CHECK: jb
+define i32 @cti_op_eq(i8** nocapture %args) nounwind ssp {
+entry:
+  %0 = load i8** null, align 8
+  %tmp13 = bitcast i8* %0 to %"class.JSC::CodeLocationCall"*
+  %tobool.i.i.i = icmp ugt i8* undef, inttoptr (i64 281474976710655 to i8*)
+  %or.cond.i = and i1 %tobool.i.i.i, undef
+  br i1 %or.cond.i, label %if.then.i, label %if.end.i
+
+if.then.i:                                        ; preds = %entry
+  br i1 undef, label %if.then.i.i.i, label %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit
+
+if.then.i.i.i:                                    ; preds = %if.then.i
+  %conv.i.i.i.i = trunc i64 undef to i32
+  br label %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit
+
+if.end.i:                                         ; preds = %entry
+  br i1 undef, label %land.rhs.i121.i, label %_ZNK3JSC7JSValue8isStringEv.exit122.i
+
+land.rhs.i121.i:                                  ; preds = %if.end.i
+  %tmp.i.i117.i = load %"class.JSC::Structure"** undef, align 8
+  br label %_ZNK3JSC7JSValue8isStringEv.exit122.i
+
+_ZNK3JSC7JSValue8isStringEv.exit122.i:            ; preds = %land.rhs.i121.i, %if.end.i
+  %brmerge.i = or i1 undef, false
+  %or.cond = or i1 false, %brmerge.i
+  br i1 %or.cond, label %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit, label %if.then.i92.i
+
+if.then.i92.i:                                    ; preds = %_ZNK3JSC7JSValue8isStringEv.exit122.i
+  tail call void @_ZNK3JSC8JSString11resolveRopeEPNS_9ExecStateE(%"class.JSC::JSString"* undef, %"class.JSC::CodeLocationCall"* %tmp13) nounwind
+  unreachable
+
+_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit: ; preds = %_ZNK3JSC7JSValue8isStringEv.exit122.i, %if.then.i.i.i, %if.then.i
+
+  %1 = load i8** undef, align 8
+  br i1 undef, label %do.end39, label %do.body27
+
+do.body27:                                        ; preds = %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit
+  %tmp30 = bitcast i8* %1 to %"class.JSC::JSGlobalData"*
+  %2 = getelementptr inbounds i8** %args, i64 -1
+  %3 = bitcast i8** %2 to %"class.JSC::FunctionPtr"*
+  tail call fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalDataENS_16ReturnAddressPtrERS2_(%"class.JSC::JSGlobalData"* %tmp30, i8* undef, %"class.JSC::FunctionPtr"* %3)
+  unreachable
+
+do.end39:                                         ; preds = %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit
+  ret i32 undef
+}
+
+declare void @_ZNK3JSC8JSString11resolveRopeEPNS_9ExecStateE(%"class.JSC::JSString"*, %"class.JSC::CodeLocationCall"*)

diff --git a/src/LLVM/test/CodeGen/X86/2011-05-09-loaduse.ll b/src/LLVM/test/CodeGen/X86/2011-05-09-loaduse.ll
new file mode 100644
index 0000000..8673d74
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-05-09-loaduse.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
+
+;CHECK: test
+;CHECK-not: pshufd
+;CHECK: ret
+define float @test(<4 x float>* %A) nounwind {
+entry:
+  %T = load <4 x float>* %A
+  %R = extractelement <4 x float> %T, i32 3
+  store <4 x float><float 0.0, float 0.0, float 0.0, float 0.0>, <4 x float>* %A
+  ret float %R
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll b/src/LLVM/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
new file mode 100644
index 0000000..0f18f09
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -verify-coalescing
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.0"
+
+%struct.attrib = type { i32, i32 }
+%struct.dfa = type { [80 x i8], i32, %struct.state*, i32, i32, %struct.attrib*, i32, i32 }
+%struct.state = type { i32, [4 x i32] }
+
+@aux_temp = external global %struct.dfa, align 8
+
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+
+declare void @__memset_chk() nounwind
+
+define void @dfa_add_string() nounwind uwtable ssp {
+entry:
+  br label %if.end.i
+
+if.end.i:                                         ; preds = %entry
+  %idxprom.i = add i64 0, 1
+  br i1 undef, label %land.end.thread.i, label %land.end.i
+
+land.end.thread.i:                                ; preds = %if.end.i
+  %0 = call i64 @llvm.objectsize.i64(i8* undef, i1 false) nounwind
+  %cmp1710.i = icmp eq i64 %0, -1
+  br i1 %cmp1710.i, label %cond.false156.i, label %cond.true138.i
+
+land.end.i:                                       ; preds = %if.end.i
+  %1 = call i64 @llvm.objectsize.i64(i8* undef, i1 false) nounwind
+  %cmp17.i = icmp eq i64 %1, -1
+  br i1 %cmp17.i, label %cond.false156.i, label %cond.true138.i
+
+cond.true138.i:                                   ; preds = %for.end.i, %land.end.thread.i
+  call void @__memset_chk() nounwind
+  br label %cond.end166.i
+
+cond.false156.i:                                  ; preds = %for.end.i, %land.end.thread.i
+  %idxprom1114.i = phi i64 [ undef, %land.end.thread.i ], [ %idxprom.i, %land.end.i ]
+  call void @__memset_chk() nounwind
+  br label %cond.end166.i
+
+cond.end166.i:                                    ; preds = %cond.false156.i, %cond.true138.i
+  %idxprom1113.i = phi i64 [ %idxprom1114.i, %cond.false156.i ], [ undef, %cond.true138.i ]
+  %tmp235.i = load %struct.state** getelementptr inbounds (%struct.dfa* @aux_temp, i64 0, i32 2), align 8, !tbaa !0
+  %att.i = getelementptr inbounds %struct.state* %tmp235.i, i64 %idxprom1113.i, i32 0
+  store i32 0, i32* %att.i, align 4, !tbaa !3
+  ret void
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}

diff --git a/src/LLVM/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll b/src/LLVM/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll
new file mode 100644
index 0000000..c595bba
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -verify-coalescing
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.0"
+
+@bit_count = external constant [256 x i32], align 16
+
+define fastcc void @unate_intersect() nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc.i
+  br label %do.body.i
+
+do.body.i:                                        ; preds = %do.body.i, %for.body
+  %exitcond149 = icmp eq i64 undef, undef
+  br i1 %exitcond149, label %land.lhs.true, label %do.body.i
+
+land.lhs.true:                                    ; preds = %do.body.i
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.inc.i, %if.then
+  %tmp3524.i = phi i32 [ 0, %land.lhs.true ], [ %tmp351.i, %for.inc.i ]
+  %tmp6.i12 = load i32* undef, align 4
+  br i1 undef, label %for.inc.i, label %if.then.i17
+
+if.then.i17:                                      ; preds = %for.body.i
+  %shr.i14 = lshr i32 %tmp6.i12, 8
+  %and14.i = and i32 %shr.i14, 255
+  %idxprom15.i = zext i32 %and14.i to i64
+  %arrayidx16.i = getelementptr inbounds [256 x i32]* @bit_count, i64 0, i64 %idxprom15.i
+  %tmp17.i15 = load i32* %arrayidx16.i, align 4
+  %add.i = add i32 0, %tmp3524.i
+  %add24.i = add i32 %add.i, %tmp17.i15
+  %add31.i = add i32 %add24.i, 0
+  %add33.i = add i32 %add31.i, 0
+  br label %for.inc.i
+
+for.inc.i:                                        ; preds = %if.then.i17, %for.body.i
+  %tmp351.i = phi i32 [ %add33.i, %if.then.i17 ], [ %tmp3524.i, %for.body.i ]
+  br label %for.body.i
+}

diff --git a/src/LLVM/test/CodeGen/X86/2011-06-01-fildll.ll b/src/LLVM/test/CodeGen/X86/2011-06-01-fildll.ll
new file mode 100644
index 0000000..3a0b05f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-06-01-fildll.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-macosx10.6.6"
+
+define float @f(i64* nocapture %x) nounwind readonly ssp {
+entry:
+; CHECK: movl
+; CHECK-NOT: movl
+  %tmp1 = load i64* %x, align 4
+; CHECK: fildll
+  %conv = sitofp i64 %tmp1 to float
+  %add = fadd float %conv, 1.000000e+00
+  ret float %add
+}

diff --git a/src/LLVM/test/CodeGen/X86/2011-06-03-x87chain.ll b/src/LLVM/test/CodeGen/X86/2011-06-03-x87chain.ll
new file mode 100644
index 0000000..bf7f583
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-06-03-x87chain.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
+
+define float @chainfail1(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
+entry:
+  %tmp1 = load i64* %a, align 8
+; Insure x87 ops are properly chained, order preserved.
+; CHECK: fildll
+  %conv = sitofp i64 %tmp1 to float
+; CHECK: fstps
+  store float %conv, float* %f, align 4
+; CHECK: idivl
+  %div = sdiv i32 %x, %y
+  %conv5 = sext i32 %div to i64
+  store i64 %conv5, i64* %b, align 8
+  ret float %conv
+}
+
+define float @chainfail2(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
+entry:
+; CHECK: movl $0,
+  store i64 0, i64* %b, align 8
+  %mul = mul nsw i32 %y, %x
+  %sub = add nsw i32 %mul, -1
+  %idxprom = sext i32 %sub to i64
+  %arrayidx = getelementptr inbounds i64* %a, i64 %idxprom
+  %tmp4 = load i64* %arrayidx, align 8
+; CHECK: fildll
+  %conv = sitofp i64 %tmp4 to float
+  store float %conv, float* %f, align 4
+  ret float %conv
+}

diff --git a/src/LLVM/test/CodeGen/X86/2011-06-06-fgetsign80bit.ll b/src/LLVM/test/CodeGen/X86/2011-06-06-fgetsign80bit.ll
new file mode 100644
index 0000000..d934148
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-06-06-fgetsign80bit.ll

@@ -0,0 +1,8 @@
+; RUN: llc -march=x86-64 < %s
+define i32 @signbitl(x86_fp80 %x) nounwind uwtable readnone {
+entry:
+  %tmp4 = bitcast x86_fp80 %x to i80
+  %tmp4.lobit = lshr i80 %tmp4, 79
+  %tmp = trunc i80 %tmp4.lobit to i32
+  ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll b/src/LLVM/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
new file mode 100644
index 0000000..a51dad0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll

@@ -0,0 +1,52 @@
+; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic -stats |& FileCheck %s
+;
+; This test should not cause any spilling with RAFast.
+;
+; CHECK: Number of copies coalesced
+; CHECK-NOT: Number of stores added
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%0 = type { i64, i64, i8*, i8* }
+%1 = type opaque
+%2 = type opaque
+%3 = type <{ i8*, i32, i32, void (%4*)*, i8*, i64 }>
+%4 = type { i8**, i32, i32, i8**, %5*, i64 }
+%5 = type { i64, i64 }
+%6 = type { i8*, i32, i32, i8*, %5* }
+
+@0 = external hidden constant %0
+
+define hidden void @f() ssp {
+bb:
+  %tmp5 = alloca i64, align 8
+  %tmp6 = alloca void ()*, align 8
+  %tmp7 = alloca %3, align 8
+  store i64 0, i64* %tmp5, align 8
+  br label %bb8
+
+bb8:                                              ; preds = %bb23, %bb
+  %tmp15 = getelementptr inbounds %3* %tmp7, i32 0, i32 4
+  store i8* bitcast (%0* @0 to i8*), i8** %tmp15
+  %tmp16 = bitcast %3* %tmp7 to void ()*
+  store void ()* %tmp16, void ()** %tmp6, align 8
+  %tmp17 = load void ()** %tmp6, align 8
+  %tmp18 = bitcast void ()* %tmp17 to %6*
+  %tmp19 = getelementptr inbounds %6* %tmp18, i32 0, i32 3
+  %tmp20 = bitcast %6* %tmp18 to i8*
+  %tmp21 = load i8** %tmp19
+  %tmp22 = bitcast i8* %tmp21 to void (i8*)*
+  call void %tmp22(i8* %tmp20)
+  br label %bb23
+
+bb23:                                             ; preds = %bb8
+  %tmp24 = load i64* %tmp5, align 8
+  %tmp25 = add i64 %tmp24, 1
+  store i64 %tmp25, i64* %tmp5, align 8
+  %tmp26 = icmp ult i64 %tmp25, 10
+  br i1 %tmp26, label %bb8, label %bb27
+
+bb27:                                             ; preds = %bb23
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll b/src/LLVM/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll
new file mode 100644
index 0000000..114b985
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64 -stress-sched | FileCheck %s
+; REQUIRES: asserts
+; Test interference between physreg aliases during preRAsched.
+; mul wants an operand in AL, but call clobbers it.
+
+define i8 @f(i8 %v1, i8 %v2) nounwind {
+entry:
+; CHECK: callq
+; CHECK: movb %{{.*}}, %al
+; CHECK: mulb
+; CHECK: mulb
+        %rval = tail call i8 @bar() nounwind
+        %m1 = mul i8 %v1, %v2
+        %m2 = mul i8 %m1, %rval
+        ret i8 %m2
+}
+
+declare i8 @bar()

diff --git a/src/LLVM/test/CodeGen/X86/2011-06-14-mmx-inlineasm.ll b/src/LLVM/test/CodeGen/X86/2011-06-14-mmx-inlineasm.ll
new file mode 100644
index 0000000..445fc01
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-06-14-mmx-inlineasm.ll

@@ -0,0 +1,45 @@
+; RUN: llc -mcpu=i686 -mattr=+mmx < %s | FileCheck %s
+; ModuleID = 'tq.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-macosx10.6.6"
+
+%0 = type { x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx }
+
+define i32 @pixman_fill_mmx(i32* nocapture %bits, i32 %stride, i32 %bpp, i32 %x, i32 %y, i32 %width, i32 %height, i32 %xor) nounwind ssp {
+entry:
+  %conv = zext i32 %xor to i64
+  %shl = shl nuw i64 %conv, 32
+  %or = or i64 %shl, %conv
+  %0 = bitcast i64 %or to x86_mmx
+; CHECK:      movq [[MMXR:%mm[0-7],]] {{%mm[0-7]}}
+; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}}
+; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}}
+; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}}
+; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}}
+; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}}
+; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}}
+  %1 = tail call %0 asm "movq\09\09$7,\09$0\0Amovq\09\09$7,\09$1\0Amovq\09\09$7,\09$2\0Amovq\09\09$7,\09$3\0Amovq\09\09$7,\09$4\0Amovq\09\09$7,\09$5\0Amovq\09\09$7,\09$6\0A", "=&y,=&y,=&y,=&y,=&y,=&y,=y,y,~{dirflag},~{fpsr},~{flags}"(x86_mmx %0) nounwind, !srcloc !0
+  %asmresult = extractvalue %0 %1, 0
+  %asmresult6 = extractvalue %0 %1, 1
+  %asmresult7 = extractvalue %0 %1, 2
+  %asmresult8 = extractvalue %0 %1, 3
+  %asmresult9 = extractvalue %0 %1, 4
+  %asmresult10 = extractvalue %0 %1, 5
+  %asmresult11 = extractvalue %0 %1, 6
+; CHECK:      movq {{%mm[0-7]}},
+; CHECK-NEXT: movq {{%mm[0-7]}},
+; CHECK-NEXT: movq {{%mm[0-7]}},
+; CHECK-NEXT: movq {{%mm[0-7]}},
+; CHECK-NEXT: movq {{%mm[0-7]}},
+; CHECK-NEXT: movq {{%mm[0-7]}},
+; CHECK-NEXT: movq {{%mm[0-7]}},
+; CHECK-NEXT: movq {{%mm[0-7]}},
+  tail call void asm sideeffect "movq\09$1,\09  ($0)\0Amovq\09$2,\09 8($0)\0Amovq\09$3,\0916($0)\0Amovq\09$4,\0924($0)\0Amovq\09$5,\0932($0)\0Amovq\09$6,\0940($0)\0Amovq\09$7,\0948($0)\0Amovq\09$8,\0956($0)\0A", "r,y,y,y,y,y,y,y,y,~{memory},~{dirflag},~{fpsr},~{flags}"(i8* undef, x86_mmx %0, x86_mmx %asmresult, x86_mmx %asmresult6, x86_mmx %asmresult7, x86_mmx %asmresult8, x86_mmx %asmresult9, x86_mmx %asmresult10, x86_mmx %asmresult11) nounwind, !srcloc !1
+  tail call void @llvm.x86.mmx.emms() nounwind
+  ret i32 1
+}
+
+declare void @llvm.x86.mmx.emms() nounwind
+
+!0 = metadata !{i32 888, i32 917, i32 945, i32 973, i32 1001, i32 1029, i32 1057}
+!1 = metadata !{i32 1390, i32 1430, i32 1469, i32 1508, i32 1547, i32 1586, i32 1625, i32 1664}

diff --git a/src/LLVM/test/CodeGen/X86/2011-06-19-QuicksortCoalescerBug.ll b/src/LLVM/test/CodeGen/X86/2011-06-19-QuicksortCoalescerBug.ll
new file mode 100644
index 0000000..08178a3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-06-19-QuicksortCoalescerBug.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -verify-coalescing
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+define void @Quicksort(i32* %a, i32 %l, i32 %r) nounwind ssp {
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %do.cond, %entry
+  %l.tr = phi i32 [ %l, %entry ], [ %i.1, %do.cond ]
+  %r.tr = phi i32 [ %r, %entry ], [ %l.tr, %do.cond ]
+  %idxprom12 = sext i32 %r.tr to i64
+  %arrayidx14 = getelementptr inbounds i32* %a, i64 %idxprom12
+  br label %do.body
+
+do.body:                                          ; preds = %do.cond, %tailrecurse
+  %i.0 = phi i32 [ %l.tr, %tailrecurse ], [ %i.1, %do.cond ]
+  %add7 = add nsw i32 %i.0, 1
+  %cmp = icmp sgt i32 %add7, %r.tr
+  br i1 %cmp, label %do.cond, label %if.then
+
+if.then:                                          ; preds = %do.body
+  store i32 %add7, i32* %arrayidx14, align 4
+  %add16 = add i32 %i.0, 2
+  br label %do.cond
+
+do.cond:                                          ; preds = %do.body, %if.then
+  %i.1 = phi i32 [ %add16, %if.then ], [ %add7, %do.body ]
+  %cmp19 = icmp sgt i32 %i.1, %r.tr
+  br i1 %cmp19, label %tailrecurse, label %do.body
+}

diff --git a/src/LLVM/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll b/src/LLVM/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll
new file mode 100644
index 0000000..7632034
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll

@@ -0,0 +1,20 @@
+; RUN: llc -march=x86-64 < %s -disable-fp-elim | FileCheck %s
+
+; This test is checking that we don't crash and we don't incorrectly fold
+; a large displacement and a frame index into a single lea.
+; <rdar://problem/9763308>
+
+declare void @bar([39 x i8]*)
+define i32 @f(i64 %a, i64 %b) nounwind readnone {
+entry:
+  %stack_main = alloca [39 x i8]
+  call void @bar([39 x i8]* %stack_main)
+  %tmp6 = add i64 %a, -2147483647
+  %.sum = add i64 %tmp6, %b
+  %tmp8 = getelementptr inbounds [39 x i8]* %stack_main, i64 0, i64 %.sum
+  %tmp9 = load i8* %tmp8, align 1
+  %tmp10 = sext i8 %tmp9 to i32
+  ret i32 %tmp10
+}
+; CHECK: f:
+; CHECK: movsbl	-2147483647

diff --git a/src/LLVM/test/CodeGen/X86/2011-08-23-PerformSubCombine128.ll b/src/LLVM/test/CodeGen/X86/2011-08-23-PerformSubCombine128.ll
new file mode 100644
index 0000000..12171ac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-08-23-PerformSubCombine128.ll

@@ -0,0 +1,18 @@
+; RUN: llc -march=x86-64 -O2 < %s
+
+define void @test(i64 %add127.tr.i2686) {
+entry:
+  %conv143.i2687 = and i64 %add127.tr.i2686, 72057594037927935
+  %conv76.i2623 = zext i64 %conv143.i2687 to i128
+  %mul148.i2338 = mul i128 0, %conv76.i2623
+  %add149.i2339 = add i128 %mul148.i2338, 0
+  %add.i2303 = add i128 0, 170141183460469229370468033484042534912
+  %add6.i2270 = add i128 %add.i2303, 0
+  %sub58.i2271 = sub i128 %add6.i2270, %add149.i2339
+  %add71.i2272 = add i128 %sub58.i2271, 0
+  %add105.i2273 = add i128 %add71.i2272, 0
+  %add116.i2274 = add i128 %add105.i2273, 0
+  %shr124.i2277 = lshr i128 %add116.i2274, 56
+  %add116.tr.i2280 = trunc i128 %add116.i2274 to i64
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/2011-08-23-Trampoline.ll b/src/LLVM/test/CodeGen/X86/2011-08-23-Trampoline.ll
new file mode 100644
index 0000000..7a5a0f8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-08-23-Trampoline.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+	%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets = type { i32, i32, void (i32, i32)*, i8 (i32, i32)* }
+
+define fastcc i32 @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets.5146(i64 %table.0.0, i64 %table.0.1, i32 %last, i32 %pos) {
+entry:
+	call void @llvm.init.trampoline( i8* null, i8* bitcast (void (%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets*, i32, i32)* @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177 to i8*), i8* null )		; <i8*> [#uses=0]
+        %tramp22 = call i8* @llvm.adjust.trampoline( i8* null)
+	unreachable
+}
+
+declare void @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177(%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets* nest , i32, i32) nounwind 
+
+declare void @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
+declare i8* @llvm.adjust.trampoline(i8*) nounwind 

diff --git a/src/LLVM/test/CodeGen/X86/2011-08-29-BlockConstant.ll b/src/LLVM/test/CodeGen/X86/2011-08-29-BlockConstant.ll
new file mode 100644
index 0000000..83e4bcc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-08-29-BlockConstant.ll

@@ -0,0 +1,34 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@x = global [500 x i64] zeroinitializer, align 64 ; <[500 x i64]*>
+; CHECK: x:
+; CHECK: .zero	4000
+
+@y = global [63 x i64] [
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262,
+  i64 6799976246779207262, i64 6799976246779207262, i64 6799976246779207262],
+  align 64 ; <[63 x i64]*> 0x5e5e5e5e
+; CHECK: y:
+; CHECK: .zero	504,94

diff --git a/src/LLVM/test/CodeGen/X86/2011-08-29-InitOrder.ll b/src/LLVM/test/CodeGen/X86/2011-08-29-InitOrder.ll
new file mode 100644
index 0000000..72c79d2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-08-29-InitOrder.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=i386-linux-gnu | FileCheck %s --check-prefix=CHECK-DEFAULT
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s --check-prefix=CHECK-DARWIN
+; PR5329
+
+@llvm.global_ctors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @construct_2 }, { i32, void ()* } { i32 3000, void ()* @construct_3 }, { i32, void ()* } { i32 1000, void ()* @construct_1 }]
+; CHECK-DEFAULT: construct_3
+; CHECK-DEFAULT: construct_2
+; CHECK-DEFAULT: construct_1
+
+; CHECK-DARWIN: construct_1
+; CHECK-DARWIN: construct_2
+; CHECK-DARWIN: construct_3
+
+@llvm.global_dtors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @destruct_2 }, { i32, void ()* } { i32 1000, void ()* @destruct_1 }, { i32, void ()* } { i32 3000, void ()* @destruct_3 }]
+; CHECK-DEFAULT: destruct_3
+; CHECK-DEFAULT: destruct_2
+; CHECK-DEFAULT: destruct_1
+
+; CHECK-DARWIN: destruct_1
+; CHECK-DARWIN: destruct_2
+; CHECK-DARWIN: destruct_3
+
+declare void @construct_1()
+declare void @construct_2()
+declare void @construct_3()
+declare void @destruct_1()
+declare void @destruct_2()
+declare void @destruct_3()

diff --git a/src/LLVM/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/src/LLVM/test/CodeGen/X86/2011-09-14-valcoalesce.ll
new file mode 100644
index 0000000..1068d1b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-09-14-valcoalesce.ll

@@ -0,0 +1,174 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+;
+; Test RegistersDefinedFromSameValue. We have multiple copies of the same vreg:
+; while.body85.i:
+;   vreg1 = copy vreg2
+;   vreg2 = add
+; critical edge from land.lhs.true.i -> if.end117.i:
+;   vreg27 = vreg2
+; critical edge from land.lhs.true103.i -> if.end117.i:
+;   vreg27 = vreg2
+; if.then108.i:
+;   vreg27 = vreg1
+;
+; Prior to fixing PR10920 401.bzip miscompile, the coalescer would
+; consider vreg1 and vreg27 to be copies of the same value. It would
+; then remove one of the critical edge copes, which cannot safely be removed.
+;
+; CHECK: # %while.body85.i
+; CHECK-NOT: # %
+; CHECK-NOT: add
+; CHECK: movl %[[POSTR:e[abcdxi]+]], %[[PRER:e[abcdxi]+]]
+; CHECK: addl %{{.*}}, %[[POSTR]]
+; CHECK: # %while.end.i
+; CHECK: movl %[[POSTR]], %[[USER:e[abcdxi]+]]
+; CHECK: # %land.lhs.true.i
+; CHECK: movl %[[POSTR]], %[[USER]]
+; CHECK: # %land.lhs.true103.i
+; CHECK: movl %[[POSTR]], %[[USER]]
+; CHECK: # %if.then108.i
+; [[PRER] live out, so nothing on this path should define it.
+; CHECK-NOT: , %[[PRER]]
+; CHECK: # %if.end117.i
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@.str3 = external unnamed_addr constant [59 x i8], align 1
+
+define void @BZ2_compressBlock() nounwind ssp {
+entry:
+  br i1 undef, label %if.then68, label %if.end85
+
+if.then68:                                        ; preds = %entry
+  br label %for.body.i.i
+
+for.body.i.i:                                     ; preds = %for.inc.i.i, %if.then68
+  br i1 undef, label %for.inc.i.i, label %if.then.i.i
+
+if.then.i.i:                                      ; preds = %for.body.i.i
+  br label %for.inc.i.i
+
+for.inc.i.i:                                      ; preds = %if.then.i.i, %for.body.i.i
+  br i1 undef, label %makeMaps_e.exit.i, label %for.body.i.i
+
+makeMaps_e.exit.i:                                ; preds = %for.inc.i.i
+  br i1 undef, label %for.cond19.preheader.i, label %for.cond.for.cond19.preheader_crit_edge.i
+
+for.cond.for.cond19.preheader_crit_edge.i:        ; preds = %makeMaps_e.exit.i
+  unreachable
+
+for.cond19.preheader.i:                           ; preds = %makeMaps_e.exit.i
+  br i1 undef, label %for.body25.lr.ph.i, label %for.cond33.preheader.i
+
+for.body25.lr.ph.i:                               ; preds = %for.cond19.preheader.i
+  br label %for.body25.i
+
+for.cond33.preheader.i:                           ; preds = %for.body25.i, %for.cond19.preheader.i
+  br i1 undef, label %if.then.i, label %if.end.i
+
+for.body25.i:                                     ; preds = %for.body25.i, %for.body25.lr.ph.i
+  br i1 undef, label %for.body25.i, label %for.cond33.preheader.i
+
+if.then.i:                                        ; preds = %for.cond33.preheader.i
+  br label %if.end.i
+
+if.end.i:                                         ; preds = %if.then.i, %for.cond33.preheader.i
+  br i1 undef, label %for.inc27.us.5.i, label %for.end30.i
+
+for.end30.i:                                      ; preds = %for.inc27.us.5.i, %if.end.i
+  br i1 undef, label %if.end36.i, label %if.then35.i
+
+if.then35.i:                                      ; preds = %for.end30.i
+  unreachable
+
+if.end36.i:                                       ; preds = %for.end30.i
+  %sub83.i = add nsw i32 undef, 1
+  br label %while.body.i188
+
+for.cond182.preheader.i:                          ; preds = %for.end173.i
+  br i1 undef, label %for.inc220.us.i, label %while.body300.preheader.i
+
+while.body.i188:                                  ; preds = %for.end173.i, %if.end36.i
+  %gs.0526.i = phi i32 [ 0, %if.end36.i ], [ %add177.i, %for.end173.i ]
+  %or.cond514517.i = and i1 false, undef
+  br i1 %or.cond514517.i, label %while.body85.i, label %if.end117.i
+
+while.body85.i:                                   ; preds = %while.body85.i, %while.body.i188
+  %aFreq.0518.i = phi i32 [ %add93.i, %while.body85.i ], [ 0, %while.body.i188 ]
+  %inc87.i = add nsw i32 0, 1
+  %tmp91.i = load i32* undef, align 4, !tbaa !0
+  %add93.i = add nsw i32 %tmp91.i, %aFreq.0518.i
+  %or.cond514.i = and i1 undef, false
+  br i1 %or.cond514.i, label %while.body85.i, label %while.end.i
+
+while.end.i:                                      ; preds = %while.body85.i
+  br i1 undef, label %land.lhs.true.i, label %if.end117.i
+
+land.lhs.true.i:                                  ; preds = %while.end.i
+  br i1 undef, label %land.lhs.true103.i, label %if.end117.i
+
+land.lhs.true103.i:                               ; preds = %land.lhs.true.i
+  br i1 undef, label %if.then108.i, label %if.end117.i
+
+if.then108.i:                                     ; preds = %land.lhs.true103.i
+  br label %if.end117.i
+
+if.end117.i:                                      ; preds = %if.then108.i, %land.lhs.true103.i, %land.lhs.true.i, %while.end.i, %while.body.i188
+  %aFreq.1.i = phi i32 [ %aFreq.0518.i, %if.then108.i ], [ %add93.i, %land.lhs.true103.i ], [ %add93.i, %land.lhs.true.i ], [ %add93.i, %while.end.i ], [ 0, %while.body.i188 ]
+  %ge.1.i = phi i32 [ 0, %if.then108.i ], [ %inc87.i, %land.lhs.true103.i ], [ %inc87.i, %land.lhs.true.i ], [ %inc87.i, %while.end.i ], [ 0, %while.body.i188 ]
+  br i1 undef, label %if.then122.i, label %for.cond138.preheader.i
+
+if.then122.i:                                     ; preds = %if.end117.i
+  call void (...)* @fprintf(i32 undef, i32 %gs.0526.i, i32 %ge.1.i, i32 %aFreq.1.i, double undef) nounwind
+  br label %for.cond138.preheader.i
+
+for.cond138.preheader.i:                          ; preds = %if.then122.i, %if.end117.i
+  br i1 undef, label %for.body143.lr.ph.i, label %for.end173.i
+
+for.body143.lr.ph.i:                              ; preds = %for.cond138.preheader.i
+  br label %for.body143.i
+
+for.body143.i:                                    ; preds = %for.body143.i, %for.body143.lr.ph.i
+  br i1 undef, label %for.end173.i, label %for.body143.i
+
+for.end173.i:                                     ; preds = %for.body143.i, %for.cond138.preheader.i
+  %add177.i = add nsw i32 %ge.1.i, 1
+  %cmp73.i = icmp sgt i32 undef, 0
+  br i1 %cmp73.i, label %while.body.i188, label %for.cond182.preheader.i
+
+for.inc220.us.i:                                  ; preds = %for.cond182.preheader.i
+  unreachable
+
+while.body300.preheader.i:                        ; preds = %for.cond182.preheader.i
+  br i1 undef, label %for.end335.i, label %while.end2742.i
+
+for.end335.i:                                     ; preds = %for.end2039.i, %while.body300.preheader.i
+  br label %for.body2021.i
+
+for.body2021.i:                                   ; preds = %for.body2021.i, %for.end335.i
+  br i1 undef, label %for.body2021.i, label %for.end2039.i
+
+for.end2039.i:                                    ; preds = %for.body2021.i
+  br label %for.end335.i
+
+while.end2742.i:                                  ; preds = %while.body300.preheader.i
+  br i1 undef, label %if.then2748.i, label %for.body2778.i
+
+if.then2748.i:                                    ; preds = %while.end2742.i
+  unreachable
+
+for.body2778.i:                                   ; preds = %while.end2742.i
+  unreachable
+
+for.inc27.us.5.i:                                 ; preds = %if.end.i
+  br label %for.end30.i
+
+if.end85:                                         ; preds = %entry
+  ret void
+}
+
+declare void @fprintf(...) nounwind
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}

diff --git a/src/LLVM/test/CodeGen/X86/2011-09-18-sse2cmp.ll b/src/LLVM/test/CodeGen/X86/2011-09-18-sse2cmp.ll
new file mode 100644
index 0000000..844d674
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-09-18-sse2cmp.ll

@@ -0,0 +1,12 @@
+;RUN: llc < %s -march=x86 -mcpu=yonah -promote-elements -mattr=+sse2,-sse41 | FileCheck %s
+
+;CHECK: @max
+;CHECK: cmplepd
+;CHECK: ret
+
+define <2 x double> @max(<2 x double> %x, <2 x double> %y) {
+   %max_is_x = fcmp oge <2 x double> %x, %y
+   %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
+   ret <2 x double> %max
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2011-09-21-setcc-bug.ll b/src/LLVM/test/CodeGen/X86/2011-09-21-setcc-bug.ll
new file mode 100644
index 0000000..ed5649c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-09-21-setcc-bug.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -promote-elements -mattr=+sse41
+
+; Make sure we are not crashing on this code.
+
+define void @load_4_i8(<4 x i8>* %k, <4 x i8>* %y, <4 x double>* %A1, <4 x double>* %A0)  {
+   %A = load <4 x i8>* %k
+   %B = load <4 x i8>* %y
+   %C = load <4 x double>* %A0
+   %D= load <4 x double>* %A1
+   %M = icmp uge <4 x i8> %A, %B
+   %T = select <4 x i1> %M, <4 x double> %C, <4 x double> %D
+   store <4 x double> %T, <4 x double>* undef
+   ret void
+}
+
+
+define void @load_256_i8(<256 x i8>* %k, <256 x i8>* %y, <256 x double>* %A1, <256 x double>* %A0)  {
+   %A = load <256 x i8>* %k
+   %B = load <256 x i8>* %y
+   %C = load <256 x double>* %A0
+   %D= load <256 x double>* %A1
+   %M = icmp uge <256 x i8> %A, %B
+   %T = select <256 x i1> %M, <256 x double> %C, <256 x double> %D
+   store <256 x double> %T, <256 x double>* undef
+   ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2011-10-11-SpillDead.ll b/src/LLVM/test/CodeGen/X86/2011-10-11-SpillDead.ll
new file mode 100644
index 0000000..8e70d65
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-10-11-SpillDead.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -verify-regalloc
+; PR11125
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7"
+
+; The inline asm takes %x as a GR32_ABCD virtual register.
+; The call to @g forces a spill of that register.
+;
+; The asm has a dead output tied to %x.
+; Verify that the spiller creates a value number for that dead def.
+;
+define void @f(i32 %x) nounwind uwtable ssp {
+entry:
+  tail call void @g() nounwind
+  %0 = tail call i32 asm sideeffect "foo $0", "=Q,0,~{ebx},~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
+  ret void
+}
+
+declare void @g()

diff --git a/src/LLVM/test/CodeGen/X86/2011-10-11-srl.ll b/src/LLVM/test/CodeGen/X86/2011-10-11-srl.ll
new file mode 100644
index 0000000..cf9d36f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-10-11-srl.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -promote-elements -mattr=-sse41 
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @m387(<2 x i8>* %p, <2 x i16>* %q) {
+  %t = load <2 x i8>* %p
+  %r = sext <2 x i8> %t to <2 x i16>
+  store <2 x i16> %r, <2 x i16>* %q
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/2011-10-12-MachineCSE.ll b/src/LLVM/test/CodeGen/X86/2011-10-12-MachineCSE.ll
new file mode 100644
index 0000000..cd15f84
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/2011-10-12-MachineCSE.ll

@@ -0,0 +1,116 @@
+; RUN: llc -verify-machineinstrs < %s
+; <rdar://problem/10270968>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
+%struct.optab = type { i32, [59 x %struct.anon.3] }
+%struct.anon.3 = type { i32, %struct.rtx_def* }
+%struct.rtx_def = type { [2 x i8], i8, i8, [1 x %union.rtunion_def] }
+%union.rtunion_def = type { i64 }
+%struct.insn_data = type { i8*, i8*, %struct.rtx_def* (%struct.rtx_def*, ...)*, %struct.insn_operand_data*, i8, i8, i8, i8 }
+%struct.insn_operand_data = type { i32 (%struct.rtx_def*, i32)*, i8*, [2 x i8], i8, i8 }
+
+@optab_table = external global [49 x %struct.optab*], align 16
+@insn_data = external constant [0 x %struct.insn_data]
+
+define %struct.rtx_def* @gen_add3_insn(%struct.rtx_def* %r0, %struct.rtx_def* %r1, %struct.rtx_def* %c) nounwind uwtable ssp {
+entry:
+  %0 = bitcast %struct.rtx_def* %r0 to i32*
+  %1 = load i32* %0, align 8
+  %2 = lshr i32 %1, 16
+  %bf.clear = and i32 %2, 255
+  %idxprom = sext i32 %bf.clear to i64
+  %3 = load %struct.optab** getelementptr inbounds ([49 x %struct.optab*]* @optab_table, i32 0, i64 0), align 8, !tbaa !0
+  %handlers = getelementptr inbounds %struct.optab* %3, i32 0, i32 1
+  %arrayidx = getelementptr inbounds [59 x %struct.anon.3]* %handlers, i32 0, i64 %idxprom
+  %insn_code = getelementptr inbounds %struct.anon.3* %arrayidx, i32 0, i32 0
+  %4 = load i32* %insn_code, align 4, !tbaa !3
+  %cmp = icmp eq i32 %4, 1317
+  br i1 %cmp, label %if.then, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %entry
+  %idxprom1 = sext i32 %4 to i64
+  %arrayidx2 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom1
+  %operand = getelementptr inbounds %struct.insn_data* %arrayidx2, i32 0, i32 3
+  %5 = load %struct.insn_operand_data** %operand, align 8, !tbaa !0
+  %arrayidx3 = getelementptr inbounds %struct.insn_operand_data* %5, i64 0
+  %predicate = getelementptr inbounds %struct.insn_operand_data* %arrayidx3, i32 0, i32 0
+  %6 = load i32 (%struct.rtx_def*, i32)** %predicate, align 8, !tbaa !0
+  %idxprom4 = sext i32 %4 to i64
+  %arrayidx5 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom4
+  %operand6 = getelementptr inbounds %struct.insn_data* %arrayidx5, i32 0, i32 3
+  %7 = load %struct.insn_operand_data** %operand6, align 8, !tbaa !0
+  %arrayidx7 = getelementptr inbounds %struct.insn_operand_data* %7, i64 0
+  %8 = bitcast %struct.insn_operand_data* %arrayidx7 to i8*
+  %bf.field.offs = getelementptr i8* %8, i32 16
+  %9 = bitcast i8* %bf.field.offs to i32*
+  %10 = load i32* %9, align 8
+  %bf.clear8 = and i32 %10, 65535
+  %call = tail call i32 %6(%struct.rtx_def* %r0, i32 %bf.clear8)
+  %tobool = icmp ne i32 %call, 0
+  br i1 %tobool, label %lor.lhs.false9, label %if.then
+
+lor.lhs.false9:                                   ; preds = %lor.lhs.false
+  %idxprom10 = sext i32 %4 to i64
+  %arrayidx11 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom10
+  %operand12 = getelementptr inbounds %struct.insn_data* %arrayidx11, i32 0, i32 3
+  %11 = load %struct.insn_operand_data** %operand12, align 8, !tbaa !0
+  %arrayidx13 = getelementptr inbounds %struct.insn_operand_data* %11, i64 1
+  %predicate14 = getelementptr inbounds %struct.insn_operand_data* %arrayidx13, i32 0, i32 0
+  %12 = load i32 (%struct.rtx_def*, i32)** %predicate14, align 8, !tbaa !0
+  %idxprom15 = sext i32 %4 to i64
+  %arrayidx16 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom15
+  %operand17 = getelementptr inbounds %struct.insn_data* %arrayidx16, i32 0, i32 3
+  %13 = load %struct.insn_operand_data** %operand17, align 8, !tbaa !0
+  %arrayidx18 = getelementptr inbounds %struct.insn_operand_data* %13, i64 1
+  %14 = bitcast %struct.insn_operand_data* %arrayidx18 to i8*
+  %bf.field.offs19 = getelementptr i8* %14, i32 16
+  %15 = bitcast i8* %bf.field.offs19 to i32*
+  %16 = load i32* %15, align 8
+  %bf.clear20 = and i32 %16, 65535
+  %call21 = tail call i32 %12(%struct.rtx_def* %r1, i32 %bf.clear20)
+  %tobool22 = icmp ne i32 %call21, 0
+  br i1 %tobool22, label %lor.lhs.false23, label %if.then
+
+lor.lhs.false23:                                  ; preds = %lor.lhs.false9
+  %idxprom24 = sext i32 %4 to i64
+  %arrayidx25 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom24
+  %operand26 = getelementptr inbounds %struct.insn_data* %arrayidx25, i32 0, i32 3
+  %17 = load %struct.insn_operand_data** %operand26, align 8, !tbaa !0
+  %arrayidx27 = getelementptr inbounds %struct.insn_operand_data* %17, i64 2
+  %predicate28 = getelementptr inbounds %struct.insn_operand_data* %arrayidx27, i32 0, i32 0
+  %18 = load i32 (%struct.rtx_def*, i32)** %predicate28, align 8, !tbaa !0
+  %idxprom29 = sext i32 %4 to i64
+  %arrayidx30 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom29
+  %operand31 = getelementptr inbounds %struct.insn_data* %arrayidx30, i32 0, i32 3
+  %19 = load %struct.insn_operand_data** %operand31, align 8, !tbaa !0
+  %arrayidx32 = getelementptr inbounds %struct.insn_operand_data* %19, i64 2
+  %20 = bitcast %struct.insn_operand_data* %arrayidx32 to i8*
+  %bf.field.offs33 = getelementptr i8* %20, i32 16
+  %21 = bitcast i8* %bf.field.offs33 to i32*
+  %22 = load i32* %21, align 8
+  %bf.clear34 = and i32 %22, 65535
+  %call35 = tail call i32 %18(%struct.rtx_def* %c, i32 %bf.clear34)
+  %tobool36 = icmp ne i32 %call35, 0
+  br i1 %tobool36, label %if.end, label %if.then
+
+if.then:                                          ; preds = %lor.lhs.false23, %lor.lhs.false9, %lor.lhs.false, %entry
+  br label %return
+
+if.end:                                           ; preds = %lor.lhs.false23
+  %idxprom37 = sext i32 %4 to i64
+  %arrayidx38 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom37
+  %genfun = getelementptr inbounds %struct.insn_data* %arrayidx38, i32 0, i32 2
+  %23 = load %struct.rtx_def* (%struct.rtx_def*, ...)** %genfun, align 8, !tbaa !0
+  %call39 = tail call %struct.rtx_def* (%struct.rtx_def*, ...)* %23(%struct.rtx_def* %r0, %struct.rtx_def* %r1, %struct.rtx_def* %c)
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %24 = phi %struct.rtx_def* [ %call39, %if.end ], [ null, %if.then ]
+  ret %struct.rtx_def* %24
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"_ZTS9insn_code", metadata !1}

diff --git a/src/LLVM/test/CodeGen/X86/3addr-16bit.ll b/src/LLVM/test/CodeGen/X86/3addr-16bit.ll
new file mode 100644
index 0000000..c51247a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/3addr-16bit.ll

@@ -0,0 +1,95 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -asm-verbose=false | FileCheck %s -check-prefix=64BIT
+; rdar://7329206
+
+; In 32-bit the partial register stall would degrade performance.
+
+define zeroext i16 @t1(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t1:
+; 32BIT:     movw 20(%esp), %ax
+; 32BIT-NOT: movw %ax, %cx
+; 32BIT:     leal 1(%eax), %ecx
+
+; 64BIT:     t1:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal 1(%rsi), %eax
+  %0 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  %1 = add i16 %k, 1                              ; <i16> [#uses=3]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %1) nounwind
+  ret i16 %1
+
+bb1:                                              ; preds = %entry
+  ret i16 %1
+}
+
+define zeroext i16 @t2(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t2:
+; 32BIT:     movw 20(%esp), %ax
+; 32BIT-NOT: movw %ax, %cx
+; 32BIT:     leal -1(%eax), %ecx
+
+; 64BIT:     t2:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal -1(%rsi), %eax
+  %0 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  %1 = add i16 %k, -1                             ; <i16> [#uses=3]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %1) nounwind
+  ret i16 %1
+
+bb1:                                              ; preds = %entry
+  ret i16 %1
+}
+
+declare void @foo(i16 zeroext)
+
+define zeroext i16 @t3(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t3:
+; 32BIT:     movw 20(%esp), %ax
+; 32BIT-NOT: movw %ax, %cx
+; 32BIT:     leal 2(%eax), %ecx
+
+; 64BIT:     t3:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal 2(%rsi), %eax
+  %0 = add i16 %k, 2                              ; <i16> [#uses=3]
+  %1 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %0) nounwind
+  ret i16 %0
+
+bb1:                                              ; preds = %entry
+  ret i16 %0
+}
+
+define zeroext i16 @t4(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t4:
+; 32BIT:     movw 16(%esp), %ax
+; 32BIT:     movw 20(%esp), %cx
+; 32BIT-NOT: movw %cx, %dx
+; 32BIT:     leal (%ecx,%eax), %edx
+
+; 64BIT:     t4:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal (%rsi,%rdi), %eax
+  %0 = add i16 %k, %c                             ; <i16> [#uses=3]
+  %1 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %0) nounwind
+  ret i16 %0
+
+bb1:                                              ; preds = %entry
+  ret i16 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/3addr-or.ll b/src/LLVM/test/CodeGen/X86/3addr-or.ll
new file mode 100644
index 0000000..912bdc2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/3addr-or.ll

@@ -0,0 +1,61 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7527734
+
+define i32 @test1(i32 %x) nounwind readnone ssp {
+entry:
+; CHECK: test1:
+; CHECK: leal 3(%rdi), %eax
+  %0 = shl i32 %x, 5                              ; <i32> [#uses=1]
+  %1 = or i32 %0, 3                               ; <i32> [#uses=1]
+  ret i32 %1
+}
+
+define i64 @test2(i8 %A, i8 %B) nounwind {
+; CHECK: test2:
+; CHECK: shrq $4
+; CHECK-NOT: movq
+; CHECK-NOT: orq
+; CHECK: leaq
+; CHECK: ret
+  %C = zext i8 %A to i64                          ; <i64> [#uses=1]
+  %D = shl i64 %C, 4                              ; <i64> [#uses=1]
+  %E = and i64 %D, 48                             ; <i64> [#uses=1]
+  %F = zext i8 %B to i64                          ; <i64> [#uses=1]
+  %G = lshr i64 %F, 4                             ; <i64> [#uses=1]
+  %H = or i64 %G, %E                              ; <i64> [#uses=1]
+  ret i64 %H
+}
+
+;; Test that OR is only emitted as LEA, not as ADD.
+
+define void @test3(i32 %x, i32* %P) nounwind readnone ssp {
+entry:
+; No reason to emit an add here, should be an or.
+; CHECK: test3:
+; CHECK: orl $3, %edi
+  %0 = shl i32 %x, 5
+  %1 = or i32 %0, 3
+  store i32 %1, i32* %P
+  ret void
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+  %and = and i32 %a, 6
+  %and2 = and i32 %b, 16
+  %or = or i32 %and2, %and
+  ret i32 %or
+; CHECK: test4:
+; CHECK: leal	(%rsi,%rdi), %eax
+}
+
+define void @test5(i32 %a, i32 %b, i32* nocapture %P) nounwind ssp {
+entry:
+  %and = and i32 %a, 6
+  %and2 = and i32 %b, 16
+  %or = or i32 %and2, %and
+  store i32 %or, i32* %P, align 4
+  ret void
+; CHECK: test5:
+; CHECK: orl
+}

diff --git a/src/LLVM/test/CodeGen/X86/3dnow-intrinsics.ll b/src/LLVM/test/CodeGen/X86/3dnow-intrinsics.ll
new file mode 100644
index 0000000..0b27bf2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/3dnow-intrinsics.ll

@@ -0,0 +1,297 @@
+; RUN: llc < %s -march=x86 -mattr=+3dnow | FileCheck %s
+
+define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
+; CHECK: pavgusb
+entry:
+  %0 = bitcast x86_mmx %a.coerce to <8 x i8>
+  %1 = bitcast x86_mmx %b.coerce to <8 x i8>
+  %2 = bitcast <8 x i8> %0 to x86_mmx
+  %3 = bitcast <8 x i8> %1 to x86_mmx
+  %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3)
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  ret <8 x i8> %5
+}
+
+declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone {
+; CHECK: pf2id
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0)
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  ret <2 x i32> %2
+}
+
+declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfacc
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfadd
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfcmpeq
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  ret <2 x i32> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfcmpge
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  ret <2 x i32> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfcmpgt
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  ret <2 x i32> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfmax
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfmin
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfmul
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone {
+; CHECK: pfrcp
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0)
+  %2 = bitcast x86_mmx %1 to <2 x float>
+  ret <2 x float> %2
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfrcpit1
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfrcpit2
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone {
+; CHECK: pfrsqrt
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0)
+  %2 = bitcast x86_mmx %1 to <2 x float>
+  ret <2 x float> %2
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfrsqit1
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfsub
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfsubr
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone {
+; CHECK: pi2fd
+entry:
+  %0 = bitcast x86_mmx %a.coerce to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to x86_mmx
+  %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
+
+define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
+; CHECK: pmulhrw
+entry:
+  %0 = bitcast x86_mmx %a.coerce to <4 x i16>
+  %1 = bitcast x86_mmx %b.coerce to <4 x i16>
+  %2 = bitcast <4 x i16> %0 to x86_mmx
+  %3 = bitcast <4 x i16> %1 to x86_mmx
+  %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3)
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  ret <4 x i16> %5
+}
+
+declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone {
+; CHECK: pf2iw
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0)
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  ret <2 x i32> %2
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfnacc
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfpnacc
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone {
+; CHECK: pi2fw
+entry:
+  %0 = bitcast x86_mmx %a.coerce to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to x86_mmx
+  %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone {
+; CHECK: pswapd
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
+  %2 = bitcast x86_mmx %1 to <2 x float>
+  ret <2 x float> %2
+}
+
+define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
+; CHECK: pswapd
+entry:
+  %0 = bitcast <2 x i32> %a to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  ret <2 x i32> %2
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/4char-promote.ll b/src/LLVM/test/CodeGen/X86/4char-promote.ll
new file mode 100644
index 0000000..386057f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/4char-promote.ll

@@ -0,0 +1,17 @@
+; A test for checking PR 9623
+;RUN: llc -march=x86-64 -mcpu=corei7 -promote-elements < %s | FileCheck %s
+
+target triple = "x86_64-apple-darwin"
+
+; CHECK:  pmulld 
+; CHECK:  paddd  
+; CHECK:  movdqa 
+
+define <4 x i8> @foo(<4 x i8> %x, <4 x i8> %y) {
+entry:
+ %binop = mul <4 x i8> %x, %y
+ %binop6 = add <4 x i8> %binop, %x
+ ret <4 x i8> %binop6
+}
+
+

diff --git a/src/LLVM/test/CodeGen/X86/9601.ll b/src/LLVM/test/CodeGen/X86/9601.ll
new file mode 100644
index 0000000..cd65a03
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/9601.ll

@@ -0,0 +1,12 @@
+; RUN:  llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR9601
+; Previously we'd crash trying to put a 32-bit float into a constraint
+; for a normal 'r' register.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test() {
+entry:
+  %0 = call float asm sideeffect "xchg $0, $1", "=r,*m,0,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* undef, float 2.000000e+00) nounwind
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/Atomics-64.ll b/src/LLVM/test/CodeGen/X86/Atomics-64.ll
new file mode 100644
index 0000000..8e93762
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/Atomics-64.ll

@@ -0,0 +1,950 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: llc < %s -march=x86 > %t
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+@sc = common global i8 0
+@uc = common global i8 0
+@ss = common global i16 0
+@us = common global i16 0
+@si = common global i32 0
+@ui = common global i32 0
+@sl = common global i64 0
+@ul = common global i64 0
+@sll = common global i64 0
+@ull = common global i64 0
+
+define void @test_op_ignore() nounwind {
+entry:
+  %0 = atomicrmw add i8* @sc, i8 1 monotonic
+  %1 = atomicrmw add i8* @uc, i8 1 monotonic
+  %2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %3 = atomicrmw add i16* %2, i16 1 monotonic
+  %4 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %5 = atomicrmw add i16* %4, i16 1 monotonic
+  %6 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %7 = atomicrmw add i32* %6, i32 1 monotonic
+  %8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %9 = atomicrmw add i32* %8, i32 1 monotonic
+  %10 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %11 = atomicrmw add i64* %10, i64 1 monotonic
+  %12 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %13 = atomicrmw add i64* %12, i64 1 monotonic
+  %14 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %15 = atomicrmw add i64* %14, i64 1 monotonic
+  %16 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %17 = atomicrmw add i64* %16, i64 1 monotonic
+  %18 = atomicrmw sub i8* @sc, i8 1 monotonic
+  %19 = atomicrmw sub i8* @uc, i8 1 monotonic
+  %20 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %21 = atomicrmw sub i16* %20, i16 1 monotonic
+  %22 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %23 = atomicrmw sub i16* %22, i16 1 monotonic
+  %24 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %25 = atomicrmw sub i32* %24, i32 1 monotonic
+  %26 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %27 = atomicrmw sub i32* %26, i32 1 monotonic
+  %28 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %29 = atomicrmw sub i64* %28, i64 1 monotonic
+  %30 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %31 = atomicrmw sub i64* %30, i64 1 monotonic
+  %32 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %33 = atomicrmw sub i64* %32, i64 1 monotonic
+  %34 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %35 = atomicrmw sub i64* %34, i64 1 monotonic
+  %36 = atomicrmw or i8* @sc, i8 1 monotonic
+  %37 = atomicrmw or i8* @uc, i8 1 monotonic
+  %38 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %39 = atomicrmw or i16* %38, i16 1 monotonic
+  %40 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %41 = atomicrmw or i16* %40, i16 1 monotonic
+  %42 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %43 = atomicrmw or i32* %42, i32 1 monotonic
+  %44 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %45 = atomicrmw or i32* %44, i32 1 monotonic
+  %46 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %47 = atomicrmw or i64* %46, i64 1 monotonic
+  %48 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %49 = atomicrmw or i64* %48, i64 1 monotonic
+  %50 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %51 = atomicrmw or i64* %50, i64 1 monotonic
+  %52 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %53 = atomicrmw or i64* %52, i64 1 monotonic
+  %54 = atomicrmw xor i8* @sc, i8 1 monotonic
+  %55 = atomicrmw xor i8* @uc, i8 1 monotonic
+  %56 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %57 = atomicrmw xor i16* %56, i16 1 monotonic
+  %58 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %59 = atomicrmw xor i16* %58, i16 1 monotonic
+  %60 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %61 = atomicrmw xor i32* %60, i32 1 monotonic
+  %62 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %63 = atomicrmw xor i32* %62, i32 1 monotonic
+  %64 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %65 = atomicrmw xor i64* %64, i64 1 monotonic
+  %66 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %67 = atomicrmw xor i64* %66, i64 1 monotonic
+  %68 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %69 = atomicrmw xor i64* %68, i64 1 monotonic
+  %70 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %71 = atomicrmw xor i64* %70, i64 1 monotonic
+  %72 = atomicrmw and i8* @sc, i8 1 monotonic
+  %73 = atomicrmw and i8* @uc, i8 1 monotonic
+  %74 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %75 = atomicrmw and i16* %74, i16 1 monotonic
+  %76 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %77 = atomicrmw and i16* %76, i16 1 monotonic
+  %78 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %79 = atomicrmw and i32* %78, i32 1 monotonic
+  %80 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %81 = atomicrmw and i32* %80, i32 1 monotonic
+  %82 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %83 = atomicrmw and i64* %82, i64 1 monotonic
+  %84 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %85 = atomicrmw and i64* %84, i64 1 monotonic
+  %86 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %87 = atomicrmw and i64* %86, i64 1 monotonic
+  %88 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %89 = atomicrmw and i64* %88, i64 1 monotonic
+  %90 = atomicrmw nand i8* @sc, i8 1 monotonic
+  %91 = atomicrmw nand i8* @uc, i8 1 monotonic
+  %92 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %93 = atomicrmw nand i16* %92, i16 1 monotonic
+  %94 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %95 = atomicrmw nand i16* %94, i16 1 monotonic
+  %96 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %97 = atomicrmw nand i32* %96, i32 1 monotonic
+  %98 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %99 = atomicrmw nand i32* %98, i32 1 monotonic
+  %100 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %101 = atomicrmw nand i64* %100, i64 1 monotonic
+  %102 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %103 = atomicrmw nand i64* %102, i64 1 monotonic
+  %104 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %105 = atomicrmw nand i64* %104, i64 1 monotonic
+  %106 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %107 = atomicrmw nand i64* %106, i64 1 monotonic
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @test_fetch_and_op() nounwind {
+entry:
+  %0 = atomicrmw add i8* @sc, i8 11 monotonic
+  store i8 %0, i8* @sc, align 1
+  %1 = atomicrmw add i8* @uc, i8 11 monotonic
+  store i8 %1, i8* @uc, align 1
+  %2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %3 = atomicrmw add i16* %2, i16 11 monotonic
+  store i16 %3, i16* @ss, align 2
+  %4 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %5 = atomicrmw add i16* %4, i16 11 monotonic
+  store i16 %5, i16* @us, align 2
+  %6 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %7 = atomicrmw add i32* %6, i32 11 monotonic
+  store i32 %7, i32* @si, align 4
+  %8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %9 = atomicrmw add i32* %8, i32 11 monotonic
+  store i32 %9, i32* @ui, align 4
+  %10 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %11 = atomicrmw add i64* %10, i64 11 monotonic
+  store i64 %11, i64* @sl, align 8
+  %12 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %13 = atomicrmw add i64* %12, i64 11 monotonic
+  store i64 %13, i64* @ul, align 8
+  %14 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %15 = atomicrmw add i64* %14, i64 11 monotonic
+  store i64 %15, i64* @sll, align 8
+  %16 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %17 = atomicrmw add i64* %16, i64 11 monotonic
+  store i64 %17, i64* @ull, align 8
+  %18 = atomicrmw sub i8* @sc, i8 11 monotonic
+  store i8 %18, i8* @sc, align 1
+  %19 = atomicrmw sub i8* @uc, i8 11 monotonic
+  store i8 %19, i8* @uc, align 1
+  %20 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %21 = atomicrmw sub i16* %20, i16 11 monotonic
+  store i16 %21, i16* @ss, align 2
+  %22 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %23 = atomicrmw sub i16* %22, i16 11 monotonic
+  store i16 %23, i16* @us, align 2
+  %24 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %25 = atomicrmw sub i32* %24, i32 11 monotonic
+  store i32 %25, i32* @si, align 4
+  %26 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %27 = atomicrmw sub i32* %26, i32 11 monotonic
+  store i32 %27, i32* @ui, align 4
+  %28 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %29 = atomicrmw sub i64* %28, i64 11 monotonic
+  store i64 %29, i64* @sl, align 8
+  %30 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %31 = atomicrmw sub i64* %30, i64 11 monotonic
+  store i64 %31, i64* @ul, align 8
+  %32 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %33 = atomicrmw sub i64* %32, i64 11 monotonic
+  store i64 %33, i64* @sll, align 8
+  %34 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %35 = atomicrmw sub i64* %34, i64 11 monotonic
+  store i64 %35, i64* @ull, align 8
+  %36 = atomicrmw or i8* @sc, i8 11 monotonic
+  store i8 %36, i8* @sc, align 1
+  %37 = atomicrmw or i8* @uc, i8 11 monotonic
+  store i8 %37, i8* @uc, align 1
+  %38 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %39 = atomicrmw or i16* %38, i16 11 monotonic
+  store i16 %39, i16* @ss, align 2
+  %40 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %41 = atomicrmw or i16* %40, i16 11 monotonic
+  store i16 %41, i16* @us, align 2
+  %42 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %43 = atomicrmw or i32* %42, i32 11 monotonic
+  store i32 %43, i32* @si, align 4
+  %44 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %45 = atomicrmw or i32* %44, i32 11 monotonic
+  store i32 %45, i32* @ui, align 4
+  %46 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %47 = atomicrmw or i64* %46, i64 11 monotonic
+  store i64 %47, i64* @sl, align 8
+  %48 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %49 = atomicrmw or i64* %48, i64 11 monotonic
+  store i64 %49, i64* @ul, align 8
+  %50 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %51 = atomicrmw or i64* %50, i64 11 monotonic
+  store i64 %51, i64* @sll, align 8
+  %52 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %53 = atomicrmw or i64* %52, i64 11 monotonic
+  store i64 %53, i64* @ull, align 8
+  %54 = atomicrmw xor i8* @sc, i8 11 monotonic
+  store i8 %54, i8* @sc, align 1
+  %55 = atomicrmw xor i8* @uc, i8 11 monotonic
+  store i8 %55, i8* @uc, align 1
+  %56 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %57 = atomicrmw xor i16* %56, i16 11 monotonic
+  store i16 %57, i16* @ss, align 2
+  %58 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %59 = atomicrmw xor i16* %58, i16 11 monotonic
+  store i16 %59, i16* @us, align 2
+  %60 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %61 = atomicrmw xor i32* %60, i32 11 monotonic
+  store i32 %61, i32* @si, align 4
+  %62 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %63 = atomicrmw xor i32* %62, i32 11 monotonic
+  store i32 %63, i32* @ui, align 4
+  %64 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %65 = atomicrmw xor i64* %64, i64 11 monotonic
+  store i64 %65, i64* @sl, align 8
+  %66 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %67 = atomicrmw xor i64* %66, i64 11 monotonic
+  store i64 %67, i64* @ul, align 8
+  %68 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %69 = atomicrmw xor i64* %68, i64 11 monotonic
+  store i64 %69, i64* @sll, align 8
+  %70 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %71 = atomicrmw xor i64* %70, i64 11 monotonic
+  store i64 %71, i64* @ull, align 8
+  %72 = atomicrmw and i8* @sc, i8 11 monotonic
+  store i8 %72, i8* @sc, align 1
+  %73 = atomicrmw and i8* @uc, i8 11 monotonic
+  store i8 %73, i8* @uc, align 1
+  %74 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %75 = atomicrmw and i16* %74, i16 11 monotonic
+  store i16 %75, i16* @ss, align 2
+  %76 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %77 = atomicrmw and i16* %76, i16 11 monotonic
+  store i16 %77, i16* @us, align 2
+  %78 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %79 = atomicrmw and i32* %78, i32 11 monotonic
+  store i32 %79, i32* @si, align 4
+  %80 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %81 = atomicrmw and i32* %80, i32 11 monotonic
+  store i32 %81, i32* @ui, align 4
+  %82 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %83 = atomicrmw and i64* %82, i64 11 monotonic
+  store i64 %83, i64* @sl, align 8
+  %84 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %85 = atomicrmw and i64* %84, i64 11 monotonic
+  store i64 %85, i64* @ul, align 8
+  %86 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %87 = atomicrmw and i64* %86, i64 11 monotonic
+  store i64 %87, i64* @sll, align 8
+  %88 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %89 = atomicrmw and i64* %88, i64 11 monotonic
+  store i64 %89, i64* @ull, align 8
+  %90 = atomicrmw nand i8* @sc, i8 11 monotonic
+  store i8 %90, i8* @sc, align 1
+  %91 = atomicrmw nand i8* @uc, i8 11 monotonic
+  store i8 %91, i8* @uc, align 1
+  %92 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %93 = atomicrmw nand i16* %92, i16 11 monotonic
+  store i16 %93, i16* @ss, align 2
+  %94 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %95 = atomicrmw nand i16* %94, i16 11 monotonic
+  store i16 %95, i16* @us, align 2
+  %96 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %97 = atomicrmw nand i32* %96, i32 11 monotonic
+  store i32 %97, i32* @si, align 4
+  %98 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %99 = atomicrmw nand i32* %98, i32 11 monotonic
+  store i32 %99, i32* @ui, align 4
+  %100 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %101 = atomicrmw nand i64* %100, i64 11 monotonic
+  store i64 %101, i64* @sl, align 8
+  %102 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %103 = atomicrmw nand i64* %102, i64 11 monotonic
+  store i64 %103, i64* @ul, align 8
+  %104 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %105 = atomicrmw nand i64* %104, i64 11 monotonic
+  store i64 %105, i64* @sll, align 8
+  %106 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %107 = atomicrmw nand i64* %106, i64 11 monotonic
+  store i64 %107, i64* @ull, align 8
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @test_op_and_fetch() nounwind {
+entry:
+  %0 = load i8* @uc, align 1
+  %1 = zext i8 %0 to i32
+  %2 = trunc i32 %1 to i8
+  %3 = atomicrmw add i8* @sc, i8 %2 monotonic
+  %4 = add i8 %3, %2
+  store i8 %4, i8* @sc, align 1
+  %5 = load i8* @uc, align 1
+  %6 = zext i8 %5 to i32
+  %7 = trunc i32 %6 to i8
+  %8 = atomicrmw add i8* @uc, i8 %7 monotonic
+  %9 = add i8 %8, %7
+  store i8 %9, i8* @uc, align 1
+  %10 = load i8* @uc, align 1
+  %11 = zext i8 %10 to i32
+  %12 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %13 = trunc i32 %11 to i16
+  %14 = atomicrmw add i16* %12, i16 %13 monotonic
+  %15 = add i16 %14, %13
+  store i16 %15, i16* @ss, align 2
+  %16 = load i8* @uc, align 1
+  %17 = zext i8 %16 to i32
+  %18 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %19 = trunc i32 %17 to i16
+  %20 = atomicrmw add i16* %18, i16 %19 monotonic
+  %21 = add i16 %20, %19
+  store i16 %21, i16* @us, align 2
+  %22 = load i8* @uc, align 1
+  %23 = zext i8 %22 to i32
+  %24 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %25 = atomicrmw add i32* %24, i32 %23 monotonic
+  %26 = add i32 %25, %23
+  store i32 %26, i32* @si, align 4
+  %27 = load i8* @uc, align 1
+  %28 = zext i8 %27 to i32
+  %29 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %30 = atomicrmw add i32* %29, i32 %28 monotonic
+  %31 = add i32 %30, %28
+  store i32 %31, i32* @ui, align 4
+  %32 = load i8* @uc, align 1
+  %33 = zext i8 %32 to i64
+  %34 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %35 = atomicrmw add i64* %34, i64 %33 monotonic
+  %36 = add i64 %35, %33
+  store i64 %36, i64* @sl, align 8
+  %37 = load i8* @uc, align 1
+  %38 = zext i8 %37 to i64
+  %39 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %40 = atomicrmw add i64* %39, i64 %38 monotonic
+  %41 = add i64 %40, %38
+  store i64 %41, i64* @ul, align 8
+  %42 = load i8* @uc, align 1
+  %43 = zext i8 %42 to i64
+  %44 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %45 = atomicrmw add i64* %44, i64 %43 monotonic
+  %46 = add i64 %45, %43
+  store i64 %46, i64* @sll, align 8
+  %47 = load i8* @uc, align 1
+  %48 = zext i8 %47 to i64
+  %49 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %50 = atomicrmw add i64* %49, i64 %48 monotonic
+  %51 = add i64 %50, %48
+  store i64 %51, i64* @ull, align 8
+  %52 = load i8* @uc, align 1
+  %53 = zext i8 %52 to i32
+  %54 = trunc i32 %53 to i8
+  %55 = atomicrmw sub i8* @sc, i8 %54 monotonic
+  %56 = sub i8 %55, %54
+  store i8 %56, i8* @sc, align 1
+  %57 = load i8* @uc, align 1
+  %58 = zext i8 %57 to i32
+  %59 = trunc i32 %58 to i8
+  %60 = atomicrmw sub i8* @uc, i8 %59 monotonic
+  %61 = sub i8 %60, %59
+  store i8 %61, i8* @uc, align 1
+  %62 = load i8* @uc, align 1
+  %63 = zext i8 %62 to i32
+  %64 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %65 = trunc i32 %63 to i16
+  %66 = atomicrmw sub i16* %64, i16 %65 monotonic
+  %67 = sub i16 %66, %65
+  store i16 %67, i16* @ss, align 2
+  %68 = load i8* @uc, align 1
+  %69 = zext i8 %68 to i32
+  %70 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %71 = trunc i32 %69 to i16
+  %72 = atomicrmw sub i16* %70, i16 %71 monotonic
+  %73 = sub i16 %72, %71
+  store i16 %73, i16* @us, align 2
+  %74 = load i8* @uc, align 1
+  %75 = zext i8 %74 to i32
+  %76 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %77 = atomicrmw sub i32* %76, i32 %75 monotonic
+  %78 = sub i32 %77, %75
+  store i32 %78, i32* @si, align 4
+  %79 = load i8* @uc, align 1
+  %80 = zext i8 %79 to i32
+  %81 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %82 = atomicrmw sub i32* %81, i32 %80 monotonic
+  %83 = sub i32 %82, %80
+  store i32 %83, i32* @ui, align 4
+  %84 = load i8* @uc, align 1
+  %85 = zext i8 %84 to i64
+  %86 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %87 = atomicrmw sub i64* %86, i64 %85 monotonic
+  %88 = sub i64 %87, %85
+  store i64 %88, i64* @sl, align 8
+  %89 = load i8* @uc, align 1
+  %90 = zext i8 %89 to i64
+  %91 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %92 = atomicrmw sub i64* %91, i64 %90 monotonic
+  %93 = sub i64 %92, %90
+  store i64 %93, i64* @ul, align 8
+  %94 = load i8* @uc, align 1
+  %95 = zext i8 %94 to i64
+  %96 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %97 = atomicrmw sub i64* %96, i64 %95 monotonic
+  %98 = sub i64 %97, %95
+  store i64 %98, i64* @sll, align 8
+  %99 = load i8* @uc, align 1
+  %100 = zext i8 %99 to i64
+  %101 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %102 = atomicrmw sub i64* %101, i64 %100 monotonic
+  %103 = sub i64 %102, %100
+  store i64 %103, i64* @ull, align 8
+  %104 = load i8* @uc, align 1
+  %105 = zext i8 %104 to i32
+  %106 = trunc i32 %105 to i8
+  %107 = atomicrmw or i8* @sc, i8 %106 monotonic
+  %108 = or i8 %107, %106
+  store i8 %108, i8* @sc, align 1
+  %109 = load i8* @uc, align 1
+  %110 = zext i8 %109 to i32
+  %111 = trunc i32 %110 to i8
+  %112 = atomicrmw or i8* @uc, i8 %111 monotonic
+  %113 = or i8 %112, %111
+  store i8 %113, i8* @uc, align 1
+  %114 = load i8* @uc, align 1
+  %115 = zext i8 %114 to i32
+  %116 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %117 = trunc i32 %115 to i16
+  %118 = atomicrmw or i16* %116, i16 %117 monotonic
+  %119 = or i16 %118, %117
+  store i16 %119, i16* @ss, align 2
+  %120 = load i8* @uc, align 1
+  %121 = zext i8 %120 to i32
+  %122 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %123 = trunc i32 %121 to i16
+  %124 = atomicrmw or i16* %122, i16 %123 monotonic
+  %125 = or i16 %124, %123
+  store i16 %125, i16* @us, align 2
+  %126 = load i8* @uc, align 1
+  %127 = zext i8 %126 to i32
+  %128 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %129 = atomicrmw or i32* %128, i32 %127 monotonic
+  %130 = or i32 %129, %127
+  store i32 %130, i32* @si, align 4
+  %131 = load i8* @uc, align 1
+  %132 = zext i8 %131 to i32
+  %133 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %134 = atomicrmw or i32* %133, i32 %132 monotonic
+  %135 = or i32 %134, %132
+  store i32 %135, i32* @ui, align 4
+  %136 = load i8* @uc, align 1
+  %137 = zext i8 %136 to i64
+  %138 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %139 = atomicrmw or i64* %138, i64 %137 monotonic
+  %140 = or i64 %139, %137
+  store i64 %140, i64* @sl, align 8
+  %141 = load i8* @uc, align 1
+  %142 = zext i8 %141 to i64
+  %143 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %144 = atomicrmw or i64* %143, i64 %142 monotonic
+  %145 = or i64 %144, %142
+  store i64 %145, i64* @ul, align 8
+  %146 = load i8* @uc, align 1
+  %147 = zext i8 %146 to i64
+  %148 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %149 = atomicrmw or i64* %148, i64 %147 monotonic
+  %150 = or i64 %149, %147
+  store i64 %150, i64* @sll, align 8
+  %151 = load i8* @uc, align 1
+  %152 = zext i8 %151 to i64
+  %153 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %154 = atomicrmw or i64* %153, i64 %152 monotonic
+  %155 = or i64 %154, %152
+  store i64 %155, i64* @ull, align 8
+  %156 = load i8* @uc, align 1
+  %157 = zext i8 %156 to i32
+  %158 = trunc i32 %157 to i8
+  %159 = atomicrmw xor i8* @sc, i8 %158 monotonic
+  %160 = xor i8 %159, %158
+  store i8 %160, i8* @sc, align 1
+  %161 = load i8* @uc, align 1
+  %162 = zext i8 %161 to i32
+  %163 = trunc i32 %162 to i8
+  %164 = atomicrmw xor i8* @uc, i8 %163 monotonic
+  %165 = xor i8 %164, %163
+  store i8 %165, i8* @uc, align 1
+  %166 = load i8* @uc, align 1
+  %167 = zext i8 %166 to i32
+  %168 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %169 = trunc i32 %167 to i16
+  %170 = atomicrmw xor i16* %168, i16 %169 monotonic
+  %171 = xor i16 %170, %169
+  store i16 %171, i16* @ss, align 2
+  %172 = load i8* @uc, align 1
+  %173 = zext i8 %172 to i32
+  %174 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %175 = trunc i32 %173 to i16
+  %176 = atomicrmw xor i16* %174, i16 %175 monotonic
+  %177 = xor i16 %176, %175
+  store i16 %177, i16* @us, align 2
+  %178 = load i8* @uc, align 1
+  %179 = zext i8 %178 to i32
+  %180 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %181 = atomicrmw xor i32* %180, i32 %179 monotonic
+  %182 = xor i32 %181, %179
+  store i32 %182, i32* @si, align 4
+  %183 = load i8* @uc, align 1
+  %184 = zext i8 %183 to i32
+  %185 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %186 = atomicrmw xor i32* %185, i32 %184 monotonic
+  %187 = xor i32 %186, %184
+  store i32 %187, i32* @ui, align 4
+  %188 = load i8* @uc, align 1
+  %189 = zext i8 %188 to i64
+  %190 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %191 = atomicrmw xor i64* %190, i64 %189 monotonic
+  %192 = xor i64 %191, %189
+  store i64 %192, i64* @sl, align 8
+  %193 = load i8* @uc, align 1
+  %194 = zext i8 %193 to i64
+  %195 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %196 = atomicrmw xor i64* %195, i64 %194 monotonic
+  %197 = xor i64 %196, %194
+  store i64 %197, i64* @ul, align 8
+  %198 = load i8* @uc, align 1
+  %199 = zext i8 %198 to i64
+  %200 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %201 = atomicrmw xor i64* %200, i64 %199 monotonic
+  %202 = xor i64 %201, %199
+  store i64 %202, i64* @sll, align 8
+  %203 = load i8* @uc, align 1
+  %204 = zext i8 %203 to i64
+  %205 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %206 = atomicrmw xor i64* %205, i64 %204 monotonic
+  %207 = xor i64 %206, %204
+  store i64 %207, i64* @ull, align 8
+  %208 = load i8* @uc, align 1
+  %209 = zext i8 %208 to i32
+  %210 = trunc i32 %209 to i8
+  %211 = atomicrmw and i8* @sc, i8 %210 monotonic
+  %212 = and i8 %211, %210
+  store i8 %212, i8* @sc, align 1
+  %213 = load i8* @uc, align 1
+  %214 = zext i8 %213 to i32
+  %215 = trunc i32 %214 to i8
+  %216 = atomicrmw and i8* @uc, i8 %215 monotonic
+  %217 = and i8 %216, %215
+  store i8 %217, i8* @uc, align 1
+  %218 = load i8* @uc, align 1
+  %219 = zext i8 %218 to i32
+  %220 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %221 = trunc i32 %219 to i16
+  %222 = atomicrmw and i16* %220, i16 %221 monotonic
+  %223 = and i16 %222, %221
+  store i16 %223, i16* @ss, align 2
+  %224 = load i8* @uc, align 1
+  %225 = zext i8 %224 to i32
+  %226 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %227 = trunc i32 %225 to i16
+  %228 = atomicrmw and i16* %226, i16 %227 monotonic
+  %229 = and i16 %228, %227
+  store i16 %229, i16* @us, align 2
+  %230 = load i8* @uc, align 1
+  %231 = zext i8 %230 to i32
+  %232 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %233 = atomicrmw and i32* %232, i32 %231 monotonic
+  %234 = and i32 %233, %231
+  store i32 %234, i32* @si, align 4
+  %235 = load i8* @uc, align 1
+  %236 = zext i8 %235 to i32
+  %237 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %238 = atomicrmw and i32* %237, i32 %236 monotonic
+  %239 = and i32 %238, %236
+  store i32 %239, i32* @ui, align 4
+  %240 = load i8* @uc, align 1
+  %241 = zext i8 %240 to i64
+  %242 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %243 = atomicrmw and i64* %242, i64 %241 monotonic
+  %244 = and i64 %243, %241
+  store i64 %244, i64* @sl, align 8
+  %245 = load i8* @uc, align 1
+  %246 = zext i8 %245 to i64
+  %247 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %248 = atomicrmw and i64* %247, i64 %246 monotonic
+  %249 = and i64 %248, %246
+  store i64 %249, i64* @ul, align 8
+  %250 = load i8* @uc, align 1
+  %251 = zext i8 %250 to i64
+  %252 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %253 = atomicrmw and i64* %252, i64 %251 monotonic
+  %254 = and i64 %253, %251
+  store i64 %254, i64* @sll, align 8
+  %255 = load i8* @uc, align 1
+  %256 = zext i8 %255 to i64
+  %257 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %258 = atomicrmw and i64* %257, i64 %256 monotonic
+  %259 = and i64 %258, %256
+  store i64 %259, i64* @ull, align 8
+  %260 = load i8* @uc, align 1
+  %261 = zext i8 %260 to i32
+  %262 = trunc i32 %261 to i8
+  %263 = atomicrmw nand i8* @sc, i8 %262 monotonic
+  %264 = xor i8 %263, -1
+  %265 = and i8 %264, %262
+  store i8 %265, i8* @sc, align 1
+  %266 = load i8* @uc, align 1
+  %267 = zext i8 %266 to i32
+  %268 = trunc i32 %267 to i8
+  %269 = atomicrmw nand i8* @uc, i8 %268 monotonic
+  %270 = xor i8 %269, -1
+  %271 = and i8 %270, %268
+  store i8 %271, i8* @uc, align 1
+  %272 = load i8* @uc, align 1
+  %273 = zext i8 %272 to i32
+  %274 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %275 = trunc i32 %273 to i16
+  %276 = atomicrmw nand i16* %274, i16 %275 monotonic
+  %277 = xor i16 %276, -1
+  %278 = and i16 %277, %275
+  store i16 %278, i16* @ss, align 2
+  %279 = load i8* @uc, align 1
+  %280 = zext i8 %279 to i32
+  %281 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %282 = trunc i32 %280 to i16
+  %283 = atomicrmw nand i16* %281, i16 %282 monotonic
+  %284 = xor i16 %283, -1
+  %285 = and i16 %284, %282
+  store i16 %285, i16* @us, align 2
+  %286 = load i8* @uc, align 1
+  %287 = zext i8 %286 to i32
+  %288 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %289 = atomicrmw nand i32* %288, i32 %287 monotonic
+  %290 = xor i32 %289, -1
+  %291 = and i32 %290, %287
+  store i32 %291, i32* @si, align 4
+  %292 = load i8* @uc, align 1
+  %293 = zext i8 %292 to i32
+  %294 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %295 = atomicrmw nand i32* %294, i32 %293 monotonic
+  %296 = xor i32 %295, -1
+  %297 = and i32 %296, %293
+  store i32 %297, i32* @ui, align 4
+  %298 = load i8* @uc, align 1
+  %299 = zext i8 %298 to i64
+  %300 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %301 = atomicrmw nand i64* %300, i64 %299 monotonic
+  %302 = xor i64 %301, -1
+  %303 = and i64 %302, %299
+  store i64 %303, i64* @sl, align 8
+  %304 = load i8* @uc, align 1
+  %305 = zext i8 %304 to i64
+  %306 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %307 = atomicrmw nand i64* %306, i64 %305 monotonic
+  %308 = xor i64 %307, -1
+  %309 = and i64 %308, %305
+  store i64 %309, i64* @ul, align 8
+  %310 = load i8* @uc, align 1
+  %311 = zext i8 %310 to i64
+  %312 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %313 = atomicrmw nand i64* %312, i64 %311 monotonic
+  %314 = xor i64 %313, -1
+  %315 = and i64 %314, %311
+  store i64 %315, i64* @sll, align 8
+  %316 = load i8* @uc, align 1
+  %317 = zext i8 %316 to i64
+  %318 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %319 = atomicrmw nand i64* %318, i64 %317 monotonic
+  %320 = xor i64 %319, -1
+  %321 = and i64 %320, %317
+  store i64 %321, i64* @ull, align 8
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @test_compare_and_swap() nounwind {
+entry:
+  %0 = load i8* @sc, align 1
+  %1 = zext i8 %0 to i32
+  %2 = load i8* @uc, align 1
+  %3 = zext i8 %2 to i32
+  %4 = trunc i32 %3 to i8
+  %5 = trunc i32 %1 to i8
+  %6 = cmpxchg i8* @sc, i8 %4, i8 %5 monotonic
+  store i8 %6, i8* @sc, align 1
+  %7 = load i8* @sc, align 1
+  %8 = zext i8 %7 to i32
+  %9 = load i8* @uc, align 1
+  %10 = zext i8 %9 to i32
+  %11 = trunc i32 %10 to i8
+  %12 = trunc i32 %8 to i8
+  %13 = cmpxchg i8* @uc, i8 %11, i8 %12 monotonic
+  store i8 %13, i8* @uc, align 1
+  %14 = load i8* @sc, align 1
+  %15 = sext i8 %14 to i16
+  %16 = zext i16 %15 to i32
+  %17 = load i8* @uc, align 1
+  %18 = zext i8 %17 to i32
+  %19 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %20 = trunc i32 %18 to i16
+  %21 = trunc i32 %16 to i16
+  %22 = cmpxchg i16* %19, i16 %20, i16 %21 monotonic
+  store i16 %22, i16* @ss, align 2
+  %23 = load i8* @sc, align 1
+  %24 = sext i8 %23 to i16
+  %25 = zext i16 %24 to i32
+  %26 = load i8* @uc, align 1
+  %27 = zext i8 %26 to i32
+  %28 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %29 = trunc i32 %27 to i16
+  %30 = trunc i32 %25 to i16
+  %31 = cmpxchg i16* %28, i16 %29, i16 %30 monotonic
+  store i16 %31, i16* @us, align 2
+  %32 = load i8* @sc, align 1
+  %33 = sext i8 %32 to i32
+  %34 = load i8* @uc, align 1
+  %35 = zext i8 %34 to i32
+  %36 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %37 = cmpxchg i32* %36, i32 %35, i32 %33 monotonic
+  store i32 %37, i32* @si, align 4
+  %38 = load i8* @sc, align 1
+  %39 = sext i8 %38 to i32
+  %40 = load i8* @uc, align 1
+  %41 = zext i8 %40 to i32
+  %42 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %43 = cmpxchg i32* %42, i32 %41, i32 %39 monotonic
+  store i32 %43, i32* @ui, align 4
+  %44 = load i8* @sc, align 1
+  %45 = sext i8 %44 to i64
+  %46 = load i8* @uc, align 1
+  %47 = zext i8 %46 to i64
+  %48 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %49 = cmpxchg i64* %48, i64 %47, i64 %45 monotonic
+  store i64 %49, i64* @sl, align 8
+  %50 = load i8* @sc, align 1
+  %51 = sext i8 %50 to i64
+  %52 = load i8* @uc, align 1
+  %53 = zext i8 %52 to i64
+  %54 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %55 = cmpxchg i64* %54, i64 %53, i64 %51 monotonic
+  store i64 %55, i64* @ul, align 8
+  %56 = load i8* @sc, align 1
+  %57 = sext i8 %56 to i64
+  %58 = load i8* @uc, align 1
+  %59 = zext i8 %58 to i64
+  %60 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %61 = cmpxchg i64* %60, i64 %59, i64 %57 monotonic
+  store i64 %61, i64* @sll, align 8
+  %62 = load i8* @sc, align 1
+  %63 = sext i8 %62 to i64
+  %64 = load i8* @uc, align 1
+  %65 = zext i8 %64 to i64
+  %66 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %67 = cmpxchg i64* %66, i64 %65, i64 %63 monotonic
+  store i64 %67, i64* @ull, align 8
+  %68 = load i8* @sc, align 1
+  %69 = zext i8 %68 to i32
+  %70 = load i8* @uc, align 1
+  %71 = zext i8 %70 to i32
+  %72 = trunc i32 %71 to i8
+  %73 = trunc i32 %69 to i8
+  %74 = cmpxchg i8* @sc, i8 %72, i8 %73 monotonic
+  %75 = icmp eq i8 %74, %72
+  %76 = zext i1 %75 to i8
+  %77 = zext i8 %76 to i32
+  store i32 %77, i32* @ui, align 4
+  %78 = load i8* @sc, align 1
+  %79 = zext i8 %78 to i32
+  %80 = load i8* @uc, align 1
+  %81 = zext i8 %80 to i32
+  %82 = trunc i32 %81 to i8
+  %83 = trunc i32 %79 to i8
+  %84 = cmpxchg i8* @uc, i8 %82, i8 %83 monotonic
+  %85 = icmp eq i8 %84, %82
+  %86 = zext i1 %85 to i8
+  %87 = zext i8 %86 to i32
+  store i32 %87, i32* @ui, align 4
+  %88 = load i8* @sc, align 1
+  %89 = sext i8 %88 to i16
+  %90 = zext i16 %89 to i32
+  %91 = load i8* @uc, align 1
+  %92 = zext i8 %91 to i32
+  %93 = trunc i32 %92 to i8
+  %94 = trunc i32 %90 to i8
+  %95 = cmpxchg i8* bitcast (i16* @ss to i8*), i8 %93, i8 %94 monotonic
+  %96 = icmp eq i8 %95, %93
+  %97 = zext i1 %96 to i8
+  %98 = zext i8 %97 to i32
+  store i32 %98, i32* @ui, align 4
+  %99 = load i8* @sc, align 1
+  %100 = sext i8 %99 to i16
+  %101 = zext i16 %100 to i32
+  %102 = load i8* @uc, align 1
+  %103 = zext i8 %102 to i32
+  %104 = trunc i32 %103 to i8
+  %105 = trunc i32 %101 to i8
+  %106 = cmpxchg i8* bitcast (i16* @us to i8*), i8 %104, i8 %105 monotonic
+  %107 = icmp eq i8 %106, %104
+  %108 = zext i1 %107 to i8
+  %109 = zext i8 %108 to i32
+  store i32 %109, i32* @ui, align 4
+  %110 = load i8* @sc, align 1
+  %111 = sext i8 %110 to i32
+  %112 = load i8* @uc, align 1
+  %113 = zext i8 %112 to i32
+  %114 = trunc i32 %113 to i8
+  %115 = trunc i32 %111 to i8
+  %116 = cmpxchg i8* bitcast (i32* @si to i8*), i8 %114, i8 %115 monotonic
+  %117 = icmp eq i8 %116, %114
+  %118 = zext i1 %117 to i8
+  %119 = zext i8 %118 to i32
+  store i32 %119, i32* @ui, align 4
+  %120 = load i8* @sc, align 1
+  %121 = sext i8 %120 to i32
+  %122 = load i8* @uc, align 1
+  %123 = zext i8 %122 to i32
+  %124 = trunc i32 %123 to i8
+  %125 = trunc i32 %121 to i8
+  %126 = cmpxchg i8* bitcast (i32* @ui to i8*), i8 %124, i8 %125 monotonic
+  %127 = icmp eq i8 %126, %124
+  %128 = zext i1 %127 to i8
+  %129 = zext i8 %128 to i32
+  store i32 %129, i32* @ui, align 4
+  %130 = load i8* @sc, align 1
+  %131 = sext i8 %130 to i64
+  %132 = load i8* @uc, align 1
+  %133 = zext i8 %132 to i64
+  %134 = trunc i64 %133 to i8
+  %135 = trunc i64 %131 to i8
+  %136 = cmpxchg i8* bitcast (i64* @sl to i8*), i8 %134, i8 %135 monotonic
+  %137 = icmp eq i8 %136, %134
+  %138 = zext i1 %137 to i8
+  %139 = zext i8 %138 to i32
+  store i32 %139, i32* @ui, align 4
+  %140 = load i8* @sc, align 1
+  %141 = sext i8 %140 to i64
+  %142 = load i8* @uc, align 1
+  %143 = zext i8 %142 to i64
+  %144 = trunc i64 %143 to i8
+  %145 = trunc i64 %141 to i8
+  %146 = cmpxchg i8* bitcast (i64* @ul to i8*), i8 %144, i8 %145 monotonic
+  %147 = icmp eq i8 %146, %144
+  %148 = zext i1 %147 to i8
+  %149 = zext i8 %148 to i32
+  store i32 %149, i32* @ui, align 4
+  %150 = load i8* @sc, align 1
+  %151 = sext i8 %150 to i64
+  %152 = load i8* @uc, align 1
+  %153 = zext i8 %152 to i64
+  %154 = trunc i64 %153 to i8
+  %155 = trunc i64 %151 to i8
+  %156 = cmpxchg i8* bitcast (i64* @sll to i8*), i8 %154, i8 %155 monotonic
+  %157 = icmp eq i8 %156, %154
+  %158 = zext i1 %157 to i8
+  %159 = zext i8 %158 to i32
+  store i32 %159, i32* @ui, align 4
+  %160 = load i8* @sc, align 1
+  %161 = sext i8 %160 to i64
+  %162 = load i8* @uc, align 1
+  %163 = zext i8 %162 to i64
+  %164 = trunc i64 %163 to i8
+  %165 = trunc i64 %161 to i8
+  %166 = cmpxchg i8* bitcast (i64* @ull to i8*), i8 %164, i8 %165 monotonic
+  %167 = icmp eq i8 %166, %164
+  %168 = zext i1 %167 to i8
+  %169 = zext i8 %168 to i32
+  store i32 %169, i32* @ui, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @test_lock() nounwind {
+entry:
+  %0 = atomicrmw xchg i8* @sc, i8 1 monotonic
+  store i8 %0, i8* @sc, align 1
+  %1 = atomicrmw xchg i8* @uc, i8 1 monotonic
+  store i8 %1, i8* @uc, align 1
+  %2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %3 = atomicrmw xchg i16* %2, i16 1 monotonic
+  store i16 %3, i16* @ss, align 2
+  %4 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %5 = atomicrmw xchg i16* %4, i16 1 monotonic
+  store i16 %5, i16* @us, align 2
+  %6 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %7 = atomicrmw xchg i32* %6, i32 1 monotonic
+  store i32 %7, i32* @si, align 4
+  %8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %9 = atomicrmw xchg i32* %8, i32 1 monotonic
+  store i32 %9, i32* @ui, align 4
+  %10 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %11 = atomicrmw xchg i64* %10, i64 1 monotonic
+  store i64 %11, i64* @sl, align 8
+  %12 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %13 = atomicrmw xchg i64* %12, i64 1 monotonic
+  store i64 %13, i64* @ul, align 8
+  %14 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %15 = atomicrmw xchg i64* %14, i64 1 monotonic
+  store i64 %15, i64* @sll, align 8
+  %16 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %17 = atomicrmw xchg i64* %16, i64 1 monotonic
+  store i64 %17, i64* @ull, align 8
+  fence seq_cst
+  store volatile i8 0, i8* @sc, align 1
+  store volatile i8 0, i8* @uc, align 1
+  %18 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  store volatile i16 0, i16* %18, align 2
+  %19 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  store volatile i16 0, i16* %19, align 2
+  %20 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  store volatile i32 0, i32* %20, align 4
+  %21 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  store volatile i32 0, i32* %21, align 4
+  %22 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  store volatile i64 0, i64* %22, align 8
+  %23 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  store volatile i64 0, i64* %23, align 8
+  %24 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  store volatile i64 0, i64* %24, align 8
+  %25 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  store volatile i64 0, i64* %25, align 8
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/GC/alloc_loop.ll b/src/LLVM/test/CodeGen/X86/GC/alloc_loop.ll
new file mode 100644
index 0000000..c830e11
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/GC/alloc_loop.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s

+

+

+declare i8* @llvm_gc_allocate(i32)

+declare void @llvm_gc_initialize(i32)

+

+declare void @llvm.gcroot(i8**, i8*)

+declare void @llvm.gcwrite(i8*, i8*, i8**)

+

+define i32 @main() gc "shadow-stack" {

+entry:

+	%A = alloca i8*

+	%B = alloca i8**

+

+	call void @llvm_gc_initialize(i32 1048576)  ; Start with 1MB heap

+

+        ;; void *A;

+	call void @llvm.gcroot(i8** %A, i8* null)

+

+        ;; A = gcalloc(10);

+	%Aptr = call i8* @llvm_gc_allocate(i32 10)

+	store i8* %Aptr, i8** %A

+

+        ;; void **B;

+	%tmp.1 = bitcast i8*** %B to i8**

+	call void @llvm.gcroot(i8** %tmp.1, i8* null)

+

+	;; B = gcalloc(4);

+	%B.upgrd.1 = call i8* @llvm_gc_allocate(i32 8)

+	%tmp.2 = bitcast i8* %B.upgrd.1 to i8**

+	store i8** %tmp.2, i8*** %B

+

+	;; *B = A;

+	%B.1 = load i8*** %B

+	%A.1 = load i8** %A

+	call void @llvm.gcwrite(i8* %A.1, i8* %B.upgrd.1, i8** %B.1)

+	

+	br label %AllocLoop

+

+AllocLoop:

+	%i = phi i32 [ 0, %entry ], [ %indvar.next, %AllocLoop ]

+        ;; Allocated mem: allocated memory is immediately dead.

+	call i8* @llvm_gc_allocate(i32 100)

+	

+	%indvar.next = add i32 %i, 1

+	%exitcond = icmp eq i32 %indvar.next, 10000000

+	br i1 %exitcond, label %Exit, label %AllocLoop

+

+Exit:

+	ret i32 0

+}

+

+declare void @__main()


diff --git a/src/LLVM/test/CodeGen/X86/GC/argpromotion.ll b/src/LLVM/test/CodeGen/X86/GC/argpromotion.ll
new file mode 100644
index 0000000..c63ce22
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/GC/argpromotion.ll

@@ -0,0 +1,19 @@
+; RUN: opt < %s -argpromotion
+
+declare void @llvm.gcroot(i8**, i8*)
+
+define i32 @g() {
+entry:
+	%var = alloca i32
+	store i32 1, i32* %var
+	%x = call i32 @f(i32* %var)
+	ret i32 %x
+}
+
+define internal i32 @f(i32* %xp) gc "example" {
+entry:
+	%var = alloca i8*
+	call void @llvm.gcroot(i8** %var, i8* null)
+	%x = load i32* %xp
+	ret i32 %x
+}

diff --git a/src/LLVM/test/CodeGen/X86/GC/badreadproto.ll b/src/LLVM/test/CodeGen/X86/GC/badreadproto.ll
new file mode 100644
index 0000000..4fe90b9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/GC/badreadproto.ll

@@ -0,0 +1,13 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+	%list = type { i32, %list* }
+
+; This usage is invalid now; instead, objects must be bitcast to i8* for input
+; to the gc intrinsics.
+declare %list* @llvm.gcread(%list*, %list**)
+
+define %list* @tl(%list* %l) gc "example" {
+	%hd.ptr = getelementptr %list* %l, i32 0, i32 0
+	%hd = call %list* @llvm.gcread(%list* %l, %list** %hd.ptr)
+	ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/X86/GC/badrootproto.ll b/src/LLVM/test/CodeGen/X86/GC/badrootproto.ll
new file mode 100644
index 0000000..ff86d03
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/GC/badrootproto.ll

@@ -0,0 +1,13 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+	%list = type { i32, %list* }
+	%meta = type opaque
+
+; This usage is invalid now; instead, objects must be bitcast to i8* for input
+; to the gc intrinsics.
+declare void @llvm.gcroot(%list*, %meta*)
+
+define void @root() gc "example" {
+	%x.var = alloca i8*
+	call void @llvm.gcroot(i8** %x.var, %meta* null)
+}

diff --git a/src/LLVM/test/CodeGen/X86/GC/badwriteproto.ll b/src/LLVM/test/CodeGen/X86/GC/badwriteproto.ll
new file mode 100644
index 0000000..be81f84
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/GC/badwriteproto.ll

@@ -0,0 +1,22 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+	%list = type { i32, %list* }
+
+; This usage is invalid now; instead, objects must be bitcast to i8* for input
+; to the gc intrinsics.
+declare void @llvm.gcwrite(%list*, %list*, %list**)
+
+define %list* @cons(i32 %hd, %list* %tl) gc "example" {
+	%tmp = call i8* @gcalloc(i32 bitcast(%list* getelementptr(%list* null, i32 1) to i32))
+	%cell = bitcast i8* %tmp to %list*
+	
+	%hd.ptr = getelementptr %list* %cell, i32 0, i32 0
+	store i32 %hd, i32* %hd.ptr
+	
+	%tl.ptr = getelementptr %list* %cell, i32 0, i32 0
+	call void @llvm.gcwrite(%list* %tl, %list* %cell, %list** %tl.ptr)
+	
+	ret %cell.2
+}
+
+declare i8* @gcalloc(i32)

diff --git a/src/LLVM/test/CodeGen/X86/GC/deadargelim.ll b/src/LLVM/test/CodeGen/X86/GC/deadargelim.ll
new file mode 100644
index 0000000..1760190
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/GC/deadargelim.ll

@@ -0,0 +1,16 @@
+; RUN: opt < %s -deadargelim
+
+declare void @llvm.gcroot(i8**, i8*)
+
+define void @g() {
+entry:
+	call void @f(i32 0)
+	ret void
+}
+
+define internal void @f(i32 %unused) gc "example" {
+entry:
+	%var = alloca i8*
+	call void @llvm.gcroot(i8** %var, i8* null)
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/GC/dg.exp b/src/LLVM/test/CodeGen/X86/GC/dg.exp
new file mode 100644
index 0000000..0b301a8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/GC/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp

+

+if { [llvm_supports_target X86] } {

+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]

+}


diff --git a/src/LLVM/test/CodeGen/X86/GC/fat.ll b/src/LLVM/test/CodeGen/X86/GC/fat.ll
new file mode 100644
index 0000000..d05ca3d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/GC/fat.ll

@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+declare void @llvm.gcroot(i8**, i8*) nounwind
+
+define void @f() gc "x" {
+	%st = alloca { i8*, i1 }		; <{ i8*, i1 }*> [#uses=1]
+	%st_ptr = bitcast { i8*, i1 }* %st to i8**		; <i8**> [#uses=1]
+	call void @llvm.gcroot(i8** %st_ptr, i8* null)
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/GC/inline.ll b/src/LLVM/test/CodeGen/X86/GC/inline.ll
new file mode 100644
index 0000000..9da33ae
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/GC/inline.ll

@@ -0,0 +1,23 @@
+; RUN: opt < %s -inline -S | grep example
+
+	%IntArray = type { i32, [0 x i32*] }
+
+declare void @llvm.gcroot(i8**, i8*) nounwind 
+
+define i32 @f() {
+	%x = call i32 @g( )		; <i32> [#uses=1]
+	ret i32 %x
+}
+
+define internal i32 @g() gc "example" {
+	%root = alloca i8*		; <i8**> [#uses=2]
+	call void @llvm.gcroot( i8** %root, i8* null )
+	%obj = call %IntArray* @h( )		; <%IntArray*> [#uses=2]
+	%obj.2 = bitcast %IntArray* %obj to i8*		; <i8*> [#uses=1]
+	store i8* %obj.2, i8** %root
+	%Length.ptr = getelementptr %IntArray* %obj, i32 0, i32 0		; <i32*> [#uses=1]
+	%Length = load i32* %Length.ptr		; <i32> [#uses=1]
+	ret i32 %Length
+}
+
+declare %IntArray* @h()

diff --git a/src/LLVM/test/CodeGen/X86/GC/inline2.ll b/src/LLVM/test/CodeGen/X86/GC/inline2.ll
new file mode 100644
index 0000000..1594705
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/GC/inline2.ll

@@ -0,0 +1,24 @@
+; RUN: opt < %s -inline -S | grep sample
+; RUN: opt < %s -inline -S | grep example
+
+	%IntArray = type { i32, [0 x i32*] }
+
+declare void @llvm.gcroot(i8**, i8*) nounwind 
+
+define i32 @f() gc "sample" {
+	%x = call i32 @g( )		; <i32> [#uses=1]
+	ret i32 %x
+}
+
+define internal i32 @g() gc "example" {
+	%root = alloca i8*		; <i8**> [#uses=2]
+	call void @llvm.gcroot( i8** %root, i8* null )
+	%obj = call %IntArray* @h( )		; <%IntArray*> [#uses=2]
+	%obj.2 = bitcast %IntArray* %obj to i8*		; <i8*> [#uses=1]
+	store i8* %obj.2, i8** %root
+	%Length.ptr = getelementptr %IntArray* %obj, i32 0, i32 0		; <i32*> [#uses=1]
+	%Length = load i32* %Length.ptr		; <i32> [#uses=1]
+	ret i32 %Length
+}
+
+declare %IntArray* @h()

diff --git a/src/LLVM/test/CodeGen/X86/GC/lower_gcroot.ll b/src/LLVM/test/CodeGen/X86/GC/lower_gcroot.ll
new file mode 100644
index 0000000..c2d418a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/GC/lower_gcroot.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s
+
+	%Env = type i8*
+
+define void @.main(%Env) gc "shadow-stack" {
+	%Root = alloca %Env
+	call void @llvm.gcroot( %Env* %Root, %Env null )
+	unreachable
+}
+
+declare void @llvm.gcroot(%Env*, %Env)

diff --git a/src/LLVM/test/CodeGen/X86/GC/outside.ll b/src/LLVM/test/CodeGen/X86/GC/outside.ll
new file mode 100644
index 0000000..2968c69
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/GC/outside.ll

@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+declare void @llvm.gcroot(i8**, i8*)
+
+define void @f(i8* %x) {
+	%root = alloca i8*
+	call void @llvm.gcroot(i8** %root, i8* null)
+	store i8* %x, i8** %root
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/MachineSink-CritEdge.ll b/src/LLVM/test/CodeGen/X86/MachineSink-CritEdge.ll
new file mode 100644
index 0000000..74a1049
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/MachineSink-CritEdge.ll

@@ -0,0 +1,58 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define i32 @f(i32 %x) nounwind ssp {
+entry:
+  %shl.i = shl i32 %x, 12
+  %neg.i = xor i32 %shl.i, -1
+  %add.i = add nsw i32 %neg.i, %x
+  %shr.i = ashr i32 %add.i, 22
+  %xor.i = xor i32 %shr.i, %add.i
+  %shl5.i = shl i32 %xor.i, 13
+  %neg6.i = xor i32 %shl5.i, -1
+  %add8.i = add nsw i32 %xor.i, %neg6.i
+  %shr10.i = ashr i32 %add8.i, 8
+  %xor12.i = xor i32 %shr10.i, %add8.i
+  %add16.i = mul i32 %xor12.i, 9
+  %shr18.i = ashr i32 %add16.i, 15
+  %xor20.i = xor i32 %shr18.i, %add16.i
+  %shl22.i = shl i32 %xor20.i, 27
+  %neg23.i = xor i32 %shl22.i, -1
+  %add25.i = add nsw i32 %xor20.i, %neg23.i
+  %shr27.i = ashr i32 %add25.i, 31
+  %rem = srem i32 %x, 7
+  %cmp = icmp eq i32 %rem, 3
+  br i1 %cmp, label %land.lhs.true, label %do.body.preheader
+
+land.lhs.true:
+  %call3 = tail call i32 @g(i32 %x) nounwind
+  %cmp4 = icmp eq i32 %call3, 10
+  br i1 %cmp4, label %do.body.preheader, label %if.then
+
+; %shl.i should be sinked all the way down to do.body.preheader, but not into the loop.
+; CHECK: do.body.preheader
+; CHECK-NOT: do.body
+; CHECK: shll	$12
+
+do.body.preheader:
+  %xor29.i = xor i32 %shr27.i, %add25.i
+  br label %do.body
+
+if.then:
+  %add = add nsw i32 %x, 11
+  ret i32 %add
+
+do.body:
+  %x.addr.1 = phi i32 [ %add9, %do.body ], [ %x, %do.body.preheader ]
+  %xor = xor i32 %xor29.i, %x.addr.1
+  %add9 = add nsw i32 %xor, %x.addr.1
+  %and = and i32 %add9, 13
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %if.end, label %do.body
+
+if.end:
+  ret i32 %add9
+}
+
+declare i32 @g(i32)

diff --git a/src/LLVM/test/CodeGen/X86/MachineSink-DbgValue.ll b/src/LLVM/test/CodeGen/X86/MachineSink-DbgValue.ll
new file mode 100644
index 0000000..ea791a3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/MachineSink-DbgValue.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s | FileCheck %s
+; Should sink matching DBG_VALUEs also.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+define i32 @foo(i32 %i, i32* nocapture %c) nounwind uwtable readonly ssp {
+  tail call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !6), !dbg !12
+  %ab = load i32* %c, align 1, !dbg !14
+  tail call void @llvm.dbg.value(metadata !{i32* %c}, i64 0, metadata !7), !dbg !13
+  tail call void @llvm.dbg.value(metadata !{i32 %ab}, i64 0, metadata !10), !dbg !14
+  %cd = icmp eq i32 %i, 42, !dbg !15
+  br i1 %cd, label %bb1, label %bb2, !dbg !15
+
+bb1:                                     ; preds = %0
+;CHECK: DEBUG_VALUE: a
+;CHECK-NEXT: 	.loc	1 5 5
+;CHECK-NEXT:	addl
+  %gh = add nsw i32 %ab, 2, !dbg !16
+  br label %bb2, !dbg !16
+
+bb2:
+  %.0 = phi i32 [ %gh, %bb1 ], [ 0, %0 ]
+  ret i32 %.0, !dbg !17
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1}
+!llvm.dbg.lv.foo = !{!6, !7, !10}
+
+!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !1, metadata !"i", metadata !2, i32 16777218, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 590081, metadata !1, metadata !"c", metadata !2, i32 33554434, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 589860, metadata !0, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 590080, metadata !11, metadata !"a", metadata !2, i32 3, metadata !9, i32 0} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 589835, metadata !1, i32 2, i32 25, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 2, i32 13, metadata !1, null}
+!13 = metadata !{i32 2, i32 22, metadata !1, null}
+!14 = metadata !{i32 3, i32 14, metadata !11, null}
+!15 = metadata !{i32 4, i32 3, metadata !11, null}
+!16 = metadata !{i32 5, i32 5, metadata !11, null}
+!17 = metadata !{i32 7, i32 1, metadata !11, null}

diff --git a/src/LLVM/test/CodeGen/X86/MachineSink-PHIUse.ll b/src/LLVM/test/CodeGen/X86/MachineSink-PHIUse.ll
new file mode 100644
index 0000000..3758fd8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/MachineSink-PHIUse.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=x86_64-appel-darwin -disable-cgp-branch-opts -stats |& grep {machine-sink}
+
+define fastcc void @t() nounwind ssp {
+entry:
+  br i1 undef, label %bb, label %bb4
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %return, label %bb3
+
+bb3:                                              ; preds = %bb
+  unreachable
+
+bb4:                                              ; preds = %entry
+  br i1 undef, label %bb.nph, label %return
+
+bb.nph:                                           ; preds = %bb4
+  br label %bb5
+
+bb5:                                              ; preds = %bb9, %bb.nph
+  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp12, %bb9 ] ; <i64> [#uses=1]
+  %tmp12 = add i64 %indvar, 1                     ; <i64> [#uses=2]
+  %tmp13 = trunc i64 %tmp12 to i32                ; <i32> [#uses=0]
+  br i1 undef, label %bb9, label %bb6
+
+bb6:                                              ; preds = %bb5
+  br i1 undef, label %bb9, label %bb7
+
+bb7:                                              ; preds = %bb6
+  br i1 undef, label %bb9, label %bb8
+
+bb8:                                              ; preds = %bb7
+  unreachable
+
+bb9:                                              ; preds = %bb7, %bb6, %bb5
+  br i1 undef, label %bb5, label %return
+
+return:                                           ; preds = %bb9, %bb4, %bb
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/MachineSink-eflags.ll b/src/LLVM/test/CodeGen/X86/MachineSink-eflags.ll
new file mode 100644
index 0000000..5b8c7b2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/MachineSink-eflags.ll

@@ -0,0 +1,74 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux"
+
+
+%0 = type <{ i64, i64, %1, %1, [21 x %2] }>
+%1 = type <{ i64, i64, i64 }>
+%2 = type <{ i32, i32, i8 addrspace(2)* }>
+%3 = type { i8*, i8*, i8*, i8*, i32 }
+%4 = type <{ %5*, i8*, i32, i32, [4 x i64], [4 x i64], [4 x i64], [4 x i64], [4 x i64] }>
+%5 = type <{ void (i32)*, i8*, i32 (i8*, ...)* }>
+
+define void @foo(i8* nocapture %_stubArgs) nounwind {
+entry:
+ %i0 = alloca i8*, align 8
+ %i2 = alloca i8*, align 8
+ %b.i = alloca [16 x <2 x double>], align 16
+ %conv = bitcast i8* %_stubArgs to i32*
+ %tmp1 = load i32* %conv, align 4
+ %ptr8 = getelementptr i8* %_stubArgs, i64 16
+ %i4 = bitcast i8* %ptr8 to <2 x double>*
+ %ptr20 = getelementptr i8* %_stubArgs, i64 48
+ %i7 = bitcast i8* %ptr20 to <2 x double> addrspace(1)**
+ %tmp21 = load <2 x double> addrspace(1)** %i7, align 8
+ %ptr28 = getelementptr i8* %_stubArgs, i64 64
+ %i9 = bitcast i8* %ptr28 to i32*
+ %tmp29 = load i32* %i9, align 4
+ %ptr32 = getelementptr i8* %_stubArgs, i64 68
+ %i10 = bitcast i8* %ptr32 to i32*
+ %tmp33 = load i32* %i10, align 4
+ %tmp17.i = mul i32 10, 20
+ %tmp19.i = add i32 %tmp17.i, %tmp33
+ %conv21.i = zext i32 %tmp19.i to i64
+ %tmp6.i = and i32 42, -32
+ %tmp42.i = add i32 %tmp6.i, 17
+ %tmp44.i = insertelement <2 x i32> undef, i32 %tmp42.i, i32 1
+ %tmp96676677.i = or i32 17, -4
+ %ptr4438.i = getelementptr inbounds [16 x <2 x double>]* %b.i, i64 0, i64 0
+ %arrayidx4506.i = getelementptr [16 x <2 x double>]* %b.i, i64 0, i64 4
+ %tmp52.i = insertelement <2 x i32> %tmp44.i, i32 0, i32 0
+ %tmp78.i = extractelement <2 x i32> %tmp44.i, i32 1
+ %tmp97.i = add i32 %tmp78.i, %tmp96676677.i
+ %tmp99.i = insertelement <2 x i32> %tmp52.i, i32 %tmp97.i, i32 1
+ %tmp154.i = extractelement <2 x i32> %tmp99.i, i32 1
+ %tmp156.i = extractelement <2 x i32> %tmp52.i, i32 0
+ %tmp158.i = urem i32 %tmp156.i, %tmp1
+ %i38 = mul i32 %tmp154.i, %tmp29
+ %i39 = add i32 %tmp158.i, %i38
+ %conv160.i = zext i32 %i39 to i64
+ %tmp22.sum652.i = add i64 %conv160.i, %conv21.i
+ %arrayidx161.i = getelementptr <2 x double> addrspace(1)* %tmp21, i64 %tmp22.sum652.i
+ %tmp162.i = load <2 x double> addrspace(1)* %arrayidx161.i, align 16
+ %tmp222.i = add i32 %tmp154.i, 1
+ %i43 = mul i32 %tmp222.i, %tmp29
+ %i44 = add i32 %tmp158.i, %i43
+ %conv228.i = zext i32 %i44 to i64
+ %tmp22.sum656.i = add i64 %conv228.i, %conv21.i
+ %arrayidx229.i = getelementptr <2 x double> addrspace(1)* %tmp21, i64 %tmp22.sum656.i
+ %tmp230.i = load <2 x double> addrspace(1)* %arrayidx229.i, align 16
+ %cmp432.i = icmp ult i32 %tmp156.i, %tmp1
+
+; %shl.i should not be sinked below the compare.
+; CHECK: cmpl
+; CHECK-NOT: shlq
+
+ %cond.i = select i1 %cmp432.i, <2 x double> %tmp162.i, <2 x double> zeroinitializer
+ store <2 x double> %cond.i, <2 x double>* %ptr4438.i, align 16
+ %cond448.i = select i1 %cmp432.i, <2 x double> %tmp230.i, <2 x double> zeroinitializer
+ store <2 x double> %cond448.i, <2 x double>* %arrayidx4506.i, align 16
+  ret void
+}
+
+
+

diff --git a/src/LLVM/test/CodeGen/X86/SwitchLowering.ll b/src/LLVM/test/CodeGen/X86/SwitchLowering.ll
new file mode 100644
index 0000000..e06fa30
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/SwitchLowering.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 | grep cmp | count 1

+; PR964

+

+define i8* @FindChar(i8* %CurPtr) {

+entry:

+        br label %bb

+

+bb:             ; preds = %bb, %entry

+        %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]          ; <i32> [#uses=3]

+        %CurPtr_addr.0.rec = bitcast i32 %indvar to i32         ; <i32> [#uses=1]

+        %gep.upgrd.1 = zext i32 %indvar to i64          ; <i64> [#uses=1]

+        %CurPtr_addr.0 = getelementptr i8* %CurPtr, i64 %gep.upgrd.1            ; <i8*> [#uses=1]

+        %tmp = load i8* %CurPtr_addr.0          ; <i8> [#uses=3]

+        %tmp2.rec = add i32 %CurPtr_addr.0.rec, 1               ; <i32> [#uses=1]

+        %tmp2 = getelementptr i8* %CurPtr, i32 %tmp2.rec                ; <i8*> [#uses=1]

+        %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]

+        switch i8 %tmp, label %bb [

+                 i8 0, label %bb7

+                 i8 120, label %bb7

+        ]

+

+bb7:            ; preds = %bb, %bb

+        tail call void @foo( i8 %tmp )

+        ret i8* %tmp2

+}

+

+declare void @foo(i8)

+


diff --git a/src/LLVM/test/CodeGen/X86/abi-isel.ll b/src/LLVM/test/CodeGen/X86/abi-isel.ll
new file mode 100644
index 0000000..5068d29
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/abi-isel.ll

@@ -0,0 +1,9660 @@
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
+
+@src = external global [131072 x i32]
+@dst = external global [131072 x i32]
+@xsrc = external global [32 x i32]
+@xdst = external global [32 x i32]
+@ptr = external global i32*
+@dsrc = global [131072 x i32] zeroinitializer, align 32
+@ddst = global [131072 x i32] zeroinitializer, align 32
+@dptr = global i32* null
+@lsrc = internal global [131072 x i32] zeroinitializer
+@ldst = internal global [131072 x i32] zeroinitializer
+@lptr = internal global i32* null
+@ifunc = external global void ()*
+@difunc = global void ()* null
+@lifunc = internal global void ()* null
+@lxsrc = internal global [32 x i32] zeroinitializer, align 32
+@lxdst = internal global [32 x i32] zeroinitializer, align 32
+@dxsrc = global [32 x i32] zeroinitializer, align 32
+@dxdst = global [32 x i32] zeroinitializer, align 32
+
+define void @foo00() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 0), align 4
+	ret void
+
+; LINUX-64-STATIC: foo00:
+; LINUX-64-STATIC: movl	src(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl	[[EAX]], dst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo00:
+; LINUX-32-STATIC: 	movl	src, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo00:
+; LINUX-32-PIC: 	movl	src, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r..]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e..]]
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r..]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo00:
+; DARWIN-32-STATIC: 	movl	_src, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _dst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, [[EAX:%e..]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), [[EAX:%e..]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[ECX:%e..]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo00:
+; DARWIN-32-PIC: 	calll	L0$pb
+; DARWIN-32-PIC-NEXT: L0$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L0$pb([[EAX]]), [[ECX:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	([[ECX]]), [[ECX:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L0$pb([[EAX]]), [[EAX:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r..]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]]), [[EAX:%e..]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r..]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r..]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]]), [[EAX:%e..]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r..]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r..]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e..]]
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r..]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @fxo00() nounwind {
+entry:
+	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
+	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 0), align 4
+	ret void
+
+; LINUX-64-STATIC: fxo00:
+; LINUX-64-STATIC: movl	xsrc(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl	[[EAX]], xdst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo00:
+; LINUX-32-STATIC: 	movl	xsrc, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], xdst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: fxo00:
+; LINUX-32-PIC: 	movl	xsrc, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], xdst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo00:
+; DARWIN-32-STATIC: 	movl	_xsrc, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _xdst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo00:
+; DARWIN-32-PIC: 	calll	L1$pb
+; DARWIN-32-PIC-NEXT: L1$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L1$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L1$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo01() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i32 0), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: foo01:
+; LINUX-64-STATIC: movq	$dst, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo01:
+; LINUX-32-STATIC: 	movl	$dst, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo01:
+; LINUX-32-PIC: 	movl	$dst, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo01:
+; DARWIN-32-STATIC: 	movl	$_dst, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo01:
+; DARWIN-32-PIC: 	calll	L2$pb
+; DARWIN-32-PIC-NEXT: L2$pb:
+; DARWIN-32-PIC-NEXT: 	popl
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L2$pb(
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L2$pb(
+; DARWIN-32-PIC-NEXT: 	movl
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @fxo01() nounwind {
+entry:
+	store i32* getelementptr ([32 x i32]* @xdst, i32 0, i32 0), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: fxo01:
+; LINUX-64-STATIC: movq	$xdst, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo01:
+; LINUX-32-STATIC: 	movl	$xdst, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: fxo01:
+; LINUX-32-PIC: 	movl	$xdst, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo01:
+; DARWIN-32-STATIC: 	movl	$_xdst, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo01:
+; DARWIN-32-PIC: 	calll	L3$pb
+; DARWIN-32-PIC-NEXT: L3$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[R0:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L3$pb([[R0]]), [[R1:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L3$pb([[R0]]), [[R2:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	[[R1:%e..]], ([[R2]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
+	store i32 %1, i32* %0, align 4
+	ret void
+; LINUX-64-STATIC: foo02:
+; LINUX-64-STATIC: movl    src(%rip), %
+; LINUX-64-STATIC: movq    ptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo02:
+; LINUX-32-STATIC: 	movl	src, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo02:
+; LINUX-32-PIC: 	movl	src, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo02:
+; DARWIN-32-STATIC: 	movl	_src, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo02:
+; DARWIN-32-PIC: 	calll	L4$pb
+; DARWIN-32-PIC-NEXT: L4$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[R0:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L4$pb([[R0]]), [[R1:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	([[R1]]), [[R2:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L4$pb([[R0]]), [[R3:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	([[R3]]), [[R4:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	[[R2]], ([[R4]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @fxo02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
+	store i32 %1, i32* %0, align 4
+; LINUX-64-STATIC: fxo02:
+; LINUX-64-STATIC: movl    xsrc(%rip), %
+; LINUX-64-STATIC: movq    ptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo02:
+; LINUX-32-STATIC: 	movl	xsrc, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: fxo02:
+; LINUX-32-PIC: 	movl	xsrc, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo02:
+; DARWIN-32-STATIC: 	movl	_xsrc, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo02:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo02:
+; DARWIN-32-PIC: 	calll	L5$pb
+; DARWIN-32-PIC-NEXT: L5$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L5$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L5$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo03() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 0), align 32
+	ret void
+; LINUX-64-STATIC: foo03:
+; LINUX-64-STATIC: movl    dsrc(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ddst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo03:
+; LINUX-32-STATIC: 	movl	dsrc, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ddst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo03:
+; LINUX-32-PIC: 	movl	dsrc, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ddst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo03:
+; DARWIN-32-STATIC: 	movl	_dsrc, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ddst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _ddst
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo03:
+; DARWIN-32-PIC: 	calll	L6$pb
+; DARWIN-32-PIC-NEXT: L6$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L6$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _ddst-L6$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo03:
+; DARWIN-64-STATIC: 	movl	_dsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], _ddst(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], _ddst(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo03:
+; DARWIN-64-PIC: 	movl	_dsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], _ddst(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo04() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i32 0), i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: foo04:
+; LINUX-64-STATIC: movq    $ddst, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo04:
+; LINUX-32-STATIC: 	movl	$ddst, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo04:
+; LINUX-32-PIC: 	movl	$ddst, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo04:
+; DARWIN-32-STATIC: 	movl	$_ddst, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo04:
+; DARWIN-32-PIC: 	calll	L7$pb
+; DARWIN-32-PIC-NEXT: L7$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L7$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _dptr-L7$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
+	store i32 %1, i32* %0, align 4
+	ret void
+; LINUX-64-STATIC: foo05:
+; LINUX-64-STATIC: movl    dsrc(%rip), %
+; LINUX-64-STATIC: movq    dptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo05:
+; LINUX-32-STATIC: 	movl	dsrc, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo05:
+; LINUX-32-PIC: 	movl	dsrc, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo05:
+; DARWIN-32-STATIC: 	movl	_dsrc, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo05:
+; DARWIN-32-PIC: 	calll	L8$pb
+; DARWIN-32-PIC-NEXT: L8$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L8$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L8$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo05:
+; DARWIN-64-STATIC: 	movl	_dsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo05:
+; DARWIN-64-PIC: 	movl	_dsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo06() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 0), align 4
+	ret void
+; LINUX-64-STATIC: foo06:
+; LINUX-64-STATIC: movl    lsrc(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ldst(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo06:
+; LINUX-32-STATIC: 	movl	lsrc, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ldst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo06:
+; LINUX-32-PIC: 	movl	lsrc, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ldst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo06:
+; LINUX-64-PIC: 	movl	lsrc(%rip), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ldst(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo06:
+; DARWIN-32-STATIC: 	movl	_lsrc, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ldst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _ldst
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo06:
+; DARWIN-32-PIC: 	calll	L9$pb
+; DARWIN-32-PIC-NEXT: L9$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L9$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _ldst-L9$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo06:
+; DARWIN-64-STATIC: 	movl	_lsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], _ldst(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], _ldst(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo06:
+; DARWIN-64-PIC: 	movl	_lsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], _ldst(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo07() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i32 0), i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: foo07:
+; LINUX-64-STATIC: movq    $ldst, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo07:
+; LINUX-32-STATIC: 	movl	$ldst, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo07:
+; LINUX-32-PIC: 	movl	$ldst, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo07:
+; DARWIN-32-STATIC: 	movl	$_ldst, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo07:
+; DARWIN-32-PIC: 	calll	L10$pb
+; DARWIN-32-PIC-NEXT: L10$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L10$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _lptr-L10$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
+	store i32 %1, i32* %0, align 4
+	ret void
+; LINUX-64-STATIC: foo08:
+; LINUX-64-STATIC: movl    lsrc(%rip), %
+; LINUX-64-STATIC: movq    lptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo08:
+; LINUX-32-STATIC: 	movl	lsrc, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo08:
+; LINUX-32-PIC: 	movl	lsrc, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo08:
+; LINUX-64-PIC: 	movl	lsrc(%rip), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo08:
+; DARWIN-32-STATIC: 	movl	_lsrc, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo08:
+; DARWIN-32-PIC: 	calll	L11$pb
+; DARWIN-32-PIC-NEXT: L11$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L11$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L11$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo08:
+; DARWIN-64-STATIC: 	movl	_lsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo08:
+; DARWIN-64-PIC: 	movl	_lsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux00() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), align 4
+	ret void
+; LINUX-64-STATIC: qux00:
+; LINUX-64-STATIC: movl    src+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], dst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux00:
+; LINUX-32-STATIC: 	movl	src+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux00:
+; LINUX-32-PIC: 	movl	src+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux00:
+; DARWIN-32-STATIC: 	movl	_src+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _dst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux00:
+; DARWIN-32-PIC: 	calll	L12$pb
+; DARWIN-32-PIC-NEXT: L12$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L12$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L12$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 64([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qxx00() nounwind {
+entry:
+	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
+	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), align 4
+	ret void
+; LINUX-64-STATIC: qxx00:
+; LINUX-64-STATIC: movl    xsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], xdst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx00:
+; LINUX-32-STATIC: 	movl	xsrc+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], xdst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qxx00:
+; LINUX-32-PIC: 	movl	xsrc+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], xdst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx00:
+; DARWIN-32-STATIC: 	movl	_xsrc+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _xdst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx00:
+; DARWIN-32-PIC: 	calll	L13$pb
+; DARWIN-32-PIC-NEXT: L13$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L13$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L13$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 64([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux01() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: qux01:
+; LINUX-64-STATIC: movq    $dst+64, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux01:
+; LINUX-32-STATIC: 	movl	$dst+64, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux01:
+; LINUX-32-PIC: 	movl	$dst+64, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux01:
+; DARWIN-32-STATIC: 	movl	$_dst+64, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux01:
+; DARWIN-32-PIC: 	calll	L14$pb
+; DARWIN-32-PIC-NEXT: L14$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L14$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	addl	$64, [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L14$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qxx01() nounwind {
+entry:
+	store i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: qxx01:
+; LINUX-64-STATIC: movq    $xdst+64, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx01:
+; LINUX-32-STATIC: 	movl	$xdst+64, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qxx01:
+; LINUX-32-PIC: 	movl	$xdst+64, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx01:
+; DARWIN-32-STATIC: 	movl	$_xdst+64, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx01:
+; DARWIN-32-PIC: 	calll	L15$pb
+; DARWIN-32-PIC-NEXT: L15$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L15$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	addl	$64, [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L15$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
+	%2 = getelementptr i32* %0, i64 16
+	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux02:
+; LINUX-64-STATIC: movl    src+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]])
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux02:
+; LINUX-32-STATIC: 	movl	src+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: qux02:
+; LINUX-32-PIC: 	movl	src+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux02:
+; DARWIN-32-STATIC: 	movl	_src+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux02:
+; DARWIN-32-PIC: 	calll	L16$pb
+; DARWIN-32-PIC-NEXT: L16$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L16$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L16$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 64([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qxx02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
+	%2 = getelementptr i32* %0, i64 16
+	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qxx02:
+; LINUX-64-STATIC: movl    xsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]])
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx02:
+; LINUX-32-STATIC: 	movl	xsrc+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: qxx02:
+; LINUX-32-PIC: 	movl	xsrc+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx02:
+; DARWIN-32-STATIC: 	movl	_xsrc+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx02:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx02:
+; DARWIN-32-PIC: 	calll	L17$pb
+; DARWIN-32-PIC-NEXT: L17$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L17$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L17$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 64([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux03() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), align 32
+	ret void
+; LINUX-64-STATIC: qux03:
+; LINUX-64-STATIC: movl    dsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ddst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux03:
+; LINUX-32-STATIC: 	movl	dsrc+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ddst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux03:
+; LINUX-32-PIC: 	movl	dsrc+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ddst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux03:
+; DARWIN-32-STATIC: 	movl	_dsrc+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ddst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _ddst+64
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux03:
+; DARWIN-32-PIC: 	calll	L18$pb
+; DARWIN-32-PIC-NEXT: L18$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L18$pb)+64([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], (_ddst-L18$pb)+64([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux03:
+; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], _ddst+64(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], _ddst+64(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux03:
+; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], _ddst+64(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux04() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: qux04:
+; LINUX-64-STATIC: movq    $ddst+64, dptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux04:
+; LINUX-32-STATIC: 	movl	$ddst+64, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux04:
+; LINUX-32-PIC: 	movl	$ddst+64, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux04:
+; DARWIN-32-STATIC: 	movl	$_ddst+64, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+64, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux04:
+; DARWIN-32-PIC: 	calll	L19$pb
+; DARWIN-32-PIC-NEXT: L19$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L19$pb)+64([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _dptr-L19$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux04:
+; DARWIN-64-STATIC: 	leaq	_ddst+64(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+64(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux04:
+; DARWIN-64-PIC: 	leaq	_ddst+64(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
+	%2 = getelementptr i32* %0, i64 16
+	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux05:
+; LINUX-64-STATIC: movl    dsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]])
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux05:
+; LINUX-32-STATIC: 	movl	dsrc+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: qux05:
+; LINUX-32-PIC: 	movl	dsrc+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux05:
+; DARWIN-32-STATIC: 	movl	_dsrc+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux05:
+; DARWIN-32-PIC: 	calll	L20$pb
+; DARWIN-32-PIC-NEXT: L20$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L20$pb)+64([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L20$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 64([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux05:
+; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux05:
+; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux06() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), align 4
+	ret void
+; LINUX-64-STATIC: qux06:
+; LINUX-64-STATIC: movl    lsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ldst+64
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux06:
+; LINUX-32-STATIC: 	movl	lsrc+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ldst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux06:
+; LINUX-32-PIC: 	movl	lsrc+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ldst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux06:
+; LINUX-64-PIC: 	movl	lsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ldst+64(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux06:
+; DARWIN-32-STATIC: 	movl	_lsrc+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ldst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _ldst+64
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux06:
+; DARWIN-32-PIC: 	calll	L21$pb
+; DARWIN-32-PIC-NEXT: L21$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L21$pb)+64([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], (_ldst-L21$pb)+64([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux06:
+; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], _ldst+64(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], _ldst+64(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux06:
+; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], _ldst+64(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux07() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: qux07:
+; LINUX-64-STATIC: movq    $ldst+64, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux07:
+; LINUX-32-STATIC: 	movl	$ldst+64, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux07:
+; LINUX-32-PIC: 	movl	$ldst+64, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux07:
+; LINUX-64-PIC: 	leaq	ldst+64(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux07:
+; DARWIN-32-STATIC: 	movl	$_ldst+64, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+64, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux07:
+; DARWIN-32-PIC: 	calll	L22$pb
+; DARWIN-32-PIC-NEXT: L22$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L22$pb)+64([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _lptr-L22$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux07:
+; DARWIN-64-STATIC: 	leaq	_ldst+64(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+64(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux07:
+; DARWIN-64-PIC: 	leaq	_ldst+64(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
+	%2 = getelementptr i32* %0, i64 16
+	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux08:
+; LINUX-64-STATIC: movl    lsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]])
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux08:
+; LINUX-32-STATIC: 	movl	lsrc+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: qux08:
+; LINUX-32-PIC: 	movl	lsrc+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux08:
+; LINUX-64-PIC: 	movl	lsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux08:
+; DARWIN-32-STATIC: 	movl	_lsrc+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux08:
+; DARWIN-32-PIC: 	calll	L23$pb
+; DARWIN-32-PIC-NEXT: L23$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L23$pb)+64([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L23$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 64([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux08:
+; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux08:
+; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind00(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @src, i64 0, i64 %i
+	%1 = load i32* %0, align 4
+	%2 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: ind00:
+; LINUX-64-STATIC: movl    src(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], dst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind00:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	src(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], dst(,[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind00:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	src(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], dst(,[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind00:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_src(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _dst(,[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind00:
+; DARWIN-32-PIC: 	calll	L24$pb
+; DARWIN-32-PIC-NEXT: L24$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L24$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L24$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], ([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ixd00(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %i
+	%1 = load i32* %0, align 4
+	%2 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: ixd00:
+; LINUX-64-STATIC: movl    xsrc(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], xdst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd00:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	xsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], xdst(,[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd00:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	xsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], xdst(,[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd00:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _xdst(,[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd00:
+; DARWIN-32-PIC: 	calll	L25$pb
+; DARWIN-32-PIC-NEXT: L25$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L25$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L25$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], ([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind01(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: ind01:
+; LINUX-64-STATIC: leaq    dst(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind01:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	dst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind01:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	dst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind01:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	%rdi, ([[RAX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind01:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_dst(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind01:
+; DARWIN-32-PIC: 	calll	L26$pb
+; DARWIN-32-PIC-NEXT: L26$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	shll	$2, [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L26$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L26$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind01:
+; DARWIN-64-STATIC: 	shlq	$2, %rdi
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	%rdi, ([[RAX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind01:
+; DARWIN-64-DYNAMIC: 	shlq	$2, %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, ([[RAX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind01:
+; DARWIN-64-PIC: 	shlq	$2, %rdi
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	%rdi, ([[RAX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ixd01(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: ixd01:
+; LINUX-64-STATIC: leaq    xdst(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd01:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	xdst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd01:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	xdst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd01:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	%rdi, ([[RAX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd01:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_xdst$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd01:
+; DARWIN-32-PIC: 	calll	L27$pb
+; DARWIN-32-PIC-NEXT: L27$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	shll	$2, [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L27$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L27$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd01:
+; DARWIN-64-STATIC: 	shlq	$2, %rdi
+; DARWIN-64-STATIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	%rdi, ([[RAX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd01:
+; DARWIN-64-DYNAMIC: 	shlq	$2, %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, ([[RAX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd01:
+; DARWIN-64-PIC: 	shlq	$2, %rdi
+; DARWIN-64-PIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	%rdi, ([[RAX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %i
+	%2 = load i32* %1, align 4
+	%3 = getelementptr i32* %0, i64 %i
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: ind02:
+; LINUX-64-STATIC: movl    src(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ([[RCX]],%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind02:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	src(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind02:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	src(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind02:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_src(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EDX]]), [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind02:
+; DARWIN-32-PIC: 	calll	L28$pb
+; DARWIN-32-PIC-NEXT: L28$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L28$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L28$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], ([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ixd02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %i
+	%2 = load i32* %1, align 4
+	%3 = getelementptr i32* %0, i64 %i
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: ixd02:
+; LINUX-64-STATIC: movl    xsrc(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ([[RCX]],%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd02:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	xsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd02:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	xsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd02:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EDX]]), [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd02:
+; DARWIN-32-PIC: 	calll	L29$pb
+; DARWIN-32-PIC-NEXT: L29$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L29$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L29$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], ([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind03(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %i
+	%1 = load i32* %0, align 4
+	%2 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: ind03:
+; LINUX-64-STATIC: movl    dsrc(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ddst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind03:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ddst(,[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind03:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ddst(,[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind03:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _ddst(,[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], _ddst(,[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind03:
+; DARWIN-32-PIC: 	calll	L30$pb
+; DARWIN-32-PIC-NEXT: L30$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L30$pb([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], _ddst-L30$pb([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind04(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
+	store i32* %0, i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: ind04:
+; LINUX-64-STATIC: leaq    ddst(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind04:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ddst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind04:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ddst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind04:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	%rdi, ([[RAX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind04:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind04:
+; DARWIN-32-PIC: 	calll	L31$pb
+; DARWIN-32-PIC-NEXT: L31$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L31$pb([[EAX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _dptr-L31$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %i
+	%2 = load i32* %1, align 4
+	%3 = getelementptr i32* %0, i64 %i
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: ind05:
+; LINUX-64-STATIC: movl    dsrc(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ([[RCX]],%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind05:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind05:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind05:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind05:
+; DARWIN-32-PIC: 	calll	L32$pb
+; DARWIN-32-PIC-NEXT: L32$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L32$pb([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L32$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], ([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind06(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %i
+	%1 = load i32* %0, align 4
+	%2 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: ind06:
+; LINUX-64-STATIC: movl    lsrc(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ldst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind06:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ldst(,[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind06:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ldst(,[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind06:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _ldst(,[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], _ldst(,[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind06:
+; DARWIN-32-PIC: 	calll	L33$pb
+; DARWIN-32-PIC-NEXT: L33$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L33$pb([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], _ldst-L33$pb([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind07(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
+	store i32* %0, i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: ind07:
+; LINUX-64-STATIC: leaq    ldst(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind07:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ldst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind07:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ldst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind07:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind07:
+; DARWIN-32-PIC: 	calll	L34$pb
+; DARWIN-32-PIC-NEXT: L34$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L34$pb([[EAX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _lptr-L34$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %i
+	%2 = load i32* %1, align 4
+	%3 = getelementptr i32* %0, i64 %i
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: ind08:
+; LINUX-64-STATIC: movl    lsrc(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ([[RCX]],%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind08:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind08:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind08:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind08:
+; DARWIN-32-PIC: 	calll	L35$pb
+; DARWIN-32-PIC-NEXT: L35$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L35$pb([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L35$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], ([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: off00:
+; LINUX-64-STATIC: movl    src+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], dst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off00:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	src+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], dst+64(,[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off00:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	src+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], dst+64(,[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off00:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _dst+64(,[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off00:
+; DARWIN-32-PIC: 	calll	L36$pb
+; DARWIN-32-PIC-NEXT: L36$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L36$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L36$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 64([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @oxf00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: oxf00:
+; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], xdst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf00:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], xdst+64(,[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf00:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], xdst+64(,[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf00:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _xdst+64(,[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf00:
+; DARWIN-32-PIC: 	calll	L37$pb
+; DARWIN-32-PIC-NEXT: L37$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L37$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L37$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 64([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off01(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: off01:
+; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off01:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	dst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off01:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	dst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off01:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off01:
+; DARWIN-32-PIC: 	calll	L38$pb
+; DARWIN-32-PIC-NEXT: L38$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L38$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EDX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L38$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @oxf01(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %.sum
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: oxf01:
+; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf01:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf01:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	xdst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf01:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf01:
+; DARWIN-32-PIC: 	calll	L39$pb
+; DARWIN-32-PIC-NEXT: L39$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L39$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EDX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L39$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr [131072 x i32]* @src, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: off02:
+; LINUX-64-STATIC: movl    src+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]],%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off02:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	src+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off02:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	src+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off02:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EDX]]), [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off02:
+; DARWIN-32-PIC: 	calll	L40$pb
+; DARWIN-32-PIC-NEXT: L40$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L40$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L40$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 64([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @oxf02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: oxf02:
+; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]],%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf02:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf02:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf02:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EDX]]), [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf02:
+; DARWIN-32-PIC: 	calll	L41$pb
+; DARWIN-32-PIC-NEXT: L41$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L41$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L41$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 64([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off03(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: off03:
+; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ddst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off03:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ddst+64(,[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off03:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ddst+64(,[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off03:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _ddst+64(,[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], _ddst+64(,[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off03:
+; DARWIN-32-PIC: 	calll	L42$pb
+; DARWIN-32-PIC-NEXT: L42$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L42$pb)+64([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], (_ddst-L42$pb)+64([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off04(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
+	store i32* %0, i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: off04:
+; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off04:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off04:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ddst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off04:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off04:
+; DARWIN-32-PIC: 	calll	L43$pb
+; DARWIN-32-PIC-NEXT: L43$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L43$pb)+64([[EAX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _dptr-L43$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: off05:
+; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]],%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off05:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off05:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off05:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off05:
+; DARWIN-32-PIC: 	calll	L44$pb
+; DARWIN-32-PIC-NEXT: L44$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L44$pb)+64([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L44$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 64([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off06(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: off06:
+; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ldst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off06:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ldst+64(,[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off06:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ldst+64(,[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off06:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _ldst+64(,[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], _ldst+64(,[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off06:
+; DARWIN-32-PIC: 	calll	L45$pb
+; DARWIN-32-PIC-NEXT: L45$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L45$pb)+64([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], (_ldst-L45$pb)+64([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off07(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
+	store i32* %0, i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: off07:
+; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off07:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off07:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ldst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off07:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off07:
+; DARWIN-32-PIC: 	calll	L46$pb
+; DARWIN-32-PIC-NEXT: L46$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L46$pb)+64([[EAX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _lptr-L46$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: off08:
+; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]],%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off08:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off08:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off08:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off08:
+; DARWIN-32-PIC: 	calll	L47$pb
+; DARWIN-32-PIC-NEXT: L47$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L47$pb)+64([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L47$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 64([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo00(i64 %i) nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), align 4
+	ret void
+; LINUX-64-STATIC: moo00:
+; LINUX-64-STATIC: movl    src+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], dst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo00:
+; LINUX-32-STATIC: 	movl	src+262144, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo00:
+; LINUX-32-PIC: 	movl	src+262144, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo00:
+; DARWIN-32-STATIC: 	movl	_src+262144, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _dst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo00:
+; DARWIN-32-PIC: 	calll	L48$pb
+; DARWIN-32-PIC-NEXT: L48$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L48$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	262144([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L48$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo01(i64 %i) nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: moo01:
+; LINUX-64-STATIC: movq    $dst+262144, ptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo01:
+; LINUX-32-STATIC: 	movl	$dst+262144, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo01:
+; LINUX-32-PIC: 	movl	$dst+262144, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo01:
+; LINUX-64-PIC: 	movl	$262144, [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo01:
+; DARWIN-32-STATIC: 	movl	$_dst+262144, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo01:
+; DARWIN-32-PIC: 	calll	L49$pb
+; DARWIN-32-PIC-NEXT: L49$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	$262144, [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L49$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L49$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo01:
+; DARWIN-64-STATIC: 	movl	$262144, [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo01:
+; DARWIN-64-PIC: 	movl	$262144, [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
+	%2 = getelementptr i32* %0, i64 65536
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: moo02:
+; LINUX-64-STATIC: movl    src+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]])
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo02:
+; LINUX-32-STATIC: 	movl	src+262144, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo02:
+; LINUX-32-PIC: 	movl	src+262144, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo02:
+; DARWIN-32-STATIC: 	movl	_src+262144, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo02:
+; DARWIN-32-PIC: 	calll	L50$pb
+; DARWIN-32-PIC-NEXT: L50$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L50$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	262144([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L50$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo03(i64 %i) nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), align 32
+	ret void
+; LINUX-64-STATIC: moo03:
+; LINUX-64-STATIC: movl    dsrc+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ddst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo03:
+; LINUX-32-STATIC: 	movl	dsrc+262144, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ddst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo03:
+; LINUX-32-PIC: 	movl	dsrc+262144, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ddst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo03:
+; DARWIN-32-STATIC: 	movl	_dsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ddst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _ddst+262144
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo03:
+; DARWIN-32-PIC: 	calll	L51$pb
+; DARWIN-32-PIC-NEXT: L51$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L51$pb)+262144([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], (_ddst-L51$pb)+262144([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo03:
+; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], _ddst+262144(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], _ddst+262144(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo03:
+; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], _ddst+262144(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo04(i64 %i) nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: moo04:
+; LINUX-64-STATIC: movq    $ddst+262144, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo04:
+; LINUX-32-STATIC: 	movl	$ddst+262144, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo04:
+; LINUX-32-PIC: 	movl	$ddst+262144, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo04:
+; LINUX-64-PIC: 	movl	$262144, [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo04:
+; DARWIN-32-STATIC: 	movl	$_ddst+262144, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+262144, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo04:
+; DARWIN-32-PIC: 	calll	L52$pb
+; DARWIN-32-PIC-NEXT: L52$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L52$pb)+262144([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _dptr-L52$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo04:
+; DARWIN-64-STATIC: 	leaq	_ddst+262144(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+262144(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo04:
+; DARWIN-64-PIC: 	leaq	_ddst+262144(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
+	%2 = getelementptr i32* %0, i64 65536
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: moo05:
+; LINUX-64-STATIC: movl    dsrc+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]])
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo05:
+; LINUX-32-STATIC: 	movl	dsrc+262144, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo05:
+; LINUX-32-PIC: 	movl	dsrc+262144, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo05:
+; DARWIN-32-STATIC: 	movl	_dsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo05:
+; DARWIN-32-PIC: 	calll	L53$pb
+; DARWIN-32-PIC-NEXT: L53$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L53$pb)+262144([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L53$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo05:
+; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo05:
+; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo06(i64 %i) nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), align 4
+	ret void
+; LINUX-64-STATIC: moo06:
+; LINUX-64-STATIC: movl    lsrc+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ldst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo06:
+; LINUX-32-STATIC: 	movl	lsrc+262144, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ldst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo06:
+; LINUX-32-PIC: 	movl	lsrc+262144, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ldst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo06:
+; LINUX-64-PIC: 	movl	lsrc+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ldst+262144(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo06:
+; DARWIN-32-STATIC: 	movl	_lsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ldst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _ldst+262144
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo06:
+; DARWIN-32-PIC: 	calll	L54$pb
+; DARWIN-32-PIC-NEXT: L54$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L54$pb)+262144([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], (_ldst-L54$pb)+262144([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo06:
+; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], _ldst+262144(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], _ldst+262144(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo06:
+; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], _ldst+262144(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo07(i64 %i) nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: moo07:
+; LINUX-64-STATIC: movq    $ldst+262144, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo07:
+; LINUX-32-STATIC: 	movl	$ldst+262144, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo07:
+; LINUX-32-PIC: 	movl	$ldst+262144, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo07:
+; LINUX-64-PIC: 	leaq	ldst+262144(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo07:
+; DARWIN-32-STATIC: 	movl	$_ldst+262144, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+262144, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo07:
+; DARWIN-32-PIC: 	calll	L55$pb
+; DARWIN-32-PIC-NEXT: L55$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L55$pb)+262144([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _lptr-L55$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo07:
+; DARWIN-64-STATIC: 	leaq	_ldst+262144(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+262144(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo07:
+; DARWIN-64-PIC: 	leaq	_ldst+262144(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
+	%2 = getelementptr i32* %0, i64 65536
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: moo08:
+; LINUX-64-STATIC: movl    lsrc+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]])
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo08:
+; LINUX-32-STATIC: 	movl	lsrc+262144, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo08:
+; LINUX-32-PIC: 	movl	lsrc+262144, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo08:
+; LINUX-64-PIC: 	movl	lsrc+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo08:
+; DARWIN-32-STATIC: 	movl	_lsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo08:
+; DARWIN-32-PIC: 	calll	L56$pb
+; DARWIN-32-PIC-NEXT: L56$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L56$pb)+262144([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L56$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo08:
+; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo08:
+; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: big00:
+; LINUX-64-STATIC: movl    src+262144(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], dst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big00:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	src+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], dst+262144(,[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big00:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	src+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], dst+262144(,[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big00:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _dst+262144(,[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big00:
+; DARWIN-32-PIC: 	calll	L57$pb
+; DARWIN-32-PIC-NEXT: L57$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L57$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	262144([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L57$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 262144([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big01(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 65536
+	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: big01:
+; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], ptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big01:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big01:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	dst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big01:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big01:
+; DARWIN-32-PIC: 	calll	L58$pb
+; DARWIN-32-PIC-NEXT: L58$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L58$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EDX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L58$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr [131072 x i32]* @src, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: big02:
+; LINUX-64-STATIC: movl    src+262144(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]],%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big02:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	src+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big02:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	src+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big02:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EDX]]), [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big02:
+; DARWIN-32-PIC: 	calll	L59$pb
+; DARWIN-32-PIC-NEXT: L59$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L59$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	262144([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L59$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 262144([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big03(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: big03:
+; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ddst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big03:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ddst+262144(,[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big03:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ddst+262144(,[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big03:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _ddst+262144(,[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], _ddst+262144(,[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big03:
+; DARWIN-32-PIC: 	calll	L60$pb
+; DARWIN-32-PIC-NEXT: L60$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L60$pb)+262144([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], (_ddst-L60$pb)+262144([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big04(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 65536
+	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
+	store i32* %0, i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: big04:
+; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big04:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big04:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big04:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big04:
+; DARWIN-32-PIC: 	calll	L61$pb
+; DARWIN-32-PIC-NEXT: L61$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L61$pb)+262144([[EAX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _dptr-L61$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: big05:
+; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]],%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big05:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big05:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big05:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big05:
+; DARWIN-32-PIC: 	calll	L62$pb
+; DARWIN-32-PIC-NEXT: L62$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L62$pb)+262144([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L62$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 262144([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big06(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: big06:
+; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ldst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big06:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ldst+262144(,[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big06:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ldst+262144(,[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big06:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _ldst+262144(,[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], _ldst+262144(,[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big06:
+; DARWIN-32-PIC: 	calll	L63$pb
+; DARWIN-32-PIC-NEXT: L63$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L63$pb)+262144([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], (_ldst-L63$pb)+262144([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big07(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 65536
+	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
+	store i32* %0, i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: big07:
+; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big07:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big07:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big07:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big07:
+; DARWIN-32-PIC: 	calll	L64$pb
+; DARWIN-32-PIC-NEXT: L64$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L64$pb)+262144([[EAX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _lptr-L64$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: big08:
+; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]],%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big08:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big08:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big08:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big08:
+; DARWIN-32-PIC: 	calll	L65$pb
+; DARWIN-32-PIC-NEXT: L65$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L65$pb)+262144([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L65$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 262144([[EAX]],[[ECX]],4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar00() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @src to i8*)
+; LINUX-64-STATIC: bar00:
+; LINUX-64-STATIC: movl    $src, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar00:
+; LINUX-32-STATIC: 	movl	$src, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar00:
+; LINUX-32-PIC: 	movl	$src, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar00:
+; DARWIN-32-STATIC: 	movl	$_src, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar00:
+; DARWIN-32-PIC: 	calll	L66$pb
+; DARWIN-32-PIC-NEXT: L66$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L66$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxr00() nounwind {
+entry:
+	ret i8* bitcast ([32 x i32]* @xsrc to i8*)
+; LINUX-64-STATIC: bxr00:
+; LINUX-64-STATIC: movl    $xsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxr00:
+; LINUX-32-STATIC: 	movl	$xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxr00:
+; LINUX-32-PIC: 	movl	$xsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxr00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxr00:
+; DARWIN-32-STATIC: 	movl	$_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxr00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxr00:
+; DARWIN-32-PIC: 	calll	L67$pb
+; DARWIN-32-PIC-NEXT: L67$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L67$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxr00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxr00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxr00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar01() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @dst to i8*)
+; LINUX-64-STATIC: bar01:
+; LINUX-64-STATIC: movl    $dst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar01:
+; LINUX-32-STATIC: 	movl	$dst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar01:
+; LINUX-32-PIC: 	movl	$dst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar01:
+; DARWIN-32-STATIC: 	movl	$_dst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar01:
+; DARWIN-32-PIC: 	calll	L68$pb
+; DARWIN-32-PIC-NEXT: L68$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L68$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxr01() nounwind {
+entry:
+	ret i8* bitcast ([32 x i32]* @xdst to i8*)
+; LINUX-64-STATIC: bxr01:
+; LINUX-64-STATIC: movl    $xdst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxr01:
+; LINUX-32-STATIC: 	movl	$xdst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxr01:
+; LINUX-32-PIC: 	movl	$xdst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxr01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxr01:
+; DARWIN-32-STATIC: 	movl	$_xdst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxr01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxr01:
+; DARWIN-32-PIC: 	calll	L69$pb
+; DARWIN-32-PIC-NEXT: L69$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L69$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxr01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxr01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxr01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar02() nounwind {
+entry:
+	ret i8* bitcast (i32** @ptr to i8*)
+; LINUX-64-STATIC: bar02:
+; LINUX-64-STATIC: movl    $ptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar02:
+; LINUX-32-STATIC: 	movl	$ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar02:
+; LINUX-32-PIC: 	movl	$ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar02:
+; DARWIN-32-STATIC: 	movl	$_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar02:
+; DARWIN-32-PIC: 	calll	L70$pb
+; DARWIN-32-PIC-NEXT: L70$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L70$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar03() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @dsrc to i8*)
+; LINUX-64-STATIC: bar03:
+; LINUX-64-STATIC: movl    $dsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar03:
+; LINUX-32-STATIC: 	movl	$dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar03:
+; LINUX-32-PIC: 	movl	$dsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar03:
+; DARWIN-32-STATIC: 	movl	$_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar03:
+; DARWIN-32-PIC: 	calll	L71$pb
+; DARWIN-32-PIC-NEXT: L71$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L71$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar04() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @ddst to i8*)
+; LINUX-64-STATIC: bar04:
+; LINUX-64-STATIC: movl    $ddst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar04:
+; LINUX-32-STATIC: 	movl	$ddst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar04:
+; LINUX-32-PIC: 	movl	$ddst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar04:
+; DARWIN-32-STATIC: 	movl	$_ddst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar04:
+; DARWIN-32-PIC: 	calll	L72$pb
+; DARWIN-32-PIC-NEXT: L72$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L72$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar05() nounwind {
+entry:
+	ret i8* bitcast (i32** @dptr to i8*)
+; LINUX-64-STATIC: bar05:
+; LINUX-64-STATIC: movl    $dptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar05:
+; LINUX-32-STATIC: 	movl	$dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar05:
+; LINUX-32-PIC: 	movl	$dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar05:
+; DARWIN-32-STATIC: 	movl	$_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar05:
+; DARWIN-32-DYNAMIC: 	movl	$_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar05:
+; DARWIN-32-PIC: 	calll	L73$pb
+; DARWIN-32-PIC-NEXT: L73$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_dptr-L73$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar05:
+; DARWIN-64-STATIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar05:
+; DARWIN-64-DYNAMIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar05:
+; DARWIN-64-PIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar06() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @lsrc to i8*)
+; LINUX-64-STATIC: bar06:
+; LINUX-64-STATIC: movl    $lsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar06:
+; LINUX-32-STATIC: 	movl	$lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar06:
+; LINUX-32-PIC: 	movl	$lsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar06:
+; DARWIN-32-STATIC: 	movl	$_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar06:
+; DARWIN-32-PIC: 	calll	L74$pb
+; DARWIN-32-PIC-NEXT: L74$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L74$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar07() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @ldst to i8*)
+; LINUX-64-STATIC: bar07:
+; LINUX-64-STATIC: movl    $ldst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar07:
+; LINUX-32-STATIC: 	movl	$ldst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar07:
+; LINUX-32-PIC: 	movl	$ldst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar07:
+; DARWIN-32-STATIC: 	movl	$_ldst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar07:
+; DARWIN-32-PIC: 	calll	L75$pb
+; DARWIN-32-PIC-NEXT: L75$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L75$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar08() nounwind {
+entry:
+	ret i8* bitcast (i32** @lptr to i8*)
+; LINUX-64-STATIC: bar08:
+; LINUX-64-STATIC: movl    $lptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar08:
+; LINUX-32-STATIC: 	movl	$lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar08:
+; LINUX-32-PIC: 	movl	$lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar08:
+; LINUX-64-PIC: 	leaq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar08:
+; DARWIN-32-STATIC: 	movl	$_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar08:
+; DARWIN-32-DYNAMIC: 	movl	$_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar08:
+; DARWIN-32-PIC: 	calll	L76$pb
+; DARWIN-32-PIC-NEXT: L76$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_lptr-L76$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar08:
+; DARWIN-64-STATIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar08:
+; DARWIN-64-DYNAMIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar08:
+; DARWIN-64-PIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har00() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @src to i8*)
+; LINUX-64-STATIC: har00:
+; LINUX-64-STATIC: movl    $src, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har00:
+; LINUX-32-STATIC: 	movl	$src, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har00:
+; LINUX-32-PIC: 	movl	$src, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har00:
+; DARWIN-32-STATIC: 	movl	$_src, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har00:
+; DARWIN-32-PIC: 	calll	L77$pb
+; DARWIN-32-PIC-NEXT: L77$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L77$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @hxr00() nounwind {
+entry:
+	ret i8* bitcast ([32 x i32]* @xsrc to i8*)
+; LINUX-64-STATIC: hxr00:
+; LINUX-64-STATIC: movl    $xsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: hxr00:
+; LINUX-32-STATIC: 	movl	$xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: hxr00:
+; LINUX-32-PIC: 	movl	$xsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: hxr00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _hxr00:
+; DARWIN-32-STATIC: 	movl	$_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _hxr00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _hxr00:
+; DARWIN-32-PIC: 	calll	L78$pb
+; DARWIN-32-PIC-NEXT: L78$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L78$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _hxr00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _hxr00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _hxr00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har01() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @dst to i8*)
+; LINUX-64-STATIC: har01:
+; LINUX-64-STATIC: movl    $dst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har01:
+; LINUX-32-STATIC: 	movl	$dst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har01:
+; LINUX-32-PIC: 	movl	$dst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har01:
+; DARWIN-32-STATIC: 	movl	$_dst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har01:
+; DARWIN-32-PIC: 	calll	L79$pb
+; DARWIN-32-PIC-NEXT: L79$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L79$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @hxr01() nounwind {
+entry:
+	ret i8* bitcast ([32 x i32]* @xdst to i8*)
+; LINUX-64-STATIC: hxr01:
+; LINUX-64-STATIC: movl    $xdst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: hxr01:
+; LINUX-32-STATIC: 	movl	$xdst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: hxr01:
+; LINUX-32-PIC: 	movl	$xdst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: hxr01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _hxr01:
+; DARWIN-32-STATIC: 	movl	$_xdst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _hxr01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _hxr01:
+; DARWIN-32-PIC: 	calll	L80$pb
+; DARWIN-32-PIC-NEXT: L80$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L80$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _hxr01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _hxr01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _hxr01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = bitcast i32* %0 to i8*
+	ret i8* %1
+; LINUX-64-STATIC: har02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har02:
+; LINUX-32-STATIC: 	movl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har02:
+; LINUX-32-PIC: 	movl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har02:
+; DARWIN-32-STATIC: 	movl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har02:
+; DARWIN-32-PIC: 	calll	L81$pb
+; DARWIN-32-PIC-NEXT: L81$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L81$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RAX]]), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RAX]]), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RAX]]), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har03() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @dsrc to i8*)
+; LINUX-64-STATIC: har03:
+; LINUX-64-STATIC: movl    $dsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har03:
+; LINUX-32-STATIC: 	movl	$dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har03:
+; LINUX-32-PIC: 	movl	$dsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har03:
+; DARWIN-32-STATIC: 	movl	$_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har03:
+; DARWIN-32-PIC: 	calll	L82$pb
+; DARWIN-32-PIC-NEXT: L82$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L82$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har04() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @ddst to i8*)
+; LINUX-64-STATIC: har04:
+; LINUX-64-STATIC: movl    $ddst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har04:
+; LINUX-32-STATIC: 	movl	$ddst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har04:
+; LINUX-32-PIC: 	movl	$ddst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har04:
+; DARWIN-32-STATIC: 	movl	$_ddst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har04:
+; DARWIN-32-PIC: 	calll	L83$pb
+; DARWIN-32-PIC-NEXT: L83$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L83$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = bitcast i32* %0 to i8*
+	ret i8* %1
+; LINUX-64-STATIC: har05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har05:
+; LINUX-32-STATIC: 	movl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har05:
+; LINUX-32-PIC: 	movl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har05:
+; DARWIN-32-STATIC: 	movl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har05:
+; DARWIN-32-DYNAMIC: 	movl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har05:
+; DARWIN-32-PIC: 	calll	L84$pb
+; DARWIN-32-PIC-NEXT: L84$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L84$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har06() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @lsrc to i8*)
+; LINUX-64-STATIC: har06:
+; LINUX-64-STATIC: movl    $lsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har06:
+; LINUX-32-STATIC: 	movl	$lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har06:
+; LINUX-32-PIC: 	movl	$lsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har06:
+; DARWIN-32-STATIC: 	movl	$_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har06:
+; DARWIN-32-PIC: 	calll	L85$pb
+; DARWIN-32-PIC-NEXT: L85$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L85$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har07() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @ldst to i8*)
+; LINUX-64-STATIC: har07:
+; LINUX-64-STATIC: movl    $ldst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har07:
+; LINUX-32-STATIC: 	movl	$ldst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har07:
+; LINUX-32-PIC: 	movl	$ldst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har07:
+; DARWIN-32-STATIC: 	movl	$_ldst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har07:
+; DARWIN-32-PIC: 	calll	L86$pb
+; DARWIN-32-PIC-NEXT: L86$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L86$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = bitcast i32* %0 to i8*
+	ret i8* %1
+; LINUX-64-STATIC: har08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har08:
+; LINUX-32-STATIC: 	movl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har08:
+; LINUX-32-PIC: 	movl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har08:
+; DARWIN-32-STATIC: 	movl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har08:
+; DARWIN-32-DYNAMIC: 	movl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har08:
+; DARWIN-32-PIC: 	calll	L87$pb
+; DARWIN-32-PIC-NEXT: L87$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L87$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat00() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat00:
+; LINUX-64-STATIC: movl    $src+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat00:
+; LINUX-32-STATIC: 	movl	$src+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat00:
+; LINUX-32-PIC: 	movl	$src+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat00:
+; DARWIN-32-STATIC: 	movl	$_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat00:
+; DARWIN-32-PIC: 	calll	L88$pb
+; DARWIN-32-PIC-NEXT: L88$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L88$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxt00() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bxt00:
+; LINUX-64-STATIC: movl    $xsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxt00:
+; LINUX-32-STATIC: 	movl	$xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxt00:
+; LINUX-32-PIC: 	movl	$xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxt00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxt00:
+; DARWIN-32-STATIC: 	movl	$_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxt00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxt00:
+; DARWIN-32-PIC: 	calll	L89$pb
+; DARWIN-32-PIC-NEXT: L89$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L89$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxt00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxt00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxt00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat01() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat01:
+; LINUX-64-STATIC: movl    $dst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat01:
+; LINUX-32-STATIC: 	movl	$dst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat01:
+; LINUX-32-PIC: 	movl	$dst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat01:
+; DARWIN-32-STATIC: 	movl	$_dst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat01:
+; DARWIN-32-PIC: 	calll	L90$pb
+; DARWIN-32-PIC-NEXT: L90$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L90$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxt01() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bxt01:
+; LINUX-64-STATIC: movl    $xdst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxt01:
+; LINUX-32-STATIC: 	movl	$xdst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxt01:
+; LINUX-32-PIC: 	movl	$xdst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxt01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxt01:
+; DARWIN-32-STATIC: 	movl	$_xdst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxt01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxt01:
+; DARWIN-32-PIC: 	calll	L91$pb
+; DARWIN-32-PIC-NEXT: L91$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L91$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxt01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxt01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxt01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = getelementptr i32* %0, i64 16
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bat02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat02:
+; LINUX-32-STATIC: 	movl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat02:
+; LINUX-32-PIC: 	movl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat02:
+; DARWIN-32-STATIC: 	movl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat02:
+; DARWIN-32-PIC: 	calll	L92$pb
+; DARWIN-32-PIC-NEXT: L92$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L92$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RAX]]), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RAX]]), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat03() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat03:
+; LINUX-64-STATIC: movl    $dsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat03:
+; LINUX-32-STATIC: 	movl	$dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat03:
+; LINUX-32-PIC: 	movl	$dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat03:
+; DARWIN-32-STATIC: 	movl	$_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat03:
+; DARWIN-32-PIC: 	calll	L93$pb
+; DARWIN-32-PIC-NEXT: L93$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L93$pb)+64([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat03:
+; DARWIN-64-STATIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat03:
+; DARWIN-64-PIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat04() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat04:
+; LINUX-64-STATIC: movl    $ddst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat04:
+; LINUX-32-STATIC: 	movl	$ddst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat04:
+; LINUX-32-PIC: 	movl	$ddst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat04:
+; DARWIN-32-STATIC: 	movl	$_ddst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat04:
+; DARWIN-32-PIC: 	calll	L94$pb
+; DARWIN-32-PIC-NEXT: L94$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L94$pb)+64([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat04:
+; DARWIN-64-STATIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat04:
+; DARWIN-64-PIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = getelementptr i32* %0, i64 16
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bat05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat05:
+; LINUX-32-STATIC: 	movl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat05:
+; LINUX-32-PIC: 	movl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat05:
+; DARWIN-32-STATIC: 	movl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat05:
+; DARWIN-32-DYNAMIC: 	movl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat05:
+; DARWIN-32-PIC: 	calll	L95$pb
+; DARWIN-32-PIC-NEXT: L95$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L95$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat06() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat06:
+; LINUX-64-STATIC: movl    $lsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat06:
+; LINUX-32-STATIC: 	movl	$lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat06:
+; LINUX-32-PIC: 	movl	$lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat06:
+; LINUX-64-PIC: 	leaq	lsrc+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat06:
+; DARWIN-32-STATIC: 	movl	$_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat06:
+; DARWIN-32-PIC: 	calll	L96$pb
+; DARWIN-32-PIC-NEXT: L96$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L96$pb)+64([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat06:
+; DARWIN-64-STATIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat06:
+; DARWIN-64-PIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat07() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat07:
+; LINUX-64-STATIC: movl    $ldst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat07:
+; LINUX-32-STATIC: 	movl	$ldst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat07:
+; LINUX-32-PIC: 	movl	$ldst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat07:
+; LINUX-64-PIC: 	leaq	ldst+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat07:
+; DARWIN-32-STATIC: 	movl	$_ldst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat07:
+; DARWIN-32-PIC: 	calll	L97$pb
+; DARWIN-32-PIC-NEXT: L97$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L97$pb)+64([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat07:
+; DARWIN-64-STATIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat07:
+; DARWIN-64-PIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = getelementptr i32* %0, i64 16
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bat08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat08:
+; LINUX-32-STATIC: 	movl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat08:
+; LINUX-32-PIC: 	movl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat08:
+; DARWIN-32-STATIC: 	movl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat08:
+; DARWIN-32-DYNAMIC: 	movl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat08:
+; DARWIN-32-PIC: 	calll	L98$pb
+; DARWIN-32-PIC-NEXT: L98$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L98$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam00() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam00:
+; LINUX-64-STATIC: movl    $src+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam00:
+; LINUX-32-STATIC: 	movl	$src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam00:
+; LINUX-32-PIC: 	movl	$src+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam00:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam00:
+; DARWIN-32-STATIC: 	movl	$_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam00:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam00:
+; DARWIN-32-PIC: 	calll	L99$pb
+; DARWIN-32-PIC-NEXT: L99$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_src$non_lazy_ptr-L99$pb([[ECX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam00:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam00:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam00:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam01() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam01:
+; LINUX-64-STATIC: movl    $dst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam01:
+; LINUX-32-STATIC: 	movl	$dst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam01:
+; LINUX-32-PIC: 	movl	$dst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam01:
+; DARWIN-32-STATIC: 	movl	$_dst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam01:
+; DARWIN-32-PIC: 	calll	L100$pb
+; DARWIN-32-PIC-NEXT: L100$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L100$pb([[ECX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxm01() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bxm01:
+; LINUX-64-STATIC: movl    $xdst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxm01:
+; LINUX-32-STATIC: 	movl	$xdst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxm01:
+; LINUX-32-PIC: 	movl	$xdst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxm01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxm01:
+; DARWIN-32-STATIC: 	movl	$_xdst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxm01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxm01:
+; DARWIN-32-PIC: 	calll	L101$pb
+; DARWIN-32-PIC-NEXT: L101$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L101$pb([[ECX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxm01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxm01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxm01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = getelementptr i32* %0, i64 65536
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bam02:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    ptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam02:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam02:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	([[RCX]]), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam02:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	([[ECX]]), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam02:
+; DARWIN-32-PIC: 	calll	L102$pb
+; DARWIN-32-PIC-NEXT: L102$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L102$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	([[ECX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	([[RCX]]), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	([[RCX]]), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	([[RCX]]), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam03() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam03:
+; LINUX-64-STATIC: movl    $dsrc+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam03:
+; LINUX-32-STATIC: 	movl	$dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam03:
+; LINUX-32-PIC: 	movl	$dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam03:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam03:
+; DARWIN-32-STATIC: 	movl	$_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam03:
+; DARWIN-32-PIC: 	calll	L103$pb
+; DARWIN-32-PIC-NEXT: L103$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L103$pb)+262144([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam03:
+; DARWIN-64-STATIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam03:
+; DARWIN-64-PIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam04() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam04:
+; LINUX-64-STATIC: movl    $ddst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam04:
+; LINUX-32-STATIC: 	movl	$ddst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam04:
+; LINUX-32-PIC: 	movl	$ddst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam04:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam04:
+; DARWIN-32-STATIC: 	movl	$_ddst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam04:
+; DARWIN-32-PIC: 	calll	L104$pb
+; DARWIN-32-PIC-NEXT: L104$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L104$pb)+262144([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam04:
+; DARWIN-64-STATIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam04:
+; DARWIN-64-PIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = getelementptr i32* %0, i64 65536
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bam05:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    dptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam05:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam05:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	([[RCX]]), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam05:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam05:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam05:
+; DARWIN-32-PIC: 	calll	L105$pb
+; DARWIN-32-PIC-NEXT: L105$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	_dptr-L105$pb([[ECX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam05:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam05:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam05:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam06() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam06:
+; LINUX-64-STATIC: movl    $lsrc+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam06:
+; LINUX-32-STATIC: 	movl	$lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam06:
+; LINUX-32-PIC: 	movl	$lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam06:
+; LINUX-64-PIC: 	leaq	lsrc+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam06:
+; DARWIN-32-STATIC: 	movl	$_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam06:
+; DARWIN-32-PIC: 	calll	L106$pb
+; DARWIN-32-PIC-NEXT: L106$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L106$pb)+262144([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam06:
+; DARWIN-64-STATIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam06:
+; DARWIN-64-PIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam07() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam07:
+; LINUX-64-STATIC: movl    $ldst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam07:
+; LINUX-32-STATIC: 	movl	$ldst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam07:
+; LINUX-32-PIC: 	movl	$ldst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam07:
+; LINUX-64-PIC: 	leaq	ldst+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam07:
+; DARWIN-32-STATIC: 	movl	$_ldst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam07:
+; DARWIN-32-PIC: 	calll	L107$pb
+; DARWIN-32-PIC-NEXT: L107$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L107$pb)+262144([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam07:
+; DARWIN-64-STATIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam07:
+; DARWIN-64-PIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = getelementptr i32* %0, i64 65536
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bam08:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    lptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam08:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam08:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam08:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam08:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam08:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam08:
+; DARWIN-32-PIC: 	calll	L108$pb
+; DARWIN-32-PIC-NEXT: L108$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	_lptr-L108$pb([[ECX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam08:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam08:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam08:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat00:
+; LINUX-64-STATIC: leaq    src+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat00:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	src+64(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat00:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	src+64(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat00:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_src+64(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat00:
+; DARWIN-32-PIC: 	calll	L109$pb
+; DARWIN-32-PIC-NEXT: L109$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L109$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cxt00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cxt00:
+; LINUX-64-STATIC: leaq    xsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxt00:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	xsrc+64(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxt00:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	xsrc+64(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxt00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxt00:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+64(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxt00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxt00:
+; DARWIN-32-PIC: 	calll	L110$pb
+; DARWIN-32-PIC-NEXT: L110$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L110$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxt00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxt00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxt00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat01(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat01:
+; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat01:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	dst+64(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat01:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	dst+64(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat01:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat01:
+; DARWIN-32-PIC: 	calll	L111$pb
+; DARWIN-32-PIC-NEXT: L111$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L111$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cxt01(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cxt01:
+; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxt01:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxt01:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	xdst+64(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxt01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxt01:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxt01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxt01:
+; DARWIN-32-PIC: 	calll	L112$pb
+; DARWIN-32-PIC-NEXT: L112$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L112$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxt01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxt01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxt01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cat02:
+; LINUX-64-STATIC: movq    ptr(%rip), [[RAX:%r.x]]
+; LINUX-64-STATIC: leaq    64([[RAX]],%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat02:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat02:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat02:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat02:
+; DARWIN-32-PIC: 	calll	L113$pb
+; DARWIN-32-PIC-NEXT: L113$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L113$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat03(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat03:
+; LINUX-64-STATIC: leaq    dsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat03:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	dsrc+64(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat03:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	dsrc+64(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat03:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+64(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+64(,[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat03:
+; DARWIN-32-PIC: 	calll	L114$pb
+; DARWIN-32-PIC-NEXT: L114$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L114$pb)+64([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat04(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat04:
+; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat04:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat04:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ddst+64(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat04:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat04:
+; DARWIN-32-PIC: 	calll	L115$pb
+; DARWIN-32-PIC-NEXT: L115$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L115$pb)+64([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cat05:
+; LINUX-64-STATIC: movq    dptr(%rip), [[RAX:%r.x]]
+; LINUX-64-STATIC: leaq    64([[RAX]],%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat05:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat05:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat05:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat05:
+; DARWIN-32-PIC: 	calll	L116$pb
+; DARWIN-32-PIC-NEXT: L116$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L116$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat06(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat06:
+; LINUX-64-STATIC: leaq    lsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat06:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	lsrc+64(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat06:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	lsrc+64(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat06:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+64(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+64(,[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat06:
+; DARWIN-32-PIC: 	calll	L117$pb
+; DARWIN-32-PIC-NEXT: L117$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L117$pb)+64([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat07(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat07:
+; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat07:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat07:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ldst+64(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat07:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat07:
+; DARWIN-32-PIC: 	calll	L118$pb
+; DARWIN-32-PIC-NEXT: L118$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L118$pb)+64([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cat08:
+; LINUX-64-STATIC: movq    lptr(%rip), [[RAX:%r.x]]
+; LINUX-64-STATIC: leaq    64([[RAX]],%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat08:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat08:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat08:
+; LINUX-64-PIC: 	movq	lptr(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat08:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat08:
+; DARWIN-32-PIC: 	calll	L119$pb
+; DARWIN-32-PIC-NEXT: L119$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L119$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam00:
+; LINUX-64-STATIC: leaq    src+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam00:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	src+262144(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam00:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	src+262144(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam00:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_src+262144(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam00:
+; DARWIN-32-PIC: 	calll	L120$pb
+; DARWIN-32-PIC-NEXT: L120$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L120$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cxm00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cxm00:
+; LINUX-64-STATIC: leaq    xsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxm00:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	xsrc+262144(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxm00:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	xsrc+262144(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxm00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxm00:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+262144(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxm00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxm00:
+; DARWIN-32-PIC: 	calll	L121$pb
+; DARWIN-32-PIC-NEXT: L121$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L121$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxm00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxm00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxm00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam01(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam01:
+; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam01:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam01:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	dst+262144(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam01:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam01:
+; DARWIN-32-PIC: 	calll	L122$pb
+; DARWIN-32-PIC-NEXT: L122$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L122$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cxm01(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cxm01:
+; LINUX-64-STATIC: leaq    xdst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxm01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+262144(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxm01:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	xdst+262144(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxm01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxm01:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+262144(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxm01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxm01:
+; DARWIN-32-PIC: 	calll	L123$pb
+; DARWIN-32-PIC-NEXT: L123$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L123$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxm01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxm01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxm01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cam02:
+; LINUX-64-STATIC: movq    ptr(%rip), [[RAX:%r.x]]
+; LINUX-64-STATIC: leaq    262144([[RAX]],%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam02:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam02:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam02:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam02:
+; DARWIN-32-PIC: 	calll	L124$pb
+; DARWIN-32-PIC-NEXT: L124$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L124$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam03(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam03:
+; LINUX-64-STATIC: leaq    dsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam03:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	dsrc+262144(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam03:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	dsrc+262144(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam03:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+262144(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+262144(,[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam03:
+; DARWIN-32-PIC: 	calll	L125$pb
+; DARWIN-32-PIC-NEXT: L125$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L125$pb)+262144([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam04(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam04:
+; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam04:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam04:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam04:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam04:
+; DARWIN-32-PIC: 	calll	L126$pb
+; DARWIN-32-PIC-NEXT: L126$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L126$pb)+262144([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cam05:
+; LINUX-64-STATIC: movq    dptr(%rip), [[RAX:%r.x]]
+; LINUX-64-STATIC: leaq    262144([[RAX]],%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam05:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam05:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam05:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam05:
+; DARWIN-32-PIC: 	calll	L127$pb
+; DARWIN-32-PIC-NEXT: L127$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L127$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam06(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam06:
+; LINUX-64-STATIC: leaq    lsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam06:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	lsrc+262144(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam06:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	lsrc+262144(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam06:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+262144(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+262144(,[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam06:
+; DARWIN-32-PIC: 	calll	L128$pb
+; DARWIN-32-PIC-NEXT: L128$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L128$pb)+262144([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam07(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam07:
+; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam07:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam07:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam07:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam07:
+; DARWIN-32-PIC: 	calll	L129$pb
+; DARWIN-32-PIC-NEXT: L129$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L129$pb)+262144([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cam08:
+; LINUX-64-STATIC: movq    lptr(%rip), [[RAX:%r.x]]
+; LINUX-64-STATIC: leaq    262144([[RAX]],%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam08:
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam08:
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam08:
+; LINUX-64-PIC: 	movq	lptr(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam08:
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam08:
+; DARWIN-32-PIC: 	calll	L130$pb
+; DARWIN-32-PIC-NEXT: L130$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L130$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @lcallee() nounwind {
+entry:
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	ret void
+; LINUX-64-STATIC: lcallee:
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: lcallee:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: lcallee:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: lcallee:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _lcallee:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _lcallee:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _lcallee:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _lcallee:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _lcallee:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _lcallee:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+declare void @x()
+
+define internal void @dcallee() nounwind {
+entry:
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	ret void
+; LINUX-64-STATIC: dcallee:
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dcallee:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dcallee:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dcallee:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dcallee:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dcallee:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dcallee:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dcallee:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dcallee:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dcallee:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+declare void @y()
+
+define void ()* @address() nounwind {
+entry:
+	ret void ()* @callee
+; LINUX-64-STATIC: address:
+; LINUX-64-STATIC: movl    $callee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: address:
+; LINUX-32-STATIC: 	movl	$callee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: address:
+; LINUX-32-PIC: 	movl	$callee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: address:
+; LINUX-64-PIC: 	movq	callee@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _address:
+; DARWIN-32-STATIC: 	movl	$_callee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _address:
+; DARWIN-32-DYNAMIC: 	movl	L_callee$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _address:
+; DARWIN-32-PIC: 	calll	L133$pb
+; DARWIN-32-PIC-NEXT: L133$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_callee$non_lazy_ptr-L133$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _address:
+; DARWIN-64-STATIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _address:
+; DARWIN-64-DYNAMIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _address:
+; DARWIN-64-PIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+declare void @callee()
+
+define void ()* @laddress() nounwind {
+entry:
+	ret void ()* @lcallee
+; LINUX-64-STATIC: laddress:
+; LINUX-64-STATIC: movl    $lcallee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: laddress:
+; LINUX-32-STATIC: 	movl	$lcallee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: laddress:
+; LINUX-32-PIC: 	movl	$lcallee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: laddress:
+; LINUX-64-PIC: 	movq	lcallee@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _laddress:
+; DARWIN-32-STATIC: 	movl	$_lcallee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _laddress:
+; DARWIN-32-DYNAMIC: 	movl	$_lcallee, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _laddress:
+; DARWIN-32-PIC: 	calll	L134$pb
+; DARWIN-32-PIC-NEXT: L134$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_lcallee-L134$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _laddress:
+; DARWIN-64-STATIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _laddress:
+; DARWIN-64-DYNAMIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _laddress:
+; DARWIN-64-PIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void ()* @daddress() nounwind {
+entry:
+	ret void ()* @dcallee
+; LINUX-64-STATIC: daddress:
+; LINUX-64-STATIC: movl    $dcallee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: daddress:
+; LINUX-32-STATIC: 	movl	$dcallee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: daddress:
+; LINUX-32-PIC: 	movl	$dcallee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: daddress:
+; LINUX-64-PIC: 	leaq	dcallee(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _daddress:
+; DARWIN-32-STATIC: 	movl	$_dcallee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _daddress:
+; DARWIN-32-DYNAMIC: 	movl	$_dcallee, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _daddress:
+; DARWIN-32-PIC: 	calll	L135$pb
+; DARWIN-32-PIC-NEXT: L135$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_dcallee-L135$pb([[EAX]]), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _daddress:
+; DARWIN-64-STATIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _daddress:
+; DARWIN-64-DYNAMIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _daddress:
+; DARWIN-64-PIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @caller() nounwind {
+entry:
+	call void @callee() nounwind
+	call void @callee() nounwind
+	ret void
+; LINUX-64-STATIC: caller:
+; LINUX-64-STATIC: callq   callee
+; LINUX-64-STATIC: callq   callee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: caller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	callee
+; LINUX-32-STATIC-NEXT: 	calll	callee
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: caller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	callee
+; LINUX-32-PIC-NEXT: 	calll	callee
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: caller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	callee@PLT
+; LINUX-64-PIC-NEXT: 	callq	callee@PLT
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _caller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_callee
+; DARWIN-32-STATIC-NEXT: 	calll	_callee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _caller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _caller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _caller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_callee
+; DARWIN-64-STATIC-NEXT: 	callq	_callee
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _caller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _caller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_callee
+; DARWIN-64-PIC-NEXT: 	callq	_callee
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @dcaller() nounwind {
+entry:
+	call void @dcallee() nounwind
+	call void @dcallee() nounwind
+	ret void
+; LINUX-64-STATIC: dcaller:
+; LINUX-64-STATIC: callq   dcallee
+; LINUX-64-STATIC: callq   dcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	dcallee
+; LINUX-32-STATIC-NEXT: 	calll	dcallee
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	dcallee
+; LINUX-32-PIC-NEXT: 	calll	dcallee
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dcaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	dcallee
+; LINUX-64-PIC-NEXT: 	callq	dcallee
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_dcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_dcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	_dcallee
+; DARWIN-32-PIC-NEXT: 	calll	_dcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dcaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
+; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dcaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dcaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_dcallee
+; DARWIN-64-PIC-NEXT: 	callq	_dcallee
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @lcaller() nounwind {
+entry:
+	call void @lcallee() nounwind
+	call void @lcallee() nounwind
+	ret void
+; LINUX-64-STATIC: lcaller:
+; LINUX-64-STATIC: callq   lcallee
+; LINUX-64-STATIC: callq   lcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: lcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	lcallee
+; LINUX-32-STATIC-NEXT: 	calll	lcallee
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: lcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	lcallee
+; LINUX-32-PIC-NEXT: 	calll	lcallee
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: lcaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _lcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_lcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_lcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _lcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _lcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	_lcallee
+; DARWIN-32-PIC-NEXT: 	calll	_lcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _lcaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
+; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _lcaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _lcaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_lcallee
+; DARWIN-64-PIC-NEXT: 	callq	_lcallee
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @tailcaller() nounwind {
+entry:
+	call void @callee() nounwind
+	ret void
+; LINUX-64-STATIC: tailcaller:
+; LINUX-64-STATIC: callq   callee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: tailcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	callee
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: tailcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	callee
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: tailcaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	callee@PLT
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _tailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_callee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _tailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _tailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _tailcaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_callee
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _tailcaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _tailcaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_callee
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @dtailcaller() nounwind {
+entry:
+	call void @dcallee() nounwind
+	ret void
+; LINUX-64-STATIC: dtailcaller:
+; LINUX-64-STATIC: callq   dcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dtailcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	dcallee
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dtailcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	dcallee
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dtailcaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	dcallee
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dtailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_dcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dtailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dtailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	_dcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dtailcaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dtailcaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dtailcaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_dcallee
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ltailcaller() nounwind {
+entry:
+	call void @lcallee() nounwind
+	ret void
+; LINUX-64-STATIC: ltailcaller:
+; LINUX-64-STATIC: callq   lcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ltailcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	lcallee
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ltailcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	lcallee
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ltailcaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ltailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_lcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ltailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ltailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	_lcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ltailcaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ltailcaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ltailcaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_lcallee
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @icaller() nounwind {
+entry:
+	%0 = load void ()** @ifunc, align 8
+	call void %0() nounwind
+	%1 = load void ()** @ifunc, align 8
+	call void %1() nounwind
+	ret void
+; LINUX-64-STATIC: icaller:
+; LINUX-64-STATIC: callq   *ifunc
+; LINUX-64-STATIC: callq   *ifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: icaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: icaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: icaller:
+; LINUX-64-PIC: 	pushq	[[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
+; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
+; LINUX-64-PIC-NEXT: 	popq	[[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _icaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _icaller:
+; DARWIN-32-DYNAMIC: 	pushl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _icaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L142$pb
+; DARWIN-32-PIC-NEXT: L142$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L142$pb([[EAX]]), %esi
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _icaller:
+; DARWIN-64-STATIC: 	pushq	[[RBX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-STATIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-STATIC-NEXT: 	popq	[[RBX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _icaller:
+; DARWIN-64-DYNAMIC: 	pushq	[[RBX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-DYNAMIC-NEXT: 	popq	[[RBX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _icaller:
+; DARWIN-64-PIC: 	pushq	[[RBX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-PIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-PIC-NEXT: 	popq	[[RBX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @dicaller() nounwind {
+entry:
+	%0 = load void ()** @difunc, align 8
+	call void %0() nounwind
+	%1 = load void ()** @difunc, align 8
+	call void %1() nounwind
+	ret void
+; LINUX-64-STATIC: dicaller:
+; LINUX-64-STATIC: callq   *difunc
+; LINUX-64-STATIC: callq   *difunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dicaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*difunc
+; LINUX-32-STATIC-NEXT: 	calll	*difunc
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dicaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*difunc
+; LINUX-32-PIC-NEXT: 	calll	*difunc
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dicaller:
+; LINUX-64-PIC: 	pushq	[[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
+; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
+; LINUX-64-PIC-NEXT: 	popq	[[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dicaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	*_difunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_difunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dicaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dicaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L143$pb
+; DARWIN-32-PIC-NEXT: L143$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	calll	*_difunc-L143$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*_difunc-L143$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dicaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dicaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dicaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @licaller() nounwind {
+entry:
+	%0 = load void ()** @lifunc, align 8
+	call void %0() nounwind
+	%1 = load void ()** @lifunc, align 8
+	call void %1() nounwind
+	ret void
+; LINUX-64-STATIC: licaller:
+; LINUX-64-STATIC: callq   *lifunc
+; LINUX-64-STATIC: callq   *lifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: licaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*lifunc
+; LINUX-32-STATIC-NEXT: 	calll	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: licaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*lifunc
+; LINUX-32-PIC-NEXT: 	calll	*lifunc
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: licaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _licaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _licaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _licaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L144$pb
+; DARWIN-32-PIC-NEXT: L144$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	calll	*_lifunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*_lifunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _licaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _licaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _licaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @itailcaller() nounwind {
+entry:
+	%0 = load void ()** @ifunc, align 8
+	call void %0() nounwind
+	%1 = load void ()** @ifunc, align 8
+	call void %1() nounwind
+	ret void
+; LINUX-64-STATIC: itailcaller:
+; LINUX-64-STATIC: callq   *ifunc
+; LINUX-64-STATIC: callq   *ifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: itailcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: itailcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: itailcaller:
+; LINUX-64-PIC: 	pushq	[[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
+; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
+; LINUX-64-PIC-NEXT: 	popq	[[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _itailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _itailcaller:
+; DARWIN-32-DYNAMIC: 	pushl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _itailcaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L145$pb
+; DARWIN-32-PIC-NEXT: L145$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L145$pb([[EAX]]), %esi
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _itailcaller:
+; DARWIN-64-STATIC: 	pushq	[[RBX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-STATIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-STATIC-NEXT: 	popq	[[RBX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _itailcaller:
+; DARWIN-64-DYNAMIC: 	pushq	[[RBX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-DYNAMIC-NEXT: 	popq	[[RBX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _itailcaller:
+; DARWIN-64-PIC: 	pushq	[[RBX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-PIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-PIC-NEXT: 	popq	[[RBX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ditailcaller() nounwind {
+entry:
+	%0 = load void ()** @difunc, align 8
+	call void %0() nounwind
+	ret void
+; LINUX-64-STATIC: ditailcaller:
+; LINUX-64-STATIC: callq   *difunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ditailcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*difunc
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ditailcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*difunc
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ditailcaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	callq	*([[RAX]])
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ditailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	*_difunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ditailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ditailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L146$pb
+; DARWIN-32-PIC-NEXT: L146$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	calll	*_difunc-L146$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ditailcaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ditailcaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ditailcaller:
+; DARWIN-64-PIC: 	callq	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @litailcaller() nounwind {
+entry:
+	%0 = load void ()** @lifunc, align 8
+	call void %0() nounwind
+	ret void
+; LINUX-64-STATIC: litailcaller:
+; LINUX-64-STATIC: callq   *lifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: litailcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: litailcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*lifunc
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: litailcaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _litailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _litailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _litailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L147$pb
+; DARWIN-32-PIC-NEXT: L147$pb:
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	calll	*_lifunc-L147$pb([[EAX]])
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _litailcaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _litailcaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _litailcaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/add-of-carry.ll b/src/LLVM/test/CodeGen/X86/add-of-carry.ll
new file mode 100644
index 0000000..a4abccb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/add-of-carry.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; <rdar://problem/8449754>
+
+define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+; CHECK: test1:
+; CHECK: cmpl %ecx, %eax
+; CHECK-NOT: addl
+; CHECK: adcl $0, %eax
+  %add4 = add i32 %x, %sum
+  %cmp = icmp ult i32 %add4, %x
+  %inc = zext i1 %cmp to i32
+  %z.0 = add i32 %add4, %inc
+  ret i32 %z.0
+}
+
+; Instcombine transforms test1 into test2:
+; CHECK: test2:
+; CHECK: movl
+; CHECK-NEXT: addl
+; CHECK-NEXT: adcl $0
+; CHECK-NEXT: ret
+define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum)
+  %0 = extractvalue { i32, i1 } %uadd, 0
+  %cmp = extractvalue { i32, i1 } %uadd, 1
+  %inc = zext i1 %cmp to i32
+  %z.0 = add i32 %0, %inc
+  ret i32 %z.0
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/add.ll b/src/LLVM/test/CodeGen/X86/add.ll
new file mode 100644
index 0000000..7bf527a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/add.ll

@@ -0,0 +1,152 @@
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
+
+; Some of these tests depend on -join-physregs to commute instructions.
+
+; The immediate can be encoded in a smaller way if the
+; instruction is a sub instead of an add.
+
+define i32 @test1(i32 inreg %a) nounwind {
+  %b = add i32 %a, 128
+  ret i32 %b
+; X32: subl	$-128, %eax
+; X64: subl $-128, 
+}
+define i64 @test2(i64 inreg %a) nounwind {
+  %b = add i64 %a, 2147483648
+  ret i64 %b
+; X32: addl	$-2147483648, %eax
+; X64: subq	$-2147483648,
+}
+define i64 @test3(i64 inreg %a) nounwind {
+  %b = add i64 %a, 128
+  ret i64 %b
+  
+; X32: addl $128, %eax
+; X64: subq	$-128,
+}
+
+define i1 @test4(i32 %v1, i32 %v2, i32* %X) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  store i32 0, i32* %X
+  br label %overflow
+
+overflow:
+  ret i1 false
+  
+; X32: test4:
+; X32: addl
+; X32-NEXT: jo
+
+; X64:        test4:
+; X64:          addl	%e[[A1:si|dx]], %e[[A0:di|cx]]
+; X64-NEXT:	jo
+}
+
+define i1 @test5(i32 %v1, i32 %v2, i32* %X) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  store i32 0, i32* %X
+  br label %carry
+
+carry:
+  ret i1 false
+
+; X32: test5:
+; X32: addl
+; X32-NEXT: jb
+
+; X64:        test5:
+; X64:          addl	%e[[A1]], %e[[A0]]
+; X64-NEXT:	jb
+}
+
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
+
+
+define i64 @test6(i64 %A, i32 %B) nounwind {
+        %tmp12 = zext i32 %B to i64             ; <i64> [#uses=1]
+        %tmp3 = shl i64 %tmp12, 32              ; <i64> [#uses=1]
+        %tmp5 = add i64 %tmp3, %A               ; <i64> [#uses=1]
+        ret i64 %tmp5
+
+; X32: test6:
+; X32:	    movl 12(%esp), %edx
+; X32-NEXT: addl 8(%esp), %edx
+; X32-NEXT: movl 4(%esp), %eax
+; X32-NEXT: ret
+        
+; X64: test6:
+; X64:	shlq	$32, %r[[A1]]
+; X64:	leaq	(%r[[A1]],%r[[A0]]), %rax
+; X64:	ret
+}
+
+define {i32, i1} @test7(i32 %v1, i32 %v2) nounwind {
+   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+   ret {i32, i1} %t
+}
+
+; X64: test7:
+; X64: addl %e[[A1]], %eax
+; X64-NEXT: setb %dl
+; X64-NEXT: ret
+
+; PR5443
+define {i64, i1} @test8(i64 %left, i64 %right) nounwind {
+entry:
+    %extleft = zext i64 %left to i65
+    %extright = zext i64 %right to i65
+    %sum = add i65 %extleft, %extright
+    %res.0 = trunc i65 %sum to i64
+    %overflow = and i65 %sum, -18446744073709551616
+    %res.1 = icmp ne i65 %overflow, 0
+    %final0 = insertvalue {i64, i1} undef, i64 %res.0, 0
+    %final1 = insertvalue {i64, i1} %final0, i1 %res.1, 1
+    ret {i64, i1} %final1
+}
+
+; X64: test8:
+; X64: addq
+; X64-NEXT: sbbq
+; X64-NEXT: testb
+
+define i32 @test9(i32 %x, i32 %y) nounwind readnone {
+  %cmp = icmp eq i32 %x, 10
+  %sub = sext i1 %cmp to i32
+  %cond = add i32 %sub, %y
+  ret i32 %cond
+; X64: test9:
+; X64: cmpl $10
+; X64: sete
+; X64: subl
+; X64: ret
+}
+
+define i1 @test10(i32 %x) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %x, i32 1)
+  %obit = extractvalue {i32, i1} %t, 1
+  ret i1 %obit
+
+; X32: test10:
+; X32: incl
+; X32-NEXT: seto
+
+; X64: test10:
+; X64: incl
+; X64-NEXT: seto
+}

diff --git a/src/LLVM/test/CodeGen/X86/adde-carry.ll b/src/LLVM/test/CodeGen/X86/adde-carry.ll
new file mode 100644
index 0000000..e86adf4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/adde-carry.ll

@@ -0,0 +1,20 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s -check-prefix=CHECK-64
+
+define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+ %0 = zext i64 %a to i128
+ %1 = zext i64 %b to i128
+ %2 = add i128 %1, %0
+ %3 = zext i64 %c to i128
+ %4 = shl i128 %3, 64
+ %5 = add i128 %4, %2
+ %6 = lshr i128 %5, 64
+ %7 = trunc i128 %6 to i64
+ store i64 %7, i64* %s, align 8
+ %8 = trunc i128 %2 to i64
+ store i64 %8, i64* %t, align 8
+ ret void
+
+; CHECK-64: addq
+; CHECK-64: adcq $0
+}

diff --git a/src/LLVM/test/CodeGen/X86/addr-label-difference.ll b/src/LLVM/test/CodeGen/X86/addr-label-difference.ll
new file mode 100644
index 0000000..49abd8a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/addr-label-difference.ll

@@ -0,0 +1,26 @@
+; RUN: llc %s -o - | grep {__TEXT,__const}
+; PR5929
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0"
+
+; This array should go into the __TEXT,__const section, not into the
+; __DATA,__const section, because the elements don't need relocations.
+@test.array = internal unnamed_addr constant [3 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@test, %foo) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %bar) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %hack) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32))] ; <[3 x i32]*> [#uses=1]
+
+define void @test(i32 %i) nounwind ssp {
+entry:
+  call void @test(i32 1)
+  br label %foo
+
+foo:
+  call void @test(i32 1)
+  br label %bar
+
+bar:
+  call void @test(i32 1)
+  br label %hack
+
+hack:
+  call void @test(i32 1)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/aliases.ll b/src/LLVM/test/CodeGen/X86/aliases.ll
new file mode 100644
index 0000000..b0eed73
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/aliases.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false -o %t

+; RUN: grep globl %t | count 6

+; RUN: grep weak %t  | count 1

+; RUN: grep hidden %t | count 1

+; RUN: grep protected %t | count 1

+

+@bar = external global i32

+@foo1 = alias i32* @bar

+@foo2 = alias i32* @bar

+

+%FunTy = type i32()

+

+declare i32 @foo_f()

+@bar_f = alias weak %FunTy* @foo_f

+

+@bar_i = alias internal i32* @bar

+

+@A = alias bitcast (i32* @bar to i64*)

+

+@bar_h = hidden alias i32* @bar

+

+@bar_p = protected alias i32* @bar

+

+define i32 @test() {

+entry:

+   %tmp = load i32* @foo1

+   %tmp1 = load i32* @foo2

+   %tmp0 = load i32* @bar_i

+   %tmp2 = call i32 @foo_f()

+   %tmp3 = add i32 %tmp, %tmp2

+   %tmp4 = call %FunTy* @bar_f()

+   %tmp5 = add i32 %tmp3, %tmp4

+   %tmp6 = add i32 %tmp1, %tmp5

+   %tmp7 = add i32 %tmp6, %tmp0

+   ret i32 %tmp7

+}


diff --git a/src/LLVM/test/CodeGen/X86/aligned-comm.ll b/src/LLVM/test/CodeGen/X86/aligned-comm.ll
new file mode 100644
index 0000000..7715869
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/aligned-comm.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep {array,16512,7}
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep {array,16512,7}
+
+; Darwin 9+ should get alignment on common symbols.
+@array = common global [4128 x i32] zeroinitializer, align 128

diff --git a/src/LLVM/test/CodeGen/X86/alignment-2.ll b/src/LLVM/test/CodeGen/X86/alignment-2.ll
new file mode 100644
index 0000000..cc709b5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/alignment-2.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple i386-apple-darwin10 | FileCheck %s
+; <rdar://problem/10058036>
+
+%struct._psqlSettings = type { %struct.pg_conn*, i32, %struct.__sFILE*, i8, %struct.printQueryOpt, i8*, i8, i32, %struct.__sFILE*, i8, i32, i8*, i8*, i8*, i64, i8, %struct.__sFILE*, %struct._variable*, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i8*, i8*, i8*, i32 }
+%struct.pg_conn = type opaque
+%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sbuf = type { i8*, i32 }
+%struct.__sFILEX = type opaque
+%struct.printQueryOpt = type { %struct.printTableOpt, i8*, i8, i8*, i8**, i8, i8, i8* }
+%struct.printTableOpt = type { i32, i8, i16, i16, i8, i8, i8, i32, %struct.printTextFormat*, i8*, i8*, i8, i8*, i32, i32, i32 }
+%struct.printTextFormat = type { i8*, [4 x %struct.printTextLineFormat], i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8 }
+%struct.printTextLineFormat = type { i8*, i8*, i8*, i8* }
+%struct._variable = type { i8*, i8*, void (i8*)*, %struct._variable* }
+%struct.pg_result = type opaque
+
+@pset = external global %struct._psqlSettings
+
+define signext i8 @do_lo_list() nounwind optsize ssp {
+bb:
+; CHECK:     do_lo_list
+; CHECK-NOT: movaps
+  %myopt = alloca %struct.printQueryOpt, align 4
+  %tmp = bitcast %struct.printQueryOpt* %myopt to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.printQueryOpt* getelementptr inbounds (%struct._psqlSettings* @pset, i32 0, i32 4) to i8*), i32 76, i32 4, i1 false)
+  ret i8 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/alignment.ll b/src/LLVM/test/CodeGen/X86/alignment.ll
new file mode 100644
index 0000000..5908c0c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/alignment.ll

@@ -0,0 +1,43 @@
+; RUN: llc %s -o - -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; This cannot get rounded up to the preferred alignment (16) if they have an
+; explicit alignment specified.
+@GlobalA = global { [384 x i8] } zeroinitializer, align 8 
+
+; CHECK:	.bss
+; CHECK:	.globl	GlobalA
+; CHECK:	.align	8
+; CHECK: GlobalA:
+; CHECK:	.zero	384
+
+; Common variables should not get rounded up to the preferred alignment (16) if
+; they have an explicit alignment specified.
+; PR6921
+@GlobalB = common global { [384 x i8] } zeroinitializer, align 8
+
+; CHECK: 	.comm	GlobalB,384,8
+
+
+@GlobalC = common global { [384 x i8] } zeroinitializer, align 2
+
+; CHECK: 	.comm	GlobalC,384,2
+
+
+
+; This cannot get rounded up to the preferred alignment (16) if they have an
+; explicit alignment specified *and* a section specified.
+@GlobalAS = global { [384 x i8] } zeroinitializer, align 8, section "foo"
+
+; CHECK:	.globl	GlobalAS
+; CHECK:	.align	8
+; CHECK: GlobalAS:
+; CHECK:	.zero	384
+
+; Common variables should not get rounded up to the preferred alignment (16) if
+; they have an explicit alignment specified and a section specified.
+; PR6921
+@GlobalBS = common global { [384 x i8] } zeroinitializer, align 8, section "foo"
+; CHECK: 	.comm	GlobalBS,384,8
+
+@GlobalCS = common global { [384 x i8] } zeroinitializer, align 2, section "foo"
+; CHECK: 	.comm	GlobalCS,384,2

diff --git a/src/LLVM/test/CodeGen/X86/all-ones-vector.ll b/src/LLVM/test/CodeGen/X86/all-ones-vector.ll
new file mode 100644
index 0000000..10fecad
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/all-ones-vector.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=sse2 | grep pcmpeqd | count 4
+
+define <4 x i32> @ioo() nounwind {
+        ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+}
+define <2 x i64> @loo() nounwind {
+        ret <2 x i64> <i64 -1, i64 -1>
+}
+define <2 x double> @doo() nounwind {
+        ret <2 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff>
+}
+define <4 x float> @foo() nounwind {
+        ret <4 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000>
+}

diff --git a/src/LLVM/test/CodeGen/X86/alldiv-divdi3.ll b/src/LLVM/test/CodeGen/X86/alldiv-divdi3.ll
new file mode 100644
index 0000000..86aa1fd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/alldiv-divdi3.ll

@@ -0,0 +1,17 @@
+; Test that, for a 64 bit signed div, a libcall to alldiv is made on Windows
+; unless we have libgcc.
+
+; RUN: llc < %s -mtriple i386-pc-win32 | FileCheck %s
+; RUN: llc < %s -mtriple i386-pc-cygwin | FileCheck %s -check-prefix USEDIVDI
+; RUN: llc < %s -mtriple i386-pc-mingw32 | FileCheck %s -check-prefix USEDIVDI
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+  %conv4 = sext i32 %argc to i64
+  %div = sdiv i64 84, %conv4
+  %conv7 = trunc i64 %div to i32
+  ret i32 %conv7
+}
+
+; CHECK: alldiv
+; USEDIVDI: divdi3

diff --git a/src/LLVM/test/CodeGen/X86/alloca-align-rounding-32.ll b/src/LLVM/test/CodeGen/X86/alloca-align-rounding-32.ll
new file mode 100644
index 0000000..c0f1a18
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/alloca-align-rounding-32.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
+
+declare void @bar(<2 x i64>* %n)
+
+define void @foo(i32 %h) {
+  %p = alloca <2 x i64>, i32 %h
+  call void @bar(<2 x i64>* %p)
+  ret void
+}
+
+define void @foo2(i32 %h) {
+  %p = alloca <2 x i64>, i32 %h, align 32
+  call void @bar(<2 x i64>* %p)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/alloca-align-rounding.ll b/src/LLVM/test/CodeGen/X86/alloca-align-rounding.ll
new file mode 100644
index 0000000..3c87dbf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/alloca-align-rounding.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
+
+declare void @bar(<2 x i64>* %n)
+
+define void @foo(i64 %h) {
+  %p = alloca <2 x i64>, i64 %h
+  call void @bar(<2 x i64>* %p)
+  ret void
+}
+
+define void @foo2(i64 %h) {
+  %p = alloca <2 x i64>, i64 %h, align 32
+  call void @bar(<2 x i64>* %p)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/allrem-moddi3.ll b/src/LLVM/test/CodeGen/X86/allrem-moddi3.ll
new file mode 100644
index 0000000..0c3d04f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/allrem-moddi3.ll

@@ -0,0 +1,19 @@
+; Test that, for a 64 bit signed rem, a libcall to allrem is made on Windows
+; unless we have libgcc.
+
+; RUN: llc < %s -mtriple i386-pc-win32 | FileCheck %s
+; RUN: llc < %s -mtriple i386-pc-cygwin | FileCheck %s -check-prefix USEMODDI
+; RUN: llc < %s -mtriple i386-pc-mingw32 | FileCheck %s -check-prefix USEMODDI
+; PR10305
+; END.
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+  %conv4 = sext i32 %argc to i64
+  %div = srem i64 84, %conv4
+  %conv7 = trunc i64 %div to i32
+  ret i32 %conv7
+}
+
+; CHECK: allrem
+; USEMODDI: moddi3

diff --git a/src/LLVM/test/CodeGen/X86/and-or-fold.ll b/src/LLVM/test/CodeGen/X86/and-or-fold.ll
new file mode 100644
index 0000000..9171946
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/and-or-fold.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck -check-prefix=DARWIN %s

+; RUN: opt < %s -O2 | llc -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=DARWIN-OPT %s

+

+; The dag combiner should fold together (x&127)|(y&16711680) -> (x|y)&c1

+; in this case.

+

+define i32 @test1(i32 %x, i16 %y) {

+  %tmp1 = zext i16 %y to i32

+  %tmp2 = and i32 %tmp1, 127

+  %tmp4 = shl i32 %x, 16

+  %tmp5 = and i32 %tmp4, 16711680

+  %tmp6 = or i32 %tmp2, %tmp5

+  ret i32 %tmp6

+; DARWIN: andl $16711807, %eax

+}

+

+; <rdar://problem/7529774> The optimizer shouldn't fold this into (and (or, C),  D)

+; if (C & D) == 0

+define i64 @test2(i64 %x) nounwind readnone ssp {

+entry:

+  %tmp1 = and i64 %x, 123127

+  %tmp2 = or i64 %tmp1, 3

+  ret i64 %tmp2

+; DARWIN-OPT:       andq $123124

+; DARWIN-OPT-NEXT:  leaq 3

+}


diff --git a/src/LLVM/test/CodeGen/X86/and-su.ll b/src/LLVM/test/CodeGen/X86/and-su.ll
new file mode 100644
index 0000000..38db88a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/and-su.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; Don't duplicate the load.
+
+define fastcc i32 @foo(i32* %p) nounwind {
+; CHECK: foo:
+; CHECK: andl $10, %eax
+; CHECK: je
+	%t0 = load i32* %p
+	%t2 = and i32 %t0, 10
+	%t3 = icmp ne i32 %t2, 0
+	br i1 %t3, label %bb63, label %bb76
+bb63:
+	ret i32 %t2
+bb76:
+	ret i32 0
+}
+
+define fastcc double @bar(i32 %hash, double %x, double %y) nounwind {
+entry:
+; CHECK: bar:
+  %0 = and i32 %hash, 15
+  %1 = icmp ult i32 %0, 8
+  br i1 %1, label %bb11, label %bb10
+
+bb10:
+; CHECK: bb10
+; CHECK: testb $1
+  %2 = and i32 %hash, 1
+  %3 = icmp eq i32 %2, 0
+  br i1 %3, label %bb13, label %bb11
+
+bb11:
+  %4 = fsub double -0.000000e+00, %x
+  br label %bb13
+
+bb13:
+; CHECK: bb13
+; CHECK: testb $2
+  %iftmp.9.0 = phi double [ %4, %bb11 ], [ %x, %bb10 ]
+  %5 = and i32 %hash, 2
+  %6 = icmp eq i32 %5, 0
+  br i1 %6, label %bb16, label %bb14
+
+bb14:
+  %7 = fsub double -0.000000e+00, %y
+  br label %bb16
+
+bb16:
+  %iftmp.10.0 = phi double [ %7, %bb14 ], [ %y, %bb13 ]
+  %8 = fadd double %iftmp.9.0, %iftmp.10.0
+  ret double %8
+}

diff --git a/src/LLVM/test/CodeGen/X86/andimm8.ll b/src/LLVM/test/CodeGen/X86/andimm8.ll
new file mode 100644
index 0000000..a3dc85f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/andimm8.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnu -show-mc-encoding -join-physregs | FileCheck %s
+
+; PR8365
+; CHECK: andl	$-64, %edi              # encoding: [0x83,0xe7,0xc0]
+
+define i64 @bra(i32 %zed) nounwind {
+ %t1 = zext i32 %zed to i64
+ %t2 = and i64  %t1, 4294967232
+ ret i64 %t2
+}
+
+; CHECK:  orq     $2, %rdi                # encoding: [0x48,0x83,0xcf,0x02]
+
+define void @foo(i64 %zed, i64* %x) nounwind {
+  %t1 = and i64 %zed, -4
+  %t2 = or i64 %t1, 2
+  store i64 %t2, i64* %x, align 8
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/anyext.ll b/src/LLVM/test/CodeGen/X86/anyext.ll
new file mode 100644
index 0000000..106fe83
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/anyext.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64 | grep movzbl | count 2
+
+; Use movzbl to avoid partial-register updates.
+
+define i32 @foo(i32 %p, i8 zeroext %x) nounwind {
+  %q = trunc i32 %p to i8
+  %r = udiv i8 %q, %x
+  %s = zext i8 %r to i32
+  %t = and i32 %s, 1
+  ret i32 %t
+}
+define i32 @bar(i32 %p, i16 zeroext %x) nounwind {
+  %q = trunc i32 %p to i16
+  %r = udiv i16 %q, %x
+  %s = zext i16 %r to i32
+  %t = and i32 %s, 1
+  ret i32 %t
+}

diff --git a/src/LLVM/test/CodeGen/X86/apm.ll b/src/LLVM/test/CodeGen/X86/apm.ll
new file mode 100644
index 0000000..b514cf6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/apm.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; PR8573
+
+; CHECK: foo:
+; CHECK: leaq    (%rdi), %rax
+; CHECK-NEXT: movl    %esi, %ecx
+; CHECK-NEXT: monitor
+; WIN64: foo:
+; WIN64:      leaq    (%rcx), %rax
+; WIN64-NEXT: movl    %edx, %ecx
+; WIN64-NEXT: movl    %r8d, %edx
+; WIN64-NEXT: monitor
+define void @foo(i8* %P, i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
+  ret void
+}
+
+declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
+
+; CHECK: bar:
+; CHECK: movl    %edi, %ecx
+; CHECK-NEXT: movl    %esi, %eax
+; CHECK-NEXT: mwait
+; WIN64: bar:
+; WIN64:      movl    %edx, %eax
+; WIN64-NEXT: mwait
+define void @bar(i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
+  ret void
+}
+
+declare void @llvm.x86.sse3.mwait(i32, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/arg-cast.ll b/src/LLVM/test/CodeGen/X86/arg-cast.ll
new file mode 100644
index 0000000..c111514
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/arg-cast.ll

@@ -0,0 +1,18 @@
+; This should compile to movl $2147483647, %eax + andl only.
+; RUN: llc < %s | grep andl
+; RUN: llc < %s | not grep movsd
+; RUN: llc < %s | grep esp | not grep add
+; rdar://5736574
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define i32 @foo(double %x) nounwind  {
+entry:
+	%x15 = bitcast double %x to i64		; <i64> [#uses=1]
+	%tmp713 = lshr i64 %x15, 32		; <i64> [#uses=1]
+	%tmp714 = trunc i64 %tmp713 to i32		; <i32> [#uses=1]
+	%tmp8 = and i32 %tmp714, 2147483647		; <i32> [#uses=1]
+	ret i32 %tmp8
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/asm-block-labels.ll b/src/LLVM/test/CodeGen/X86/asm-block-labels.ll
new file mode 100644
index 0000000..a43d430
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/asm-block-labels.ll

@@ -0,0 +1,41 @@
+; RUN: opt < %s -std-compile-opts | llc
+; ModuleID = 'block12.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define void @bar() {
+entry:
+	br label %"LASM$foo"
+
+"LASM$foo":		; preds = %entry
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 1", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "int $$1", "~{dirflag},~{fpsr},~{flags},~{memory}"( )
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 2", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "brl ${0:l}", "X,~{dirflag},~{fpsr},~{flags},~{memory}"( label %"LASM$foo" )
+	br label %return
+
+return:		; preds = %"LASM$foo"
+	ret void
+}
+
+define void @baz() {
+entry:
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 3", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "brl ${0:l}", "X,~{dirflag},~{fpsr},~{flags},~{memory}"( label %"LASM$foo" )
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 4", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "int $$1", "~{dirflag},~{fpsr},~{flags},~{memory}"( )
+	br label %"LASM$foo"
+
+"LASM$foo":		; preds = %entry
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 5", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "int $$1", "~{dirflag},~{fpsr},~{flags},~{memory}"( )
+	br label %return
+
+return:		; preds = %"LASM$foo"
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/asm-global-imm.ll b/src/LLVM/test/CodeGen/X86/asm-global-imm.ll
new file mode 100644
index 0000000..3e401b9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/asm-global-imm.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s

+; PR882

+

+target datalayout = "e-p:32:32"

+target triple = "i686-apple-darwin9.0.0d2"

+@GV = weak global i32 0		; <i32*> [#uses=2]

+@str = external global [12 x i8]		; <[12 x i8]*> [#uses=1]

+

+define void @foo() {

+; CHECK: foo:

+; CHECK-NOT: ret

+; CHECK: test1 $_GV

+; CHECK-NOT: ret

+; CHECK: test2 _GV

+; CHECK: ret

+

+	tail call void asm sideeffect "test1 $0", "i,~{dirflag},~{fpsr},~{flags}"( i32* @GV )

+	tail call void asm sideeffect "test2 ${0:c}", "i,~{dirflag},~{fpsr},~{flags}"( i32* @GV )

+	ret void

+}

+

+define void @unknown_bootoption() {

+entry:

+	call void asm sideeffect "ud2\0A\09.word ${0:c}\0A\09.long ${1:c}\0A", "i,i,~{dirflag},~{fpsr},~{flags}"( i32 235, i8* getelementptr ([12 x i8]* @str, i32 0, i64 0) )

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/asm-indirect-mem.ll b/src/LLVM/test/CodeGen/X86/asm-indirect-mem.ll
new file mode 100644
index 0000000..c57aa99
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/asm-indirect-mem.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s 
+; PR2267
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @atomic_store_rel_int(i32* %p, i32 %v) nounwind  {
+entry:
+	%asmtmp = tail call i32 asm sideeffect "xchgl $1,$0", "=*m,=r,*m,1,~{dirflag},~{fpsr},~{flags}"( i32* %p, i32* %p, i32 %v ) nounwind 		; <i32> [#uses=0]
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/asm-label.ll b/src/LLVM/test/CodeGen/X86/asm-label.ll
new file mode 100644
index 0000000..1fc6e2e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/asm-label.ll

@@ -0,0 +1,40 @@
+; RUN: llc -mtriple=x86_64-apple-darwin10 -O0 < %s | FileCheck %s
+
+; test that we print a label that we use. We had a bug where
+; we would print the jump, but not the label because it was considered
+; a fall through.
+
+; CHECK:        jmp     LBB0_9
+; CHECK: LBB0_9:                                 ## %cleanup
+
+define void @foo()  {
+entry:
+  br i1 undef, label %land.lhs.true, label %if.end11
+
+land.lhs.true:                                    ; preds = %entry
+  br i1 undef, label %if.then, label %if.end11
+
+if.then:                                          ; preds = %land.lhs.true
+  br i1 undef, label %if.then9, label %if.end
+
+if.then9:                                         ; preds = %if.then
+  br label %cleanup
+
+if.end:                                           ; preds = %if.then
+  br label %cleanup
+
+cleanup:                                          ; preds = %if.end, %if.then9
+  switch i32 undef, label %unreachable [
+    i32 0, label %cleanup.cont
+    i32 1, label %if.end11
+  ]
+
+cleanup.cont:                                     ; preds = %cleanup
+  br label %if.end11
+
+if.end11:                                         ; preds = %cleanup.cont, %cleanup, %land.lhs.true, %entry
+  ret void
+
+unreachable:                                      ; preds = %cleanup
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/asm-label2.ll b/src/LLVM/test/CodeGen/X86/asm-label2.ll
new file mode 100644
index 0000000..8715aa9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/asm-label2.ll

@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=x86_64-apple-darwin10 -O0 < %s | FileCheck %s
+
+; test that we print a label that we use. We had a bug where
+; we would print the jump, but not the label because it was considered
+; a fall through.
+
+; CHECK:        jmp     LBB0_1
+; CHECK: LBB0_1:
+
+define void @foobar()  {
+entry:
+  invoke void @_zed()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  ret void
+
+lpad:                                             ; preds = %entry
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  unreachable
+}
+
+declare void @_zed() ssp align 2
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/X86/asm-modifier-P.ll b/src/LLVM/test/CodeGen/X86/asm-modifier-P.ll
new file mode 100644
index 0000000..6139da8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/asm-modifier-P.ll

@@ -0,0 +1,79 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-unknown-linux-gnu -relocation-model=pic    | FileCheck %s -check-prefix=CHECK-PIC-32
+; RUN: llc < %s -march=x86 -mtriple=i686-unknown-linux-gnu -relocation-model=static | FileCheck %s -check-prefix=CHECK-STATIC-32
+; RUN: llc < %s -march=x86-64 -relocation-model=static | FileCheck %s -check-prefix=CHECK-STATIC-64
+; RUN: llc < %s -march=x86-64 -relocation-model=pic    | FileCheck %s -check-prefix=CHECK-PIC-64
+; PR3379
+; XFAIL: *
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@G = external global i32              ; <i32*> [#uses=1]
+
+declare void @bar(...)
+
+; extern int G;
+; void test1() {
+;  asm("frob %0 x" : : "m"(G));
+;  asm("frob %P0 x" : : "m"(G));
+;}
+
+define void @test1() nounwind {
+entry:
+; P suffix removes (rip) in -static 64-bit mode.
+
+; CHECK-PIC-64: test1:
+; CHECK-PIC-64: movq	G@GOTPCREL(%rip), %rax
+; CHECK-PIC-64: frob (%rax) x
+; CHECK-PIC-64: frob (%rax) x
+
+; CHECK-STATIC-64: test1:
+; CHECK-STATIC-64: frob G(%rip) x
+; CHECK-STATIC-64: frob G x
+
+; CHECK-PIC-32: test1:
+; CHECK-PIC-32: frob G x
+; CHECK-PIC-32: frob G x
+
+; CHECK-STATIC-32: test1:
+; CHECK-STATIC-32: frob G x
+; CHECK-STATIC-32: frob G x
+
+        call void asm "frob $0 x", "*m"(i32* @G) nounwind
+        call void asm "frob ${0:P} x", "*m"(i32* @G) nounwind
+        ret void
+}
+
+define void @test3() nounwind {
+entry:
+; CHECK-STATIC-64: test3:
+; CHECK-STATIC-64: call bar
+; CHECK-STATIC-64: call test3
+; CHECK-STATIC-64: call $bar
+; CHECK-STATIC-64: call $test3
+
+; CHECK-STATIC-32: test3:
+; CHECK-STATIC-32: call bar
+; CHECK-STATIC-32: call test3
+; CHECK-STATIC-32: call $bar
+; CHECK-STATIC-32: call $test3
+
+; CHECK-PIC-64: test3:
+; CHECK-PIC-64: call bar@PLT
+; CHECK-PIC-64: call test3@PLT
+; CHECK-PIC-64: call $bar
+; CHECK-PIC-64: call $test3
+
+; CHECK-PIC-32: test3:
+; CHECK-PIC-32: call bar@PLT
+; CHECK-PIC-32: call test3@PLT
+; CHECK-PIC-32: call $bar
+; CHECK-PIC-32: call $test3
+
+
+; asm(" blah %P0" : : "X"(bar));
+  tail call void asm sideeffect "call ${0:P}", "X"(void (...)* @bar) nounwind
+  tail call void asm sideeffect "call ${0:P}", "X"(void (...)* bitcast (void ()* @test3 to void (...)*)) nounwind
+  tail call void asm sideeffect "call $0", "X"(void (...)* @bar) nounwind
+  tail call void asm sideeffect "call $0", "X"(void (...)* bitcast (void ()* @test3 to void (...)*)) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/asm-modifier.ll b/src/LLVM/test/CodeGen/X86/asm-modifier.ll
new file mode 100644
index 0000000..44f972e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/asm-modifier.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = 'asm.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define i32 @test1() nounwind {
+entry:
+; CHECK: test1:
+; CHECK: movw	%gs:6, %ax
+  %asmtmp.i = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 6) nounwind ; <i16> [#uses=1]
+  %0 = zext i16 %asmtmp.i to i32                  ; <i32> [#uses=1]
+  ret i32 %0
+}
+
+define zeroext i16 @test2(i32 %address) nounwind {
+entry:
+; CHECK: test2:
+; CHECK: movw	%gs:(%eax), %ax
+  %asmtmp = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 %address) nounwind ; <i16> [#uses=1]
+  ret i16 %asmtmp
+}
+
+@n = global i32 42                                ; <i32*> [#uses=3]
+@y = common global i32 0                          ; <i32*> [#uses=3]
+
+define void @test3() nounwind {
+entry:
+; CHECK: test3:
+; CHECK: movl _n, %eax
+  call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(i32* @n) nounwind
+  ret void
+}
+
+define void @test4() nounwind {
+entry:
+; CHECK: test4:
+; CHECK: movl	L_y$non_lazy_ptr, %ecx
+; CHECK: movl (%ecx), %eax
+  call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(i32* @y) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/atomic-load-store-wide.ll b/src/LLVM/test/CodeGen/X86/atomic-load-store-wide.ll
new file mode 100644
index 0000000..a9ebfef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/atomic-load-store-wide.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; 64-bit load/store on x86-32
+; FIXME: The generated code can be substantially improved.
+
+define void @test1(i64* %ptr, i64 %val1) {
+; CHECK: test1
+; CHECK: cmpxchg8b
+; CHECK-NEXT: jne
+  store atomic i64 %val1, i64* %ptr seq_cst, align 8
+  ret void
+}
+
+define i64 @test2(i64* %ptr) {
+; CHECK: test2
+; CHECK: cmpxchg8b
+  %val = load atomic i64* %ptr seq_cst, align 8
+  ret i64 %val
+}

diff --git a/src/LLVM/test/CodeGen/X86/atomic-load-store.ll b/src/LLVM/test/CodeGen/X86/atomic-load-store.ll
new file mode 100644
index 0000000..fee4585
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/atomic-load-store.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -O0 | FileCheck %s
+
+define void @test1(i32* %ptr, i32 %val1) {
+; CHECK: test1
+; CHECK: xchgl	%esi, (%rdi)
+  store atomic i32 %val1, i32* %ptr seq_cst, align 4
+  ret void
+}
+
+define void @test2(i32* %ptr, i32 %val1) {
+; CHECK: test2
+; CHECK: movl	%esi, (%rdi)
+  store atomic i32 %val1, i32* %ptr release, align 4
+  ret void
+}
+
+define i32 @test3(i32* %ptr) {
+; CHECK: test3
+; CHECK: movl	(%rdi), %eax
+  %val = load atomic i32* %ptr seq_cst, align 4
+  ret i32 %val
+}

diff --git a/src/LLVM/test/CodeGen/X86/atomic-or.ll b/src/LLVM/test/CodeGen/X86/atomic-or.ll
new file mode 100644
index 0000000..3f02eaf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/atomic-or.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; rdar://9692967
+
+define void @t1(i64* %p, i32 %b) nounwind {
+entry:
+  %p.addr = alloca i64*, align 8
+  store i64* %p, i64** %p.addr, align 8
+  %tmp = load i64** %p.addr, align 8
+; CHECK: t1:
+; CHECK: movl    $2147483648, %eax
+; CHECK: lock
+; CHECK-NEXT: orq %r{{.*}}, (%r{{.*}})
+  %0 = atomicrmw or i64* %tmp, i64 2147483648 seq_cst
+  ret void
+}
+
+define void @t2(i64* %p, i32 %b) nounwind {
+entry:
+  %p.addr = alloca i64*, align 8
+  store i64* %p, i64** %p.addr, align 8
+  %tmp = load i64** %p.addr, align 8
+; CHECK: t2:
+; CHECK: lock
+; CHECK-NEXT: orq $2147483644, (%r{{.*}})
+  %0 = atomicrmw or i64* %tmp, i64 2147483644 seq_cst
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/atomic_add.ll b/src/LLVM/test/CodeGen/X86/atomic_add.ll
new file mode 100644
index 0000000..1fce256
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/atomic_add.ll

@@ -0,0 +1,201 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; rdar://7103704
+
+define void @sub1(i32* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub1:
+; CHECK: subl
+  %0 = atomicrmw sub i32* %p, i32 %v monotonic
+  ret void
+}
+
+define void @inc4(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc4:
+; CHECK: incq
+  %0 = atomicrmw add i64* %p, i64 1 monotonic
+  ret void
+}
+
+define void @add8(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add8:
+; CHECK: addq $2
+  %0 = atomicrmw add i64* %p, i64 2 monotonic
+  ret void
+}
+
+define void @add4(i64* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add4:
+; CHECK: addq
+  %0 = sext i32 %v to i64		; <i64> [#uses=1]
+  %1 = atomicrmw add i64* %p, i64 %0 monotonic
+  ret void
+}
+
+define void @inc3(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc3:
+; CHECK: incb
+  %0 = atomicrmw add i8* %p, i8 1 monotonic
+  ret void
+}
+
+define void @add7(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add7:
+; CHECK: addb $2
+  %0 = atomicrmw add i8* %p, i8 2 monotonic
+  ret void
+}
+
+define void @add3(i8* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add3:
+; CHECK: addb
+  %0 = trunc i32 %v to i8		; <i8> [#uses=1]
+  %1 = atomicrmw add i8* %p, i8 %0 monotonic
+  ret void
+}
+
+define void @inc2(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc2:
+; CHECK: incw
+  %0 = atomicrmw add i16* %p, i16 1 monotonic
+  ret void
+}
+
+define void @add6(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add6:
+; CHECK: addw $2
+  %0 = atomicrmw add i16* %p, i16 2 monotonic
+  ret void
+}
+
+define void @add2(i16* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add2:
+; CHECK: addw
+	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
+  %1 = atomicrmw add i16* %p, i16 %0 monotonic
+  ret void
+}
+
+define void @inc1(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc1:
+; CHECK: incl
+  %0 = atomicrmw add i32* %p, i32 1 monotonic
+  ret void
+}
+
+define void @add5(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add5:
+; CHECK: addl $2
+  %0 = atomicrmw add i32* %p, i32 2 monotonic
+  ret void
+}
+
+define void @add1(i32* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add1:
+; CHECK: addl
+  %0 = atomicrmw add i32* %p, i32 %v monotonic
+  ret void
+}
+
+define void @dec4(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec4:
+; CHECK: decq
+  %0 = atomicrmw sub i64* %p, i64 1 monotonic
+  ret void
+}
+
+define void @sub8(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub8:
+; CHECK: subq $2
+  %0 = atomicrmw sub i64* %p, i64 2 monotonic
+  ret void
+}
+
+define void @sub4(i64* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub4:
+; CHECK: subq
+	%0 = sext i32 %v to i64		; <i64> [#uses=1]
+  %1 = atomicrmw sub i64* %p, i64 %0 monotonic
+  ret void
+}
+
+define void @dec3(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec3:
+; CHECK: decb
+  %0 = atomicrmw sub i8* %p, i8 1 monotonic
+  ret void
+}
+
+define void @sub7(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub7:
+; CHECK: subb $2
+  %0 = atomicrmw sub i8* %p, i8 2 monotonic
+  ret void
+}
+
+define void @sub3(i8* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub3:
+; CHECK: subb
+	%0 = trunc i32 %v to i8		; <i8> [#uses=1]
+  %1 = atomicrmw sub i8* %p, i8 %0 monotonic
+  ret void
+}
+
+define void @dec2(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec2:
+; CHECK: decw
+  %0 = atomicrmw sub i16* %p, i16 1 monotonic
+  ret void
+}
+
+define void @sub6(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub6:
+; CHECK: subw $2
+  %0 = atomicrmw sub i16* %p, i16 2 monotonic
+  ret void
+}
+
+define void @sub2(i16* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub2:
+; CHECK: negl
+	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
+  %1 = atomicrmw sub i16* %p, i16 %0 monotonic
+  ret void
+}
+
+define void @dec1(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec1:
+; CHECK: decl
+  %0 = atomicrmw sub i32* %p, i32 1 monotonic
+  ret void
+}
+
+define void @sub5(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub5:
+; CHECK: subl $2
+  %0 = atomicrmw sub i32* %p, i32 2 monotonic
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/atomic_op.ll b/src/LLVM/test/CodeGen/X86/atomic_op.ll
new file mode 100644
index 0000000..972dab2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/atomic_op.ll

@@ -0,0 +1,119 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+define void @func(i32 %argc, i8** %argv) nounwind {
+entry:
+	%argc.addr = alloca i32		; <i32*> [#uses=1]
+	%argv.addr = alloca i8**		; <i8***> [#uses=1]
+	%val1 = alloca i32		; <i32*> [#uses=2]
+	%val2 = alloca i32		; <i32*> [#uses=15]
+	%andt = alloca i32		; <i32*> [#uses=2]
+	%ort = alloca i32		; <i32*> [#uses=2]
+	%xort = alloca i32		; <i32*> [#uses=2]
+	%old = alloca i32		; <i32*> [#uses=18]
+	%temp = alloca i32		; <i32*> [#uses=2]
+	store i32 %argc, i32* %argc.addr
+	store i8** %argv, i8*** %argv.addr
+	store i32 0, i32* %val1
+	store i32 31, i32* %val2
+	store i32 3855, i32* %andt
+	store i32 3855, i32* %ort
+	store i32 3855, i32* %xort
+	store i32 4, i32* %temp
+	%tmp = load i32* %temp
+        ; CHECK: lock
+        ; CHECK: xaddl
+  %0 = atomicrmw add i32* %val1, i32 %tmp monotonic
+	store i32 %0, i32* %old
+        ; CHECK: lock
+        ; CHECK: xaddl
+  %1 = atomicrmw sub i32* %val2, i32 30 monotonic
+	store i32 %1, i32* %old
+        ; CHECK: lock
+        ; CHECK: xaddl
+  %2 = atomicrmw add i32* %val2, i32 1 monotonic
+	store i32 %2, i32* %old
+        ; CHECK: lock
+        ; CHECK: xaddl
+  %3 = atomicrmw sub i32* %val2, i32 1 monotonic
+	store i32 %3, i32* %old
+        ; CHECK: andl
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+  %4 = atomicrmw and i32* %andt, i32 4080 monotonic
+	store i32 %4, i32* %old
+        ; CHECK: orl
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+  %5 = atomicrmw or i32* %ort, i32 4080 monotonic
+	store i32 %5, i32* %old
+        ; CHECK: xorl
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+  %6 = atomicrmw xor i32* %xort, i32 4080 monotonic
+	store i32 %6, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+  %7 = atomicrmw min i32* %val2, i32 16 monotonic
+	store i32 %7, i32* %old
+	%neg = sub i32 0, 1		; <i32> [#uses=1]
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+  %8 = atomicrmw min i32* %val2, i32 %neg monotonic
+	store i32 %8, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+  %9 = atomicrmw max i32* %val2, i32 1 monotonic
+	store i32 %9, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+  %10 = atomicrmw max i32* %val2, i32 0 monotonic
+	store i32 %10, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+  %11 = atomicrmw umax i32* %val2, i32 65535 monotonic
+	store i32 %11, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+  %12 = atomicrmw umax i32* %val2, i32 10 monotonic
+	store i32 %12, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+  %13 = atomicrmw umin i32* %val2, i32 1 monotonic
+	store i32 %13, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+  %14 = atomicrmw umin i32* %val2, i32 10 monotonic
+	store i32 %14, i32* %old
+        ; CHECK: xchgl   %{{.*}}, {{.*}}(%esp)
+  %15 = atomicrmw xchg i32* %val2, i32 1976 monotonic
+	store i32 %15, i32* %old
+	%neg1 = sub i32 0, 10		; <i32> [#uses=1]
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+  %16 = cmpxchg i32* %val2, i32 %neg1, i32 1 monotonic
+	store i32 %16, i32* %old
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+  %17 = cmpxchg i32* %val2, i32 1976, i32 1 monotonic
+	store i32 %17, i32* %old
+	ret void
+}
+
+define void @test2(i32 addrspace(256)* nocapture %P) nounwind {
+entry:
+; CHECK: lock
+; CHECK:	cmpxchgl	%{{.*}}, %gs:(%{{.*}})
+
+  %0 = cmpxchg i32 addrspace(256)* %P, i32 0, i32 1 monotonic
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/attribute-sections.ll b/src/LLVM/test/CodeGen/X86/attribute-sections.ll
new file mode 100644
index 0000000..3035334
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/attribute-sections.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+
+declare i32 @foo()
+@G0 = global i32 ()* @foo, section ".init_array"
+
+; LINUX:  .section  .init_array,"aw"
+; LINUX:  .globl G0
+
+@G1 = global i32 ()* @foo, section ".fini_array"
+
+; LINUX:  .section  .fini_array,"aw"
+; LINUX:  .globl G1
+
+@G2 = global i32 ()* @foo, section ".preinit_array"
+
+; LINUX:  .section .preinit_array,"aw"
+; LINUX:  .globl G2
+

diff --git a/src/LLVM/test/CodeGen/X86/avoid-lea-scale2.ll b/src/LLVM/test/CodeGen/X86/avoid-lea-scale2.ll
new file mode 100644
index 0000000..cee2ee4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avoid-lea-scale2.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK: leal -2({{%rdi,%rdi|%rcx,%rcx}})
+
+define i32 @foo(i32 %x) nounwind readnone {
+  %t0 = shl i32 %x, 1
+  %t1 = add i32 %t0, -2
+  ret i32 %t1
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avoid-loop-align-2.ll b/src/LLVM/test/CodeGen/X86/avoid-loop-align-2.ll
new file mode 100644
index 0000000..fc9d1f0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avoid-loop-align-2.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86 | grep align | count 4
+
+; TODO: Is it a good idea to align inner loops? It's hard to know without
+; knowing what their trip counts are, or other dynamic information. For
+; now, CodeGen aligns all loops.
+
+@x = external global i32*		; <i32**> [#uses=1]
+
+define i32 @t(i32 %a, i32 %b) nounwind readonly ssp {
+entry:
+	%0 = icmp eq i32 %a, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb5, label %bb.nph12
+
+bb.nph12:		; preds = %entry
+	%1 = icmp eq i32 %b, 0		; <i1> [#uses=1]
+	%2 = load i32** @x, align 8		; <i32*> [#uses=1]
+	br i1 %1, label %bb2.preheader, label %bb2.preheader.us
+
+bb2.preheader.us:		; preds = %bb2.bb3_crit_edge.us, %bb.nph12
+	%indvar18 = phi i32 [ 0, %bb.nph12 ], [ %indvar.next19, %bb2.bb3_crit_edge.us ]		; <i32> [#uses=2]
+	%sum.111.us = phi i32 [ 0, %bb.nph12 ], [ %4, %bb2.bb3_crit_edge.us ]		; <i32> [#uses=0]
+	%tmp16 = mul i32 %indvar18, %a		; <i32> [#uses=1]
+	br label %bb1.us
+
+bb1.us:		; preds = %bb1.us, %bb2.preheader.us
+	%indvar = phi i32 [ 0, %bb2.preheader.us ], [ %indvar.next, %bb1.us ]		; <i32> [#uses=2]
+	%tmp17 = add i32 %indvar, %tmp16		; <i32> [#uses=1]
+	%tmp. = zext i32 %tmp17 to i64		; <i64> [#uses=1]
+	%3 = getelementptr i32* %2, i64 %tmp.		; <i32*> [#uses=1]
+	%4 = load i32* %3, align 4		; <i32> [#uses=2]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %b		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2.bb3_crit_edge.us, label %bb1.us
+
+bb2.bb3_crit_edge.us:		; preds = %bb1.us
+	%indvar.next19 = add i32 %indvar18, 1		; <i32> [#uses=2]
+	%exitcond22 = icmp eq i32 %indvar.next19, %a		; <i1> [#uses=1]
+	br i1 %exitcond22, label %bb5, label %bb2.preheader.us
+
+bb2.preheader:		; preds = %bb2.preheader, %bb.nph12
+	%indvar24 = phi i32 [ %indvar.next25, %bb2.preheader ], [ 0, %bb.nph12 ]		; <i32> [#uses=1]
+	%indvar.next25 = add i32 %indvar24, 1		; <i32> [#uses=2]
+	%exitcond28 = icmp eq i32 %indvar.next25, %a		; <i1> [#uses=1]
+	br i1 %exitcond28, label %bb5, label %bb2.preheader
+
+bb5:		; preds = %bb2.preheader, %bb2.bb3_crit_edge.us, %entry
+	%sum.1.lcssa = phi i32 [ 0, %entry ], [ 0, %bb2.preheader ], [ %4, %bb2.bb3_crit_edge.us ]		; <i32> [#uses=1]
+	ret i32 %sum.1.lcssa
+}

diff --git a/src/LLVM/test/CodeGen/X86/avoid-loop-align.ll b/src/LLVM/test/CodeGen/X86/avoid-loop-align.ll
new file mode 100644
index 0000000..7957db7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avoid-loop-align.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+; CodeGen should align the top of the loop, which differs from the loop
+; header in this case.
+
+; CHECK: jmp LBB0_2
+; CHECK: .align
+; CHECK: LBB0_1:
+
+@A = common global [100 x i32] zeroinitializer, align 32		; <[100 x i32]*> [#uses=1]
+
+define i8* @test(i8* %Q, i32* %L) nounwind {
+entry:
+	%tmp = tail call i32 (...)* @foo() nounwind		; <i32> [#uses=2]
+	%tmp1 = inttoptr i32 %tmp to i8*		; <i8*> [#uses=1]
+	br label %bb1
+
+bb:		; preds = %bb1, %bb1
+	%indvar.next = add i32 %P.0.rec, 1		; <i32> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%P.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%P.0 = getelementptr i8* %tmp1, i32 %P.0.rec		; <i8*> [#uses=3]
+	%tmp2 = load i8* %P.0, align 1		; <i8> [#uses=1]
+	switch i8 %tmp2, label %bb4 [
+		i8 12, label %bb
+		i8 42, label %bb
+	]
+
+bb4:		; preds = %bb1
+	%tmp3 = ptrtoint i8* %P.0 to i32		; <i32> [#uses=1]
+	%tmp4 = sub i32 %tmp3, %tmp		; <i32> [#uses=1]
+	%tmp5 = getelementptr [100 x i32]* @A, i32 0, i32 %tmp4		; <i32*> [#uses=1]
+	store i32 4, i32* %tmp5, align 4
+	ret i8* %P.0
+}
+
+declare i32 @foo(...)

diff --git a/src/LLVM/test/CodeGen/X86/avx-arith.ll b/src/LLVM/test/CodeGen/X86/avx-arith.ll
new file mode 100644
index 0000000..59988ca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-arith.ll

@@ -0,0 +1,261 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vaddpd
+define <4 x double> @addpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <4 x double> %x, %y
+  ret <4 x double> %add.i
+}
+
+; CHECK: vaddpd LCP{{.*}}(%rip)
+define <4 x double> @addpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <4 x double> %add.i
+}
+
+; CHECK: vaddps
+define <8 x float> @addps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <8 x float> %x, %y
+  ret <8 x float> %add.i
+}
+
+; CHECK: vaddps LCP{{.*}}(%rip)
+define <8 x float> @addps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <8 x float> %add.i
+}
+
+; CHECK: vsubpd
+define <4 x double> @subpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %sub.i = fsub <4 x double> %x, %y
+  ret <4 x double> %sub.i
+}
+
+; CHECK: vsubpd (%
+define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+  %tmp2 = load <4 x double>* %x, align 32
+  %sub.i = fsub <4 x double> %y, %tmp2
+  ret <4 x double> %sub.i
+}
+
+; CHECK: vsubps
+define <8 x float> @subps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %sub.i = fsub <8 x float> %x, %y
+  ret <8 x float> %sub.i
+}
+
+; CHECK: vsubps (%
+define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+  %tmp2 = load <8 x float>* %x, align 32
+  %sub.i = fsub <8 x float> %y, %tmp2
+  ret <8 x float> %sub.i
+}
+
+; CHECK: vmulpd
+define <4 x double> @mulpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %mul.i = fmul <4 x double> %x, %y
+  ret <4 x double> %mul.i
+}
+
+; CHECK: vmulpd LCP{{.*}}(%rip)
+define <4 x double> @mulpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %mul.i = fmul <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <4 x double> %mul.i
+}
+
+; CHECK: vmulps
+define <8 x float> @mulps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %mul.i = fmul <8 x float> %x, %y
+  ret <8 x float> %mul.i
+}
+
+; CHECK: vmulps LCP{{.*}}(%rip)
+define <8 x float> @mulps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %mul.i = fmul <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <8 x float> %mul.i
+}
+
+; CHECK: vdivpd
+define <4 x double> @divpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %div.i = fdiv <4 x double> %x, %y
+  ret <4 x double> %div.i
+}
+
+; CHECK: vdivpd LCP{{.*}}(%rip)
+define <4 x double> @divpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %div.i = fdiv <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <4 x double> %div.i
+}
+
+; CHECK: vdivps
+define <8 x float> @divps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %div.i = fdiv <8 x float> %x, %y
+  ret <8 x float> %div.i
+}
+
+; CHECK: vdivps LCP{{.*}}(%rip)
+define <8 x float> @divps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %div.i = fdiv <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <8 x float> %div.i
+}
+
+; CHECK: vsqrtss
+define float @sqrtA(float %a) nounwind uwtable readnone ssp {
+entry:
+  %conv1 = tail call float @sqrtf(float %a) nounwind readnone
+  ret float %conv1
+}
+
+declare double @sqrt(double) readnone
+
+; CHECK: vsqrtsd
+define double @sqrtB(double %a) nounwind uwtable readnone ssp {
+entry:
+  %call = tail call double @sqrt(double %a) nounwind readnone
+  ret double %call
+}
+
+declare float @sqrtf(float) readnone
+
+
+; CHECK: vextractf128 $1
+; CHECK-NEXT: vextractf128 $1
+; CHECK-NEXT: vpaddq %xmm
+; CHECK-NEXT: vpaddq %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = add <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
+; CHECK: vextractf128 $1
+; CHECK-NEXT: vextractf128 $1
+; CHECK-NEXT: vpaddd %xmm
+; CHECK-NEXT: vpaddd %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = add <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK: vextractf128 $1
+; CHECK-NEXT: vextractf128 $1
+; CHECK-NEXT: vpaddw %xmm
+; CHECK-NEXT: vpaddw %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = add <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; CHECK: vextractf128 $1
+; CHECK-NEXT: vextractf128 $1
+; CHECK-NEXT: vpaddb %xmm
+; CHECK-NEXT: vpaddb %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %x = add <32 x i8> %i, %j
+  ret <32 x i8> %x
+}
+
+; CHECK: vextractf128 $1
+; CHECK-NEXT: vextractf128 $1
+; CHECK-NEXT: vpsubq %xmm
+; CHECK-NEXT: vpsubq %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = sub <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
+; CHECK: vextractf128 $1
+; CHECK-NEXT: vextractf128 $1
+; CHECK-NEXT: vpsubd %xmm
+; CHECK-NEXT: vpsubd %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = sub <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK: vextractf128 $1
+; CHECK-NEXT: vextractf128 $1
+; CHECK-NEXT: vpsubw %xmm
+; CHECK-NEXT: vpsubw %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = sub <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; CHECK: vextractf128 $1
+; CHECK-NEXT: vextractf128 $1
+; CHECK-NEXT: vpsubb %xmm
+; CHECK-NEXT: vpsubb %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %x = sub <32 x i8> %i, %j
+  ret <32 x i8> %x
+}
+
+; CHECK: vextractf128 $1
+; CHECK-NEXT: vextractf128 $1
+; CHECK-NEXT: vpmulld %xmm
+; CHECK-NEXT: vpmulld %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = mul <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK: vextractf128 $1
+; CHECK-NEXT: vextractf128 $1
+; CHECK-NEXT: vpmullw %xmm
+; CHECK-NEXT: vpmullw %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = mul <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; CHECK: vextractf128 $1
+; CHECK-NEXT: vextractf128 $1
+; CHECK-NEXT: vpmuludq %xmm
+; CHECK-NEXT: vpsrlq $32, %xmm
+; CHECK-NEXT: vpmuludq %xmm
+; CHECK-NEXT: vpsllq $32, %xmm
+; CHECK-NEXT: vpaddq %xmm
+; CHECK-NEXT: vpmuludq %xmm
+; CHECK-NEXT: vpsrlq $32, %xmm
+; CHECK-NEXT: vpmuludq %xmm
+; CHECK-NEXT: vpsllq $32, %xmm
+; CHECK-NEXT: vpsrlq $32, %xmm
+; CHECK-NEXT: vpmuludq %xmm
+; CHECK-NEXT: vpsllq $32, %xmm
+; CHECK-NEXT: vpaddq %xmm
+; CHECK-NEXT: vpaddq %xmm
+; CHECK-NEXT: vpsrlq $32, %xmm
+; CHECK-NEXT: vpmuludq %xmm
+; CHECK-NEXT: vpsllq $32, %xmm
+; CHECK-NEXT: vpaddq %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = mul <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-basic.ll b/src/LLVM/test/CodeGen/X86/avx-basic.ll
new file mode 100644
index 0000000..0a46b08
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-basic.ll

@@ -0,0 +1,107 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+@x = common global <8 x float> zeroinitializer, align 32
+@y = common global <4 x double> zeroinitializer, align 32
+@z = common global <4 x float> zeroinitializer, align 16
+
+define void @zero128() nounwind ssp {
+entry:
+  ; CHECK: vpxor
+  ; CHECK: vmovaps
+  store <4 x float> zeroinitializer, <4 x float>* @z, align 16
+  ret void
+}
+
+define void @zero256() nounwind ssp {
+entry:
+  ; CHECK: vxorps
+  ; CHECK: vmovaps
+  ; CHECK: vmovaps
+  store <8 x float> zeroinitializer, <8 x float>* @x, align 32
+  store <4 x double> zeroinitializer, <4 x double>* @y, align 32
+  ret void
+}
+
+; CHECK: vpcmpeqd
+; CHECK: vinsertf128 $1
+define void @ones([0 x float]* nocapture %RET, [0 x float]* nocapture %aFOO) nounwind {
+allocas:
+  %ptr2vec615 = bitcast [0 x float]* %RET to <8 x float>*
+  store <8 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float
+0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float
+0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, <8 x
+float>* %ptr2vec615, align 32
+  ret void
+}
+
+; CHECK: vpcmpeqd
+; CHECK: vinsertf128 $1
+define void @ones2([0 x i32]* nocapture %RET, [0 x i32]* nocapture %aFOO) nounwind {
+allocas:
+  %ptr2vec615 = bitcast [0 x i32]* %RET to <8 x i32>*
+  store <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32>* %ptr2vec615, align 32
+  ret void
+}
+
+;;; Just make sure this doesn't crash
+; CHECK: _ISelCrash
+define <4 x i64> @ISelCrash(<4 x i64> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 4>
+  ret <4 x i64> %shuffle
+}
+
+;;;
+;;; Check that some 256-bit vectors are xformed into 128 ops
+; CHECK: _A
+; CHECK: vshufpd $1
+; CHECK-NEXT: vextractf128 $1
+; CHECK-NEXT: vshufpd $1
+; CHECK-NEXT: vinsertf128 $1
+define <4 x i64> @A(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
+  ret <4 x i64> %shuffle
+}
+
+; CHECK: _B
+; CHECK: vshufpd $1, %ymm
+define <4 x i64> @B(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 undef, i32 undef, i32 6>
+  ret <4 x i64> %shuffle
+}
+
+; CHECK: movlhps
+; CHECK-NEXT: vextractf128  $1
+; CHECK-NEXT: movlhps
+; CHECK-NEXT: vinsertf128 $1
+define <4 x i64> @C(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 undef, i32 0, i32 undef, i32 6>
+  ret <4 x i64> %shuffle
+}
+
+; CHECK: vpshufd $-96
+; CHECK: vpshufd $-6
+; CHECK: vinsertf128 $1
+define <8 x i32> @D(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 10, i32 10, i32 11, i32 11>
+  ret <8 x i32> %shuffle
+}
+
+;;; Don't crash on movd
+; CHECK: _VMOVZQI2PQI
+; CHECK: vmovd (%
+define <8 x i32> @VMOVZQI2PQI([0 x float]* nocapture %aFOO) nounwind {
+allocas:
+  %ptrcast.i33.i = bitcast [0 x float]* %aFOO to i32*
+  %val.i34.i = load i32* %ptrcast.i33.i, align 4
+  %ptroffset.i22.i992 = getelementptr [0 x float]* %aFOO, i64 0, i64 1
+  %ptrcast.i23.i = bitcast float* %ptroffset.i22.i992 to i32*
+  %val.i24.i = load i32* %ptrcast.i23.i, align 4
+  %updatedret.i30.i = insertelement <8 x i32> undef, i32 %val.i34.i, i32 1
+  ret <8 x i32> %updatedret.i30.i
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-bitcast.ll b/src/LLVM/test/CodeGen/X86/avx-bitcast.ll
new file mode 100644
index 0000000..ecc71be
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-bitcast.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -O0 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vmovsd (%
+; CHECK-NEXT: vmovd %xmm
+define i64 @bitcasti64tof64() {
+  %a = load double* undef
+  %b = bitcast double %a to i64
+  ret i64 %b
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-blend.ll b/src/LLVM/test/CodeGen/X86/avx-blend.ll
new file mode 100644
index 0000000..7729491
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-blend.ll

@@ -0,0 +1,104 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -promote-elements -mattr=+avx | FileCheck %s
+
+; AVX128 tests:
+
+;CHECK: vsel_float
+;CHECK: vblendvps
+;CHECK: ret
+define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
+  ret <4 x float> %vsel
+}
+
+
+;CHECK: vsel_i32
+;CHECK: vblendvps
+;CHECK: ret
+define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %v1, <4 x i32> %v2
+  ret <4 x i32> %vsel
+}
+
+
+;CHECK: vsel_double
+;CHECK: vblendvpd
+;CHECK: ret
+define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2
+  ret <2 x double> %vsel
+}
+
+
+;CHECK: vsel_i64
+;CHECK: vblendvpd
+;CHECK: ret
+define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2
+  ret <2 x i64> %vsel
+}
+
+
+;CHECK: vsel_i8
+;CHECK: vpblendvb
+;CHECK: ret
+define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
+  %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
+  ret <16 x i8> %vsel
+}
+
+
+; AVX256 tests:
+
+
+;CHECK: vsel_float
+;CHECK: vblendvps
+;CHECK: ret
+define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
+  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
+  ret <8 x float> %vsel
+}
+
+;CHECK: vsel_i32
+;CHECK: vblendvps
+;CHECK: ret
+define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
+  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
+  ret <8 x i32> %vsel
+}
+
+;CHECK: vsel_double
+;CHECK: vblendvpd
+;CHECK: ret
+define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
+  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
+  ret <8 x double> %vsel
+}
+
+;CHECK: vsel_i64
+;CHECK: vblendvpd
+;CHECK: ret
+define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
+  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
+  ret <8 x i64> %vsel
+}
+
+;; TEST blend + compares
+; CHECK: A
+define <2 x double> @A(<2 x double> %x, <2 x double> %y) {
+  ; CHECK: vcmplepd
+  ; CHECK: vblendvpd
+  %max_is_x = fcmp oge <2 x double> %x, %y
+  %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
+  ret <2 x double> %max
+}
+
+; CHECK: B
+define <2 x double> @B(<2 x double> %x, <2 x double> %y) {
+  ; CHECK: vcmpnlepd
+  ; CHECK: vblendvpd
+  %min_is_x = fcmp ult <2 x double> %x, %y
+  %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
+  ret <2 x double> %min
+}
+
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-cast.ll b/src/LLVM/test/CodeGen/X86/avx-cast.ll
new file mode 100644
index 0000000..d6d2415
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-cast.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vxorps
+; CHECK-NEXT: vinsertf128 $0
+define <8 x float> @castA(<4 x float> %m) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x float> %m, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle.i
+}
+
+; CHECK: vxorps
+; CHECK-NEXT: vinsertf128 $0
+define <4 x double> @castB(<2 x double> %m) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <2 x double> %m, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  ret <4 x double> %shuffle.i
+}
+
+; CHECK: vpxor
+; CHECK-NEXT: vinsertf128 $0
+define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <2 x i64> %m, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK-NOT: vextractf128 $0
+define <4 x float> @castD(<8 x float> %m) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x float> %m, <8 x float> %m, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %shuffle.i
+}
+
+; CHECK-NOT: vextractf128 $0
+define <2 x i64> @castE(<4 x i64> %m) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %m, <4 x i64> %m, <2 x i32> <i32 0, i32 1>
+  ret <2 x i64> %shuffle.i
+}
+
+; CHECK-NOT: vextractf128 $0
+define <2 x double> @castF(<4 x double> %m) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x double> %m, <4 x double> %m, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %shuffle.i
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-cmp.ll b/src/LLVM/test/CodeGen/X86/avx-cmp.ll
new file mode 100644
index 0000000..a050d6a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-cmp.ll

@@ -0,0 +1,150 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vcmpltps %ymm
+; CHECK-NOT: vucomiss
+define <8 x i32> @cmp00(<8 x float> %a, <8 x float> %b) nounwind readnone {
+  %bincmp = fcmp olt <8 x float> %a, %b
+  %s = sext <8 x i1> %bincmp to <8 x i32>
+  ret <8 x i32> %s
+}
+
+; CHECK: vcmpltpd %ymm
+; CHECK-NOT: vucomisd
+define <4 x i64> @cmp01(<4 x double> %a, <4 x double> %b) nounwind readnone {
+  %bincmp = fcmp olt <4 x double> %a, %b
+  %s = sext <4 x i1> %bincmp to <4 x i64>
+  ret <4 x i64> %s
+}
+
+declare void @scale() nounwind uwtable
+
+; CHECK: vucomisd
+define void @render() nounwind uwtable {
+entry:
+  br i1 undef, label %for.cond5, label %for.end52
+
+for.cond5:
+  %or.cond = and i1 undef, false
+  br i1 %or.cond, label %for.body33, label %for.cond5
+
+for.cond30:
+  br i1 false, label %for.body33, label %for.cond5
+
+for.body33:
+  %tobool = fcmp une double undef, 0.000000e+00
+  br i1 %tobool, label %if.then, label %for.cond30
+
+if.then:
+  call void @scale()
+  br label %for.cond30
+
+for.end52:
+  ret void
+}
+
+; CHECK: vextractf128  $1
+; CHECK: vextractf128  $1
+; CHECK-NEXT: vpcmpgtd  %xmm
+; CHECK-NEXT: vpcmpgtd  %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <8 x i32> @int256-cmp(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %bincmp = icmp slt <8 x i32> %i, %j
+  %x = sext <8 x i1> %bincmp to <8 x i32>
+  ret <8 x i32> %x
+}
+
+; CHECK: vextractf128  $1
+; CHECK: vextractf128  $1
+; CHECK-NEXT: vpcmpgtq  %xmm
+; CHECK-NEXT: vpcmpgtq  %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <4 x i64> @v4i64-cmp(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %bincmp = icmp slt <4 x i64> %i, %j
+  %x = sext <4 x i1> %bincmp to <4 x i64>
+  ret <4 x i64> %x
+}
+
+; CHECK: vextractf128  $1
+; CHECK: vextractf128  $1
+; CHECK-NEXT: vpcmpgtw  %xmm
+; CHECK-NEXT: vpcmpgtw  %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <16 x i16> @v16i16-cmp(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %bincmp = icmp slt <16 x i16> %i, %j
+  %x = sext <16 x i1> %bincmp to <16 x i16>
+  ret <16 x i16> %x
+}
+
+; CHECK: vextractf128  $1
+; CHECK: vextractf128  $1
+; CHECK-NEXT: vpcmpgtb  %xmm
+; CHECK-NEXT: vpcmpgtb  %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <32 x i8> @v32i8-cmp(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %bincmp = icmp slt <32 x i8> %i, %j
+  %x = sext <32 x i1> %bincmp to <32 x i8>
+  ret <32 x i8> %x
+}
+
+; CHECK: vextractf128  $1
+; CHECK: vextractf128  $1
+; CHECK-NEXT: vpcmpeqd  %xmm
+; CHECK-NEXT: vpcmpeqd  %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <8 x i32> @int256-cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %bincmp = icmp eq <8 x i32> %i, %j
+  %x = sext <8 x i1> %bincmp to <8 x i32>
+  ret <8 x i32> %x
+}
+
+; CHECK: vextractf128  $1
+; CHECK: vextractf128  $1
+; CHECK-NEXT: vpcmpeqq  %xmm
+; CHECK-NEXT: vpcmpeqq  %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <4 x i64> @v4i64-cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %bincmp = icmp eq <4 x i64> %i, %j
+  %x = sext <4 x i1> %bincmp to <4 x i64>
+  ret <4 x i64> %x
+}
+
+; CHECK: vextractf128  $1
+; CHECK: vextractf128  $1
+; CHECK-NEXT: vpcmpeqw  %xmm
+; CHECK-NEXT: vpcmpeqw  %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <16 x i16> @v16i16-cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %bincmp = icmp eq <16 x i16> %i, %j
+  %x = sext <16 x i1> %bincmp to <16 x i16>
+  ret <16 x i16> %x
+}
+
+; CHECK: vextractf128  $1
+; CHECK: vextractf128  $1
+; CHECK-NEXT: vpcmpeqb  %xmm
+; CHECK-NEXT: vpcmpeqb  %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <32 x i8> @v32i8-cmpeq(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %bincmp = icmp eq <32 x i8> %i, %j
+  %x = sext <32 x i1> %bincmp to <32 x i8>
+  ret <32 x i8> %x
+}
+
+;; Scalar comparison
+
+; CHECK: scalarcmpA
+; CHECK: vcmpeqsd
+define i32 @scalarcmpA() uwtable ssp {
+  %cmp29 = fcmp oeq double undef, 0.000000e+00
+  %res = zext i1 %cmp29 to i32
+  ret i32 %res
+}
+
+; CHECK: scalarcmpB
+; CHECK: vcmpeqss
+define i32 @scalarcmpB() uwtable ssp {
+  %cmp29 = fcmp oeq float undef, 0.000000e+00
+  %res = zext i1 %cmp29 to i32
+  ret i32 %res
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-cvt.ll b/src/LLVM/test/CodeGen/X86/avx-cvt.ll
new file mode 100644
index 0000000..6c0bd58
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-cvt.ll

@@ -0,0 +1,83 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vcvtdq2ps %ymm
+define <8 x float> @sitofp00(<8 x i32> %a) nounwind {
+  %b = sitofp <8 x i32> %a to <8 x float>
+  ret <8 x float> %b
+}
+
+; CHECK: vcvttps2dq %ymm
+define <8 x i32> @fptosi00(<8 x float> %a) nounwind {
+  %b = fptosi <8 x float> %a to <8 x i32>
+  ret <8 x i32> %b
+}
+
+; CHECK: vcvtdq2pd %xmm
+define <4 x double> @sitofp01(<4 x i32> %a) {
+  %b = sitofp <4 x i32> %a to <4 x double>
+  ret <4 x double> %b
+}
+
+; CHECK: vcvtpd2dqy %ymm
+define <4 x i32> @fptosi01(<4 x double> %a) {
+  %b = fptosi <4 x double> %a to <4 x i32>
+  ret <4 x i32> %b
+}
+
+; CHECK: vcvtpd2psy %ymm
+; CHECK-NEXT: vcvtpd2psy %ymm
+; CHECK-NEXT: vinsertf128 $1
+define <8 x float> @fptrunc00(<8 x double> %b) nounwind {
+  %a = fptrunc <8 x double> %b to <8 x float>
+  ret <8 x float> %a
+}
+
+; CHECK: vcvtps2pd %xmm
+define <4 x double> @fpext00(<4 x float> %b) nounwind {
+  %a = fpext <4 x float> %b to <4 x double>
+  ret <4 x double> %a
+}
+
+; CHECK: vcvtsi2sdq (%
+define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
+entry:
+  %tmp1 = load i64* %e, align 8
+  %conv = sitofp i64 %tmp1 to double
+  ret double %conv
+}
+
+; CHECK: vcvtsi2sd (%
+define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp {
+entry:
+  %tmp1 = load i32* %e, align 4
+  %conv = sitofp i32 %tmp1 to double
+  ret double %conv
+}
+
+; CHECK: vcvtsi2ss (%
+define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp {
+entry:
+  %tmp1 = load i32* %e, align 4
+  %conv = sitofp i32 %tmp1 to float
+  ret float %conv
+}
+
+; CHECK: vcvtsi2ssq  (%
+define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
+entry:
+  %tmp1 = load i64* %e, align 8
+  %conv = sitofp i64 %tmp1 to float
+  ret float %conv
+}
+
+; CHECK: vcvtss2sd
+define void @fpext() nounwind uwtable {
+entry:
+  %f = alloca float, align 4
+  %d = alloca double, align 8
+  %tmp = load float* %f, align 4
+  %conv = fpext float %tmp to double
+  store double %conv, double* %d, align 8
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-intrinsics-x86.ll b/src/LLVM/test/CodeGen/X86/avx-intrinsics-x86.ll
new file mode 100644
index 0000000..5201688
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-intrinsics-x86.ll

@@ -0,0 +1,2578 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
+
+define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vaesdec
+  %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vaesdeclast
+  %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vaesenc
+  %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vaesenclast
+  %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
+  ; CHECK: vaesimc
+  %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
+  ; CHECK: vaeskeygenassist
+  %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vaddsd
+  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcmpordpd
+  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcmpordsd
+  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: sbbl    %eax, %eax
+  ; CHECK: andl    $1, %eax
+  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
+  ; CHECK: vcvtdq2pd
+  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
+  ; CHECK: vcvtdq2ps
+  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
+  ; CHECK: vcvtpd2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
+  ; CHECK: vcvtpd2ps
+  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
+  ; CHECK: vcvtps2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
+  ; CHECK: vcvtps2pd
+  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
+  ; CHECK: vcvtsd2si
+  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
+  ; CHECK: vcvtsd2ss
+  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
+  ; CHECK: movl
+  ; CHECK: vcvtsi2sd
+  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
+  ; CHECK: vcvtss2sd
+  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
+  ; CHECK: vcvttpd2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
+  ; CHECK: vcvttps2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
+  ; CHECK: vcvttsd2si
+  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vdivsd
+  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovups
+  %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovups
+  %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly
+
+
+define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
+  ; CHECK: pushl
+  ; CHECK: movl
+  ; CHECK: vmaskmovdqu
+  ; CHECK: popl
+  call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2)
+  ret void
+}
+declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
+
+
+define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vmaxpd
+  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vmaxsd
+  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vminpd
+  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vminsd
+  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
+  ; CHECK: vmovmskpd
+  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
+
+
+define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovntdq
+  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
+
+
+define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovntpd
+  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
+
+
+define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vmulsd
+  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpackssdw
+  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpacksswb
+  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpackuswb
+  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpaddsb
+  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpaddsw
+  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpaddusb
+  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpaddusw
+  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpavgb
+  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpavgw
+  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpeqb
+  %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcmpeqd
+  %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcmpeqw
+  %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpgtb
+  %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcmpgtd
+  %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcmpgtw
+  %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmaddwd
+  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmaxsw
+  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpmaxub
+  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpminsw
+  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpminub
+  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
+  ; CHECK: vpmovmskb
+  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmulhw
+  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmulhuw
+  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpmuludq
+  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpsadbw
+  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpslld
+  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
+  ; CHECK: vpslldq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
+  ; CHECK: vpslldq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsllq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsllw
+  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
+  ; CHECK: vpslld
+  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
+  ; CHECK: vpsllq
+  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
+  ; CHECK: vpsllw
+  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrad
+  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsraw
+  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
+  ; CHECK: vpsrad
+  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
+  ; CHECK: vpsraw
+  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrld
+  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
+  ; CHECK: vpsrldq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
+  ; CHECK: vpsrldq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsrlq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsrlw
+  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
+  ; CHECK: vpsrld
+  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
+  ; CHECK: vpsrlq
+  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
+  ; CHECK: vpsrlw
+  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpsubsb
+  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsubsw
+  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpsubusb
+  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsubusw
+  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
+  ; CHECK: vsqrtpd
+  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
+  ; CHECK: vsqrtsd
+  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
+
+define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovq
+  call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
+
+
+define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovdqu
+  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
+
+
+define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovupd
+  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
+
+
+define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vsubsd
+  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vaddsubpd
+  %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vaddsubps
+  %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vhaddpd
+  %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vhaddps
+  %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vhsubpd
+  %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vhsubps
+  %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vlddqu
+  %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vblendpd
+  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vblendps
+  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: vblendvpd
+  %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: vblendvps
+  %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vdppd
+  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vdpps
+  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vinsertps
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovntdqa
+  %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly
+
+
+define <16 x i8> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vmpsadbw
+  %res = call <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpackusdw
+  %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+  ; CHECK: vpblendvb
+  %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpblendw
+  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcmpeqq
+  %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
+  ; CHECK: vphminposuw
+  %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpmaxsb
+  %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpmaxsd
+  %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpmaxud
+  %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmaxuw
+  %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpminsb
+  %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpminsd
+  %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpminud
+  %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpminuw
+  %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbw
+  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
+  ; CHECK: vpmovsxdq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
+  ; CHECK: vpmovsxwd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
+  ; CHECK: vpmovsxwq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbw
+  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
+  ; CHECK: vpmovzxdq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
+  ; CHECK: vpmovzxwd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
+  ; CHECK: vpmovzxwq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpmuldq
+  %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vptest 
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vptest 
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vptest 
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
+  ; CHECK: vroundpd
+  %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
+  ; CHECK: vroundps
+  %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vroundsd
+  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vroundss
+  %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestrm
+  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcmpgtq
+  %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistrm
+  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vaddss
+  %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcmpordps
+  %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcmpordss
+  %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: sbb
+  %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
+  ; CHECK: movl
+  ; CHECK: vcvtsi2ss
+  %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
+
+
+define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
+  ; CHECK: vcvtss2si
+  %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
+  ; CHECK: vcvttss2si
+  %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vdivss
+  %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_ldmxcsr(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vldmxcsr
+  call void @llvm.x86.sse.ldmxcsr(i8* %a0)
+  ret void
+}
+declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
+
+
+define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovups
+  %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly
+
+
+define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vmaxps
+  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vmaxss
+  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vminps
+  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vminss
+  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
+  ; CHECK: vmovmskps
+  %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovntps
+  call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1)
+  ret void
+}
+declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind
+
+
+define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vmulss
+  %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
+  ; CHECK: vrcpps
+  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
+  ; CHECK: vrcpss
+  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
+  ; CHECK: vrsqrtps
+  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
+  ; CHECK: vrsqrtss
+  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
+  ; CHECK: vsqrtps
+  %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
+  ; CHECK: vsqrtss
+  %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_stmxcsr(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vstmxcsr
+  call void @llvm.x86.sse.stmxcsr(i8* %a0)
+  ret void
+}
+declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
+
+
+define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovups
+  call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
+  ret void
+}
+declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
+
+
+define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vsubss
+  %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
+  ; CHECK: vpabsb
+  %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
+  ; CHECK: vpabsd
+  %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
+  ; CHECK: vpabsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vphaddd
+  %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vphaddsw
+  %res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vphaddw
+  %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vphsubd
+  %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vphsubsw
+  %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vphsubw
+  %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmaddubsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmulhrsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpshufb
+  %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpsignb
+  %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsignd
+  %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsignw
+  %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vaddsubpd
+  %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vaddsubps
+  %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vblendpd
+  %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vblendps
+  %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  ; CHECK: vblendvpd
+  %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  ; CHECK: vblendvps
+  %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vcmpordpd
+  %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vcmpordps
+  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
+  ; CHECK: vcvtpd2psy
+  %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
+  ; CHECK: vcvtpd2dqy
+  %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
+  ; CHECK: vcvtps2pd
+  %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
+  ; CHECK: vcvtps2dq
+  %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
+  ; CHECK: vcvtdq2pd
+  %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
+  ; CHECK: vcvtdq2ps
+  %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
+  ; CHECK: vcvttpd2dqy
+  %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
+  ; CHECK: vcvttps2dq
+  %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vdpps
+  %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vhaddpd
+  %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vhaddps
+  %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vhsubpd
+  %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vhsubps
+  %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
+  ; CHECK: vlddqu
+  %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
+
+
+define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) {
+  ; CHECK: vmovdqu
+  %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly
+
+
+define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) {
+  ; CHECK: vmovupd
+  %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) {
+  ; CHECK: vmovups
+  %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: vmaskmovpd
+  %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly
+
+
+define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
+  ; CHECK: vmaskmovpd
+  %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly
+
+
+define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
+  ; CHECK: vmaskmovps
+  %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
+  ; CHECK: vmaskmovps
+  %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
+
+
+define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: vmaskmovpd
+  call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind
+
+
+define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
+  ; CHECK: vmaskmovpd
+  call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind
+
+
+define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: vmaskmovps
+  call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
+
+
+define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
+  ; CHECK: vmaskmovps
+  call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+
+
+define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vmaxpd
+  %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vmaxps
+  %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vminpd
+  %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vminps
+  %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
+  ; CHECK: vmovmskpd
+  %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
+  ; CHECK: vmovmskps
+  %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
+
+
+define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) {
+  ; CHECK: vmovntdq
+  call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
+
+
+define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) {
+  ; CHECK: vmovntpd
+  call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
+
+
+define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) {
+  ; CHECK: vmovntps
+  call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
+
+
+define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vptest
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vptest
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vptest
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
+  ; CHECK: vrcpps
+  %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
+  ; CHECK: vroundpd
+  %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
+  ; CHECK: vroundps
+  %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
+  ; CHECK: vrsqrtps
+  %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
+  ; CHECK: vsqrtpd
+  %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
+  ; CHECK: vsqrtps
+  %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
+
+
+define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
+  ; CHECK: vmovdqu
+  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
+
+
+define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
+  ; CHECK: vmovupd
+  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
+
+
+define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
+  ; CHECK: vmovups
+  call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
+
+
+define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) {
+  ; CHECK: vbroadcastsd
+  %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly
+
+
+define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
+  ; CHECK: vbroadcastf128
+  %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
+  ; CHECK: vbroadcastf128
+  %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
+
+
+define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
+  ; CHECK: vextractf128
+  %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) {
+  ; CHECK: vextractf128
+  %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) {
+  ; CHECK: vextractf128
+  %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vinsertf128
+  %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vinsertf128
+  %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vinsertf128
+  %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vperm2f128
+  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vperm2f128
+  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vperm2f128
+  %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
+  ; CHECK: vpermilpd
+  %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
+  ; CHECK: vpermilpd
+  %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
+  ; CHECK: vpermilps
+  %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
+  ; CHECK: vpermilps
+  %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
+  ; CHECK: vpermilpd
+  %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
+  ; CHECK: vpermilpd
+  %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
+  ; CHECK: vpermilps
+  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
+  ; CHECK: vpermilps
+  %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define void @test_x86_avx_vzeroall() {
+  ; CHECK: vzeroall
+  call void @llvm.x86.avx.vzeroall()
+  ret void
+}
+declare void @llvm.x86.avx.vzeroall() nounwind
+
+
+define void @test_x86_avx_vzeroupper() {
+  ; CHECK: vzeroupper
+  call void @llvm.x86.avx.vzeroupper()
+  ret void
+}
+declare void @llvm.x86.avx.vzeroupper() nounwind
+
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-intrinsics-x86_64.ll b/src/LLVM/test/CodeGen/X86/avx-intrinsics-x86_64.ll
new file mode 100644
index 0000000..5a466fc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-intrinsics-x86_64.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=corei7 -mattr=avx | FileCheck %s
+
+define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
+  ; CHECK: vcvtsd2si
+  %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
+  ; CHECK: vcvtsi2sd
+  %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
+
+
+define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
+  ; CHECK: vcvttsd2si
+  %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
+
+
+define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
+  ; CHECK: vcvtss2si
+  %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
+  ; CHECK: vcvtsi2ss
+  %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
+
+
+define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
+  ; CHECK: vcvttss2si
+  %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-load-store.ll b/src/LLVM/test/CodeGen/X86/avx-load-store.ll
new file mode 100644
index 0000000..07a63ef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-load-store.ll

@@ -0,0 +1,105 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc -O0 < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s -check-prefix=CHECK_O0
+
+; CHECK: vmovaps
+; CHECK: vmovaps
+; CHECK: vmovaps
+; CHECK: vmovaps
+; CHECK: vmovaps
+; CHECK: vmovaps
+define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* nocapture %i) nounwind uwtable ssp {
+entry:
+  %0 = bitcast double* %d to <4 x double>*
+  %tmp1.i = load <4 x double>* %0, align 32
+  %1 = bitcast float* %f to <8 x float>*
+  %tmp1.i17 = load <8 x float>* %1, align 32
+  %tmp1.i16 = load <4 x i64>* %i, align 32
+  tail call void @dummy(<4 x double> %tmp1.i, <8 x float> %tmp1.i17, <4 x i64> %tmp1.i16) nounwind
+  store <4 x double> %tmp1.i, <4 x double>* %0, align 32
+  store <8 x float> %tmp1.i17, <8 x float>* %1, align 32
+  store <4 x i64> %tmp1.i16, <4 x i64>* %i, align 32
+  ret void
+}
+
+declare void @dummy(<4 x double>, <8 x float>, <4 x i64>)
+
+;;
+;; The two tests below check that we must fold load + scalar_to_vector
+;; + ins_subvec+ zext into only a single vmovss or vmovsd
+
+; CHECK: vmovss (%
+define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind {
+  %val = load float* %ptr
+  %i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0
+  ret <8 x float> %i0
+}
+
+; CHECK: vmovsd (%
+define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind {
+  %val = load double* %ptr
+  %i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0
+  ret <4 x double> %i0
+}
+
+; CHECK: vmovaps  %ymm
+define void @storev16i16(<16 x i16> %a) nounwind {
+  store <16 x i16> %a, <16 x i16>* undef, align 32
+  unreachable
+}
+
+; CHECK: vmovups  %ymm
+define void @storev16i16_01(<16 x i16> %a) nounwind {
+  store <16 x i16> %a, <16 x i16>* undef, align 4
+  unreachable
+}
+
+; CHECK: vmovaps  %ymm
+define void @storev32i8(<32 x i8> %a) nounwind {
+  store <32 x i8> %a, <32 x i8>* undef, align 32
+  unreachable
+}
+
+; CHECK: vmovups  %ymm
+define void @storev32i8_01(<32 x i8> %a) nounwind {
+  store <32 x i8> %a, <32 x i8>* undef, align 4
+  unreachable
+}
+
+; It is faster to make two saves, if the data is already in XMM registers. For
+; example, after making an integer operation.
+; CHECK: _double_save
+; CHECK-NOT: vinsertf128 $1
+; CHECK-NOT: vinsertf128 $0
+; CHECK: vmovaps %xmm
+; CHECK: vmovaps %xmm
+define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp {
+entry:
+  %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  store <8 x i32> %Z, <8 x i32>* %P, align 16
+  ret void
+}
+
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+
+; CHECK_O0: _f_f
+; CHECK-O0: vmovss LCPI
+; CHECK-O0: vxorps  %xmm
+; CHECK-O0: vmovss %xmm
+define void @f_f() nounwind {
+allocas:
+  br i1 undef, label %cif_mask_all, label %cif_mask_mixed
+
+cif_mask_all:                                     ; preds = %allocas
+  unreachable
+
+cif_mask_mixed:                                   ; preds = %allocas
+  br i1 undef, label %cif_mixed_test_all, label %cif_mixed_test_any_check
+
+cif_mixed_test_all:                               ; preds = %cif_mask_mixed
+  call void @llvm.x86.avx.maskstore.ps.256(i8* undef, <8 x float> <float 0xFFFFFFFFE0000000, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, <8 x float> undef) nounwind
+  unreachable
+
+cif_mixed_test_any_check:                         ; preds = %cif_mask_mixed
+  unreachable
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-logic.ll b/src/LLVM/test/CodeGen/X86/avx-logic.ll
new file mode 100644
index 0000000..518c09c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-logic.ll

@@ -0,0 +1,179 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vandpd
+define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %x to <4 x i64>
+  %1 = bitcast <4 x double> %y to <4 x i64>
+  %and.i = and <4 x i64> %0, %1
+  %2 = bitcast <4 x i64> %and.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vandpd LCP{{.*}}(%rip)
+define <4 x double> @andpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %y to <4 x i64>
+  %and.i = and <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
+  %1 = bitcast <4 x i64> %and.i to <4 x double>
+  ret <4 x double> %1
+}
+
+; CHECK: vandps
+define <8 x float> @andps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %x to <8 x i32>
+  %1 = bitcast <8 x float> %y to <8 x i32>
+  %and.i = and <8 x i32> %0, %1
+  %2 = bitcast <8 x i32> %and.i to <8 x float>
+  ret <8 x float> %2
+}
+
+; CHECK: vandps LCP{{.*}}(%rip)
+define <8 x float> @andps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %y to <8 x i32>
+  %and.i = and <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
+  %1 = bitcast <8 x i32> %and.i to <8 x float>
+  ret <8 x float> %1
+}
+
+; CHECK: vxorpd
+define <4 x double> @xorpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %x to <4 x i64>
+  %1 = bitcast <4 x double> %y to <4 x i64>
+  %xor.i = xor <4 x i64> %0, %1
+  %2 = bitcast <4 x i64> %xor.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vxorpd LCP{{.*}}(%rip)
+define <4 x double> @xorpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %y to <4 x i64>
+  %xor.i = xor <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
+  %1 = bitcast <4 x i64> %xor.i to <4 x double>
+  ret <4 x double> %1
+}
+
+; CHECK: vxorps
+define <8 x float> @xorps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %x to <8 x i32>
+  %1 = bitcast <8 x float> %y to <8 x i32>
+  %xor.i = xor <8 x i32> %0, %1
+  %2 = bitcast <8 x i32> %xor.i to <8 x float>
+  ret <8 x float> %2
+}
+
+; CHECK: vxorps LCP{{.*}}(%rip)
+define <8 x float> @xorps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %y to <8 x i32>
+  %xor.i = xor <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
+  %1 = bitcast <8 x i32> %xor.i to <8 x float>
+  ret <8 x float> %1
+}
+
+; CHECK: vorpd
+define <4 x double> @orpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %x to <4 x i64>
+  %1 = bitcast <4 x double> %y to <4 x i64>
+  %or.i = or <4 x i64> %0, %1
+  %2 = bitcast <4 x i64> %or.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vorpd LCP{{.*}}(%rip)
+define <4 x double> @orpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %y to <4 x i64>
+  %or.i = or <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
+  %1 = bitcast <4 x i64> %or.i to <4 x double>
+  ret <4 x double> %1
+}
+
+; CHECK: vorps
+define <8 x float> @orps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %x to <8 x i32>
+  %1 = bitcast <8 x float> %y to <8 x i32>
+  %or.i = or <8 x i32> %0, %1
+  %2 = bitcast <8 x i32> %or.i to <8 x float>
+  ret <8 x float> %2
+}
+
+; CHECK: vorps LCP{{.*}}(%rip)
+define <8 x float> @orps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %y to <8 x i32>
+  %or.i = or <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
+  %1 = bitcast <8 x i32> %or.i to <8 x float>
+  ret <8 x float> %1
+}
+
+; CHECK: vandnpd
+define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %x to <4 x i64>
+  %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %1 = bitcast <4 x double> %y to <4 x i64>
+  %and.i = and <4 x i64> %1, %neg.i
+  %2 = bitcast <4 x i64> %and.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vandnpd (%
+define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+  %tmp2 = load <4 x double>* %x, align 32
+  %0 = bitcast <4 x double> %y to <4 x i64>
+  %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %1 = bitcast <4 x double> %tmp2 to <4 x i64>
+  %and.i = and <4 x i64> %1, %neg.i
+  %2 = bitcast <4 x i64> %and.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vandnps
+define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %x to <8 x i32>
+  %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = bitcast <8 x float> %y to <8 x i32>
+  %and.i = and <8 x i32> %1, %neg.i
+  %2 = bitcast <8 x i32> %and.i to <8 x float>
+  ret <8 x float> %2
+}
+
+; CHECK: vandnps (%
+define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+  %tmp2 = load <8 x float>* %x, align 32
+  %0 = bitcast <8 x float> %y to <8 x i32>
+  %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = bitcast <8 x float> %tmp2 to <8 x i32>
+  %and.i = and <8 x i32> %1, %neg.i
+  %2 = bitcast <8 x i32> %and.i to <8 x float>
+  ret <8 x float> %2
+}
+
+;;; Test that basic 2 x i64 logic use the integer version on AVX
+
+; CHECK: vpandn  %xmm
+define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %y = xor <2 x i64> %a, <i64 -1, i64 -1>
+  %x = and <2 x i64> %a, %y
+  ret <2 x i64> %x
+}
+
+; CHECK: vpand %xmm
+define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %x = and <2 x i64> %a, %b
+  ret <2 x i64> %x
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-minmax.ll b/src/LLVM/test/CodeGen/X86/avx-minmax.ll
new file mode 100644
index 0000000..f36ba7b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-minmax.ll

@@ -0,0 +1,65 @@
+; RUN: llc < %s -march=x86-64 -mattr=+avx -asm-verbose=false -join-physregs -enable-unsafe-fp-math -enable-no-nans-fp-math -promote-elements | FileCheck -check-prefix=UNSAFE %s
+
+; UNSAFE: maxpd:
+; UNSAFE: vmaxpd {{.+}}, %xmm
+define <2 x double> @maxpd(<2 x double> %x, <2 x double> %y) {
+  %max_is_x = fcmp oge <2 x double> %x, %y
+  %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
+  ret <2 x double> %max
+}
+
+; UNSAFE: minpd:
+; UNSAFE: vminpd {{.+}}, %xmm
+define <2 x double> @minpd(<2 x double> %x, <2 x double> %y) {
+  %min_is_x = fcmp ole <2 x double> %x, %y
+  %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
+  ret <2 x double> %min
+}
+
+; UNSAFE: maxps:
+; UNSAFE: vmaxps {{.+}}, %xmm
+define <4 x float> @maxps(<4 x float> %x, <4 x float> %y) {
+  %max_is_x = fcmp oge <4 x float> %x, %y
+  %max = select <4 x i1> %max_is_x, <4 x float> %x, <4 x float> %y
+  ret <4 x float> %max
+}
+
+; UNSAFE: minps:
+; UNSAFE: vminps {{.+}}, %xmm
+define <4 x float> @minps(<4 x float> %x, <4 x float> %y) {
+  %min_is_x = fcmp ole <4 x float> %x, %y
+  %min = select <4 x i1> %min_is_x, <4 x float> %x, <4 x float> %y
+  ret <4 x float> %min
+}
+
+; UNSAFE: vmaxpd:
+; UNSAFE: vmaxpd %ymm
+define <4 x double> @vmaxpd(<4 x double> %x, <4 x double> %y) {
+  %max_is_x = fcmp oge <4 x double> %x, %y
+  %max = select <4 x i1> %max_is_x, <4 x double> %x, <4 x double> %y
+  ret <4 x double> %max
+}
+
+; UNSAFE: vminpd:
+; UNSAFE: vminpd %ymm
+define <4 x double> @vminpd(<4 x double> %x, <4 x double> %y) {
+  %min_is_x = fcmp ole <4 x double> %x, %y
+  %min = select <4 x i1> %min_is_x, <4 x double> %x, <4 x double> %y
+  ret <4 x double> %min
+}
+
+; UNSAFE: vmaxps:
+; UNSAFE: vmaxps %ymm
+define <8 x float> @vmaxps(<8 x float> %x, <8 x float> %y) {
+  %max_is_x = fcmp oge <8 x float> %x, %y
+  %max = select <8 x i1> %max_is_x, <8 x float> %x, <8 x float> %y
+  ret <8 x float> %max
+}
+
+; UNSAFE: vminps:
+; UNSAFE: vminps %ymm
+define <8 x float> @vminps(<8 x float> %x, <8 x float> %y) {
+  %min_is_x = fcmp ole <8 x float> %x, %y
+  %min = select <8 x i1> %min_is_x, <8 x float> %x, <8 x float> %y
+  ret <8 x float> %min
+}

diff --git a/src/LLVM/test/CodeGen/X86/avx-movdup.ll b/src/LLVM/test/CodeGen/X86/avx-movdup.ll
new file mode 100644
index 0000000..42d84de
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-movdup.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vmovsldup
+define <8 x float> @movdupA(<8 x float> %src) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x float> %src, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  ret <8 x float> %shuffle.i
+}
+
+; CHECK: vmovshdup
+define <8 x float> @movdupB(<8 x float> %src) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x float> %src, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  ret <8 x float> %shuffle.i
+}
+
+; CHECK: vmovsldup
+define <4 x i64> @movdupC(<4 x i64> %src) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x i64> %src to <8 x float>
+  %shuffle.i = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  %1 = bitcast <8 x float> %shuffle.i to <4 x i64>
+  ret <4 x i64> %1
+}
+
+; CHECK: vmovshdup
+define <4 x i64> @movdupD(<4 x i64> %src) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x i64> %src to <8 x float>
+  %shuffle.i = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  %1 = bitcast <8 x float> %shuffle.i to <4 x i64>
+  ret <4 x i64> %1
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-select.ll b/src/LLVM/test/CodeGen/X86/avx-select.ll
new file mode 100644
index 0000000..58a75ef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-select.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: _select00
+; CHECK: vmovaps
+; CHECK-NEXT: LBB
+define <8 x i32> @select00(i32 %a, <8 x i32> %b) nounwind {
+  %cmpres = icmp eq i32 %a, 255
+  %selres = select i1 %cmpres, <8 x i32> zeroinitializer, <8 x i32> %b
+  %res = xor <8 x i32> %b, %selres
+  ret <8 x i32> %res
+}
+
+; CHECK: _select01
+; CHECK: vmovaps
+; CHECK-NEXT: LBB
+define <4 x i64> @select01(i32 %a, <4 x i64> %b) nounwind {
+  %cmpres = icmp eq i32 %a, 255
+  %selres = select i1 %cmpres, <4 x i64> zeroinitializer, <4 x i64> %b
+  %res = xor <4 x i64> %b, %selres
+  ret <4 x i64> %res
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-shift.ll b/src/LLVM/test/CodeGen/X86/avx-shift.ll
new file mode 100644
index 0000000..3ea39a2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-shift.ll

@@ -0,0 +1,75 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+;;; Shift left
+; CHECK: vpslld
+; CHECK: vpslld
+define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
+  %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsllw
+; CHECK: vpsllw
+define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
+  %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: vpsllq
+; CHECK: vpsllq
+define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
+  %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %s
+}
+
+;;; Logical Shift right
+; CHECK: vpsrld
+; CHECK: vpsrld
+define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
+  %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsrlw
+; CHECK: vpsrlw
+define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
+  %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: vpsrlq
+; CHECK: vpsrlq
+define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
+  %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %s
+}
+
+;;; Arithmetic Shift right
+; CHECK: vpsrad
+; CHECK: vpsrad
+define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
+  %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsraw
+; CHECK: vpsraw
+define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
+  %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+;;; Support variable shifts
+; CHECK: _vshift08
+; CHECK: vextractf128 $1
+; CHECK: vpslld $23
+; CHECK: vextractf128 $1
+; CHECK: vpslld $23
+define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
+  %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
+  ret <8 x i32> %bitop
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-shuffle.ll b/src/LLVM/test/CodeGen/X86/avx-shuffle.ll
new file mode 100644
index 0000000..0db334d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-shuffle.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; PR11102
+define <4 x float> @test1(<4 x float> %a) nounwind {
+  %b = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 5, i32 undef, i32 undef>
+  ret <4 x float> %b
+; CHECK: test1:
+; CHECK: vshufps
+; CHECK: vpshufd
+}

diff --git a/src/LLVM/test/CodeGen/X86/avx-splat.ll b/src/LLVM/test/CodeGen/X86/avx-splat.ll
new file mode 100644
index 0000000..af20b90
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-splat.ll

@@ -0,0 +1,103 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+
+; CHECK: vpunpcklbw %xmm
+; CHECK-NEXT: vpunpckhbw %xmm
+; CHECK-NEXT: vinsertf128 $1
+; CHECK-NEXT: vpermilps $85
+define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <32 x i8> %shuffle
+}
+
+; CHECK: vpunpckhwd %xmm
+; CHECK-NEXT: vinsertf128 $1
+; CHECK-NEXT: vpermilps $85
+define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <16 x i16> %shuffle
+}
+
+; CHECK: vmovd
+; CHECK-NEXT: vmovlhps %xmm
+; CHECK-NEXT: vinsertf128 $1
+define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
+entry:
+  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
+  %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
+  %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
+  %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
+  ret <4 x i64> %vecinit6.i
+}
+
+; CHECK: vshufpd $0
+; CHECK-NEXT: vinsertf128 $1
+define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
+entry:
+  %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
+  %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
+  %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
+  ret <4 x double> %vecinit6.i
+}
+
+; Test this simple opt:
+;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+; To:
+;   shuffle (vload ptr)), undef, <1, 1, 1, 1>
+; CHECK: vmovdqa
+; CHECK-NEXT: vinsertf128  $1
+; CHECK-NEXT: vpermilps $-1
+define <8 x float> @funcE() nounwind {
+allocas:
+  %udx495 = alloca [18 x [18 x float]], align 32
+  br label %for_test505.preheader
+
+for_test505.preheader:                            ; preds = %for_test505.preheader, %allocas
+  br i1 undef, label %for_exit499, label %for_test505.preheader
+
+for_exit499:                                      ; preds = %for_test505.preheader
+  br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
+
+load.i1247:                                       ; preds = %for_exit499
+  %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
+  %ptr.i1237 = bitcast float* %ptr1227 to i32*
+  %val.i1238 = load i32* %ptr.i1237, align 4
+  %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
+  %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
+  %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
+  br label %__load_and_broadcast_32.exit1249
+
+__load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_exit499
+  %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
+  ret <8 x float> %load_broadcast12281250
+}
+
+; CHECK: vinsertf128 $1
+; CHECK-NEXT: vpermilps $0
+define <8 x float> @funcF(i32 %val) nounwind {
+  %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
+  %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
+  %tmp = bitcast <8 x i32> %ret7 to <8 x float>
+  ret <8 x float> %tmp
+}
+
+; CHECK: vinsertf128  $1
+; CHECK-NEXT: vpermilps  $0
+define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vextractf128  $1
+; CHECK-NEXT: vinsertf128  $1
+; CHECK-NEXT: vpermilps  $85
+define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x float> %shuffle
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-unpack.ll b/src/LLVM/test/CodeGen/X86/avx-unpack.ll
new file mode 100644
index 0000000..d420101
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-unpack.ll

@@ -0,0 +1,89 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vunpckhps
+define <8 x float> @unpackhips(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x float> %shuffle.i
+}
+
+; CHECK: vunpckhpd
+define <4 x double> @unpackhipd(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x double> %shuffle.i
+}
+
+; CHECK: vunpcklps
+define <8 x float> @unpacklops(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x float> %shuffle.i
+}
+
+; CHECK: vunpcklpd
+define <4 x double> @unpacklopd(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x double> %shuffle.i
+}
+
+; CHECK-NOT: vunpcklps %ymm
+define <8 x float> @unpacklops-not(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x float> %shuffle.i
+}
+
+; CHECK-NOT: vunpcklpd %ymm
+define <4 x double> @unpacklopd-not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x double> %shuffle.i
+}
+
+; CHECK-NOT: vunpckhps %ymm
+define <8 x float> @unpackhips-not(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x float> %shuffle.i
+}
+
+; CHECK-NOT: vunpckhpd %ymm
+define <4 x double> @unpackhipd-not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x double> %shuffle.i
+}
+
+;;;;
+;;;; Unpack versions using the fp unit for int unpacking
+;;;;
+
+; CHECK: vunpckhps
+define <8 x i32> @unpackhips1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vunpckhpd
+define <4 x i64> @unpackhipd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vunpcklps
+define <8 x i32> @unpacklops1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vunpcklpd
+define <4 x i64> @unpacklopd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i64> %shuffle.i
+}

diff --git a/src/LLVM/test/CodeGen/X86/avx-vbroadcast.ll b/src/LLVM/test/CodeGen/X86/avx-vbroadcast.ll
new file mode 100644
index 0000000..89b4188
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-vbroadcast.ll

@@ -0,0 +1,94 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; XFAIL: *
+
+; xfail this file for now because of PR8156, when it gets solved merge this with avx-splat.ll
+
+; CHECK: vbroadcastsd (%
+define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i64* %ptr, align 8
+  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
+  %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
+  %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
+  %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
+  ret <4 x i64> %vecinit6.i
+}
+
+; CHECK: vbroadcastss (%
+define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i32* %ptr, align 4
+  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+  %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
+  %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
+  %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3
+  ret <8 x i32> %vecinit6.i
+}
+
+; CHECK: vbroadcastsd (%
+define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load double* %ptr, align 8
+  %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
+  %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
+  %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
+  ret <4 x double> %vecinit6.i
+}
+
+; CHECK: vbroadcastss (%
+define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load float* %ptr, align 4
+  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+  %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
+  %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
+  %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3
+  ret <8 x float> %vecinit6.i
+}
+
+;;;; 128-bit versions
+
+; CHECK: vbroadcastss (%
+define <4 x float> @E(float* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load float* %ptr, align 4
+  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
+  ret <4 x float> %vecinit6.i
+}
+
+; CHECK: vbroadcastss (%
+define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i32* %ptr, align 4
+  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+  %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1
+  %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2
+  %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3
+  ret <4 x i32> %vecinit6.i
+}
+
+; Unsupported vbroadcasts
+
+; CHECK: _G
+; CHECK-NOT: vbroadcastsd (%
+; CHECK: ret
+define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i64* %ptr, align 8
+  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
+  %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1
+  ret <2 x i64> %vecinit2.i
+}
+
+; CHECK: _H
+; CHECK-NOT: vbroadcastss
+; CHECK: ret
+define <4 x i32> @H(<4 x i32> %a) {
+  %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  ret <4 x i32> %x
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-vextractf128.ll b/src/LLVM/test/CodeGen/X86/avx-vextractf128.ll
new file mode 100644
index 0000000..dccf901
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-vextractf128.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK-NOT: vunpck
+; CHECK: vextractf128 $1
+define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8>
+  ret <8 x float> %shuffle
+}
+
+; CHECK-NOT: vunpck
+; CHECK: vextractf128 $1
+define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 4>
+  ret <4 x double> %shuffle
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/avx-vinsertf128.ll b/src/LLVM/test/CodeGen/X86/avx-vinsertf128.ll
new file mode 100644
index 0000000..cda1331
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-vinsertf128.ll

@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=CHECK-SSE %s
+
+; CHECK-NOT: vunpck
+; CHECK: vinsertf128 $1
+define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x float> %shuffle
+}
+
+; CHECK-NOT: vunpck
+; CHECK: vinsertf128 $1
+define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 4, i32 4, i32 0, i32 1>
+  ret <4 x double> %shuffle
+}
+
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
+
+declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
+
+; Just check that no crash happens
+; CHECK-SSE: _insert_crash
+define void @insert_crash() nounwind {
+allocas:
+  %v1.i.i451 = shufflevector <4 x double> zeroinitializer, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %ret_0a.i.i.i452 = shufflevector <4 x double> %v1.i.i451, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  %vret_0.i.i.i454 = tail call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %ret_0a.i.i.i452, <2 x double> undef) nounwind
+  %ret_val.i.i.i463 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %vret_0.i.i.i454, <2 x double> undef) nounwind
+  %ret.i1.i.i464 = extractelement <2 x double> %ret_val.i.i.i463, i32 0
+  %double2float = fptrunc double %ret.i1.i.i464 to float
+  %smearinsert50 = insertelement <4 x float> undef, float %double2float, i32 3
+  %blendAsInt.i503 = bitcast <4 x float> %smearinsert50 to <4 x i32>
+  store <4 x i32> %blendAsInt.i503, <4 x i32>* undef, align 4
+  ret void
+}
+
+;; DAG Combine must remove useless vinsertf128 instructions
+
+; CHECK: DAGCombineA
+; CHECK-NOT: vinsertf128 $1
+define <4 x i32> @DAGCombineA(<4 x i32> %v1) nounwind readonly {
+  %1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %2
+}
+
+; CHECK: DAGCombineB
+; CHECK: vpaddd %xmm
+; CHECK-NOT: vinsertf128  $1
+; CHECK: vpaddd %xmm
+define <8 x i32> @DAGCombineB(<8 x i32> %v1, <8 x i32> %v2) nounwind readonly {
+  %1 = add <8 x i32> %v1, %v2
+  %2 = add <8 x i32> %1, %v1
+  ret <8 x i32> %2
+}

diff --git a/src/LLVM/test/CodeGen/X86/avx-vmovddup.ll b/src/LLVM/test/CodeGen/X86/avx-vmovddup.ll
new file mode 100644
index 0000000..1c56fe2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-vmovddup.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vmovddup %ymm
+define <4 x i64> @A(<4 x i64> %a) {
+  %c = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  ret <4 x i64> %c
+}
+
+; CHECK: vmovddup (%
+define <4 x i64> @B(<4 x i64>* %ptr) {
+  %a = load <4 x i64>* %ptr
+  %c = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  ret <4 x i64> %c
+}

diff --git a/src/LLVM/test/CodeGen/X86/avx-vperm2f128.ll b/src/LLVM/test/CodeGen/X86/avx-vperm2f128.ll
new file mode 100644
index 0000000..3550a90
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-vperm2f128.ll

@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vperm2f128 $1
+define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vperm2f128 $48
+define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vperm2f128 $0
+define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vperm2f128 $17
+define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vperm2f128 $17
+define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+; CHECK: vperm2f128 $33
+define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x i64> %shuffle
+}
+
+;;;; Cases with undef indicies mixed in the mask
+
+; CHECK: vperm2f128 $33
+define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 9, i32 undef, i32 11>
+  ret <8 x float> %shuffle
+}
+
+;;;; Cases we must not select vperm2f128
+
+; CHECK: _G
+; CHECK-NOT: vperm2f128
+define <8 x float> @G(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 12, i32 undef, i32 15>
+  ret <8 x float> %shuffle
+}

diff --git a/src/LLVM/test/CodeGen/X86/avx-vpermil.ll b/src/LLVM/test/CodeGen/X86/avx-vpermil.ll
new file mode 100644
index 0000000..49b2f54
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-vpermil.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vpermilps
+define <8 x float> @funcA(<8 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 1, i32 5, i32 6, i32 7, i32 5>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vpermilpd
+define <4 x double> @funcB(<4 x double> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
+  ret <4 x double> %shuffle
+}
+
+; CHECK: vpermilps
+define <8 x i32> @funcC(<8 x i32> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 1, i32 5, i32 6, i32 7, i32 5>
+  ret <8 x i32> %shuffle
+}
+
+; CHECK: vpermilpd
+define <4 x i64> @funcD(<4 x i64> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
+  ret <4 x i64> %shuffle
+}
+
+; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
+; target specific mask was correctly generated.
+; CHECK: vpermilps $-100
+define <8 x float> @funcE(<8 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 8, i32 3, i32 1, i32 2, i32 4, i32 8, i32 5, i32 6>
+  ret <8 x float> %shuffle
+}
+
+; CHECK-NOT: vpermilps
+define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+  ret <8 x float> %shuffle
+}

diff --git a/src/LLVM/test/CodeGen/X86/avx-vshufp.ll b/src/LLVM/test/CodeGen/X86/avx-vshufp.ll
new file mode 100644
index 0000000..f06548d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-vshufp.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vshufps  $-53, %ymm
+define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vshufpd  $10, %ymm
+define <4 x double> @B(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x double> %shuffle
+}
+
+; CHECK: vshufps  $-53, %ymm
+define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 undef, i32 undef, i32 11, i32 undef, i32 6, i32 12, i32 undef>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vshufpd  $2, %ymm
+define <4 x double> @D(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 undef>
+  ret <4 x double> %shuffle
+}

diff --git a/src/LLVM/test/CodeGen/X86/avx-vzeroupper.ll b/src/LLVM/test/CodeGen/X86/avx-vzeroupper.ll
new file mode 100644
index 0000000..eaf236c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/avx-vzeroupper.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x float> @do_sse_local(<4 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <4 x float> %a, %a
+  ret <4 x float> %add.i
+}
+
+; CHECK: _test00
+define <4 x float> @test00(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
+entry:
+  %add.i = fadd <4 x float> %a, %b
+  ; CHECK: vzeroupper
+  ; CHECK-NEXT: callq _do_sse
+  %call3 = tail call <4 x float> @do_sse(<4 x float> %add.i) nounwind
+  %sub.i = fsub <4 x float> %call3, %add.i
+  ; CHECK-NOT: vzeroupper
+  ; CHECK: callq _do_sse_local
+  %call8 = tail call <4 x float> @do_sse_local(<4 x float> %sub.i)
+  ; CHECK: vzeroupper
+  ; CHECK-NEXT: jmp _do_sse
+  %call10 = tail call <4 x float> @do_sse(<4 x float> %call8) nounwind
+  ret <4 x float> %call10
+}
+
+declare <4 x float> @do_sse(<4 x float>)

diff --git a/src/LLVM/test/CodeGen/X86/barrier-sse.ll b/src/LLVM/test/CodeGen/X86/barrier-sse.ll
new file mode 100644
index 0000000..bbfeea6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/barrier-sse.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep mfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep MEMBARRIER
+
+define void @test() {
+  fence acquire
+  fence release
+  fence acq_rel
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/barrier.ll b/src/LLVM/test/CodeGen/X86/barrier.ll
new file mode 100644
index 0000000..4769b39
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/barrier.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -mattr=-sse2 | grep lock
+
+define void @test() {
+	fence seq_cst
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/basic-promote-integers.ll b/src/LLVM/test/CodeGen/X86/basic-promote-integers.ll
new file mode 100644
index 0000000..c80f2b0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/basic-promote-integers.ll

@@ -0,0 +1,98 @@
+; Test that vectors are scalarized/lowered correctly
+; (with both legalization methods).
+; RUN: llc -march=x86 -promote-elements < %s
+; RUN: llc -march=x86                   < %s
+
+; A simple test to check copyToParts and copyFromParts.
+
+define <4 x i64> @test_param_0(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   ret <4 x i64> %A
+}
+
+define <2 x i32> @test_param_1(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   ret <2 x i32> %B
+}
+
+define <4 x i8> @test_param_2(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   ret <4 x i8> %C
+}
+
+; Simple tests to check arithmetic and vector operations on types which need to
+; be legalized (no loads/stores to/from memory here).
+
+define <4 x i64> @test_arith_0(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %K = add <4 x i64> %A, <i64 0, i64 1, i64 3, i64 9>
+   ret <4 x i64> %K
+}
+
+define <2 x i32> @test_arith_1(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %K = add <2 x i32> %B, <i32 0, i32 1>
+   ret <2 x i32> %K
+}
+
+define <4 x i8> @test_arith_2(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %K = add <4 x i8> %C, <i8 0, i8 1, i8 3, i8 9>
+   ret <4 x i8> %K
+}
+
+define i8 @test_arith_3(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %K = add <4 x i8> %C, <i8 0, i8 1, i8 3, i8 9>
+   %Y = extractelement <4 x i8> %K, i32 1
+   ret i8 %Y
+}
+
+define <4 x i8> @test_arith_4(<4 x i64> %A, <2 x i32> %B, <4 x i8> %C)  {
+   %Y = insertelement <4 x i8> %C, i8 1, i32 0
+   ret <4 x i8> %Y
+}
+
+define <4 x i32> @test_arith_5(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %Y = insertelement <4 x i32> %C, i32 1, i32 0
+   ret <4 x i32> %Y
+}
+
+define <4 x i32> @test_arith_6(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = extractelement <2 x i32> %B, i32 1
+   %Y = insertelement <4 x i32> %C, i32 %F, i32 0
+   ret <4 x i32> %Y
+}
+
+define <4 x i64> @test_arith_7(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = extractelement <2 x i32> %B, i32 1
+   %W = zext i32 %F to i64
+   %Y = insertelement <4 x i64> %A, i64 %W, i32 0
+   ret <4 x i64> %Y
+}
+
+define i64 @test_arith_8(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = extractelement <2 x i32> %B, i32 1
+   %W = zext i32 %F to i64
+   %T = add i64 %W , 11
+   ret i64 %T
+}
+
+define <4 x i64> @test_arith_9(<4 x i64> %A, <2 x i32> %B, <4 x i16> %C)  {
+   %T = add <4 x i16> %C, %C
+   %F0 = extractelement <4 x i16> %T, i32 0
+   %F1 = extractelement <4 x i16> %T, i32 1
+   %W0 = zext i16 %F0 to i64
+   %W1 = zext i16 %F1 to i64
+   %Y0 = insertelement <4 x i64> %A,  i64 %W0, i32 0
+   %Y1 = insertelement <4 x i64> %Y0, i64 %W1, i32 2
+   ret <4 x i64> %Y1
+}
+
+define <4 x i16> @test_arith_10(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = bitcast <2 x i32> %B to <4 x i16>
+   %T = add <4 x i16> %F , <i16 0, i16 1, i16 2, i16 3>
+   ret <4 x i16> %T
+}
+
+
+; Simple tests to check saving/loading from memory
+define <4 x i16> @test_mem_0(<4 x i64> %A, <2 x i32> %B, <4 x i32> %C)  {
+   %F = bitcast <2 x i32> %B to <4 x i16>
+   %T = add <4 x i16> %F , <i16 0, i16 1, i16 2, i16 3>
+   ret <4 x i16> %T
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/bc-extract.ll b/src/LLVM/test/CodeGen/X86/bc-extract.ll
new file mode 100644
index 0000000..ac972a8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/bc-extract.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 |  FileCheck %s
+
+
+define float @extractFloat1() nounwind {
+entry:
+  ; CHECK: 1065353216
+  %tmp0 = bitcast <1 x double> <double 0x000000003F800000> to <2 x float>
+  %tmp1 = extractelement <2 x float> %tmp0, i32 0 
+  ret float %tmp1
+}
+
+define float @extractFloat2() nounwind {
+entry:
+  ; CHECK: pxor	%xmm0, %xmm0
+  %tmp4 = bitcast <1 x double> <double 0x000000003F800000> to <2 x float>
+  %tmp5 = extractelement <2 x float> %tmp4, i32 1
+  ret float %tmp5
+}
+
+define i32 @extractInt2() nounwind {
+entry:
+  ; CHECK: xorl	%eax, %eax
+  %tmp4 = bitcast <1 x i64> <i64 256> to <2 x i32>
+  %tmp5 = extractelement <2 x i32> %tmp4, i32 1
+  ret i32 %tmp5
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/bigstructret.ll b/src/LLVM/test/CodeGen/X86/bigstructret.ll
new file mode 100644
index 0000000..633995d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/bigstructret.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -o %t
+; RUN: grep "movl	.24601, 12(%ecx)" %t
+; RUN: grep "movl	.48, 8(%ecx)" %t
+; RUN: grep "movl	.24, 4(%ecx)" %t
+; RUN: grep "movl	.12, (%ecx)" %t
+
+%0 = type { i32, i32, i32, i32 }
+
+define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+entry:
+  %0 = insertvalue %0 zeroinitializer, i32 12, 0
+  %1 = insertvalue %0 %0, i32 24, 1
+  %2 = insertvalue %0 %1, i32 48, 2
+  %3 = insertvalue %0 %2, i32 24601, 3
+  ret %0 %3
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/bigstructret2.ll b/src/LLVM/test/CodeGen/X86/bigstructret2.ll
new file mode 100644
index 0000000..46e0fd2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/bigstructret2.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -o %t
+
+%0 = type { i64, i64 }
+
+declare fastcc %0 @ReturnBigStruct() nounwind readnone
+
+define void @test(%0* %p) {
+  %1 = call fastcc %0 @ReturnBigStruct()
+  store %0 %1, %0* %p
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/bit-test-shift.ll b/src/LLVM/test/CodeGen/X86/bit-test-shift.ll
new file mode 100644
index 0000000..7497613
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/bit-test-shift.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; <rdar://problem/8285015>
+
+define i32 @x(i32 %t) nounwind readnone ssp {
+entry:
+; CHECK: shll	$23, %eax
+; CHECK: sarl	$31, %eax
+; CHECK: andl	$-26, %eax
+  %and = and i32 %t, 256
+  %tobool = icmp eq i32 %and, 0
+  %retval.0 = select i1 %tobool, i32 0, i32 -26
+  ret i32 %retval.0
+}

diff --git a/src/LLVM/test/CodeGen/X86/bitcast-int-to-vector.ll b/src/LLVM/test/CodeGen/X86/bitcast-int-to-vector.ll
new file mode 100644
index 0000000..4c25979
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/bitcast-int-to-vector.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86
+
+define i1 @foo(i64 %a)
+{
+  %t = bitcast i64 %a to <2 x float>
+  %r = extractelement <2 x float> %t, i32 0
+  %s = extractelement <2 x float> %t, i32 1
+  %b = fcmp uno float %r, %s
+  ret i1 %b
+}

diff --git a/src/LLVM/test/CodeGen/X86/bitcast.ll b/src/LLVM/test/CodeGen/X86/bitcast.ll
new file mode 100644
index 0000000..0b7a6c3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/bitcast.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86

+; RUN: llc < %s -march=x86-64

+; PR1033

+

+define i64 @test1(double %t) {

+        %u = bitcast double %t to i64           ; <i64> [#uses=1]

+        ret i64 %u

+}

+

+define double @test2(i64 %t) {

+        %u = bitcast i64 %t to double           ; <double> [#uses=1]

+        ret double %u

+}

+

+define i32 @test3(float %t) {

+        %u = bitcast float %t to i32            ; <i32> [#uses=1]

+        ret i32 %u

+}

+

+define float @test4(i32 %t) {

+        %u = bitcast i32 %t to float            ; <float> [#uses=1]

+        ret float %u

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/bitcast2.ll b/src/LLVM/test/CodeGen/X86/bitcast2.ll
new file mode 100644
index 0000000..6133a81
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/bitcast2.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | grep movd | count 2

+; RUN: llc < %s -march=x86-64 | not grep rsp

+

+define i64 @test1(double %A) {

+   %B = bitcast double %A to i64

+   ret i64 %B

+}

+

+define double @test2(i64 %A) {

+   %B = bitcast i64 %A to double

+   ret double %B

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/bmi.ll b/src/LLVM/test/CodeGen/X86/bmi.ll
new file mode 100644
index 0000000..88c09e3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/bmi.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86-64 -mattr=+bmi | FileCheck %s
+
+define i32 @t1(i32 %x) nounwind  {
+       %tmp = tail call i32 @llvm.cttz.i32( i32 %x )
+       ret i32 %tmp
+; CHECK: t1:
+; CHECK: tzcntl
+}
+
+declare i32 @llvm.cttz.i32(i32) nounwind readnone
+
+define i16 @t2(i16 %x) nounwind  {
+       %tmp = tail call i16 @llvm.cttz.i16( i16 %x )
+       ret i16 %tmp
+; CHECK: t2:
+; CHECK: tzcntw
+}
+
+declare i16 @llvm.cttz.i16(i16) nounwind readnone
+
+define i64 @t3(i64 %x) nounwind  {
+       %tmp = tail call i64 @llvm.cttz.i64( i64 %x )
+       ret i64 %tmp
+; CHECK: t3:
+; CHECK: tzcntq
+}
+
+declare i64 @llvm.cttz.i64(i64) nounwind readnone
+
+define i8 @t4(i8 %x) nounwind  {
+       %tmp = tail call i8 @llvm.cttz.i8( i8 %x )
+       ret i8 %tmp
+; CHECK: t4:
+; CHECK: tzcntw
+}
+
+declare i8 @llvm.cttz.i8(i8) nounwind readnone
+
+define i32 @andn32(i32 %x, i32 %y) nounwind readnone {
+  %tmp1 = xor i32 %x, -1
+  %tmp2 = and i32 %y, %tmp1
+  ret i32 %tmp2
+; CHECK: andn32:
+; CHECK: andnl
+}
+
+define i64 @andn64(i64 %x, i64 %y) nounwind readnone {
+  %tmp1 = xor i64 %x, -1
+  %tmp2 = and i64 %tmp1, %y
+  ret i64 %tmp2
+; CHECK: andn64:
+; CHECK: andnq
+}

diff --git a/src/LLVM/test/CodeGen/X86/bool-zext.ll b/src/LLVM/test/CodeGen/X86/bool-zext.ll
new file mode 100644
index 0000000..3558376
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/bool-zext.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64
+
+; X64: @bar1
+; X64: movzbl
+; X64: jmp
+; WIN64: @bar1
+; WIN64: movzbl
+; WIN64: callq
+define void @bar1(i1 zeroext %v1) nounwind ssp {
+entry:
+  %conv = zext i1 %v1 to i32
+  %call = tail call i32 (...)* @foo1(i32 %conv) nounwind
+  ret void
+}
+
+; X64: @bar2
+; X64-NOT: movzbl
+; X64: jmp
+; WIN64: @bar2
+; WIN64-NOT: movzbl
+; WIN64: callq
+define void @bar2(i8 zeroext %v1) nounwind ssp {
+entry:
+  %conv = zext i8 %v1 to i32
+  %call = tail call i32 (...)* @foo1(i32 %conv) nounwind
+  ret void
+}
+
+; X64: @bar3
+; X64: callq
+; X64-NOT: movzbl
+; X64-NOT: and
+; X64: ret
+; WIN64: @bar3
+; WIN64: callq
+; WIN64-NOT: movzbl
+; WIN64-NOT: and
+; WIN64: ret
+define zeroext i1 @bar3() nounwind ssp {
+entry:
+  %call = call i1 @foo2() nounwind
+  ret i1 %call
+}
+
+declare i32 @foo1(...)
+declare zeroext i1 @foo2()

diff --git a/src/LLVM/test/CodeGen/X86/br-fold.ll b/src/LLVM/test/CodeGen/X86/br-fold.ll
new file mode 100644
index 0000000..8af3bd1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/br-fold.ll

@@ -0,0 +1,20 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; CHECK: orq
+; CHECK-NEXT: jne
+
+@_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE = external constant [33 x i16], align 32 ; <[33 x i16]*> [#uses=1]
+@_ZN11xercesc_2_56XMLUni16fgNotationStringE = external constant [9 x i16], align 16 ; <[9 x i16]*> [#uses=1]
+
+define fastcc void @foo() {
+entry:
+  br i1 icmp eq (i64 or (i64 ptrtoint ([33 x i16]* @_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE to i64),
+                         i64 ptrtoint ([9 x i16]* @_ZN11xercesc_2_56XMLUni16fgNotationStringE to i64)), i64 0),
+     label %bb8.i329, label %bb4.i.i318.preheader
+
+bb4.i.i318.preheader:                             ; preds = %bb6
+  unreachable
+
+bb8.i329:                                         ; preds = %bb6
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/brcond.ll b/src/LLVM/test/CodeGen/X86/brcond.ll
new file mode 100644
index 0000000..5cdc100
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/brcond.ll

@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=core2 | FileCheck %s
+; rdar://7475489
+
+define i32 @test1(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: test1:
+; CHECK: xorb
+; CHECK-NOT: andb
+; CHECK-NOT: shrb
+; CHECK: testb $64
+  %0 = and i32 %a, 16384
+  %1 = icmp ne i32 %0, 0
+  %2 = and i32 %b, 16384
+  %3 = icmp ne i32 %2, 0
+  %4 = xor i1 %1, %3
+  br i1 %4, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  %5 = tail call i32 (...)* @foo() nounwind       ; <i32> [#uses=1]
+  ret i32 %5
+
+bb1:                                              ; preds = %entry
+  %6 = tail call i32 (...)* @bar() nounwind       ; <i32> [#uses=1]
+  ret i32 %6
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(...)
+
+
+
+; PR3351 - (P == 0) & (Q == 0) -> (P|Q) == 0
+define i32 @test2(i32* %P, i32* %Q) nounwind ssp {
+entry:
+  %a = icmp eq i32* %P, null                    ; <i1> [#uses=1]
+  %b = icmp eq i32* %Q, null                    ; <i1> [#uses=1]
+  %c = and i1 %a, %b
+  br i1 %c, label %bb1, label %return
+
+bb1:                                              ; preds = %entry
+  ret i32 4
+
+return:                                           ; preds = %entry
+  ret i32 192
+; CHECK: test2:
+; CHECK:	movl	4(%esp), %eax
+; CHECK-NEXT:	orl	8(%esp), %eax
+; CHECK-NEXT:	jne	LBB1_2
+}
+
+; PR3351 - (P != 0) | (Q != 0) -> (P|Q) != 0
+define i32 @test3(i32* %P, i32* %Q) nounwind ssp {
+entry:
+  %a = icmp ne i32* %P, null                    ; <i1> [#uses=1]
+  %b = icmp ne i32* %Q, null                    ; <i1> [#uses=1]
+  %c = or i1 %a, %b
+  br i1 %c, label %bb1, label %return
+
+bb1:                                              ; preds = %entry
+  ret i32 4
+
+return:                                           ; preds = %entry
+  ret i32 192
+; CHECK: test3:
+; CHECK:	movl	4(%esp), %eax
+; CHECK-NEXT:	orl	8(%esp), %eax
+; CHECK-NEXT:	je	LBB2_2
+}
+
+; <rdar://problem/7598384>:
+;
+;    jCC  L1
+;    jmp  L2
+; L1:
+;   ...
+; L2:
+;   ...
+;
+; to:
+; 
+;    jnCC L2
+; L1:
+;   ...
+; L2:
+;   ...
+define float @test4(float %x, float %y) nounwind readnone optsize ssp {
+entry:
+  %0 = fpext float %x to double                   ; <double> [#uses=1]
+  %1 = fpext float %y to double                   ; <double> [#uses=1]
+  %2 = fmul double %0, %1                         ; <double> [#uses=3]
+  %3 = fcmp oeq double %2, 0.000000e+00           ; <i1> [#uses=1]
+  br i1 %3, label %bb2, label %bb1
+
+; CHECK:      jne
+; CHECK-NEXT: jnp
+; CHECK-NOT:  jmp
+; CHECK:      LBB
+
+bb1:                                              ; preds = %entry
+  %4 = fadd double %2, -1.000000e+00              ; <double> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %entry, %bb1
+  %.0.in = phi double [ %4, %bb1 ], [ %2, %entry ] ; <double> [#uses=1]
+  %.0 = fptrunc double %.0.in to float            ; <float> [#uses=1]
+  ret float %.0
+}

diff --git a/src/LLVM/test/CodeGen/X86/break-anti-dependencies.ll b/src/LLVM/test/CodeGen/X86/break-anti-dependencies.ll
new file mode 100644
index 0000000..93b2043
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/break-anti-dependencies.ll

@@ -0,0 +1,34 @@
+; Without list-burr scheduling we may not see the difference in codegen here.
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
+; RUN:   grep {%xmm0} %t | count 14
+; RUN:   not grep {%xmm1} %t
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=critical > %t
+; RUN:   grep {%xmm0} %t | count 7
+; RUN:   grep {%xmm1} %t | count 7
+
+define void @goo(double* %r, double* %p, double* %q) nounwind {
+entry:
+	%0 = load double* %p, align 8
+	%1 = fadd double %0, 1.100000e+00
+	%2 = fmul double %1, 1.200000e+00
+	%3 = fadd double %2, 1.300000e+00
+	%4 = fmul double %3, 1.400000e+00
+	%5 = fadd double %4, 1.500000e+00
+	%6 = fptosi double %5 to i32
+	%7 = load double* %r, align 8
+	%8 = fadd double %7, 7.100000e+00
+	%9 = fmul double %8, 7.200000e+00
+	%10 = fadd double %9, 7.300000e+00
+	%11 = fmul double %10, 7.400000e+00
+	%12 = fadd double %11, 7.500000e+00
+	%13 = fptosi double %12 to i32
+	%14 = icmp slt i32 %6, %13
+	br i1 %14, label %bb, label %return
+
+bb:
+	store double 9.300000e+00, double* %q, align 8
+	ret void
+
+return:
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/break-sse-dep.ll b/src/LLVM/test/CodeGen/X86/break-sse-dep.ll
new file mode 100644
index 0000000..2dee575
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/break-sse-dep.ll

@@ -0,0 +1,63 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2 | FileCheck %s
+
+define double @t1(float* nocapture %x) nounwind readonly ssp {
+entry:
+; CHECK: t1:
+; CHECK: movss ([[A0:%rdi|%rcx]]), %xmm0
+; CHECK: cvtss2sd %xmm0, %xmm0
+
+  %0 = load float* %x, align 4
+  %1 = fpext float %0 to double
+  ret double %1
+}
+
+define float @t2(double* nocapture %x) nounwind readonly ssp optsize {
+entry:
+; CHECK: t2:
+; CHECK: cvtsd2ss ([[A0]]), %xmm0
+  %0 = load double* %x, align 8
+  %1 = fptrunc double %0 to float
+  ret float %1
+}
+
+define float @squirtf(float* %x) nounwind {
+entry:
+; CHECK: squirtf:
+; CHECK: movss ([[A0]]), %xmm0
+; CHECK: sqrtss %xmm0, %xmm0
+  %z = load float* %x
+  %t = call float @llvm.sqrt.f32(float %z)
+  ret float %t
+}
+
+define double @squirt(double* %x) nounwind {
+entry:
+; CHECK: squirt:
+; CHECK: movsd ([[A0]]), %xmm0
+; CHECK: sqrtsd %xmm0, %xmm0
+  %z = load double* %x
+  %t = call double @llvm.sqrt.f64(double %z)
+  ret double %t
+}
+
+define float @squirtf_size(float* %x) nounwind optsize {
+entry:
+; CHECK: squirtf_size:
+; CHECK: sqrtss ([[A0]]), %xmm0
+  %z = load float* %x
+  %t = call float @llvm.sqrt.f32(float %z)
+  ret float %t
+}
+
+define double @squirt_size(double* %x) nounwind optsize {
+entry:
+; CHECK: squirt_size:
+; CHECK: sqrtsd ([[A0]]), %xmm0
+  %z = load double* %x
+  %t = call double @llvm.sqrt.f64(double %z)
+  ret double %t
+}
+
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)

diff --git a/src/LLVM/test/CodeGen/X86/bss_pagealigned.ll b/src/LLVM/test/CodeGen/X86/bss_pagealigned.ll
new file mode 100644
index 0000000..da95aca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/bss_pagealigned.ll

@@ -0,0 +1,21 @@
+; RUN: llc --code-model=kernel -march=x86-64 <%s -asm-verbose=0 | FileCheck %s
+; PR4933
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+%struct.kmem_cache_order_objects = type { i64 }
+declare i8* @memset(i8*, i32, i64)
+define void @unxlate_dev_mem_ptr(i64 %phis, i8* %addr) nounwind {
+  %pte.addr.i = alloca %struct.kmem_cache_order_objects*
+  %call8 = call i8* @memset(i8* bitcast ([512 x %struct.kmem_cache_order_objects]* @bm_pte to i8*), i32 0, i64 4096)
+; CHECK: movq    $bm_pte, %rdi
+; CHECK-NEXT: xorl    %esi, %esi
+; CHECK-NEXT: movl    $4096, %edx
+; CHECK-NEXT: callq   memset
+  ret void
+}
+@bm_pte = internal global [512 x %struct.kmem_cache_order_objects] zeroinitializer, section ".bss.page_aligned", align 4096
+; CHECK: .section        .bss.page_aligned,"aw",@nobits
+; CHECK-NEXT: .align  4096
+; CHECK-NEXT: bm_pte:
+; CHECK-NEXT: .zero   4096
+; CHECK-NEXT: .size   bm_pte, 4096

diff --git a/src/LLVM/test/CodeGen/X86/bswap-inline-asm.ll b/src/LLVM/test/CodeGen/X86/bswap-inline-asm.ll
new file mode 100644
index 0000000..3bb9124
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/bswap-inline-asm.ll

@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin > %t
+; RUN: not grep InlineAsm %t
+; RUN: FileCheck %s < %t
+
+; CHECK: foo:
+; CHECK: bswapq
+define i64 @foo(i64 %x) nounwind {
+	%asmtmp = tail call i64 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
+	ret i64 %asmtmp
+}
+
+; CHECK: bar:
+; CHECK: bswapq
+define i64 @bar(i64 %x) nounwind {
+	%asmtmp = tail call i64 asm "bswapq ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
+	ret i64 %asmtmp
+}
+
+; CHECK: pen:
+; CHECK: bswapl
+define i32 @pen(i32 %x) nounwind {
+	%asmtmp = tail call i32 asm "bswapl ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
+	ret i32 %asmtmp
+}
+
+; CHECK: s16:
+; CHECK: rolw    $8,
+define zeroext i16 @s16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: t16:
+; CHECK: rolw    $8,
+define zeroext i16 @t16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: u16:
+; CHECK: rolw    $8,
+define zeroext i16 @u16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: v16:
+; CHECK: rolw    $8,
+define zeroext i16 @v16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: s32:
+; CHECK: bswapl
+define i32 @s32(i32 %x) nounwind {
+  %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
+
+; CHECK: t32:
+; CHECK: bswapl
+define i32 @t32(i32 %x) nounwind {
+  %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
+
+; CHECK: u32:
+; CHECK: bswapl
+define i32 @u32(i32 %x) nounwind {
+  %asmtmp = tail call i32 asm "rorw $$8, ${0:w};rorl $$16, $0;rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
+
+; CHECK: s64:
+; CHECK: bswapq
+define i64 @s64(i64 %x) nounwind {
+  %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
+  ret i64 %asmtmp
+}
+
+; CHECK: t64:
+; CHECK: bswapq
+define i64 @t64(i64 %x) nounwind {
+  %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{fpsr},~{dirflag},~{flags}"(i64 %x) nounwind
+  ret i64 %asmtmp
+}

diff --git a/src/LLVM/test/CodeGen/X86/bswap.ll b/src/LLVM/test/CodeGen/X86/bswap.ll
new file mode 100644
index 0000000..c08be25
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/bswap.ll

@@ -0,0 +1,59 @@
+; bswap should be constant folded when it is passed a constant argument

+

+; RUN: llc < %s -march=x86 -mcpu=i686 | FileCheck %s

+

+declare i16 @llvm.bswap.i16(i16)

+

+declare i32 @llvm.bswap.i32(i32)

+

+declare i64 @llvm.bswap.i64(i64)

+

+define i16 @W(i16 %A) {

+; CHECK: W:

+; CHECK: rolw $8, %ax

+        %Z = call i16 @llvm.bswap.i16( i16 %A )         ; <i16> [#uses=1]

+        ret i16 %Z

+}

+

+define i32 @X(i32 %A) {

+; CHECK: X:

+; CHECK: bswapl %eax

+        %Z = call i32 @llvm.bswap.i32( i32 %A )         ; <i32> [#uses=1]

+        ret i32 %Z

+}

+

+define i64 @Y(i64 %A) {

+; CHECK: Y:

+; CHECK: bswapl %eax

+; CHECK: bswapl %edx

+        %Z = call i64 @llvm.bswap.i64( i64 %A )         ; <i64> [#uses=1]

+        ret i64 %Z

+}

+

+; rdar://9164521

+define i32 @test1(i32 %a) nounwind readnone {

+entry:

+; CHECK: test1

+; CHECK: bswapl %eax

+; CHECK: shrl $16, %eax

+  %and = lshr i32 %a, 8

+  %shr3 = and i32 %and, 255

+  %and2 = shl i32 %a, 8

+  %shl = and i32 %and2, 65280

+  %or = or i32 %shr3, %shl

+  ret i32 %or

+}

+

+define i32 @test2(i32 %a) nounwind readnone {

+entry:

+; CHECK: test2

+; CHECK: bswapl %eax

+; CHECK: sarl $16, %eax

+  %and = lshr i32 %a, 8

+  %shr4 = and i32 %and, 255

+  %and2 = shl i32 %a, 8

+  %or = or i32 %shr4, %and2

+  %sext = shl i32 %or, 16

+  %conv3 = ashr exact i32 %sext, 16

+  ret i32 %conv3

+}


diff --git a/src/LLVM/test/CodeGen/X86/bt.ll b/src/LLVM/test/CodeGen/X86/bt.ll
new file mode 100644
index 0000000..ec447e5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/bt.ll

@@ -0,0 +1,442 @@
+; RUN: llc < %s -march=x86 | grep btl | count 28
+; RUN: llc < %s -march=x86 -mcpu=pentium4 | grep btl | not grep esp
+; RUN: llc < %s -march=x86 -mcpu=penryn   | grep btl | not grep esp
+; PR3253
+
+; The register+memory form of the BT instruction should be usable on
+; pentium4, however it is currently disabled due to the register+memory
+; form having different semantics than the register+register form.
+
+; Test these patterns:
+;    (X & (1 << N))  != 0  -->  BT(X, N).
+;    ((X >>u N) & 1) != 0  -->  BT(X, N).
+; as well as several variations:
+;    - The second form can use an arithmetic shift.
+;    - Either form can use == instead of !=.
+;    - Either form can compare with an operand of the &
+;      instead of with 0.
+;    - The comparison can be commuted (only cases where neither
+;      operand is constant are included).
+;    - The and can be commuted.
+
+define void @test2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @atest2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @atest2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test3(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test3b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @testne2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @testne2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @atestne2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @atestne2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @testne3(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @testne3b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @aquery2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @aquery2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query3(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, %tmp29		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query3b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, %tmp29		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query3x(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp29, %tmp3		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query3bx(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp eq i32 %tmp29, %tmp3		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @aqueryne2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @aqueryne2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne3(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, %tmp29		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne3b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, %tmp29		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne3x(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp29, %tmp3		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne3bx(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp ne i32 %tmp29, %tmp3		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare void @foo()

diff --git a/src/LLVM/test/CodeGen/X86/byval-align.ll b/src/LLVM/test/CodeGen/X86/byval-align.ll
new file mode 100644
index 0000000..c62a181
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/byval-align.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+%struct.S = type { i32}
+
+@.str = private constant [10 x i8] c"ptr = %p\0A\00", align 1 ; <[10 x i8]*> [#uses=1]
+@.str1 = private constant [8 x i8] c"Failed \00", align 1 ; <[8 x i8]*> [#uses=1]
+@.str2 = private constant [2 x i8] c"0\00", align 1 ; <[2 x i8]*> [#uses=1]
+@.str3 = private constant [7 x i8] c"test.c\00", align 1 ; <[7 x i8]*> [#uses=1]
+@__PRETTY_FUNCTION__.2067 = internal constant [13 x i8] c"aligned_func\00" ; <[13 x i8]*> [#uses=1]
+
+define void @aligned_func(%struct.S* byval align 64 %obj) nounwind {
+entry:
+  %ptr = alloca i8*                               ; <i8**> [#uses=3]
+  %p = alloca i64                                 ; <i64*> [#uses=3]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %obj1 = bitcast %struct.S* %obj to i8*          ; <i8*> [#uses=1]
+  store i8* %obj1, i8** %ptr, align 8
+  %0 = load i8** %ptr, align 8                    ; <i8*> [#uses=1]
+  %1 = ptrtoint i8* %0 to i64                     ; <i64> [#uses=1]
+  store i64 %1, i64* %p, align 8
+  %2 = load i8** %ptr, align 8                    ; <i8*> [#uses=1]
+  %3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i64 0, i64 0), i8* %2) nounwind ; <i32> [#uses=0]
+  %4 = load i64* %p, align 8                      ; <i64> [#uses=1]
+  %5 = and i64 %4, 140737488355264                ; <i64> [#uses=1]
+  %6 = load i64* %p, align 8                      ; <i64> [#uses=1]
+  %7 = icmp ne i64 %5, %6                         ; <i1> [#uses=1]
+  br i1 %7, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  %8 = call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @.str1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  call void @__assert_fail(i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([7 x i8]* @.str3, i64 0, i64 0), i32 18, i8* getelementptr inbounds ([13 x i8]* @__PRETTY_FUNCTION__.2067, i64 0, i64 0)) noreturn nounwind
+  unreachable
+
+bb2:                                              ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %bb2
+  ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind
+
+declare i32 @puts(i8*)
+
+declare void @__assert_fail(i8*, i8*, i32, i8*) noreturn nounwind
+
+define void @main() nounwind {
+entry:
+; CHECK: main
+; CHECK: andq    $-64, %rsp
+  %s1 = alloca %struct.S                          ; <%struct.S*> [#uses=4]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = getelementptr inbounds %struct.S* %s1, i32 0, i32 0 ; <i32*> [#uses=1]
+  store i32 1, i32* %0, align 4
+  call void @aligned_func(%struct.S* byval align 64 %s1) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/byval.ll b/src/LLVM/test/CodeGen/X86/byval.ll
new file mode 100644
index 0000000..185eda1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/byval.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck -check-prefix=X86-64 %s
+; Win64 has not supported byval yet.
+; RUN: llc < %s -march=x86 | FileCheck -check-prefix=X86 %s
+
+; X86: movl	4(%esp), %eax
+; X86: movl	8(%esp), %edx
+
+; X86-64: movq	8(%rsp), %rax
+
+%struct.s = type { i64, i64, i64 }
+
+define i64 @f(%struct.s* byval %a) {
+entry:
+	%tmp2 = getelementptr %struct.s* %a, i32 0, i32 0
+	%tmp3 = load i64* %tmp2, align 8
+	ret i64 %tmp3
+}

diff --git a/src/LLVM/test/CodeGen/X86/byval2.ll b/src/LLVM/test/CodeGen/X86/byval2.ll
new file mode 100644
index 0000000..196efe5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/byval2.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; Win64 has not supported byval yet.
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
+
+%struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
+                   i64, i64, i64, i64, i64, i64, i64, i64,
+                   i64 }
+
+define void @g(i64 %a, i64 %b, i64 %c) {
+entry:
+	%d = alloca %struct.s, align 16
+	%tmp = getelementptr %struct.s* %d, i32 0, i32 0
+	store i64 %a, i64* %tmp, align 16
+	%tmp2 = getelementptr %struct.s* %d, i32 0, i32 1
+	store i64 %b, i64* %tmp2, align 16
+	%tmp4 = getelementptr %struct.s* %d, i32 0, i32 2
+	store i64 %c, i64* %tmp4, align 16
+	call void @f( %struct.s*byval %d )
+	call void @f( %struct.s*byval %d )
+	ret void
+}
+
+declare void @f(%struct.s* byval)

diff --git a/src/LLVM/test/CodeGen/X86/byval3.ll b/src/LLVM/test/CodeGen/X86/byval3.ll
new file mode 100644
index 0000000..f3b125c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/byval3.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; Win64 has not supported byval yet.
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
+
+%struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32 }
+
+define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) nounwind {
+entry:
+        %d = alloca %struct.s, align 16
+        %tmp = getelementptr %struct.s* %d, i32 0, i32 0
+        store i32 %a1, i32* %tmp, align 16
+        %tmp2 = getelementptr %struct.s* %d, i32 0, i32 1
+        store i32 %a2, i32* %tmp2, align 16
+        %tmp4 = getelementptr %struct.s* %d, i32 0, i32 2
+        store i32 %a3, i32* %tmp4, align 16
+        %tmp6 = getelementptr %struct.s* %d, i32 0, i32 3
+        store i32 %a4, i32* %tmp6, align 16
+        %tmp8 = getelementptr %struct.s* %d, i32 0, i32 4
+        store i32 %a5, i32* %tmp8, align 16
+        %tmp10 = getelementptr %struct.s* %d, i32 0, i32 5
+        store i32 %a6, i32* %tmp10, align 16
+        call void @f( %struct.s* byval %d)
+        call void @f( %struct.s* byval %d)
+        ret void
+}
+
+declare void @f(%struct.s* byval)

diff --git a/src/LLVM/test/CodeGen/X86/byval4.ll b/src/LLVM/test/CodeGen/X86/byval4.ll
new file mode 100644
index 0000000..b7a4aa3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/byval4.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; Win64 has not supported byval yet.
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
+
+%struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16 }
+
+
+define void @g(i16 signext  %a1, i16 signext  %a2, i16 signext  %a3,
+	 i16 signext  %a4, i16 signext  %a5, i16 signext  %a6) nounwind {
+entry:
+        %a = alloca %struct.s, align 16
+        %tmp = getelementptr %struct.s* %a, i32 0, i32 0
+        store i16 %a1, i16* %tmp, align 16
+        %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
+        store i16 %a2, i16* %tmp2, align 16
+        %tmp4 = getelementptr %struct.s* %a, i32 0, i32 2
+        store i16 %a3, i16* %tmp4, align 16
+        %tmp6 = getelementptr %struct.s* %a, i32 0, i32 3
+        store i16 %a4, i16* %tmp6, align 16
+        %tmp8 = getelementptr %struct.s* %a, i32 0, i32 4
+        store i16 %a5, i16* %tmp8, align 16
+        %tmp10 = getelementptr %struct.s* %a, i32 0, i32 5
+        store i16 %a6, i16* %tmp10, align 16
+        call void @f( %struct.s* byval %a )
+        call void @f( %struct.s* byval %a )
+        ret void
+}
+
+declare void @f(%struct.s* byval)

diff --git a/src/LLVM/test/CodeGen/X86/byval5.ll b/src/LLVM/test/CodeGen/X86/byval5.ll
new file mode 100644
index 0000000..dca0936
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/byval5.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; Win64 has not supported byval yet.
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
+
+%struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8 }
+
+
+define void @g(i8 signext  %a1, i8 signext  %a2, i8 signext  %a3,
+	 i8 signext  %a4, i8 signext  %a5, i8 signext  %a6) {
+entry:
+        %a = alloca %struct.s
+        %tmp = getelementptr %struct.s* %a, i32 0, i32 0
+        store i8 %a1, i8* %tmp, align 8
+        %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
+        store i8 %a2, i8* %tmp2, align 8
+        %tmp4 = getelementptr %struct.s* %a, i32 0, i32 2
+        store i8 %a3, i8* %tmp4, align 8
+        %tmp6 = getelementptr %struct.s* %a, i32 0, i32 3
+        store i8 %a4, i8* %tmp6, align 8
+        %tmp8 = getelementptr %struct.s* %a, i32 0, i32 4
+        store i8 %a5, i8* %tmp8, align 8
+        %tmp10 = getelementptr %struct.s* %a, i32 0, i32 5
+        store i8 %a6, i8* %tmp10, align 8
+        call void @f( %struct.s* byval %a )
+        call void @f( %struct.s* byval %a )
+        ret void
+}
+
+declare void @f(%struct.s* byval)

diff --git a/src/LLVM/test/CodeGen/X86/byval6.ll b/src/LLVM/test/CodeGen/X86/byval6.ll
new file mode 100644
index 0000000..b060369
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/byval6.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 | grep add | not grep 16
+
+	%struct.W = type { x86_fp80, x86_fp80 }
+@B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32
+@.cpx = internal constant %struct.W { x86_fp80 0xK4001E000000000000000, x86_fp80 0xK40028000000000000000 }
+
+define i32 @main() nounwind  {
+entry:
+	tail call void (i32, ...)* @bar( i32 3, %struct.W* byval  @.cpx ) nounwind 
+	tail call void (i32, ...)* @baz( i32 3, %struct.W* byval  @B ) nounwind 
+	ret i32 undef
+}
+
+declare void @bar(i32, ...)
+
+declare void @baz(i32, ...)

diff --git a/src/LLVM/test/CodeGen/X86/byval7.ll b/src/LLVM/test/CodeGen/X86/byval7.ll
new file mode 100644
index 0000000..98a26e4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/byval7.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+
+	%struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
+                           <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
+                           <2 x i64> }
+
+define i32 @main() nounwind  {
+entry:
+; CHECK: main:
+; CHECK: movl $1, (%esp)
+; CHECK: leal 16(%esp), %edi
+; CHECK: leal 160(%esp), %esi
+; CHECK: rep;movsl
+	%s = alloca %struct.S		; <%struct.S*> [#uses=2]
+	%tmp15 = getelementptr %struct.S* %s, i32 0, i32 0		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16
+	call void @t( i32 1, %struct.S* byval  %s ) nounwind 
+	ret i32 0
+}
+
+declare void @t(i32, %struct.S* byval )

diff --git a/src/LLVM/test/CodeGen/X86/call-imm.ll b/src/LLVM/test/CodeGen/X86/call-imm.ll
new file mode 100644
index 0000000..3857fb1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/call-imm.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=static | grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | not grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep {call.*12345678}
+
+; Call to immediate is not safe on x86-64 unless we *know* that the
+; call will be within 32-bits pcrel from the dest immediate.
+
+; RUN: llc < %s -march=x86-64 | grep {call.*\\*%rax}
+
+; PR3666
+; PR3773
+; rdar://6904453
+
+define i32 @main() nounwind {
+entry:
+	%0 = call i32 inttoptr (i32 12345678 to i32 (i32)*)(i32 0) nounwind		; <i32> [#uses=1]
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/call-push.ll b/src/LLVM/test/CodeGen/X86/call-push.ll
new file mode 100644
index 0000000..8cca10c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/call-push.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | FileCheck %s
+
+        %struct.decode_t = type { i8, i8, i8, i8, i16, i8, i8, %struct.range_t** }
+        %struct.range_t = type { float, float, i32, i32, i32, [0 x i8] }
+
+define i32 @decode_byte(%struct.decode_t* %decode) nounwind {
+; CHECK: decode_byte:
+; CHECK: pushl
+; CHECK: popl
+; CHECK: popl
+; CHECK: jmp
+entry:
+        %tmp2 = getelementptr %struct.decode_t* %decode, i32 0, i32 4           ; <i16*> [#uses=1]
+        %tmp23 = bitcast i16* %tmp2 to i32*             ; <i32*> [#uses=1]
+        %tmp4 = load i32* %tmp23                ; <i32> [#uses=1]
+        %tmp514 = lshr i32 %tmp4, 24            ; <i32> [#uses=1]
+        %tmp56 = trunc i32 %tmp514 to i8                ; <i8> [#uses=1]
+        %tmp7 = icmp eq i8 %tmp56, 0            ; <i1> [#uses=1]
+        br i1 %tmp7, label %UnifiedReturnBlock, label %cond_true
+
+cond_true:              ; preds = %entry
+        %tmp10 = tail call i32 @f( %struct.decode_t* %decode )          ; <i32> [#uses=1]
+        ret i32 %tmp10
+
+UnifiedReturnBlock:             ; preds = %entry
+        ret i32 0
+}
+
+declare i32 @f(%struct.decode_t*)
+
+
+; There should be no store for the undef operand.
+
+; CHECK: _test2:
+; CHECK-NOT: 8(%esp)
+; CHECK: 4(%esp)
+; CHECK-NOT: 8(%esp)
+; CHECK: calll 
+declare i32 @foo(i32, i32, i32)
+
+define void @test2() nounwind {
+entry:
+  %call = call i32 @foo(i32 8, i32 6, i32 undef)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/change-compare-stride-0.ll b/src/LLVM/test/CodeGen/X86/change-compare-stride-0.ll
new file mode 100644
index 0000000..439f7b0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/change-compare-stride-0.ll

@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=x86 -enable-lsr-nested | FileCheck %s
+;
+; Nested LSR is required to optimize this case.
+; We do not expect to see this form of IR without -enable-iv-rewrite.
+
+define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
+; CHECK: borf:
+; CHECK-NOT: inc
+; CHECK-NOT: leal 1(
+; CHECK-NOT: leal -1(
+; CHECK: decl
+; CHECK-NEXT: cmpl $-478
+; CHECK: ret
+
+bb4.thread:
+	br label %bb2.outer
+
+bb2.outer:		; preds = %bb4, %bb4.thread
+	%indvar18 = phi i32 [ 0, %bb4.thread ], [ %indvar.next28, %bb4 ]		; <i32> [#uses=3]
+	%tmp34 = mul i32 %indvar18, 65535		; <i32> [#uses=1]
+	%i.0.reg2mem.0.ph = add i32 %tmp34, 639		; <i32> [#uses=1]
+	%0 = and i32 %i.0.reg2mem.0.ph, 65535		; <i32> [#uses=1]
+	%1 = mul i32 %0, 480		; <i32> [#uses=1]
+	%tmp20 = mul i32 %indvar18, -478		; <i32> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb2, %bb2.outer
+	%indvar = phi i32 [ 0, %bb2.outer ], [ %indvar.next, %bb2 ]		; <i32> [#uses=3]
+	%ctg2 = getelementptr i8* %out, i32 %tmp20		; <i8*> [#uses=1]
+	%tmp21 = ptrtoint i8* %ctg2 to i32		; <i32> [#uses=1]
+	%tmp23 = sub i32 %tmp21, %indvar		; <i32> [#uses=1]
+	%out_addr.0.reg2mem.0 = inttoptr i32 %tmp23 to i8*		; <i8*> [#uses=1]
+	%tmp25 = mul i32 %indvar, 65535		; <i32> [#uses=1]
+	%j.0.reg2mem.0 = add i32 %tmp25, 479		; <i32> [#uses=1]
+	%2 = and i32 %j.0.reg2mem.0, 65535		; <i32> [#uses=1]
+	%3 = add i32 %1, %2		; <i32> [#uses=9]
+	%4 = add i32 %3, -481		; <i32> [#uses=1]
+	%5 = getelementptr i8* %in, i32 %4		; <i8*> [#uses=1]
+	%6 = load i8* %5, align 1		; <i8> [#uses=1]
+	%7 = add i32 %3, -480		; <i32> [#uses=1]
+	%8 = getelementptr i8* %in, i32 %7		; <i8*> [#uses=1]
+	%9 = load i8* %8, align 1		; <i8> [#uses=1]
+	%10 = add i32 %3, -479		; <i32> [#uses=1]
+	%11 = getelementptr i8* %in, i32 %10		; <i8*> [#uses=1]
+	%12 = load i8* %11, align 1		; <i8> [#uses=1]
+	%13 = add i32 %3, -1		; <i32> [#uses=1]
+	%14 = getelementptr i8* %in, i32 %13		; <i8*> [#uses=1]
+	%15 = load i8* %14, align 1		; <i8> [#uses=1]
+	%16 = getelementptr i8* %in, i32 %3		; <i8*> [#uses=1]
+	%17 = load i8* %16, align 1		; <i8> [#uses=1]
+	%18 = add i32 %3, 1		; <i32> [#uses=1]
+	%19 = getelementptr i8* %in, i32 %18		; <i8*> [#uses=1]
+	%20 = load i8* %19, align 1		; <i8> [#uses=1]
+	%21 = add i32 %3, 481		; <i32> [#uses=1]
+	%22 = getelementptr i8* %in, i32 %21		; <i8*> [#uses=1]
+	%23 = load i8* %22, align 1		; <i8> [#uses=1]
+	%24 = add i32 %3, 480		; <i32> [#uses=1]
+	%25 = getelementptr i8* %in, i32 %24		; <i8*> [#uses=1]
+	%26 = load i8* %25, align 1		; <i8> [#uses=1]
+	%27 = add i32 %3, 479		; <i32> [#uses=1]
+	%28 = getelementptr i8* %in, i32 %27		; <i8*> [#uses=1]
+	%29 = load i8* %28, align 1		; <i8> [#uses=1]
+	%30 = add i8 %9, %6		; <i8> [#uses=1]
+	%31 = add i8 %30, %12		; <i8> [#uses=1]
+	%32 = add i8 %31, %15		; <i8> [#uses=1]
+	%33 = add i8 %32, %17		; <i8> [#uses=1]
+	%34 = add i8 %33, %20		; <i8> [#uses=1]
+	%35 = add i8 %34, %23		; <i8> [#uses=1]
+	%36 = add i8 %35, %26		; <i8> [#uses=1]
+	%37 = add i8 %36, %29		; <i8> [#uses=1]
+	store i8 %37, i8* %out_addr.0.reg2mem.0, align 1
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 478		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb4, label %bb2
+
+bb4:		; preds = %bb2
+	%indvar.next28 = add i32 %indvar18, 1		; <i32> [#uses=2]
+	%exitcond29 = icmp eq i32 %indvar.next28, 638		; <i1> [#uses=1]
+	br i1 %exitcond29, label %return, label %bb2.outer
+
+return:		; preds = %bb4
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/change-compare-stride-1.ll b/src/LLVM/test/CodeGen/X86/change-compare-stride-1.ll
new file mode 100644
index 0000000..8b53ae2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/change-compare-stride-1.ll

@@ -0,0 +1,92 @@
+; RUN: llc < %s -march=x86-64 -enable-lsr-nested | FileCheck %s
+;
+; Nested LSR is required to optimize this case.
+; We do not expect to see this form of IR without -enable-iv-rewrite.
+
+define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
+; CHECK: borf:
+; CHECK-NOT: inc
+; CHECK-NOT: leal 1(
+; CHECK-NOT: leal -1(
+; CHECK: decq
+; CHECK-NEXT: cmpq $-478
+; CHECK: ret
+
+bb4.thread:
+	br label %bb2.outer
+
+bb2.outer:		; preds = %bb4, %bb4.thread
+	%indvar19 = phi i64 [ 0, %bb4.thread ], [ %indvar.next29, %bb4 ]		; <i64> [#uses=3]
+	%indvar31 = trunc i64 %indvar19 to i16		; <i16> [#uses=1]
+	%i.0.reg2mem.0.ph = sub i16 639, %indvar31		; <i16> [#uses=1]
+	%0 = zext i16 %i.0.reg2mem.0.ph to i32		; <i32> [#uses=1]
+	%1 = mul i32 %0, 480		; <i32> [#uses=1]
+	%tmp21 = mul i64 %indvar19, -478		; <i64> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb2, %bb2.outer
+	%indvar = phi i64 [ 0, %bb2.outer ], [ %indvar.next, %bb2 ]		; <i64> [#uses=3]
+	%indvar16 = trunc i64 %indvar to i16		; <i16> [#uses=1]
+	%ctg2 = getelementptr i8* %out, i64 %tmp21		; <i8*> [#uses=1]
+	%tmp22 = ptrtoint i8* %ctg2 to i64		; <i64> [#uses=1]
+	%tmp24 = sub i64 %tmp22, %indvar		; <i64> [#uses=1]
+	%out_addr.0.reg2mem.0 = inttoptr i64 %tmp24 to i8*		; <i8*> [#uses=1]
+	%j.0.reg2mem.0 = sub i16 479, %indvar16		; <i16> [#uses=1]
+	%2 = zext i16 %j.0.reg2mem.0 to i32		; <i32> [#uses=1]
+	%3 = add i32 %1, %2		; <i32> [#uses=9]
+	%4 = add i32 %3, -481		; <i32> [#uses=1]
+	%5 = zext i32 %4 to i64		; <i64> [#uses=1]
+	%6 = getelementptr i8* %in, i64 %5		; <i8*> [#uses=1]
+	%7 = load i8* %6, align 1		; <i8> [#uses=1]
+	%8 = add i32 %3, -480		; <i32> [#uses=1]
+	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
+	%10 = getelementptr i8* %in, i64 %9		; <i8*> [#uses=1]
+	%11 = load i8* %10, align 1		; <i8> [#uses=1]
+	%12 = add i32 %3, -479		; <i32> [#uses=1]
+	%13 = zext i32 %12 to i64		; <i64> [#uses=1]
+	%14 = getelementptr i8* %in, i64 %13		; <i8*> [#uses=1]
+	%15 = load i8* %14, align 1		; <i8> [#uses=1]
+	%16 = add i32 %3, -1		; <i32> [#uses=1]
+	%17 = zext i32 %16 to i64		; <i64> [#uses=1]
+	%18 = getelementptr i8* %in, i64 %17		; <i8*> [#uses=1]
+	%19 = load i8* %18, align 1		; <i8> [#uses=1]
+	%20 = zext i32 %3 to i64		; <i64> [#uses=1]
+	%21 = getelementptr i8* %in, i64 %20		; <i8*> [#uses=1]
+	%22 = load i8* %21, align 1		; <i8> [#uses=1]
+	%23 = add i32 %3, 1		; <i32> [#uses=1]
+	%24 = zext i32 %23 to i64		; <i64> [#uses=1]
+	%25 = getelementptr i8* %in, i64 %24		; <i8*> [#uses=1]
+	%26 = load i8* %25, align 1		; <i8> [#uses=1]
+	%27 = add i32 %3, 481		; <i32> [#uses=1]
+	%28 = zext i32 %27 to i64		; <i64> [#uses=1]
+	%29 = getelementptr i8* %in, i64 %28		; <i8*> [#uses=1]
+	%30 = load i8* %29, align 1		; <i8> [#uses=1]
+	%31 = add i32 %3, 480		; <i32> [#uses=1]
+	%32 = zext i32 %31 to i64		; <i64> [#uses=1]
+	%33 = getelementptr i8* %in, i64 %32		; <i8*> [#uses=1]
+	%34 = load i8* %33, align 1		; <i8> [#uses=1]
+	%35 = add i32 %3, 479		; <i32> [#uses=1]
+	%36 = zext i32 %35 to i64		; <i64> [#uses=1]
+	%37 = getelementptr i8* %in, i64 %36		; <i8*> [#uses=1]
+	%38 = load i8* %37, align 1		; <i8> [#uses=1]
+	%39 = add i8 %11, %7		; <i8> [#uses=1]
+	%40 = add i8 %39, %15		; <i8> [#uses=1]
+	%41 = add i8 %40, %19		; <i8> [#uses=1]
+	%42 = add i8 %41, %22		; <i8> [#uses=1]
+	%43 = add i8 %42, %26		; <i8> [#uses=1]
+	%44 = add i8 %43, %30		; <i8> [#uses=1]
+	%45 = add i8 %44, %34		; <i8> [#uses=1]
+	%46 = add i8 %45, %38		; <i8> [#uses=1]
+	store i8 %46, i8* %out_addr.0.reg2mem.0, align 1
+	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %indvar.next, 478		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb4, label %bb2
+
+bb4:		; preds = %bb2
+	%indvar.next29 = add i64 %indvar19, 1		; <i64> [#uses=2]
+	%exitcond30 = icmp eq i64 %indvar.next29, 638		; <i1> [#uses=1]
+	br i1 %exitcond30, label %return, label %bb2.outer
+
+return:		; preds = %bb4
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/change-compare-stride-trickiness-0.ll b/src/LLVM/test/CodeGen/X86/change-compare-stride-trickiness-0.ll
new file mode 100644
index 0000000..1f7f6ec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/change-compare-stride-trickiness-0.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -o - | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9"
+
+; The comparison happens before the relevant use, but it can still be rewritten
+; to compare with zero.
+
+; CHECK: foo:
+; CHECK: align
+; CHECK: incl  %eax
+; CHECK-NEXT: decl  %ecx
+; CHECK-NEXT: jne
+
+define void @foo() nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i32 [ 0, %entry ], [ %i.2.0.us1534, %loop ]		; <i32> [#uses=1]
+	%i.2.0.us1534 = add i32 %indvar, 1		; <i32> [#uses=3]
+	%tmp611.us1535 = icmp eq i32 %i.2.0.us1534, 4		; <i1> [#uses=2]
+	%tmp623.us1538 = select i1 %tmp611.us1535, i32 6, i32 0		; <i32> [#uses=0]
+	%tmp628.us1540 = shl i32 %i.2.0.us1534, 1		; <i32> [#uses=1]
+	%tmp645646647.us1547 = sext i32 %tmp628.us1540 to i64		; <i64> [#uses=0]
+	br i1 %tmp611.us1535, label %exit, label %loop
+
+exit:
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/change-compare-stride-trickiness-1.ll b/src/LLVM/test/CodeGen/X86/change-compare-stride-trickiness-1.ll
new file mode 100644
index 0000000..a3933e2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/change-compare-stride-trickiness-1.ll

@@ -0,0 +1,31 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+; The comparison happens after the relevant use, so the stride can easily
+; be changed. The comparison can be done in a narrower mode than the
+; induction variable.
+; TODO: By making the first store post-increment as well, the loop setup
+; could be made simpler.
+
+define void @foo() nounwind {
+; CHECK: foo:
+; CHECK-NOT: ret
+; CHECK: cmpl $10
+; CHECK: ret
+
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i32 [ 0, %entry ], [ %i.2.0.us1534, %loop ]		; <i32> [#uses=1]
+	%i.2.0.us1534 = add i32 %indvar, 1		; <i32> [#uses=3]
+	%tmp628.us1540 = shl i32 %i.2.0.us1534, 1		; <i32> [#uses=1]
+	%tmp645646647.us1547 = sext i32 %tmp628.us1540 to i64		; <i64> [#uses=1]
+	store i64 %tmp645646647.us1547, i64* null
+	%tmp611.us1535 = icmp eq i32 %i.2.0.us1534, 4		; <i1> [#uses=2]
+	%tmp623.us1538 = select i1 %tmp611.us1535, i32 6, i32 0		; <i32> [#uses=1]
+	store i32 %tmp623.us1538, i32* null
+	br i1 %tmp611.us1535, label %exit, label %loop
+
+exit:
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/change-compare-stride-trickiness-2.ll b/src/LLVM/test/CodeGen/X86/change-compare-stride-trickiness-2.ll
new file mode 100644
index 0000000..ae27383
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/change-compare-stride-trickiness-2.ll

@@ -0,0 +1,58 @@
+; RUN: llc < %s
+; PR4222
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-pc-linux-gnu"
+module asm ".ident\09\22$FreeBSD: head/sys/amd64/amd64/minidump_machdep.c 184499 2008-10-31 10:11:35Z kib $\22"
+	%struct.dumperinfo = type <{ i32 (i8*, i8*, i64, i64, i64)*, i8*, i32, i32, i64, i64 }>
+
+define void @minidumpsys(%struct.dumperinfo* %di) nounwind {
+entry:
+	br label %if.end
+
+if.end:		; preds = %if.end52, %entry
+	br label %for.cond.i.preheader
+
+for.cond.i.preheader:		; preds = %if.end52, %if.end
+	%indvar688 = phi i64 [ 0, %if.end ], [ %indvar.next689, %if.end52 ]		; <i64> [#uses=3]
+	%tmp690 = shl i64 %indvar688, 12		; <i64> [#uses=1]
+	%pa.0642 = add i64 %tmp690, 0		; <i64> [#uses=1]
+	%indvar688703 = trunc i64 %indvar688 to i32		; <i32> [#uses=1]
+	%tmp692693 = add i32 %indvar688703, 1		; <i32> [#uses=1]
+	%phitmp = sext i32 %tmp692693 to i64		; <i64> [#uses=1]
+	br i1 false, label %if.end52, label %land.lhs.true.i
+
+land.lhs.true.i:		; preds = %for.cond.i.preheader
+	%shr2.i = lshr i64 %pa.0642, 18		; <i64> [#uses=0]
+	unreachable
+
+if.end52:		; preds = %for.cond.i.preheader
+	%phitmp654 = icmp ult i64 %phitmp, 512		; <i1> [#uses=1]
+	%indvar.next689 = add i64 %indvar688, 1		; <i64> [#uses=1]
+	br i1 %phitmp654, label %for.cond.i.preheader, label %if.end
+}
+
+define void @promote(%struct.dumperinfo* %di) nounwind {
+entry:
+	br label %if.end
+
+if.end:		; preds = %if.end52, %entry
+	br label %for.cond.i.preheader
+
+for.cond.i.preheader:		; preds = %if.end52, %if.end
+	%indvar688 = phi i32 [ 0, %if.end ], [ %indvar.next689, %if.end52 ]		; <i64> [#uses=3]
+	%tmp690 = shl i32 %indvar688, 12		; <i64> [#uses=1]
+	%pa.0642 = add i32 %tmp690, 0		; <i64> [#uses=1]
+	%tmp692693 = add i32 %indvar688, 1		; <i32> [#uses=1]
+	%phitmp = sext i32 %tmp692693 to i64		; <i64> [#uses=1]
+	br i1 false, label %if.end52, label %land.lhs.true.i
+
+land.lhs.true.i:		; preds = %for.cond.i.preheader
+	%shr2.i = lshr i32 %pa.0642, 18		; <i64> [#uses=0]
+	unreachable
+
+if.end52:		; preds = %for.cond.i.preheader
+	%phitmp654 = icmp ult i64 %phitmp, 512		; <i1> [#uses=1]
+	%indvar.next689 = add i32 %indvar688, 1		; <i64> [#uses=1]
+	br i1 %phitmp654, label %for.cond.i.preheader, label %if.end
+}

diff --git a/src/LLVM/test/CodeGen/X86/clz.ll b/src/LLVM/test/CodeGen/X86/clz.ll
new file mode 100644
index 0000000..d76fab4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/clz.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+
+define i32 @t1(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
+	ret i32 %tmp
+; CHECK: t1:
+; CHECK: bsrl
+; CHECK: cmov
+}
+
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone 
+
+define i32 @t2(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.cttz.i32( i32 %x )
+	ret i32 %tmp
+; CHECK: t2:
+; CHECK: bsfl
+; CHECK: cmov
+}
+
+declare i32 @llvm.cttz.i32(i32) nounwind readnone 
+
+define i16 @t3(i16 %x, i16 %y) nounwind  {
+entry:
+        %tmp1 = add i16 %x, %y
+	%tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1 )		; <i16> [#uses=1]
+	ret i16 %tmp2
+; CHECK: t3:
+; CHECK: bsrw
+; CHECK: cmov
+}
+
+declare i16 @llvm.ctlz.i16(i16) nounwind readnone 
+
+; Don't generate the cmovne when the source is known non-zero (and bsr would
+; not set ZF).
+; rdar://9490949
+
+define i32 @t4(i32 %n) nounwind {
+entry:
+; CHECK: t4:
+; CHECK: bsrl
+; CHECK-NOT: cmov
+; CHECK: ret
+  %or = or i32 %n, 1
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or)
+  ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/cmov.ll b/src/LLVM/test/CodeGen/X86/cmov.ll
new file mode 100644
index 0000000..7a8d6e6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/cmov.ll

@@ -0,0 +1,157 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
+entry:
+; CHECK: test1:
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
+; CHECK-NEXT: cmovael	(%rcx), %eax
+; CHECK-NEXT: ret
+
+	%0 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%1 = and i32 %0, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1]
+        %v = load i32* %vp
+	%.0 = select i1 %toBool, i32 %v, i32 12		; <i32> [#uses=1]
+	ret i32 %.0
+}
+define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
+entry:
+; CHECK: test2:
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
+; CHECK-NEXT: cmovbl	(%rcx), %eax
+; CHECK-NEXT: ret
+
+	%0 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%1 = and i32 %0, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1]
+        %v = load i32* %vp
+	%.0 = select i1 %toBool, i32 12, i32 %v		; <i32> [#uses=1]
+	ret i32 %.0
+}
+
+
+; x86's 32-bit cmov doesn't clobber the high 32 bits of the destination
+; if the condition is false. An explicit zero-extend (movl) is needed
+; after the cmov.
+
+declare void @bar(i64) nounwind
+
+define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
+; CHECK: test3:
+; CHECK:      cmovnel %edi, %esi
+; CHECK-NEXT: movl    %esi, %edi
+
+  %c = trunc i64 %a to i32
+  %d = trunc i64 %b to i32
+  %e = select i1 %p, i32 %c, i32 %d
+  %f = zext i32 %e to i64
+  call void @bar(i64 %f)
+  ret void
+}
+
+
+
+; CodeGen shouldn't try to do a setne after an expanded 8-bit conditional
+; move without recomputing EFLAGS, because the expansion of the conditional
+; move with control flow may clobber EFLAGS (e.g., with xor, to set the
+; register to zero).
+
+; The test is a little awkward; the important part is that there's a test before the
+; setne.
+; PR4814
+
+
+@g_3 = external global i8                         ; <i8*> [#uses=1]
+@g_96 = external global i8                        ; <i8*> [#uses=2]
+@g_100 = external global i8                       ; <i8*> [#uses=2]
+@_2E_str = external constant [15 x i8], align 1   ; <[15 x i8]*> [#uses=1]
+
+define i32 @test4() nounwind {
+entry:
+  %0 = load i8* @g_3, align 1                     ; <i8> [#uses=2]
+  %1 = sext i8 %0 to i32                          ; <i32> [#uses=1]
+  %.lobit.i = lshr i8 %0, 7                       ; <i8> [#uses=1]
+  %tmp.i = zext i8 %.lobit.i to i32               ; <i32> [#uses=1]
+  %tmp.not.i = xor i32 %tmp.i, 1                  ; <i32> [#uses=1]
+  %iftmp.17.0.i.i = ashr i32 %1, %tmp.not.i       ; <i32> [#uses=1]
+  %retval56.i.i = trunc i32 %iftmp.17.0.i.i to i8 ; <i8> [#uses=1]
+  %2 = icmp eq i8 %retval56.i.i, 0                ; <i1> [#uses=2]
+  %g_96.promoted.i = load i8* @g_96               ; <i8> [#uses=3]
+  %3 = icmp eq i8 %g_96.promoted.i, 0             ; <i1> [#uses=2]
+  br i1 %3, label %func_4.exit.i, label %bb.i.i.i
+
+bb.i.i.i:                                         ; preds = %entry
+  %4 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  br label %func_4.exit.i
+
+; CHECK: test4:
+; CHECK: g_100
+; CHECK: testb
+; CHECK-NOT: xor
+; CHECK: setne
+; CHECK-NEXT: testb
+
+func_4.exit.i:                                    ; preds = %bb.i.i.i, %entry
+  %.not.i = xor i1 %2, true                       ; <i1> [#uses=1]
+  %brmerge.i = or i1 %3, %.not.i                  ; <i1> [#uses=1]
+  %.mux.i = select i1 %2, i8 %g_96.promoted.i, i8 0 ; <i8> [#uses=1]
+  br i1 %brmerge.i, label %func_1.exit, label %bb.i.i
+
+bb.i.i:                                           ; preds = %func_4.exit.i
+  %5 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  br label %func_1.exit
+
+func_1.exit:                                      ; preds = %bb.i.i, %func_4.exit.i
+  %g_96.tmp.0.i = phi i8 [ %g_96.promoted.i, %bb.i.i ], [ %.mux.i, %func_4.exit.i ] ; <i8> [#uses=2]
+  store i8 %g_96.tmp.0.i, i8* @g_96
+  %6 = zext i8 %g_96.tmp.0.i to i32               ; <i32> [#uses=1]
+  %7 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([15 x i8]* @_2E_str, i64 0, i64 0), i32 %6) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+
+; Should compile to setcc | -2.
+; rdar://6668608
+define i32 @test5(i32* nocapture %P) nounwind readonly {
+entry:
+; CHECK: test5:
+; CHECK: 	setg	%al
+; CHECK:	movzbl	%al, %eax
+; CHECK:	orl	$-2, %eax
+; CHECK:	ret
+
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 -1, i32 -2		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+
+define i32 @test6(i32* nocapture %P) nounwind readonly {
+entry:
+; CHECK: test6:
+; CHECK: 	setl	%al
+; CHECK:	movzbl	%al, %eax
+; CHECK:	leal	4(%rax,%rax,8), %eax
+; CHECK:        ret
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 4, i32 13		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+
+
+; Don't try to use a 16-bit conditional move to do an 8-bit select,
+; because it isn't worth it. Just use a branch instead.
+define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind {
+; CHECK: test7:
+; CHECK:     testb	$1, %dil
+; CHECK-NEXT:     jne	LBB
+
+  %d = select i1 %c, i8 %a, i8 %b
+  ret i8 %d
+}

diff --git a/src/LLVM/test/CodeGen/X86/cmp.ll b/src/LLVM/test/CodeGen/X86/cmp.ll
new file mode 100644
index 0000000..4374aac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/cmp.ll

@@ -0,0 +1,92 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -show-mc-encoding | FileCheck %s

+

+define i32 @test1(i32 %X, i32* %y) nounwind {

+	%tmp = load i32* %y		; <i32> [#uses=1]

+	%tmp.upgrd.1 = icmp eq i32 %tmp, 0		; <i1> [#uses=1]

+	br i1 %tmp.upgrd.1, label %ReturnBlock, label %cond_true

+

+cond_true:		; preds = %0

+	ret i32 1

+

+ReturnBlock:		; preds = %0

+	ret i32 0

+; CHECK: test1:

+; CHECK: cmpl	$0, (%rsi)

+}

+

+define i32 @test2(i32 %X, i32* %y) nounwind {

+	%tmp = load i32* %y		; <i32> [#uses=1]

+	%tmp1 = shl i32 %tmp, 3		; <i32> [#uses=1]

+	%tmp1.upgrd.2 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]

+	br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true

+

+cond_true:		; preds = %0

+	ret i32 1

+

+ReturnBlock:		; preds = %0

+	ret i32 0

+; CHECK: test2:

+; CHECK: movl	(%rsi), %eax

+; CHECK: shll	$3, %eax

+; CHECK: testl	%eax, %eax

+}

+

+define i64 @test3(i64 %x) nounwind {

+  %t = icmp eq i64 %x, 0

+  %r = zext i1 %t to i64

+  ret i64 %r

+; CHECK: test3:

+; CHECK: 	testq	%rdi, %rdi

+; CHECK: 	sete	%al

+; CHECK: 	movzbl	%al, %eax

+; CHECK: 	ret

+}

+

+define i64 @test4(i64 %x) nounwind {

+  %t = icmp slt i64 %x, 1

+  %r = zext i1 %t to i64

+  ret i64 %r

+; CHECK: test4:

+; CHECK: 	testq	%rdi, %rdi

+; CHECK: 	setle	%al

+; CHECK: 	movzbl	%al, %eax

+; CHECK: 	ret

+}

+

+

+define i32 @test5(double %A) nounwind  {

+ entry:

+ %tmp2 = fcmp ogt double %A, 1.500000e+02; <i1> [#uses=1]

+ %tmp5 = fcmp ult double %A, 7.500000e+01; <i1> [#uses=1]

+ %bothcond = or i1 %tmp2, %tmp5; <i1> [#uses=1]

+ br i1 %bothcond, label %bb8, label %bb12

+

+ bb8:; preds = %entry

+ %tmp9 = tail call i32 (...)* @foo( ) nounwind ; <i32> [#uses=1]

+ ret i32 %tmp9

+

+ bb12:; preds = %entry

+ ret i32 32

+; CHECK: test5:

+; CHECK: ucomisd	LCPI4_0(%rip), %xmm0

+; CHECK: ucomisd	LCPI4_1(%rip), %xmm0

+}

+

+declare i32 @foo(...)

+

+define i32 @test6() nounwind align 2 {

+  %A = alloca {i64, i64}, align 8

+  %B = getelementptr inbounds {i64, i64}* %A, i64 0, i32 1

+  %C = load i64* %B

+  %D = icmp eq i64 %C, 0

+  br i1 %D, label %T, label %F

+T:

+  ret i32 1

+  

+F:

+  ret i32 0

+; CHECK: test6:

+; CHECK: cmpq	$0, -8(%rsp)

+; CHECK: encoding: [0x48,0x83,0x7c,0x24,0xf8,0x00]

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/cmpxchg16b.ll b/src/LLVM/test/CodeGen/X86/cmpxchg16b.ll
new file mode 100644
index 0000000..ba1c4ef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/cmpxchg16b.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core2 | FileCheck %s
+
+; Basic 128-bit cmpxchg
+define void @t1(i128* nocapture %p) nounwind ssp {
+entry:
+; CHECK movl	$1, %ebx
+; CHECK: lock
+; CHECK-NEXT: cmpxchg16b
+  %r = cmpxchg i128* %p, i128 0, i128 1 seq_cst
+  ret void
+}
+
+; FIXME: Handle 128-bit atomicrmw/load atomic/store atomic

diff --git a/src/LLVM/test/CodeGen/X86/coalesce-esp.ll b/src/LLVM/test/CodeGen/X86/coalesce-esp.ll
new file mode 100644
index 0000000..a584876
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/coalesce-esp.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s | grep {movl	%esp, %ebp}
+; PR4572
+
+; Don't coalesce with %esp if it would end up putting %esp in
+; the index position of an address, because that can't be
+; encoded on x86. It would actually be slightly better to
+; swap the address operands though, since there's no scale.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-mingw32"
+	%"struct.std::valarray<unsigned int>" = type { i32, i32* }
+
+define void @_ZSt17__gslice_to_indexjRKSt8valarrayIjES2_RS0_(i32 %__o, %"struct.std::valarray<unsigned int>"* nocapture %__l, %"struct.std::valarray<unsigned int>"* nocapture %__s, %"struct.std::valarray<unsigned int>"* nocapture %__i) nounwind {
+entry:
+	%0 = alloca i32, i32 undef, align 4		; <i32*> [#uses=1]
+	br i1 undef, label %return, label %bb4
+
+bb4:		; preds = %bb7.backedge, %entry
+	%indvar = phi i32 [ %indvar.next, %bb7.backedge ], [ 0, %entry ]		; <i32> [#uses=2]
+	%scevgep24.sum = sub i32 undef, %indvar		; <i32> [#uses=2]
+	%scevgep25 = getelementptr i32* %0, i32 %scevgep24.sum		; <i32*> [#uses=1]
+	%scevgep27 = getelementptr i32* undef, i32 %scevgep24.sum		; <i32*> [#uses=1]
+	%1 = load i32* %scevgep27, align 4		; <i32> [#uses=0]
+	br i1 undef, label %bb7.backedge, label %bb5
+
+bb5:		; preds = %bb4
+	store i32 0, i32* %scevgep25, align 4
+	br label %bb7.backedge
+
+bb7.backedge:		; preds = %bb5, %bb4
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br label %bb4
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/coalescer-commute1.ll b/src/LLVM/test/CodeGen/X86/coalescer-commute1.ll
new file mode 100644
index 0000000..8aa0bfd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/coalescer-commute1.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; PR1877
+
+@NNTOT = weak global i32 0		; <i32*> [#uses=1]
+@G = weak global float 0.000000e+00		; <float*> [#uses=1]
+
+define void @runcont(i32* %source) nounwind  {
+entry:
+	%tmp10 = load i32* @NNTOT, align 4		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%neuron.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%thesum.0 = phi float [ 0.000000e+00, %entry ], [ %tmp6, %bb ]		; <float> [#uses=1]
+	%tmp2 = getelementptr i32* %source, i32 %neuron.0		; <i32*> [#uses=1]
+	%tmp3 = load i32* %tmp2, align 4		; <i32> [#uses=1]
+	%tmp34 = sitofp i32 %tmp3 to float		; <float> [#uses=1]
+	%tmp6 = fadd float %tmp34, %thesum.0		; <float> [#uses=2]
+	%indvar.next = add i32 %neuron.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %tmp10		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb13, label %bb
+
+bb13:		; preds = %bb
+	volatile store float %tmp6, float* @G, align 4
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/coalescer-commute2.ll b/src/LLVM/test/CodeGen/X86/coalescer-commute2.ll
new file mode 100644
index 0000000..6e5c1cf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/coalescer-commute2.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=x86_64-linux -join-physregs | FileCheck %s
+; CHECK-NOT:     mov
+; CHECK:     paddw
+; CHECK-NOT:     mov
+; CHECK:     paddw
+; CHECK-NOT:     paddw
+; CHECK-NOT:     mov
+
+; The 2-addr pass should ensure that identical code is produced for these functions
+; no extra copy should be generated.
+
+define <2 x i64> @test1(<2 x i64> %x, <2 x i64> %y) nounwind  {
+entry:
+	%tmp6 = bitcast <2 x i64> %y to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp8 = bitcast <2 x i64> %x to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = add <8 x i16> %tmp8, %tmp6		; <<8 x i16>> [#uses=1]
+	%tmp10 = bitcast <8 x i16> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp10
+}
+
+define <2 x i64> @test2(<2 x i64> %x, <2 x i64> %y) nounwind  {
+entry:
+	%tmp6 = bitcast <2 x i64> %x to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp8 = bitcast <2 x i64> %y to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = add <8 x i16> %tmp8, %tmp6		; <<8 x i16>> [#uses=1]
+	%tmp10 = bitcast <8 x i16> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp10
+}
+
+
+; The coalescer should commute the add to avoid a copy.
+define <4 x float> @test3(<4 x float> %V) {
+entry:
+        %tmp8 = shufflevector <4 x float> %V, <4 x float> undef,
+                                        <4 x i32> < i32 3, i32 2, i32 1, i32 0 >
+        %add = fadd <4 x float> %tmp8, %V
+        ret <4 x float> %add
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/coalescer-commute3.ll b/src/LLVM/test/CodeGen/X86/coalescer-commute3.ll
new file mode 100644
index 0000000..e5bd448
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/coalescer-commute3.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 6
+
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define  i32 @perimeter(%struct.quad_struct* %tree, i32 %size) nounwind  {
+entry:
+	switch i32 %size, label %UnifiedReturnBlock [
+		 i32 2, label %bb
+		 i32 0, label %bb50
+	]
+
+bb:		; preds = %entry
+	%tmp31 = tail call  i32 @perimeter( %struct.quad_struct* null, i32 0 ) nounwind 		; <i32> [#uses=1]
+	%tmp40 = tail call  i32 @perimeter( %struct.quad_struct* null, i32 0 ) nounwind 		; <i32> [#uses=1]
+	%tmp33 = add i32 0, %tmp31		; <i32> [#uses=1]
+	%tmp42 = add i32 %tmp33, %tmp40		; <i32> [#uses=1]
+	ret i32 %tmp42
+
+bb50:		; preds = %entry
+	ret i32 0
+
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/coalescer-commute4.ll b/src/LLVM/test/CodeGen/X86/coalescer-commute4.ll
new file mode 100644
index 0000000..02a9781
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/coalescer-commute4.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; PR1501
+
+define float @foo(i32* %x, float* %y, i32 %c) nounwind  {
+entry:
+	%tmp2132 = icmp eq i32 %c, 0		; <i1> [#uses=2]
+	br i1 %tmp2132, label %bb23, label %bb.preheader
+
+bb.preheader:		; preds = %entry
+	%umax = select i1 %tmp2132, i32 1, i32 %c		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.preheader
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb.preheader ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%res.0.reg2mem.0 = phi float [ 0.000000e+00, %bb.preheader ], [ %tmp14, %bb ]		; <float> [#uses=1]
+	%tmp3 = getelementptr i32* %x, i32 %i.0.reg2mem.0		; <i32*> [#uses=1]
+	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
+	%tmp45 = sitofp i32 %tmp4 to float		; <float> [#uses=1]
+	%tmp8 = getelementptr float* %y, i32 %i.0.reg2mem.0		; <float*> [#uses=1]
+	%tmp9 = load float* %tmp8, align 4		; <float> [#uses=1]
+	%tmp11 = fmul float %tmp9, %tmp45		; <float> [#uses=1]
+	%tmp14 = fadd float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
+	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb23, label %bb
+
+bb23:		; preds = %bb, %entry
+	%res.0.reg2mem.1 = phi float [ 0.000000e+00, %entry ], [ %tmp14, %bb ]		; <float> [#uses=1]
+	ret float %res.0.reg2mem.1
+}

diff --git a/src/LLVM/test/CodeGen/X86/coalescer-commute5.ll b/src/LLVM/test/CodeGen/X86/coalescer-commute5.ll
new file mode 100644
index 0000000..510d115
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/coalescer-commute5.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+
+define i32 @t() {
+entry:
+	br i1 true, label %bb1664, label %bb1656
+bb1656:		; preds = %entry
+	ret i32 0
+bb1664:		; preds = %entry
+	%tmp4297 = bitcast <16 x i8> zeroinitializer to <2 x i64>		; <<2 x i64>> [#uses=2]
+	%tmp4351 = call <16 x i8> @llvm.x86.sse2.pcmpeq.b( <16 x i8> zeroinitializer, <16 x i8> zeroinitializer ) nounwind readnone 		; <<16 x i8>> [#uses=0]
+	br i1 false, label %bb5310, label %bb4743
+bb4743:		; preds = %bb1664
+	%tmp4360.not28 = or <2 x i64> zeroinitializer, %tmp4297		; <<2 x i64>> [#uses=1]
+	br label %bb5310
+bb5310:		; preds = %bb4743, %bb1664
+	%tmp4360.not28.pn = phi <2 x i64> [ %tmp4360.not28, %bb4743 ], [ %tmp4297, %bb1664 ]		; <<2 x i64>> [#uses=1]
+	%tmp4415.not.pn = or <2 x i64> zeroinitializer, %tmp4360.not28.pn		; <<2 x i64>> [#uses=0]
+	ret i32 0
+}
+
+declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone 

diff --git a/src/LLVM/test/CodeGen/X86/coalescer-cross.ll b/src/LLVM/test/CodeGen/X86/coalescer-cross.ll
new file mode 100644
index 0000000..3f1fec1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/coalescer-cross.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -regalloc=basic | FileCheck %s
+; rdar://6509240
+
+; CHECK: os_clock
+; CHECK-NOT: movaps
+
+	%0 = type { %struct.TValue }		; type %0
+	%1 = type { %struct.L_Umaxalign, i32, %struct.Node* }		; type %1
+	%struct.CallInfo = type { %struct.TValue*, %struct.TValue*, %struct.TValue*, i32*, i32, i32 }
+	%struct.GCObject = type { %struct.lua_State }
+	%struct.L_Umaxalign = type { double }
+	%struct.Mbuffer = type { i8*, i32, i32 }
+	%struct.Node = type { %struct.TValue, %struct.TKey }
+	%struct.TKey = type { %1 }
+	%struct.TString = type { %struct.anon }
+	%struct.TValue = type { %struct.L_Umaxalign, i32 }
+	%struct.Table = type { %struct.GCObject*, i8, i8, i8, i8, %struct.Table*, %struct.TValue*, %struct.Node*, %struct.Node*, %struct.GCObject*, i32 }
+	%struct.UpVal = type { %struct.GCObject*, i8, i8, %struct.TValue*, %0 }
+	%struct.anon = type { %struct.GCObject*, i8, i8, i8, i32, i32 }
+	%struct.global_State = type { %struct.stringtable, i8* (i8*, i8*, i32, i32)*, i8*, i8, i8, i32, %struct.GCObject*, %struct.GCObject**, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.Mbuffer, i32, i32, i32, i32, i32, i32, i32 (%struct.lua_State*)*, %struct.TValue, %struct.lua_State*, %struct.UpVal, [9 x %struct.Table*], [17 x %struct.TString*] }
+	%struct.lua_Debug = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 }
+	%struct.lua_State = type { %struct.GCObject*, i8, i8, i8, %struct.TValue*, %struct.TValue*, %struct.global_State*, %struct.CallInfo*, i32*, %struct.TValue*, %struct.TValue*, %struct.CallInfo*, %struct.CallInfo*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State*, %struct.lua_Debug*)*, %struct.TValue, %struct.TValue, %struct.GCObject*, %struct.GCObject*, %struct.lua_longjmp*, i32 }
+	%struct.lua_longjmp = type { %struct.lua_longjmp*, [18 x i32], i32 }
+	%struct.stringtable = type { %struct.GCObject**, i32, i32 }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.lua_State*)* @os_clock to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @os_clock(%struct.lua_State* nocapture %L) nounwind ssp {
+entry:
+	%0 = tail call i32 @"\01_clock$UNIX2003"() nounwind		; <i32> [#uses=1]
+	%1 = uitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = fdiv double %1, 1.000000e+06		; <double> [#uses=1]
+	%3 = getelementptr %struct.lua_State* %L, i32 0, i32 4		; <%struct.TValue**> [#uses=3]
+	%4 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=2]
+	%5 = getelementptr %struct.TValue* %4, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	store double %2, double* %5, align 4
+	%6 = getelementptr %struct.TValue* %4, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 3, i32* %6, align 4
+	%7 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=1]
+	%8 = getelementptr %struct.TValue* %7, i32 1		; <%struct.TValue*> [#uses=1]
+	store %struct.TValue* %8, %struct.TValue** %3, align 4
+	ret i32 1
+}
+
+declare i32 @"\01_clock$UNIX2003"()

diff --git a/src/LLVM/test/CodeGen/X86/coalescer-dce.ll b/src/LLVM/test/CodeGen/X86/coalescer-dce.ll
new file mode 100644
index 0000000..7f72e3d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/coalescer-dce.ll

@@ -0,0 +1,80 @@
+; RUN: llc < %s -disable-fp-elim -disable-machine-dce -verify-coalescing
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; This test case has a sub-register join followed by a remat:
+;
+; 256L    %vreg2<def> = COPY %vreg7:sub_32bit<kill>; GR32:%vreg2 GR64:%vreg7
+;         Considering merging %vreg2 with %vreg7:sub_32bit
+;         Cross-class to GR64.
+;                 RHS = %vreg2 = [256d,272d:0)  0@256d
+;                 LHS = %vreg7 = [208d,256d:0)[304L,480L:0)  0@208d
+;                 updated: 272L   %vreg0<def> = COPY %vreg7:sub_32bit<kill>; GR32:%vreg0 GR64:%vreg7
+;         Joined. Result = %vreg7 = [208d,272d:0)[304L,480L:0)  0@208d
+;
+; 272L    %vreg10:sub_32bit<def> = COPY %vreg7:sub_32bit<kill>, %vreg10<imp-def>; GR64:%vreg10,%vreg7
+;         Considering merging %vreg7 with %vreg10
+;                 RHS = %vreg7 = [208d,272d:0)[304L,480L:0)  0@208d
+;                 LHS = %vreg10 = [16d,64L:2)[64L,160L:1)[192L,240L:1)[272d,304L:3)[304L,352d:1)[352d,400d:0)[400d,400S:4)  0@352d 1@64L-phidef 2@16d-phikill 3@272d-phikill 4@400d
+; Remat: %vreg10<def> = MOV64r0 %vreg10<imp-def>, %EFLAGS<imp-def,dead>, %vreg10<imp-def>; GR64:%vreg10
+; Shrink: %vreg7 = [208d,272d:0)[304L,480L:0)  0@208d
+;  live-in at 240L
+;  live-in at 416L
+;  live-in at 320L
+;  live-in at 304L
+; Shrunk: %vreg7 = [208d,256d:0)[304L,480L:0)  0@208d
+;
+; The COPY at 256L is rewritten as a partial def, and that would artificially
+; extend the live range of %vreg7 to end at 256d.  When the joined copy is
+; removed, -verify-coalescing complains about the dangling kill.
+;
+; <rdar://problem/9967101>
+
+define void @f1() nounwind uwtable ssp {
+bb:
+  br label %bb1
+
+bb1:
+  %tmp = phi i32 [ 0, %bb ], [ %tmp21, %bb20 ]
+  br label %bb2
+
+bb2:
+  br i1 undef, label %bb5, label %bb8
+
+bb4:
+  br i1 undef, label %bb2, label %bb20
+
+bb5:
+  br i1 undef, label %bb4, label %bb20
+
+bb8:
+  %tmp9 = phi i32 [ %tmp24, %bb23 ], [ 0, %bb2 ]
+  br i1 false, label %bb41, label %bb10
+
+bb10:
+  %tmp11 = sub nsw i32 %tmp9, %tmp
+  br i1 false, label %bb2, label %bb26
+
+bb20:
+  %tmp21 = phi i32 [ undef, %bb4 ], [ undef, %bb5 ], [ %tmp9, %bb27 ], [ undef, %bb32 ]
+  %tmp22 = phi i32 [ undef, %bb4 ], [ undef, %bb5 ], [ %tmp11, %bb27 ], [ undef, %bb32 ]
+  br label %bb1
+
+bb23:
+  %tmp24 = add nsw i32 %tmp9, 1
+  br label %bb8
+
+bb26:
+  br i1 undef, label %bb27, label %bb32
+
+bb27:
+  %tmp28 = zext i32 %tmp11 to i64
+  %tmp30 = icmp eq i64 undef, %tmp28
+  br i1 %tmp30, label %bb20, label %bb27
+
+bb32:
+  br i1 undef, label %bb20, label %bb23
+
+bb41:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/coalescer-remat.ll b/src/LLVM/test/CodeGen/X86/coalescer-remat.ll
new file mode 100644
index 0000000..eb7b7a8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/coalescer-remat.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep xor | count 3
+
+@val = internal global i64 0
+@"\01LC" = internal constant [7 x i8] c"0x%lx\0A\00"
+
+define i32 @main() nounwind {
+entry:
+  %0 = cmpxchg i64* @val, i64 0, i64 1 monotonic
+  %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i64 0), i64 %0) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/code_placement.ll b/src/LLVM/test/CodeGen/X86/code_placement.ll
new file mode 100644
index 0000000..9747183
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/code_placement.ll

@@ -0,0 +1,136 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+@Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
+@Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
+@Te3 = external global [256 x i32]		; <[256 x i32]*> [#uses=2]
+
+define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp {
+entry:
+	%0 = load i32* %rk, align 4		; <i32> [#uses=1]
+	%1 = getelementptr i32* %rk, i64 1		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 4		; <i32> [#uses=1]
+	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
+	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
+	br label %bb
+; CHECK: jmp
+; CHECK-NEXT: align
+
+bb:		; preds = %bb1, %entry
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ]		; <i64> [#uses=3]
+	%s1.0 = phi i32 [ %2, %entry ], [ %56, %bb1 ]		; <i32> [#uses=2]
+	%s0.0 = phi i32 [ %0, %entry ], [ %43, %bb1 ]		; <i32> [#uses=2]
+	%tmp18 = shl i64 %indvar, 4		; <i64> [#uses=4]
+	%rk26 = bitcast i32* %rk to i8*		; <i8*> [#uses=6]
+	%3 = lshr i32 %s0.0, 24		; <i32> [#uses=1]
+	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
+	%5 = getelementptr [256 x i32]* @Te0, i64 0, i64 %4		; <i32*> [#uses=1]
+	%6 = load i32* %5, align 4		; <i32> [#uses=1]
+	%7 = lshr i32 %s1.0, 16		; <i32> [#uses=1]
+	%8 = and i32 %7, 255		; <i32> [#uses=1]
+	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
+	%10 = getelementptr [256 x i32]* @Te1, i64 0, i64 %9		; <i32*> [#uses=1]
+	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%ctg2.sum2728 = or i64 %tmp18, 8		; <i64> [#uses=1]
+	%12 = getelementptr i8* %rk26, i64 %ctg2.sum2728		; <i8*> [#uses=1]
+	%13 = bitcast i8* %12 to i32*		; <i32*> [#uses=1]
+	%14 = load i32* %13, align 4		; <i32> [#uses=1]
+	%15 = xor i32 %11, %6		; <i32> [#uses=1]
+	%16 = xor i32 %15, %14		; <i32> [#uses=3]
+	%17 = lshr i32 %s1.0, 24		; <i32> [#uses=1]
+	%18 = zext i32 %17 to i64		; <i64> [#uses=1]
+	%19 = getelementptr [256 x i32]* @Te0, i64 0, i64 %18		; <i32*> [#uses=1]
+	%20 = load i32* %19, align 4		; <i32> [#uses=1]
+	%21 = and i32 %s0.0, 255		; <i32> [#uses=1]
+	%22 = zext i32 %21 to i64		; <i64> [#uses=1]
+	%23 = getelementptr [256 x i32]* @Te3, i64 0, i64 %22		; <i32*> [#uses=1]
+	%24 = load i32* %23, align 4		; <i32> [#uses=1]
+	%ctg2.sum2930 = or i64 %tmp18, 12		; <i64> [#uses=1]
+	%25 = getelementptr i8* %rk26, i64 %ctg2.sum2930		; <i8*> [#uses=1]
+	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
+	%27 = load i32* %26, align 4		; <i32> [#uses=1]
+	%28 = xor i32 %24, %20		; <i32> [#uses=1]
+	%29 = xor i32 %28, %27		; <i32> [#uses=4]
+	%30 = lshr i32 %16, 24		; <i32> [#uses=1]
+	%31 = zext i32 %30 to i64		; <i64> [#uses=1]
+	%32 = getelementptr [256 x i32]* @Te0, i64 0, i64 %31		; <i32*> [#uses=1]
+	%33 = load i32* %32, align 4		; <i32> [#uses=2]
+	%exitcond = icmp eq i64 %indvar, %tmp.16		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb1:		; preds = %bb
+	%ctg2.sum31 = add i64 %tmp18, 16		; <i64> [#uses=1]
+	%34 = getelementptr i8* %rk26, i64 %ctg2.sum31		; <i8*> [#uses=1]
+	%35 = bitcast i8* %34 to i32*		; <i32*> [#uses=1]
+	%36 = lshr i32 %29, 16		; <i32> [#uses=1]
+	%37 = and i32 %36, 255		; <i32> [#uses=1]
+	%38 = zext i32 %37 to i64		; <i64> [#uses=1]
+	%39 = getelementptr [256 x i32]* @Te1, i64 0, i64 %38		; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4		; <i32> [#uses=1]
+	%41 = load i32* %35, align 4		; <i32> [#uses=1]
+	%42 = xor i32 %40, %33		; <i32> [#uses=1]
+	%43 = xor i32 %42, %41		; <i32> [#uses=1]
+	%44 = lshr i32 %29, 24		; <i32> [#uses=1]
+	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
+	%46 = getelementptr [256 x i32]* @Te0, i64 0, i64 %45		; <i32*> [#uses=1]
+	%47 = load i32* %46, align 4		; <i32> [#uses=1]
+	%48 = and i32 %16, 255		; <i32> [#uses=1]
+	%49 = zext i32 %48 to i64		; <i64> [#uses=1]
+	%50 = getelementptr [256 x i32]* @Te3, i64 0, i64 %49		; <i32*> [#uses=1]
+	%51 = load i32* %50, align 4		; <i32> [#uses=1]
+	%ctg2.sum32 = add i64 %tmp18, 20		; <i64> [#uses=1]
+	%52 = getelementptr i8* %rk26, i64 %ctg2.sum32		; <i8*> [#uses=1]
+	%53 = bitcast i8* %52 to i32*		; <i32*> [#uses=1]
+	%54 = load i32* %53, align 4		; <i32> [#uses=1]
+	%55 = xor i32 %51, %47		; <i32> [#uses=1]
+	%56 = xor i32 %55, %54		; <i32> [#uses=1]
+	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
+	br label %bb
+
+bb2:		; preds = %bb
+	%tmp10 = shl i64 %tmp.16, 4		; <i64> [#uses=2]
+	%ctg2.sum = add i64 %tmp10, 16		; <i64> [#uses=1]
+	%tmp1213 = getelementptr i8* %rk26, i64 %ctg2.sum		; <i8*> [#uses=1]
+	%57 = bitcast i8* %tmp1213 to i32*		; <i32*> [#uses=1]
+	%58 = and i32 %33, -16777216		; <i32> [#uses=1]
+	%59 = lshr i32 %29, 16		; <i32> [#uses=1]
+	%60 = and i32 %59, 255		; <i32> [#uses=1]
+	%61 = zext i32 %60 to i64		; <i64> [#uses=1]
+	%62 = getelementptr [256 x i32]* @Te1, i64 0, i64 %61		; <i32*> [#uses=1]
+	%63 = load i32* %62, align 4		; <i32> [#uses=1]
+	%64 = and i32 %63, 16711680		; <i32> [#uses=1]
+	%65 = or i32 %64, %58		; <i32> [#uses=1]
+	%66 = load i32* %57, align 4		; <i32> [#uses=1]
+	%67 = xor i32 %65, %66		; <i32> [#uses=2]
+	%68 = lshr i32 %29, 8		; <i32> [#uses=1]
+	%69 = zext i32 %68 to i64		; <i64> [#uses=1]
+	%70 = getelementptr [256 x i32]* @Te0, i64 0, i64 %69		; <i32*> [#uses=1]
+	%71 = load i32* %70, align 4		; <i32> [#uses=1]
+	%72 = and i32 %71, -16777216		; <i32> [#uses=1]
+	%73 = and i32 %16, 255		; <i32> [#uses=1]
+	%74 = zext i32 %73 to i64		; <i64> [#uses=1]
+	%75 = getelementptr [256 x i32]* @Te1, i64 0, i64 %74		; <i32*> [#uses=1]
+	%76 = load i32* %75, align 4		; <i32> [#uses=1]
+	%77 = and i32 %76, 16711680		; <i32> [#uses=1]
+	%78 = or i32 %77, %72		; <i32> [#uses=1]
+	%ctg2.sum25 = add i64 %tmp10, 20		; <i64> [#uses=1]
+	%79 = getelementptr i8* %rk26, i64 %ctg2.sum25		; <i8*> [#uses=1]
+	%80 = bitcast i8* %79 to i32*		; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4		; <i32> [#uses=1]
+	%82 = xor i32 %78, %81		; <i32> [#uses=2]
+	%83 = lshr i32 %67, 24		; <i32> [#uses=1]
+	%84 = trunc i32 %83 to i8		; <i8> [#uses=1]
+	store i8 %84, i8* %out, align 1
+	%85 = lshr i32 %67, 16		; <i32> [#uses=1]
+	%86 = trunc i32 %85 to i8		; <i8> [#uses=1]
+	%87 = getelementptr i8* %out, i64 1		; <i8*> [#uses=1]
+	store i8 %86, i8* %87, align 1
+	%88 = getelementptr i8* %out, i64 4		; <i8*> [#uses=1]
+	%89 = lshr i32 %82, 24		; <i32> [#uses=1]
+	%90 = trunc i32 %89 to i8		; <i8> [#uses=1]
+	store i8 %90, i8* %88, align 1
+	%91 = lshr i32 %82, 16		; <i32> [#uses=1]
+	%92 = trunc i32 %91 to i8		; <i8> [#uses=1]
+	%93 = getelementptr i8* %out, i64 5		; <i8*> [#uses=1]
+	store i8 %92, i8* %93, align 1
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/code_placement_eh.ll b/src/LLVM/test/CodeGen/X86/code_placement_eh.ll
new file mode 100644
index 0000000..2da3f9f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/code_placement_eh.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s
+
+; CodePlacementOpt shouldn't try to modify this loop because
+; it involves EH edges.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0"
+
+define void @foo() {
+invcont5:
+  br label %bb15
+
+.noexc3:                                          ; preds = %bb15
+  br i1 undef, label %bb18.i5.i, label %bb15
+
+.noexc6.i.i:                                      ; preds = %bb18.i5.i
+  %tmp2021 = invoke float @cosf(float 0.000000e+00) readonly
+          to label %bb18.i5.i unwind label %lpad.i.i ; <float> [#uses=0]
+
+bb18.i5.i:                                        ; preds = %.noexc6.i.i, %bb51.i
+  %tmp2019 = invoke float @sinf(float 0.000000e+00) readonly
+          to label %.noexc6.i.i unwind label %lpad.i.i ; <float> [#uses=0]
+
+lpad.i.i:                                         ; preds = %bb18.i5.i, %.noexc6.i.i
+  %lpadval.i.i = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+          catch i8* null
+  unreachable
+
+lpad59.i:                                         ; preds = %bb15
+  %lpadval60.i.i = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+          catch i8* null
+  unreachable
+
+bb15:                                             ; preds = %.noexc3, %invcont5
+  invoke fastcc void @_ZN28btHashedOverlappingPairCacheC2Ev()
+          to label %.noexc3 unwind label %lpad59.i
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare float @sinf(float) readonly
+
+declare float @cosf(float) readonly
+
+declare fastcc void @_ZN28btHashedOverlappingPairCacheC2Ev() align 2

diff --git a/src/LLVM/test/CodeGen/X86/codegen-prepare-cast.ll b/src/LLVM/test/CodeGen/X86/codegen-prepare-cast.ll
new file mode 100644
index 0000000..2a8ead8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/codegen-prepare-cast.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86-64
+; PR4297
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+        %"byte[]" = type { i64, i8* }
+        %"char[][]" = type { i64, %"byte[]"* }
+@.str = external constant [7 x i8]              ; <[7 x i8]*> [#uses=1]
+
+define fastcc i32 @_Dmain(%"char[][]" %unnamed) {
+entry:
+        %tmp = getelementptr [7 x i8]* @.str, i32 0, i32 0              ; <i8*> [#uses=1]
+        br i1 undef, label %foreachbody, label %foreachend
+
+foreachbody:            ; preds = %entry
+        %tmp4 = getelementptr i8* %tmp, i32 undef               ; <i8*> [#uses=1]
+        %tmp5 = load i8* %tmp4          ; <i8> [#uses=0]
+        unreachable
+
+foreachend:             ; preds = %entry
+        ret i32 0
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/codegen-prepare-extload.ll b/src/LLVM/test/CodeGen/X86/codegen-prepare-extload.ll
new file mode 100644
index 0000000..14df815
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/codegen-prepare-extload.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win64 | FileCheck %s
+; rdar://7304838
+
+; CodeGenPrepare should move the zext into the block with the load
+; so that SelectionDAG can select it with the load.
+
+; CHECK: movzbl ({{%rdi|%rcx}}), %eax
+
+define void @foo(i8* %p, i32* %q) {
+entry:
+  %t = load i8* %p
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = zext i8 %t to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/codemodel.ll b/src/LLVM/test/CodeGen/X86/codemodel.ll
new file mode 100644
index 0000000..b6ca1ce
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/codemodel.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -code-model=small  | FileCheck -check-prefix CHECK-SMALL %s
+; RUN: llc < %s -code-model=kernel | FileCheck -check-prefix CHECK-KERNEL %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@data = external global [0 x i32]		; <[0 x i32]*> [#uses=5]
+
+define i32 @foo() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo:
+; CHECK-SMALL:   movl data(%rip), %eax
+; CHECK-KERNEL: foo:
+; CHECK-KERNEL:  movl data, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i64 0, i64 0), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo2() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo2:
+; CHECK-SMALL:   movl data+40(%rip), %eax
+; CHECK-KERNEL: foo2:
+; CHECK-KERNEL:  movl data+40, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 10), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo3() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo3:
+; CHECK-SMALL:   movl data-40(%rip), %eax
+; CHECK-KERNEL: foo3:
+; CHECK-KERNEL:  movq $-40, %rax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -10), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo4() nounwind readonly {
+entry:
+; FIXME: We really can use movabsl here!
+; CHECK-SMALL:  foo4:
+; CHECK-SMALL:   movl $16777216, %eax
+; CHECK-SMALL:   movl data(%rax), %eax
+; CHECK-KERNEL: foo4:
+; CHECK-KERNEL:  movl data+16777216, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194304), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo1() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo1:
+; CHECK-SMALL:   movl data+16777212(%rip), %eax
+; CHECK-KERNEL: foo1:
+; CHECK-KERNEL:  movl data+16777212, %eax
+        %0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194303), align 4            ; <i32> [#uses=1]
+        ret i32 %0
+}
+define i32 @foo5() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo5:
+; CHECK-SMALL:   movl data-16777216(%rip), %eax
+; CHECK-KERNEL: foo5:
+; CHECK-KERNEL:  movq $-16777216, %rax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -4194304), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/combine-lds.ll b/src/LLVM/test/CodeGen/X86/combine-lds.ll
new file mode 100644
index 0000000..b49d081
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/combine-lds.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep fldl | count 1
+
+define double @doload64(i64 %x) nounwind  {
+	%tmp717 = bitcast i64 %x to double
+	ret double %tmp717
+}

diff --git a/src/LLVM/test/CodeGen/X86/combiner-aa-0.ll b/src/LLVM/test/CodeGen/X86/combiner-aa-0.ll
new file mode 100644
index 0000000..a61ef7a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/combiner-aa-0.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 -combiner-global-alias-analysis -combiner-alias-analysis
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+	%struct.Hash_Key = type { [4 x i32], i32 }
+@g_flipV_hashkey = external global %struct.Hash_Key, align 16		; <%struct.Hash_Key*> [#uses=1]
+
+define void @foo() nounwind {
+	%t0 = load i32* undef, align 16		; <i32> [#uses=1]
+	%t1 = load i32* null, align 4		; <i32> [#uses=1]
+	%t2 = srem i32 %t0, 32		; <i32> [#uses=1]
+	%t3 = shl i32 1, %t2		; <i32> [#uses=1]
+	%t4 = xor i32 %t3, %t1		; <i32> [#uses=1]
+	store i32 %t4, i32* null, align 4
+	%t5 = getelementptr %struct.Hash_Key* @g_flipV_hashkey, i64 0, i32 0, i64 0		; <i32*> [#uses=2]
+	%t6 = load i32* %t5, align 4		; <i32> [#uses=1]
+	%t7 = shl i32 1, undef		; <i32> [#uses=1]
+	%t8 = xor i32 %t7, %t6		; <i32> [#uses=1]
+	store i32 %t8, i32* %t5, align 4
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/combiner-aa-1.ll b/src/LLVM/test/CodeGen/X86/combiner-aa-1.ll
new file mode 100644
index 0000000..58a7129
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/combiner-aa-1.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s --combiner-alias-analysis --combiner-global-alias-analysis
+; PR4880
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+%struct.alst_node = type { %struct.node }
+%struct.arg_node = type { %struct.node, i8*, %struct.alst_node* }
+%struct.arglst_node = type { %struct.alst_node, %struct.arg_node*, %struct.arglst_node* }
+%struct.lam_node = type { %struct.alst_node, %struct.arg_node*, %struct.alst_node* }
+%struct.node = type { i32 (...)**, %struct.node* }
+
+define i32 @._ZN8lam_node18resolve_name_clashEP8arg_nodeP9alst_node._ZNK8lam_nodeeqERK8exp_node._ZN11arglst_nodeD0Ev(%struct.lam_node* %this.this, %struct.arg_node* %outer_arg, %struct.alst_node* %env.cmp, %struct.arglst_node* %this, i32 %functionID) {
+comb_entry:
+  %.SV59 = alloca %struct.node*                   ; <%struct.node**> [#uses=1]
+  %0 = load i32 (...)*** null, align 4            ; <i32 (...)**> [#uses=1]
+  %1 = getelementptr inbounds i32 (...)** %0, i32 3 ; <i32 (...)**> [#uses=1]
+  %2 = load i32 (...)** %1, align 4               ; <i32 (...)*> [#uses=1]
+  store %struct.node* undef, %struct.node** %.SV59
+  %3 = bitcast i32 (...)* %2 to i32 (%struct.node*)* ; <i32 (%struct.node*)*> [#uses=1]
+  %4 = tail call i32 %3(%struct.node* undef)      ; <i32> [#uses=0]
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/commute-intrinsic.ll b/src/LLVM/test/CodeGen/X86/commute-intrinsic.ll
new file mode 100644
index 0000000..d810cb1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/commute-intrinsic.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | not grep movaps
+
+@a = external global <2 x i64>		; <<2 x i64>*> [#uses=1]
+
+define <2 x i64> @madd(<2 x i64> %b) nounwind  {
+entry:
+	%tmp2 = load <2 x i64>* @a, align 16		; <<2 x i64>> [#uses=1]
+	%tmp6 = bitcast <2 x i64> %b to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = bitcast <2 x i64> %tmp2 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp11 = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd( <8 x i16> %tmp9, <8 x i16> %tmp6 ) nounwind readnone 		; <<4 x i32>> [#uses=1]
+	%tmp14 = bitcast <4 x i32> %tmp11 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp14
+}
+
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 

diff --git a/src/LLVM/test/CodeGen/X86/commute-two-addr.ll b/src/LLVM/test/CodeGen/X86/commute-two-addr.ll
new file mode 100644
index 0000000..e9791b1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/commute-two-addr.ll

@@ -0,0 +1,62 @@
+; The register allocator can commute two-address instructions to avoid

+; insertion of register-register copies.

+

+; Make sure there are only 3 mov's for each testcase

+; RUN: llc < %s -mtriple=i686-pc-linux-gnu   | FileCheck %s -check-prefix=LINUX

+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=DARWIN

+

+

+@G = external global i32                ; <i32*> [#uses=2]

+

+declare void @ext(i32)

+

+define i32 @t1(i32 %X, i32 %Y) nounwind {

+; LINUX: t1:

+; LINUX: movl 4(%esp), %eax

+; LINUX: movl 8(%esp), %ecx

+; LINUX: addl %eax, %ecx

+; LINUX: movl %ecx, G

+        %Z = add i32 %X, %Y             ; <i32> [#uses=1]

+        store i32 %Z, i32* @G

+        ret i32 %X

+}

+

+define i32 @t2(i32 %X, i32 %Y) nounwind {

+; LINUX: t2:

+; LINUX: movl 4(%esp), %eax

+; LINUX: movl 8(%esp), %ecx

+; LINUX: xorl %eax, %ecx

+; LINUX: movl %ecx, G

+        %Z = xor i32 %X, %Y             ; <i32> [#uses=1]

+        store i32 %Z, i32* @G

+        ret i32 %X

+}

+

+; rdar://8762995

+%0 = type { i64, i32 }

+

+define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind {

+entry:

+; DARWIN: t3:

+; DARWIN: shll $16

+; DARWIN: shlq $32, %rcx

+; DARWIN-NOT: leaq

+; DARWIN: orq %rcx, %rax

+  %tmp21 = zext i32 %lb to i64

+  %tmp23 = zext i32 %ub to i64

+  %tmp24 = shl i64 %tmp23, 32

+  %ins26 = or i64 %tmp24, %tmp21

+  %tmp28 = zext i8 %has_lb to i32

+  %tmp33 = zext i8 %has_ub to i32

+  %tmp34 = shl i32 %tmp33, 8

+  %tmp38 = zext i8 %lb_inclusive to i32

+  %tmp39 = shl i32 %tmp38, 16

+  %tmp43 = zext i8 %ub_inclusive to i32

+  %tmp44 = shl i32 %tmp43, 24

+  %ins31 = or i32 %tmp39, %tmp28

+  %ins36 = or i32 %ins31, %tmp34

+  %ins46 = or i32 %ins36, %tmp44

+  %tmp16 = insertvalue %0 undef, i64 %ins26, 0

+  %tmp19 = insertvalue %0 %tmp16, i32 %ins46, 1

+  ret %0 %tmp19

+}


diff --git a/src/LLVM/test/CodeGen/X86/compare-add.ll b/src/LLVM/test/CodeGen/X86/compare-add.ll
new file mode 100644
index 0000000..e0b69bd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/compare-add.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | not grep add

+

+define i1 @X(i32 %X) {

+        %Y = add i32 %X, 14             ; <i32> [#uses=1]

+        %Z = icmp ne i32 %Y, 12345              ; <i1> [#uses=1]

+        ret i1 %Z

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/compare-inf.ll b/src/LLVM/test/CodeGen/X86/compare-inf.ll
new file mode 100644
index 0000000..9aa44a3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/compare-inf.ll

@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; Convert oeq and une to ole/oge/ule/uge when comparing with infinity
+; and negative infinity, because those are more efficient on x86.
+
+; CHECK: oeq_inff:
+; CHECK: ucomiss
+; CHECK: jb
+define float @oeq_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp oeq float %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: oeq_inf:
+; CHECK: ucomisd
+; CHECK: jb
+define double @oeq_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp oeq double %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: une_inff:
+; CHECK: ucomiss
+; CHECK: jae
+define float @une_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp une float %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: une_inf:
+; CHECK: ucomisd
+; CHECK: jae
+define double @une_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp une double %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: oeq_neg_inff:
+; CHECK: ucomiss
+; CHECK: jb
+define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp oeq float %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: oeq_neg_inf:
+; CHECK: ucomisd
+; CHECK: jb
+define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp oeq double %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: une_neg_inff:
+; CHECK: ucomiss
+; CHECK: jae
+define float @une_neg_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp une float %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: une_neg_inf:
+; CHECK: ucomisd
+; CHECK: jae
+define double @une_neg_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp une double %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}

diff --git a/src/LLVM/test/CodeGen/X86/compare_folding.ll b/src/LLVM/test/CodeGen/X86/compare_folding.ll
new file mode 100644
index 0000000..7e4f97b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/compare_folding.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | \

+; RUN:   grep movsd | count 1

+; RUN: llc < %s -march=x86 -mcpu=yonah | \

+; RUN:   grep ucomisd

+declare i1 @llvm.isunordered.f64(double, double)

+

+define i1 @test1(double %X, double %Y) {

+        %COM = fcmp uno double %X, %Y           ; <i1> [#uses=1]

+        ret i1 %COM

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/compiler_used.ll b/src/LLVM/test/CodeGen/X86/compiler_used.ll
new file mode 100644
index 0000000..be8de5e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/compiler_used.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep no_dead_strip | count 1
+; We should have a .no_dead_strip directive for Z but not for X/Y.
+
+@X = internal global i8 4
+@Y = internal global i32 123
+@Z = internal global i8 4
+
+@llvm.used = appending global [1 x i8*] [ i8* @Z ], section "llvm.metadata"
+@llvm.compiler_used = appending global [2 x i8*] [ i8* @X, i8* bitcast (i32* @Y to i8*)], section "llvm.metadata"

diff --git a/src/LLVM/test/CodeGen/X86/complex-asm.ll b/src/LLVM/test/CodeGen/X86/complex-asm.ll
new file mode 100644
index 0000000..49878b9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/complex-asm.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; This formerly crashed.
+
+%0 = type { i64, i64 }
+
+define %0 @f() nounwind ssp {
+entry:
+  %v = alloca %0, align 8
+  call void asm sideeffect "", "=*r,r,r,0,~{dirflag},~{fpsr},~{flags}"(%0* %v, i32 0, i32 1, i128 undef) nounwind
+  %0 = getelementptr inbounds %0* %v, i64 0, i32 0
+  %1 = load i64* %0, align 8
+  %2 = getelementptr inbounds %0* %v, i64 0, i32 1
+  %3 = load i64* %2, align 8
+  %mrv4 = insertvalue %0 undef, i64 %1, 0
+  %mrv5 = insertvalue %0 %mrv4, i64 %3, 1
+  ret %0 %mrv5
+}

diff --git a/src/LLVM/test/CodeGen/X86/complex-fca.ll b/src/LLVM/test/CodeGen/X86/complex-fca.ll
new file mode 100644
index 0000000..7e7acaa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/complex-fca.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep mov | count 2
+
+define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %z) nounwind {
+entry:
+	%z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0
+	%z9 = extractvalue { x86_fp80, x86_fp80 } %z, 1
+	%0 = fsub x86_fp80 0xK80000000000000000000, %z9
+	%insert = insertvalue { x86_fp80, x86_fp80 } undef, x86_fp80 %0, 0
+	%insert7 = insertvalue { x86_fp80, x86_fp80 } %insert, x86_fp80 %z8, 1
+	call void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %insert7) nounwind
+	ret void
+}
+
+declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret, { x86_fp80, x86_fp80 }) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/conditional-indecrement.ll b/src/LLVM/test/CodeGen/X86/conditional-indecrement.ll
new file mode 100644
index 0000000..a3a0c39
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/conditional-indecrement.ll

@@ -0,0 +1,89 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+define i32 @test1(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test1:
+; CHECK: cmpl $1
+; CHECK: sbbl $-1
+; CHECK: ret
+}
+
+define i32 @test2(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test2:
+; CHECK: cmpl $1
+; CHECK: adcl $0
+; CHECK: ret
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test3:
+; CHECK: cmpl $1
+; CHECK: adcl $0
+; CHECK: ret
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test4:
+; CHECK: cmpl $1
+; CHECK: sbbl $-1
+; CHECK: ret
+}
+
+define i32 @test5(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test5:
+; CHECK: cmpl $1
+; CHECK: adcl $-1
+; CHECK: ret
+}
+
+define i32 @test6(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test6:
+; CHECK: cmpl $1
+; CHECK: sbbl $0
+; CHECK: ret
+}
+
+define i32 @test7(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test7:
+; CHECK: cmpl $1
+; CHECK: sbbl $0
+; CHECK: ret
+}
+
+define i32 @test8(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test8:
+; CHECK: cmpl $1
+; CHECK: adcl $-1
+; CHECK: ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/constant-pool-remat-0.ll b/src/LLVM/test/CodeGen/X86/constant-pool-remat-0.ll
new file mode 100644
index 0000000..4be14d2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/constant-pool-remat-0.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=x86_64-linux   | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -regalloc=greedy | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; CHECK:     LCPI
+; CHECK:     LCPI
+; CHECK:     LCPI
+; CHECK-NOT: LCPI
+
+; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats -info-output-file - | FileCheck %s -check-prefix=X64stat
+; X64stat: 6 asm-printer
+
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o /dev/null -stats -info-output-file - | FileCheck %s -check-prefix=X32stat
+; X32stat: 12 asm-printer
+
+declare float @qux(float %y)
+
+define float @array(float %a) nounwind {
+  %n = fmul float %a, 9.0
+  %m = call float @qux(float %n)
+  %o = fmul float %m, 9.0
+  ret float %o
+}

diff --git a/src/LLVM/test/CodeGen/X86/constant-pool-sharing.ll b/src/LLVM/test/CodeGen/X86/constant-pool-sharing.ll
new file mode 100644
index 0000000..f979945
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/constant-pool-sharing.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+
+; llc should share constant pool entries between this integer vector
+; and this floating-point vector since they have the same encoding.
+
+; CHECK:  LCPI0_0(%rip), %xmm0
+; CHECK:  movaps        %xmm0, ({{%rdi|%rcx}})
+; CHECK:  movaps        %xmm0, ({{%rsi|%rdx}})
+
+define void @foo(<4 x i32>* %p, <4 x float>* %q, i1 %t) nounwind {
+entry:
+  br label %loop
+loop:
+  store <4 x i32><i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824>, <4 x i32>* %p
+  store <4 x float><float 2.0, float 2.0, float 2.0, float 2.0>, <4 x float>* %q
+  br i1 %t, label %loop, label %ret
+ret:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/constpool.ll b/src/LLVM/test/CodeGen/X86/constpool.ll
new file mode 100644
index 0000000..2aac486
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/constpool.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s 
+; RUN: llc < %s -fast-isel
+; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -fast-isel -march=x86-64
+; PR4466
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.7"
+
+define i32 @main() nounwind {
+entry:
+	%0 = fcmp oeq float undef, 0x7FF0000000000000		; <i1> [#uses=1]
+	%1 = zext i1 %0 to i32		; <i32> [#uses=1]
+	store i32 %1, i32* undef, align 4
+	ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/src/LLVM/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
new file mode 100644
index 0000000..b82348b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; STATS: 9 asm-printer
+
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK: leal 1({{%rsi|%rdx}}),
+
+define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2, i8* %ptr) nounwind optsize {
+entry:
+  %0 = add i32 %i2, 1           ; <i32> [#uses=1]
+  %1 = sext i32 %0 to i64               ; <i64> [#uses=1]
+  %2 = getelementptr i8* %ptr, i64 %1           ; <i8*> [#uses=1]
+  %3 = load i8* %2, align 1             ; <i8> [#uses=1]
+  %4 = icmp eq i8 0, %3         ; <i1> [#uses=1]
+  br i1 %4, label %bb3, label %bb34
+
+bb3:            ; preds = %entry
+  %5 = add i32 %i2, 4           ; <i32> [#uses=0]
+  %6 = trunc i32 %5 to i8
+  ret i8 %6
+
+bb34:           ; preds = %entry
+  ret i8 0
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/copysign-zero.ll b/src/LLVM/test/CodeGen/X86/copysign-zero.ll
new file mode 100644
index 0000000..47522d8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/copysign-zero.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s | not grep orpd
+; RUN: llc < %s | grep andpd | count 1
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+define double @test(double %X) nounwind  {
+entry:
+	%tmp2 = tail call double @copysign( double 0.000000e+00, double %X ) nounwind readnone 		; <double> [#uses=1]
+	ret double %tmp2
+}
+
+declare double @copysign(double, double) nounwind readnone 
+

diff --git a/src/LLVM/test/CodeGen/X86/crash-O0.ll b/src/LLVM/test/CodeGen/X86/crash-O0.ll
new file mode 100644
index 0000000..956d43b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/crash-O0.ll

@@ -0,0 +1,31 @@
+; RUN: llc -O0 -relocation-model=pic -disable-fp-elim < %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10"
+
+; This file contains functions that may crash llc -O0
+
+; The DIV8 instruction produces results in AH and AL, but we don't want to use
+; AH in 64-bit mode. The hack used must not generate copyFromReg nodes for
+; aliased registers (AX and AL) - RegAllocFast does not like that.
+; PR7312
+define i32 @div8() nounwind {
+entry:
+  %0 = trunc i64 undef to i8                      ; <i8> [#uses=3]
+  %1 = udiv i8 0, %0                              ; <i8> [#uses=1]
+  %2 = urem i8 0, %0                              ; <i8> [#uses=1]
+  %3 = icmp uge i8 %2, %0                         ; <i1> [#uses=1]
+  br i1 %3, label %"40", label %"39"
+
+"39":                                             ; preds = %"36"
+  %4 = zext i8 %1 to i32                          ; <i32> [#uses=1]
+  %5 = mul nsw i32 %4, undef                      ; <i32> [#uses=1]
+  %6 = add nsw i32 %5, undef                      ; <i32> [#uses=1]
+  %7 = icmp ne i32 %6, undef                      ; <i1> [#uses=1]
+  br i1 %7, label %"40", label %"41"
+
+"40":                                             ; preds = %"39", %"36"
+  unreachable
+
+"41":                                             ; preds = %"39"
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/crash-nosse.ll b/src/LLVM/test/CodeGen/X86/crash-nosse.ll
new file mode 100644
index 0000000..1cec25b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/crash-nosse.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -mattr=-sse2,-sse41 -verify-machineinstrs
+target triple = "x86_64-unknown-linux-gnu"
+
+; PR10503
+; This test case produces INSERT_SUBREG 0, <undef> instructions that
+; ProcessImplicitDefs doesn't eliminate.
+define void @autogen_136178_500() {
+BB:
+  %Shuff6 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> <i32 27, i32 29, i32 31, i32 undef, i32 undef, i32 37, i32 39, i32 41, i32 undef, i32 45, i32 47, i32 49, i32 51, i32 53, i32 55, i32 57, i32 undef, i32 61, i32 63, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 undef, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25>
+  %S17 = select i1 true, <8 x float>* null, <8 x float>* null
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  %L19 = load <8 x float>* %S17
+  %BC = bitcast <32 x i32> %Shuff6 to <32 x float>
+  %S28 = fcmp ord double 0x3ED1A1F787BB2185, 0x3EE59DE55A8DF890
+  br i1 %S28, label %CF, label %CF39
+
+CF39:                                             ; preds = %CF39, %CF
+  store <8 x float> %L19, <8 x float>* %S17
+  %I35 = insertelement <32 x float> %BC, float 0x3EC2489F60000000, i32 9
+  %S38 = fcmp ule double 0x3EE59DE55A8DF890, 0x3EC4AB0CBB986A1A
+  br i1 %S38, label %CF39, label %CF40
+
+CF40:                                             ; preds = %CF39
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/crash.ll b/src/LLVM/test/CodeGen/X86/crash.ll
new file mode 100644
index 0000000..1531457
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/crash.ll

@@ -0,0 +1,393 @@
+; RUN: llc -march=x86 %s -o -
+; RUN: llc -march=x86-64 %s -o -
+
+; PR6497
+
+; Chain and flag folding issues.
+define i32 @test1() nounwind ssp {
+entry:
+  %tmp5.i = volatile load i32* undef              ; <i32> [#uses=1]
+  %conv.i = zext i32 %tmp5.i to i64               ; <i64> [#uses=1]
+  %tmp12.i = volatile load i32* undef             ; <i32> [#uses=1]
+  %conv13.i = zext i32 %tmp12.i to i64            ; <i64> [#uses=1]
+  %shl.i = shl i64 %conv13.i, 32                  ; <i64> [#uses=1]
+  %or.i = or i64 %shl.i, %conv.i                  ; <i64> [#uses=1]
+  %add16.i = add i64 %or.i, 256                   ; <i64> [#uses=1]
+  %shr.i = lshr i64 %add16.i, 8                   ; <i64> [#uses=1]
+  %conv19.i = trunc i64 %shr.i to i32             ; <i32> [#uses=1]
+  volatile store i32 %conv19.i, i32* undef
+  ret i32 undef
+}
+
+; PR6533
+define void @test2(i1 %x, i32 %y) nounwind {
+  %land.ext = zext i1 %x to i32                   ; <i32> [#uses=1]
+  %and = and i32 %y, 1                        ; <i32> [#uses=1]
+  %xor = xor i32 %and, %land.ext                  ; <i32> [#uses=1]
+  %cmp = icmp eq i32 %xor, 1                      ; <i1> [#uses=1]
+  br i1 %cmp, label %if.end, label %if.then
+
+if.then:                                          ; preds = %land.end
+  ret void
+
+if.end:                                           ; preds = %land.end
+  ret void
+}
+
+; PR6577
+%pair = type { i64, double }
+
+define void @test3() {
+dependentGraph243.exit:
+  %subject19 = load %pair* undef                     ; <%1> [#uses=1]
+  %0 = extractvalue %pair %subject19, 1              ; <double> [#uses=2]
+  %1 = select i1 undef, double %0, double undef   ; <double> [#uses=1]
+  %2 = select i1 undef, double %1, double %0      ; <double> [#uses=1]
+  %3 = insertvalue %pair undef, double %2, 1         ; <%1> [#uses=1]
+  store %pair %3, %pair* undef
+  ret void
+}
+
+; PR6605
+define i64 @test4(i8* %P) nounwind ssp {
+entry:
+  %tmp1 = load i8* %P                           ; <i8> [#uses=3]
+  %tobool = icmp eq i8 %tmp1, 0                   ; <i1> [#uses=1]
+  %tmp58 = sext i1 %tobool to i8                  ; <i8> [#uses=1]
+  %mul.i = and i8 %tmp58, %tmp1                   ; <i8> [#uses=1]
+  %conv6 = zext i8 %mul.i to i32                  ; <i32> [#uses=1]
+  %cmp = icmp ne i8 %tmp1, 1                      ; <i1> [#uses=1]
+  %conv11 = zext i1 %cmp to i32                   ; <i32> [#uses=1]
+  %call12 = tail call i32 @safe(i32 %conv11) nounwind ; <i32> [#uses=1]
+  %and = and i32 %conv6, %call12                  ; <i32> [#uses=1]
+  %tobool13 = icmp eq i32 %and, 0                 ; <i1> [#uses=1]
+  br i1 %tobool13, label %if.else, label %return
+
+if.else:                                          ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %if.else, %entry
+  ret i64 undef
+}
+
+declare i32 @safe(i32)
+
+; PR6607
+define fastcc void @test5(i32 %FUNC) nounwind {
+foo:
+  %0 = load i8* undef, align 1                    ; <i8> [#uses=3]
+  %1 = sext i8 %0 to i32                          ; <i32> [#uses=2]
+  %2 = zext i8 %0 to i32                          ; <i32> [#uses=1]
+  %tmp1.i5037 = urem i32 %2, 10                   ; <i32> [#uses=1]
+  %tmp.i5038 = icmp ugt i32 %tmp1.i5037, 15       ; <i1> [#uses=1]
+  %3 = zext i1 %tmp.i5038 to i8                   ; <i8> [#uses=1]
+  %4 = icmp slt i8 %0, %3                         ; <i1> [#uses=1]
+  %5 = add nsw i32 %1, 256                        ; <i32> [#uses=1]
+  %storemerge.i.i57 = select i1 %4, i32 %5, i32 %1 ; <i32> [#uses=1]
+  %6 = shl i32 %storemerge.i.i57, 16              ; <i32> [#uses=1]
+  %7 = sdiv i32 %6, -256                          ; <i32> [#uses=1]
+  %8 = trunc i32 %7 to i8                         ; <i8> [#uses=1]
+  store i8 %8, i8* undef, align 1
+  ret void
+}
+
+
+; Crash commoning identical asms.
+; PR6803
+define void @test6(i1 %C) nounwind optsize ssp {
+entry:
+  br i1 %C, label %do.body55, label %do.body92
+
+do.body55:                                        ; preds = %if.else36
+  call void asm sideeffect "foo", "~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
+  ret void
+
+do.body92:                                        ; preds = %if.then66
+  call void asm sideeffect "foo", "~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !1
+  ret void
+}
+
+!0 = metadata !{i32 633550}                       
+!1 = metadata !{i32 634261}                       
+
+
+; Crash during XOR optimization.
+; <rdar://problem/7869290>
+
+define void @test7() nounwind ssp {
+entry:
+  br i1 undef, label %bb14, label %bb67
+
+bb14:
+  %tmp0 = trunc i16 undef to i1
+  %tmp1 = load i8* undef, align 8
+  %tmp2 = shl i8 %tmp1, 4
+  %tmp3 = lshr i8 %tmp2, 7
+  %tmp4 = trunc i8 %tmp3 to i1
+  %tmp5 = icmp ne i1 %tmp0, %tmp4
+  br i1 %tmp5, label %bb14, label %bb67
+
+bb67:
+  ret void
+}
+
+; Crash when trying to copy AH to AL.
+; PR7540
+define void @copy8bitregs() nounwind {
+entry:
+  %div.i = sdiv i32 115200, 0
+  %shr8.i = lshr i32 %div.i, 8
+  %conv4.i = trunc i32 %shr8.i to i8
+  call void asm sideeffect "outb $0, ${1:w}", "{ax},N{dx},~{dirflag},~{fpsr},~{flags}"(i8 %conv4.i, i32 1017) nounwind
+  unreachable
+}
+
+; Crash trying to form conditional increment with fp value.
+; PR8981
+define i32 @test9(double %X) ssp align 2 {
+entry:
+  %0 = fcmp one double %X, 0.000000e+00
+  %cond = select i1 %0, i32 1, i32 2
+  ret i32 %cond
+}
+
+
+; PR8514 - Crash in match address do to "heroics" turning and-of-shift into
+; shift of and.
+%struct.S0 = type { i8, [2 x i8], i8 }
+
+define void @func_59(i32 %p_63) noreturn nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc44, %entry
+  %p_63.addr.1 = phi i32 [ %p_63, %entry ], [ 0, %for.inc44 ]
+  %l_74.0 = phi i32 [ 0, %entry ], [ %add46, %for.inc44 ]
+  br i1 undef, label %for.inc44, label %bb.nph81
+
+bb.nph81:                                         ; preds = %for.body
+  %tmp98 = add i32 %p_63.addr.1, 0
+  br label %for.body22
+
+for.body22:                                       ; preds = %for.body22, %bb.nph81
+  %l_75.077 = phi i64 [ %ins, %for.body22 ], [ undef, %bb.nph81 ]
+  %tmp110 = trunc i64 %l_75.077 to i32
+  %tmp111 = and i32 %tmp110, 65535
+  %arrayidx32.0 = getelementptr [9 x [5 x [2 x %struct.S0]]]* undef, i32 0, i32 %l_74.0, i32 %tmp98, i32 %tmp111, i32 0
+  store i8 1, i8* %arrayidx32.0, align 4
+  %tmp106 = shl i32 %tmp110, 2
+  %tmp107 = and i32 %tmp106, 262140
+  %scevgep99.sum114 = or i32 %tmp107, 1
+  %arrayidx32.1.1 = getelementptr [9 x [5 x [2 x %struct.S0]]]* undef, i32 0, i32 %l_74.0, i32 %tmp98, i32 0, i32 1, i32 %scevgep99.sum114
+  store i8 0, i8* %arrayidx32.1.1, align 1
+  %ins = or i64 undef, undef
+  br label %for.body22
+
+for.inc44:                                        ; preds = %for.body
+  %add46 = add i32 %l_74.0, 1
+  br label %for.body
+}
+
+; PR9028
+define void @func_60(i64 %A) nounwind {
+entry:
+  %0 = zext i64 %A to i160
+  %1 = shl i160 %0, 64
+  %2 = zext i160 %1 to i576
+  %3 = zext i96 undef to i576
+  %4 = or i576 %3, %2
+  store i576 %4, i576* undef, align 8
+  ret void
+}
+
+; <rdar://problem/9187792>
+define fastcc void @func_61() nounwind sspreq {
+entry:
+  %t1 = tail call i64 @llvm.objectsize.i64(i8* undef, i1 false)
+  %t2 = icmp eq i64 %t1, -1
+  br i1 %t2, label %bb2, label %bb1
+
+bb1:
+  ret void
+
+bb2:
+  ret void
+}
+
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+
+; PR10277
+; This test has dead code elimination caused by remat during spilling.
+; DCE causes a live interval to break into connected components.
+; One of the components is spilled.
+
+%t2 = type { i8 }
+%t9 = type { %t10 }
+%t10 = type { %t11 }
+%t11 = type { %t12 }
+%t12 = type { %t13*, %t13*, %t13* }
+%t13 = type { %t14*, %t15, %t15 }
+%t14 = type opaque
+%t15 = type { i8, i32, i32 }
+%t16 = type { %t17, i8* }
+%t17 = type { %t18 }
+%t18 = type { %t19 }
+%t19 = type { %t20*, %t20*, %t20* }
+%t20 = type { i32, i32 }
+%t21 = type { %t13* }
+
+define void @_ZNK4llvm17MipsFrameLowering12emitPrologueERNS_15MachineFunctionE() ssp align 2 {
+bb:
+  %tmp = load %t9** undef, align 4, !tbaa !0
+  %tmp2 = getelementptr inbounds %t9* %tmp, i32 0, i32 0
+  %tmp3 = getelementptr inbounds %t9* %tmp, i32 0, i32 0, i32 0, i32 0, i32 1
+  br label %bb4
+
+bb4:                                              ; preds = %bb37, %bb
+  %tmp5 = phi i96 [ undef, %bb ], [ %tmp38, %bb37 ]
+  %tmp6 = phi i96 [ undef, %bb ], [ %tmp39, %bb37 ]
+  br i1 undef, label %bb34, label %bb7
+
+bb7:                                              ; preds = %bb4
+  %tmp8 = load i32* undef, align 4
+  %tmp9 = and i96 %tmp6, 4294967040
+  %tmp10 = zext i32 %tmp8 to i96
+  %tmp11 = shl nuw nsw i96 %tmp10, 32
+  %tmp12 = or i96 %tmp9, %tmp11
+  %tmp13 = or i96 %tmp12, 1
+  %tmp14 = load i32* undef, align 4
+  %tmp15 = and i96 %tmp5, 4294967040
+  %tmp16 = zext i32 %tmp14 to i96
+  %tmp17 = shl nuw nsw i96 %tmp16, 32
+  %tmp18 = or i96 %tmp15, %tmp17
+  %tmp19 = or i96 %tmp18, 1
+  %tmp20 = load i8* undef, align 1
+  %tmp21 = and i8 %tmp20, 1
+  %tmp22 = icmp ne i8 %tmp21, 0
+  %tmp23 = select i1 %tmp22, i96 %tmp19, i96 %tmp13
+  %tmp24 = select i1 %tmp22, i96 %tmp13, i96 %tmp19
+  store i96 %tmp24, i96* undef, align 4
+  %tmp25 = load %t13** %tmp3, align 4
+  %tmp26 = icmp eq %t13* %tmp25, undef
+  br i1 %tmp26, label %bb28, label %bb27
+
+bb27:                                             ; preds = %bb7
+  br label %bb29
+
+bb28:                                             ; preds = %bb7
+  call void @_ZNSt6vectorIN4llvm11MachineMoveESaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%t10* %tmp2, %t21* byval align 4 undef, %t13* undef)
+  br label %bb29
+
+bb29:                                             ; preds = %bb28, %bb27
+  store i96 %tmp23, i96* undef, align 4
+  %tmp30 = load %t13** %tmp3, align 4
+  br i1 false, label %bb33, label %bb31
+
+bb31:                                             ; preds = %bb29
+  %tmp32 = getelementptr inbounds %t13* %tmp30, i32 1
+  store %t13* %tmp32, %t13** %tmp3, align 4
+  br label %bb37
+
+bb33:                                             ; preds = %bb29
+  unreachable
+
+bb34:                                             ; preds = %bb4
+  br i1 undef, label %bb36, label %bb35
+
+bb35:                                             ; preds = %bb34
+  store %t13* null, %t13** %tmp3, align 4
+  br label %bb37
+
+bb36:                                             ; preds = %bb34
+  call void @_ZNSt6vectorIN4llvm11MachineMoveESaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%t10* %tmp2, %t21* byval align 4 undef, %t13* undef)
+  br label %bb37
+
+bb37:                                             ; preds = %bb36, %bb35, %bb31
+  %tmp38 = phi i96 [ %tmp23, %bb31 ], [ %tmp5, %bb35 ], [ %tmp5, %bb36 ]
+  %tmp39 = phi i96 [ %tmp24, %bb31 ], [ %tmp6, %bb35 ], [ %tmp6, %bb36 ]
+  %tmp40 = add i32 undef, 1
+  br label %bb4
+}
+
+declare %t14* @_ZN4llvm9MCContext16CreateTempSymbolEv(%t2*)
+
+declare void @_ZNSt6vectorIN4llvm11MachineMoveESaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%t10*, %t21* byval align 4, %t13*)
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+
+; PR10463
+; Spilling a virtual register with <undef> uses.
+define void @autogen_239_1000() {
+BB:
+    %Shuff = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 undef>
+    br label %CF
+
+CF:
+    %B16 = frem <8 x double> zeroinitializer, %Shuff
+    %E19 = extractelement <8 x double> %Shuff, i32 5
+    br i1 undef, label %CF, label %CF75
+
+CF75:
+    br i1 undef, label %CF75, label %CF76
+
+CF76:
+    store double %E19, double* undef
+    br i1 undef, label %CF76, label %CF77
+
+CF77:
+    %B55 = fmul <8 x double> %B16, undef
+    br label %CF77
+}
+
+; PR10527
+define void @pr10527() nounwind uwtable {
+entry:
+  br label %"4"
+
+"3":
+  %0 = load <2 x i32>* null, align 8
+  %1 = xor <2 x i32> zeroinitializer, %0
+  %2 = and <2 x i32> %1, %6
+  %3 = or <2 x i32> undef, %2
+  %4 = and <2 x i32> %3, undef
+  store <2 x i32> %4, <2 x i32>* undef
+  %5 = load <2 x i32>* undef, align 1
+  br label %"4"
+
+"4":
+  %6 = phi <2 x i32> [ %5, %"3" ], [ zeroinitializer, %entry ]
+  %7 = icmp ult i32 undef, undef
+  br i1 %7, label %"3", label %"5"
+
+"5":
+  ret void
+}
+
+; PR11078
+;
+; A virtual register used by the "foo" inline asm memory operand gets
+; constrained to GR32_ABCD during coalescing.  This makes the inline asm
+; impossible to allocate without splitting the live range and reinflating the
+; register class around the inline asm.
+;
+; The constraint originally comes from the TEST8ri optimization of (icmp (and %t0, 1), 0).
+
+@__force_order = external hidden global i32, align 4
+define void @pr11078(i32* %pgd) nounwind {
+entry:
+  %t0 = load i32* %pgd, align 4
+  %and2 = and i32 %t0, 1
+  %tobool = icmp eq i32 %and2, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  %t1 = tail call i32 asm sideeffect "bar", "=r,=*m,~{dirflag},~{fpsr},~{flags}"(i32* @__force_order) nounwind
+  br label %if.end
+
+if.end:
+  %t6 = inttoptr i32 %t0 to i64*
+  %t11 = tail call i64 asm sideeffect "foo", "=*m,=A,{bx},{cx},1,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* %t6, i32 0, i32 0, i64 0) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/critical-edge-split-2.ll b/src/LLVM/test/CodeGen/X86/critical-edge-split-2.ll
new file mode 100644
index 0000000..70301cd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/critical-edge-split-2.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%0 = type <{ %1, %1 }>
+%1 = type { i8, i8, i8, i8 }
+
+@g_2 = global %0 zeroinitializer
+@g_4 = global %1 zeroinitializer, align 4
+
+
+; PR8642
+define i16 @test1(i1 zeroext %C, i8** nocapture %argv) nounwind ssp {
+entry:
+  br i1 %C, label %cond.end.i, label %cond.false.i
+
+cond.false.i:                                     ; preds = %entry
+  br label %cond.end.i
+
+cond.end.i:                                       ; preds = %entry
+  %call1 = phi i16 [ trunc (i32 srem (i32 1, i32 zext (i1 icmp eq (%1* bitcast (i8* getelementptr inbounds (%0* @g_2, i64 0, i32 1, i32 0) to %1*), %1* @g_4) to i32)) to i16), %cond.false.i ], [ 1, %entry ]
+  ret i16 %call1
+}
+
+; CHECK: test1:
+; CHECK: testb %dil, %dil
+; CHECK: jne LBB0_2
+; CHECK: divl
+; CHECK: LBB0_2:

diff --git a/src/LLVM/test/CodeGen/X86/cstring.ll b/src/LLVM/test/CodeGen/X86/cstring.ll
new file mode 100644
index 0000000..5b5a766
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/cstring.ll

@@ -0,0 +1,4 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep comm
+; rdar://6479858
+
+@str1 = internal constant [1 x i8] zeroinitializer

diff --git a/src/LLVM/test/CodeGen/X86/ctpop-combine.ll b/src/LLVM/test/CodeGen/X86/ctpop-combine.ll
new file mode 100644
index 0000000..6406cc73
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/ctpop-combine.ll

@@ -0,0 +1,40 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
+
+define i32 @test1(i64 %x) nounwind readnone {
+  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %cast = trunc i64 %count to i32
+  %cmp = icmp ugt i32 %cast, 1
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: test1:
+; CHECK: leaq -1([[A0:%rdi|%rcx]])
+; CHECK-NEXT: testq
+; CHECK-NEXT: setne
+; CHECK: ret
+}
+
+
+define i32 @test2(i64 %x) nounwind readnone {
+  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %cmp = icmp ult i64 %count, 2
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: test2:
+; CHECK: leaq -1([[A0]])
+; CHECK-NEXT: testq
+; CHECK-NEXT: sete
+; CHECK: ret
+}
+
+define i32 @test3(i64 %x) nounwind readnone {
+  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %cast = trunc i64 %count to i6 ; Too small for 0-64
+  %cmp = icmp ult i6 %cast, 2
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: test3:
+; CHECK: cmpb $2
+; CHECK: ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/dag-rauw-cse.ll b/src/LLVM/test/CodeGen/X86/dag-rauw-cse.ll
new file mode 100644
index 0000000..eca8c86
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dag-rauw-cse.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; PR3018
+
+define i32 @test(i32 %A) nounwind {
+; CHECK: test:
+; CHECK-NOT: ret
+; CHECK: orl $1
+; CHECK: ret
+  %B = or i32 %A, 1
+  %C = or i32 %B, 1
+  %D = and i32 %C, 7057
+  ret i32 %D
+}

diff --git a/src/LLVM/test/CodeGen/X86/dagcombine-buildvector.ll b/src/LLVM/test/CodeGen/X86/dagcombine-buildvector.ll
new file mode 100644
index 0000000..dae91d5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dagcombine-buildvector.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
+
+; Shows a dag combine bug that will generate an illegal build vector
+; with v2i64 build_vector i32, i32.
+
+; CHECK: test:
+; CHECK: unpcklpd
+; CHECK: movapd
+define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
+entry:
+        %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 >
+        store <2 x double> %tmp7.i, <2 x double>* %dst
+        ret void
+}
+
+; CHECK: test2:
+; CHECK: movdqa
+define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
+entry:
+        %tmp1 = load <4 x i16>* %src
+        %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+        %0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
+        store <4 x i32> %0, <4 x i32>* %dest
+        ret void
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/dagcombine-cse.ll b/src/LLVM/test/CodeGen/X86/dagcombine-cse.ll
new file mode 100644
index 0000000..c3c7990
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dagcombine-cse.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 14
+
+define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind  {
+entry:
+	%tmp7 = mul i32 %idxY, %ref_frame_stride		; <i32> [#uses=2]
+	%tmp9 = add i32 %tmp7, %idxX		; <i32> [#uses=1]
+	%tmp11 = getelementptr i8* %ref_frame_ptr, i32 %tmp9		; <i8*> [#uses=1]
+	%tmp1112 = bitcast i8* %tmp11 to i32*		; <i32*> [#uses=1]
+	%tmp13 = load i32* %tmp1112, align 4		; <i32> [#uses=1]
+	%tmp18 = add i32 %idxX, 4		; <i32> [#uses=1]
+	%tmp20.sum = add i32 %tmp18, %tmp7		; <i32> [#uses=1]
+	%tmp21 = getelementptr i8* %ref_frame_ptr, i32 %tmp20.sum		; <i8*> [#uses=1]
+	%tmp2122 = bitcast i8* %tmp21 to i16*		; <i16*> [#uses=1]
+	%tmp23 = load i16* %tmp2122, align 2		; <i16> [#uses=1]
+	%tmp2425 = zext i16 %tmp23 to i64		; <i64> [#uses=1]
+	%tmp26 = shl i64 %tmp2425, 32		; <i64> [#uses=1]
+	%tmp2728 = zext i32 %tmp13 to i64		; <i64> [#uses=1]
+	%tmp29 = or i64 %tmp26, %tmp2728		; <i64> [#uses=1]
+	%tmp3454 = bitcast i64 %tmp29 to double		; <double> [#uses=1]
+	%tmp35 = insertelement <2 x double> undef, double %tmp3454, i32 0		; <<2 x double>> [#uses=1]
+	%tmp36 = insertelement <2 x double> %tmp35, double 0.000000e+00, i32 1		; <<2 x double>> [#uses=1]
+	%tmp42 = bitcast <2 x double> %tmp36 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp43 = shufflevector <8 x i16> %tmp42, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp47 = bitcast <8 x i16> %tmp43 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp48 = extractelement <4 x i32> %tmp47, i32 0		; <i32> [#uses=1]
+	ret i32 %tmp48
+}

diff --git a/src/LLVM/test/CodeGen/X86/darwin-bzero.ll b/src/LLVM/test/CodeGen/X86/darwin-bzero.ll
new file mode 100644
index 0000000..3099526
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/darwin-bzero.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep __bzero
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @foo(i8* %p, i32 %len) {
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 %len, i32 1, i1 false)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/darwin-no-dead-strip.ll b/src/LLVM/test/CodeGen/X86/darwin-no-dead-strip.ll
new file mode 100644
index 0000000..17d379f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/darwin-no-dead-strip.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s | grep no_dead_strip

+

+target datalayout = "e-p:32:32"

+target triple = "i686-apple-darwin8.7.2"

+@x = weak global i32 0          ; <i32*> [#uses=1]

+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32* @x to i8*) ]                ; <[1 x i8*]*> [#uses=0]

+


diff --git a/src/LLVM/test/CodeGen/X86/darwin-quote.ll b/src/LLVM/test/CodeGen/X86/darwin-quote.ll
new file mode 100644
index 0000000..8fddc11
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/darwin-quote.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin  | FileCheck %s
+
+
+define internal i64 @baz() nounwind {
+  %tmp = load i64* @"+x"
+  ret i64 %tmp
+; CHECK: _baz:
+; CHECK:    movl "L_+x$non_lazy_ptr", %ecx
+}
+
+
+@"+x" = external global i64
+
+; CHECK: "L_+x$non_lazy_ptr":
+; CHECK:	.indirect_symbol "_+x"

diff --git a/src/LLVM/test/CodeGen/X86/darwin-stub.ll b/src/LLVM/test/CodeGen/X86/darwin-stub.ll
new file mode 100644
index 0000000..b4d2e1a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/darwin-stub.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin  |     grep stub
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | not grep stub
+
+@"\01LC" = internal constant [13 x i8] c"Hello World!\00"		; <[13 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+	%0 = tail call i32 @puts(i8* getelementptr ([13 x i8]* @"\01LC", i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @puts(i8*)

diff --git a/src/LLVM/test/CodeGen/X86/dbg-at-specficiation.ll b/src/LLVM/test/CodeGen/X86/dbg-at-specficiation.ll
new file mode 100644
index 0000000..aa5e6ef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-at-specficiation.ll

@@ -0,0 +1,20 @@
+; RUN: llc  < %s | FileCheck %s
+; Radar 10147769
+; Do not unnecessarily use AT_specification DIE.
+; CHECK-NOT: AT_specification
+
+@a = common global [10 x i32] zeroinitializer, align 16
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"x.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 140253)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720948, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, [10 x i32]* @a} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 720937, metadata !"x.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !8, metadata !9, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!8 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 720929, i64 0, i64 9}        ; [ DW_TAG_subrange_type ]

diff --git a/src/LLVM/test/CodeGen/X86/dbg-byval-parameter.ll b/src/LLVM/test/CodeGen/X86/dbg-byval-parameter.ll
new file mode 100644
index 0000000..5e55776
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-byval-parameter.ll

@@ -0,0 +1,45 @@
+; RUN: llc  -march=x86 -asm-verbose < %s | grep DW_TAG_formal_parameter
+
+
+%struct.Pt = type { double, double }
+%struct.Rect = type { %struct.Pt, %struct.Pt }
+
+define double @foo(%struct.Rect* byval %my_r0) nounwind ssp {
+entry:
+  %retval = alloca double                         ; <double*> [#uses=2]
+  %0 = alloca double                              ; <double*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.Rect* %my_r0}, metadata !0), !dbg !15
+  %1 = getelementptr inbounds %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
+  %2 = getelementptr inbounds %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
+  %3 = load double* %2, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %3, double* %0, align 8, !dbg !16
+  %4 = load double* %0, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %4, double* %retval, align 8, !dbg !16
+  br label %return, !dbg !16
+
+return:                                           ; preds = %entry
+  %retval1 = load double* %retval, !dbg !16       ; <double> [#uses=1]
+  ret double %retval1, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"b2.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"b2.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !7}
+!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524307, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!8 = metadata !{metadata !9, metadata !14}
+!9 = metadata !{i32 524301, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 524307, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!11 = metadata !{metadata !12, metadata !13}
+!12 = metadata !{i32 524301, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 524301, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!14 = metadata !{i32 524301, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
+!15 = metadata !{i32 11, i32 0, metadata !1, null}
+!16 = metadata !{i32 12, i32 0, metadata !17, null}
+!17 = metadata !{i32 524299, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]

diff --git a/src/LLVM/test/CodeGen/X86/dbg-const-int.ll b/src/LLVM/test/CodeGen/X86/dbg-const-int.ll
new file mode 100644
index 0000000..bfc96f1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-const-int.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s - | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.7"
+; Radar 9511391
+
+;CHECK:         .byte   4                       ## DW_AT_const_value
+define i32 @foo() nounwind uwtable readnone optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !8, i64 0, metadata !6), !dbg !9
+  ret i32 42, !dbg !10
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1}
+!llvm.dbg.lv.foo = !{!6}
+
+!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 132191)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590080, metadata !7, metadata !"i", metadata !2, i32 2, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!7 = metadata !{i32 589835, metadata !1, i32 1, i32 11, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 42}
+!9 = metadata !{i32 2, i32 12, metadata !7, null}
+!10 = metadata !{i32 3, i32 2, metadata !7, null}

diff --git a/src/LLVM/test/CodeGen/X86/dbg-const.ll b/src/LLVM/test/CodeGen/X86/dbg-const.ll
new file mode 100644
index 0000000..5a51eb8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-const.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s - | FileCheck %s
+target triple = "x86_64-apple-darwin10.0.0"
+
+;CHECK:        ## DW_OP_constu
+;CHECK-NEXT:  .byte	42
+define i32 @foobar() nounwind readonly noinline ssp {
+entry:
+  %call = tail call i32 @bar(), !dbg !11
+  tail call void @llvm.dbg.value(metadata !8, i64 0, metadata !6), !dbg !9
+  %call2 = tail call i32 @bar(), !dbg !11
+  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !6), !dbg !11
+  %add = add nsw i32 %call2, %call, !dbg !12
+  ret i32 %add, !dbg !10
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare i32 @bar() nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.foobar = !{!6}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar}
+!1 = metadata !{i32 524329, metadata !"mu.c", metadata !"/private/tmp", metadata !2}
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"mu.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 114183)", i1 true, i1 true, metadata !"", i32 0}
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!6 = metadata !{i32 524544, metadata !7, metadata !"j", metadata !1, i32 15, metadata !5}
+!7 = metadata !{i32 524299, metadata !0, i32 12, i32 52, metadata !1, i32 0}
+!8 = metadata !{i32 42}
+!9 = metadata !{i32 15, i32 12, metadata !7, null}
+!10 = metadata !{i32 23, i32 3, metadata !7, null}
+!11 = metadata !{i32 17, i32 3, metadata !7, null}
+!12 = metadata !{i32 18, i32 3, metadata !7, null}

diff --git a/src/LLVM/test/CodeGen/X86/dbg-declare-arg.ll b/src/LLVM/test/CodeGen/X86/dbg-declare-arg.ll
new file mode 100644
index 0000000..367c1ef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-declare-arg.ll

@@ -0,0 +1,123 @@
+; RUN: llc -O0 -fast-isel=false < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.7"
+;Radar 9321650
+
+;CHECK: ##DEBUG_VALUE: my_a 
+
+%class.A = type { i32, i32, i32, i32 }
+
+define void @_Z3fooi(%class.A* sret %agg.result, i32 %i) ssp {
+entry:
+  %i.addr = alloca i32, align 4
+  %j = alloca i32, align 4
+  %nrvo = alloca i1
+  %cleanup.dest.slot = alloca i32
+  store i32 %i, i32* %i.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !26), !dbg !27
+  call void @llvm.dbg.declare(metadata !{i32* %j}, metadata !28), !dbg !30
+  store i32 0, i32* %j, align 4, !dbg !31
+  %tmp = load i32* %i.addr, align 4, !dbg !32
+  %cmp = icmp eq i32 %tmp, 42, !dbg !32
+  br i1 %cmp, label %if.then, label %if.end, !dbg !32
+
+if.then:                                          ; preds = %entry
+  %tmp1 = load i32* %i.addr, align 4, !dbg !33
+  %add = add nsw i32 %tmp1, 1, !dbg !33
+  store i32 %add, i32* %j, align 4, !dbg !33
+  br label %if.end, !dbg !35
+
+if.end:                                           ; preds = %if.then, %entry
+  store i1 false, i1* %nrvo, !dbg !36
+  call void @llvm.dbg.declare(metadata !{%class.A* %agg.result}, metadata !37), !dbg !39
+  %tmp2 = load i32* %j, align 4, !dbg !40
+  %x = getelementptr inbounds %class.A* %agg.result, i32 0, i32 0, !dbg !40
+  store i32 %tmp2, i32* %x, align 4, !dbg !40
+  store i1 true, i1* %nrvo, !dbg !41
+  store i32 1, i32* %cleanup.dest.slot
+  %nrvo.val = load i1* %nrvo, !dbg !42
+  br i1 %nrvo.val, label %nrvo.skipdtor, label %nrvo.unused, !dbg !42
+
+nrvo.unused:                                      ; preds = %if.end
+  call void @_ZN1AD1Ev(%class.A* %agg.result), !dbg !42
+  br label %nrvo.skipdtor, !dbg !42
+
+nrvo.skipdtor:                                    ; preds = %nrvo.unused, %if.end
+  ret void, !dbg !42
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define linkonce_odr void @_ZN1AD1Ev(%class.A* %this) unnamed_addr ssp align 2 {
+entry:
+  %this.addr = alloca %class.A*, align 8
+  store %class.A* %this, %class.A** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !43), !dbg !44
+  %this1 = load %class.A** %this.addr
+  call void @_ZN1AD2Ev(%class.A* %this1)
+  ret void, !dbg !45
+}
+
+define linkonce_odr void @_ZN1AD2Ev(%class.A* %this) unnamed_addr nounwind ssp align 2 {
+entry:
+  %this.addr = alloca %class.A*, align 8
+  store %class.A* %this, %class.A** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !46), !dbg !47
+  %this1 = load %class.A** %this.addr
+  %x = getelementptr inbounds %class.A* %this1, i32 0, i32 0, !dbg !48
+  store i32 1, i32* %x, align 4, !dbg !48
+  ret void, !dbg !48
+}
+
+!llvm.dbg.sp = !{!0, !10, !14, !19, !22, !25}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"~A", metadata !"~A", metadata !"", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589826, metadata !2, metadata !"A", metadata !3, i32 2, i64 128, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null} ; [ DW_TAG_class_type ]
+!2 = metadata !{i32 589841, i32 0, i32 4, metadata !"a.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 130127)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589865, metadata !"a.cc", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!4 = metadata !{metadata !5, metadata !7, metadata !8, metadata !9, metadata !0, metadata !10, metadata !14}
+!5 = metadata !{i32 589837, metadata !3, metadata !"x", metadata !3, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 589837, metadata !3, metadata !"y", metadata !3, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !6} ; [ DW_TAG_member ]
+!8 = metadata !{i32 589837, metadata !3, metadata !"z", metadata !3, i32 2, i64 32, i64 32, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!9 = metadata !{i32 589837, metadata !3, metadata !"o", metadata !3, i32 2, i64 32, i64 32, i64 96, i32 0, metadata !6} ; [ DW_TAG_member ]
+!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"A", metadata !"A", metadata !"", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{null, metadata !13}
+!13 = metadata !{i32 589839, metadata !2, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 589870, i32 0, metadata !1, metadata !"A", metadata !"A", metadata !"", metadata !3, i32 2, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{null, metadata !13, metadata !17}
+!17 = metadata !{i32 589840, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_reference_type ]
+!18 = metadata !{i32 589862, metadata !2, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_const_type ]
+!19 = metadata !{i32 589870, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", metadata !3, i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!21 = metadata !{metadata !1}
+!22 = metadata !{i32 589870, i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD1Ev", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null} ; [ DW_TAG_subprogram ]
+!23 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!24 = metadata !{null}
+!25 = metadata !{i32 589870, i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD2Ev", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null} ; [ DW_TAG_subprogram ]
+!26 = metadata !{i32 590081, metadata !19, metadata !"i", metadata !3, i32 16777220, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!27 = metadata !{i32 4, i32 11, metadata !19, null}
+!28 = metadata !{i32 590080, metadata !29, metadata !"j", metadata !3, i32 5, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 589835, metadata !19, i32 4, i32 14, metadata !3, i32 0} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 5, i32 7, metadata !29, null}
+!31 = metadata !{i32 5, i32 12, metadata !29, null}
+!32 = metadata !{i32 6, i32 3, metadata !29, null}
+!33 = metadata !{i32 7, i32 5, metadata !34, null}
+!34 = metadata !{i32 589835, metadata !29, i32 6, i32 16, metadata !3, i32 1} ; [ DW_TAG_lexical_block ]
+!35 = metadata !{i32 8, i32 3, metadata !34, null}
+!36 = metadata !{i32 9, i32 9, metadata !29, null}
+!37 = metadata !{i32 590080, metadata !29, metadata !"my_a", metadata !3, i32 9, metadata !38, i32 0} ; [ DW_TAG_auto_variable ]
+!38 = metadata !{i32 589840, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
+!39 = metadata !{i32 9, i32 5, metadata !29, null}
+!40 = metadata !{i32 10, i32 3, metadata !29, null}
+!41 = metadata !{i32 11, i32 3, metadata !29, null}
+!42 = metadata !{i32 12, i32 1, metadata !29, null}
+!43 = metadata !{i32 590081, metadata !22, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64} ; [ DW_TAG_arg_variable ]
+!44 = metadata !{i32 2, i32 47, metadata !22, null}
+!45 = metadata !{i32 2, i32 61, metadata !22, null}
+!46 = metadata !{i32 590081, metadata !25, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64} ; [ DW_TAG_arg_variable ]
+!47 = metadata !{i32 2, i32 47, metadata !25, null}
+!48 = metadata !{i32 2, i32 54, metadata !49, null}
+!49 = metadata !{i32 589835, metadata !25, i32 2, i32 52, metadata !3, i32 2} ; [ DW_TAG_lexical_block ]

diff --git a/src/LLVM/test/CodeGen/X86/dbg-file-name.ll b/src/LLVM/test/CodeGen/X86/dbg-file-name.ll
new file mode 100644
index 0000000..3a849aa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-file-name.ll

@@ -0,0 +1,19 @@
+; RUN: llc -mtriple x86_64-apple-darwin10.0.0  < %s | FileCheck %s
+
+; Radar 8884898
+; CHECK: file	1 "/Users/manav/one/two{{/|\\\\}}simple.c"
+
+declare i32 @printf(i8*, ...) nounwind
+
+define i32 @main() nounwind {
+  ret i32 0
+}
+
+!llvm.dbg.sp = !{ !6}
+
+!1 = metadata !{i32 589865, metadata !"simple.c", metadata !"/Users/manav/one/two", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"simple.c", metadata !"/Users/manav/one/two", metadata !"LLVM build 00", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 589860, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !5}

diff --git a/src/LLVM/test/CodeGen/X86/dbg-i128-const.ll b/src/LLVM/test/CodeGen/X86/dbg-i128-const.ll
new file mode 100644
index 0000000..bd96d91
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-i128-const.ll

@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+
+; CHECK: DW_AT_const_value
+; CHECK-NEXT: 42
+
+define i128 @__foo(i128 %a, i128 %b) nounwind {
+entry:
+  tail call void @llvm.dbg.value(metadata !0, i64 0, metadata !1), !dbg !11
+  %add = add i128 %a, %b, !dbg !11
+  ret i128 %add, !dbg !11
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i128 42 }
+!1 = metadata !{i32 524544, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8} ; [ DW_TAG_auto_variable ]
+!2 = metadata !{i32 524299, metadata !3, i32 26, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 524329, metadata !"foo.c", metadata !"/tmp", metadata !5} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 524305, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp", metadata !"clang", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{metadata !8, metadata !8, metadata !8}
+!8 = metadata !{i32 524310, metadata !4, metadata !"ti_int", metadata !9, i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!9 = metadata !{i32 524329, metadata !"myint.h", metadata !"/tmp", metadata !5} ; [ DW_TAG_file_type ]
+!10 = metadata !{i32 524324, metadata !4, metadata !"", metadata !4, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 29, i32 0, metadata !2, null}

diff --git a/src/LLVM/test/CodeGen/X86/dbg-inline.ll b/src/LLVM/test/CodeGen/X86/dbg-inline.ll
new file mode 100644
index 0000000..523c62e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-inline.ll

@@ -0,0 +1,140 @@
+; RUN: llc < %s | FileCheck %s
+; Radar 7881628, 9747970
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+%class.APFloat = type { i32 }
+
+define i32 @_ZNK7APFloat9partCountEv(%class.APFloat* nocapture %this) nounwind uwtable readonly optsize ssp align 2 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %this}, i64 0, metadata !28), !dbg !41
+  %prec = getelementptr inbounds %class.APFloat* %this, i64 0, i32 0, !dbg !42
+  %tmp = load i32* %prec, align 4, !dbg !42, !tbaa !44
+  tail call void @llvm.dbg.value(metadata !{i32 %tmp}, i64 0, metadata !47), !dbg !48
+  %add.i = add i32 %tmp, 42, !dbg !49
+  ret i32 %add.i, !dbg !42
+}
+
+define zeroext i1 @_ZNK7APFloat14bitwiseIsEqualERKS_(%class.APFloat* %this, %class.APFloat* %rhs) uwtable optsize ssp align 2 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %this}, i64 0, metadata !29), !dbg !51
+  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %rhs}, i64 0, metadata !30), !dbg !52
+  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %this}, i64 0, metadata !53), !dbg !55
+  %prec.i = getelementptr inbounds %class.APFloat* %this, i64 0, i32 0, !dbg !56
+;CHECK: DW_TAG_inlined_subroutine
+;CHECK: DW_AT_abstract_origin
+;CHECK: DW_AT_ranges
+  %tmp.i = load i32* %prec.i, align 4, !dbg !56, !tbaa !44
+  tail call void @llvm.dbg.value(metadata !{i32 %tmp.i}, i64 0, metadata !57), !dbg !58
+  %add.i.i = add i32 %tmp.i, 42, !dbg !59
+  tail call void @llvm.dbg.value(metadata !{i32 %add.i.i}, i64 0, metadata !31), !dbg !54
+  %call2 = tail call i64* @_ZNK7APFloat16significandPartsEv(%class.APFloat* %this) optsize, !dbg !60
+  tail call void @llvm.dbg.value(metadata !{i64* %call2}, i64 0, metadata !34), !dbg !60
+  %call3 = tail call i64* @_ZNK7APFloat16significandPartsEv(%class.APFloat* %rhs) optsize, !dbg !61
+  tail call void @llvm.dbg.value(metadata !{i64* %call3}, i64 0, metadata !37), !dbg !61
+  %tmp = zext i32 %add.i.i to i64
+  br label %for.cond, !dbg !62
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
+  %tmp13 = sub i64 %tmp, %indvar, !dbg !62
+  %i.0 = trunc i64 %tmp13 to i32, !dbg !62
+  %cmp = icmp sgt i32 %i.0, 0, !dbg !62
+  br i1 %cmp, label %for.body, label %return, !dbg !62
+
+for.body:                                         ; preds = %for.cond
+  %p.0 = getelementptr i64* %call2, i64 %indvar, !dbg !63
+  %tmp6 = load i64* %p.0, align 8, !dbg !63, !tbaa !66
+  %tmp8 = load i64* %call3, align 8, !dbg !63, !tbaa !66
+  %cmp9 = icmp eq i64 %tmp6, %tmp8, !dbg !63
+  br i1 %cmp9, label %for.inc, label %return, !dbg !63
+
+for.inc:                                          ; preds = %for.body
+  %indvar.next = add i64 %indvar, 1, !dbg !67
+  br label %for.cond, !dbg !67
+
+return:                                           ; preds = %for.cond, %for.body
+  %retval.0 = phi i1 [ false, %for.body ], [ true, %for.cond ]
+  ret i1 %retval.0, !dbg !68
+}
+
+declare i64* @_ZNK7APFloat16significandPartsEv(%class.APFloat*) optsize
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1, !7, !12, !23, !24, !25}
+!llvm.dbg.lv._ZNK7APFloat9partCountEv = !{!28}
+!llvm.dbg.lv._ZNK7APFloat14bitwiseIsEqualERKS_ = !{!29, !30, !31, !34, !37}
+!llvm.dbg.lv._ZL16partCountForBitsj = !{!38}
+!llvm.dbg.gv = !{!39}
+
+!0 = metadata !{i32 655377, i32 0, i32 4, metadata !"/Volumes/Athwagate/R9747970/apf.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 136149)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 655406, i32 0, metadata !2, metadata !"bitwiseIsEqual", metadata !"bitwiseIsEqual", metadata !"_ZNK7APFloat14bitwiseIsEqualERKS_", metadata !3, i32 8, metadata !19, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 655362, metadata !0, metadata !"APFloat", metadata !3, i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null} ; [ DW_TAG_class_type ]
+!3 = metadata !{i32 655401, metadata !"/Volumes/Athwagate/R9747970/apf.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!4 = metadata !{metadata !5, metadata !1, metadata !7, metadata !12}
+!5 = metadata !{i32 655373, metadata !2, metadata !"prec", metadata !3, i32 13, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 655396, metadata !0, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 655406, i32 0, metadata !2, metadata !"partCount", metadata !"partCount", metadata !"_ZNK7APFloat9partCountEv", metadata !3, i32 9, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{metadata !6, metadata !10}
+!10 = metadata !{i32 655375, metadata !0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!12 = metadata !{i32 655406, i32 0, metadata !2, metadata !"significandParts", metadata !"significandParts", metadata !"_ZNK7APFloat16significandPartsEv", metadata !3, i32 11, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!14 = metadata !{metadata !15, metadata !10}
+!15 = metadata !{i32 655375, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
+!16 = metadata !{i32 655382, metadata !0, metadata !"integerPart", metadata !3, i32 2, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ]
+!17 = metadata !{i32 655382, metadata !0, metadata !"uint64_t", metadata !3, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ]
+!18 = metadata !{i32 655396, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!19 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !20, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!20 = metadata !{metadata !21, metadata !10, metadata !22}
+!21 = metadata !{i32 655396, metadata !0, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!22 = metadata !{i32 655376, metadata !0, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_reference_type ]
+!23 = metadata !{i32 655406, i32 0, metadata !0, metadata !"partCount", metadata !"partCount", metadata !"_ZNK7APFloat9partCountEv", metadata !3, i32 23, metadata !8, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%class.APFloat*)* @_ZNK7APFloat9partCountEv, null, metadata !7} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 655406, i32 0, metadata !0, metadata !"bitwiseIsEqual", metadata !"bitwiseIsEqual", metadata !"_ZNK7APFloat14bitwiseIsEqualERKS_", metadata !3, i32 28, metadata !19, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (%class.APFloat*, %class.APFloat*)* @_ZNK7APFloat14bitwiseIsEqualERKS_, null, metadata !1} ; [ DW_TAG_subprogram ]
+!25 = metadata !{i32 655406, i32 0, metadata !3, metadata !"partCountForBits", metadata !"partCountForBits", metadata !"", metadata !3, i32 17, metadata !26, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null} ; [ DW_TAG_subprogram ]
+!26 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !27, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!27 = metadata !{metadata !6}
+!28 = metadata !{i32 655617, metadata !23, metadata !"this", metadata !3, i32 16777238, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!29 = metadata !{i32 655617, metadata !24, metadata !"this", metadata !3, i32 16777244, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!30 = metadata !{i32 655617, metadata !24, metadata !"rhs", metadata !3, i32 33554460, metadata !22, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!31 = metadata !{i32 655616, metadata !32, metadata !"i", metadata !3, i32 29, metadata !33, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!32 = metadata !{i32 655371, metadata !24, i32 28, i32 56, metadata !3, i32 1} ; [ DW_TAG_lexical_block ]
+!33 = metadata !{i32 655396, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!34 = metadata !{i32 655616, metadata !32, metadata !"p", metadata !3, i32 30, metadata !35, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!35 = metadata !{i32 655375, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !36} ; [ DW_TAG_pointer_type ]
+!36 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_const_type ]
+!37 = metadata !{i32 655616, metadata !32, metadata !"q", metadata !3, i32 31, metadata !35, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!38 = metadata !{i32 655617, metadata !25, metadata !"bits", metadata !3, i32 16777232, metadata !6, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!39 = metadata !{i32 655412, i32 0, metadata !3, metadata !"integerPartWidth", metadata !"integerPartWidth", metadata !"integerPartWidth", metadata !3, i32 3, metadata !40, i32 1, i32 1, i32 42} ; [ DW_TAG_variable ]
+!40 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_const_type ]
+!41 = metadata !{i32 22, i32 23, metadata !23, null}
+!42 = metadata !{i32 24, i32 10, metadata !43, null}
+!43 = metadata !{i32 655371, metadata !23, i32 23, i32 1, metadata !3, i32 0} ; [ DW_TAG_lexical_block ]
+!44 = metadata !{metadata !"int", metadata !45}
+!45 = metadata !{metadata !"omnipotent char", metadata !46}
+!46 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!47 = metadata !{i32 655617, metadata !25, metadata !"bits", metadata !3, i32 16777232, metadata !6, i32 0, metadata !42} ; [ DW_TAG_arg_variable ]
+!48 = metadata !{i32 16, i32 58, metadata !25, metadata !42}
+!49 = metadata !{i32 18, i32 3, metadata !50, metadata !42}
+!50 = metadata !{i32 655371, metadata !25, i32 17, i32 1, metadata !3, i32 4} ; [ DW_TAG_lexical_block ]
+!51 = metadata !{i32 28, i32 15, metadata !24, null}
+!52 = metadata !{i32 28, i32 45, metadata !24, null}
+!53 = metadata !{i32 655617, metadata !23, metadata !"this", metadata !3, i32 16777238, metadata !10, i32 64, metadata !54} ; [ DW_TAG_arg_variable ]
+!54 = metadata !{i32 29, i32 10, metadata !32, null}
+!55 = metadata !{i32 22, i32 23, metadata !23, metadata !54}
+!56 = metadata !{i32 24, i32 10, metadata !43, metadata !54}
+!57 = metadata !{i32 655617, metadata !25, metadata !"bits", metadata !3, i32 16777232, metadata !6, i32 0, metadata !56} ; [ DW_TAG_arg_variable ]
+!58 = metadata !{i32 16, i32 58, metadata !25, metadata !56}
+!59 = metadata !{i32 18, i32 3, metadata !50, metadata !56}
+!60 = metadata !{i32 30, i32 24, metadata !32, null}
+!61 = metadata !{i32 31, i32 24, metadata !32, null}
+!62 = metadata !{i32 32, i32 3, metadata !32, null}
+!63 = metadata !{i32 33, i32 5, metadata !64, null}
+!64 = metadata !{i32 655371, metadata !65, i32 32, i32 25, metadata !3, i32 3} ; [ DW_TAG_lexical_block ]
+!65 = metadata !{i32 655371, metadata !32, i32 32, i32 3, metadata !3, i32 2} ; [ DW_TAG_lexical_block ]
+!66 = metadata !{metadata !"long long", metadata !45}
+!67 = metadata !{i32 32, i32 15, metadata !65, null}
+!68 = metadata !{i32 37, i32 1, metadata !32, null}

diff --git a/src/LLVM/test/CodeGen/X86/dbg-large-unsigned-const.ll b/src/LLVM/test/CodeGen/X86/dbg-large-unsigned-const.ll
new file mode 100644
index 0000000..fc295c6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-large-unsigned-const.ll

@@ -0,0 +1,61 @@
+; RUN: llc -filetype=obj %s -o /dev/null
+; Hanle large unsigned constant values.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-macosx10.7.0"
+
+define zeroext i1 @_Z3iseRKxS0_(i64* nocapture %LHS, i64* nocapture %RHS) nounwind readonly optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i64* %LHS}, i64 0, metadata !7), !dbg !13
+  tail call void @llvm.dbg.value(metadata !{i64* %RHS}, i64 0, metadata !11), !dbg !14
+  %tmp1 = load i64* %LHS, align 4, !dbg !15, !tbaa !17
+  %tmp3 = load i64* %RHS, align 4, !dbg !15, !tbaa !17
+  %cmp = icmp eq i64 %tmp1, %tmp3, !dbg !15
+  ret i1 %cmp, !dbg !15
+}
+
+define zeroext i1 @_Z2fnx(i64 %a) nounwind readnone optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !12), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !12), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !21), !dbg !24
+  tail call void @llvm.dbg.value(metadata !25, i64 0, metadata !26), !dbg !27
+  %cmp.i = icmp eq i64 %a, 9223372036854775807, !dbg !28
+  ret i1 %cmp.i, !dbg !22
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1, !6}
+!llvm.dbg.lv._Z3iseRKxS0_ = !{!7, !11}
+!llvm.dbg.lv._Z2fnx = !{!12}
+
+!0 = metadata !{i32 655377, i32 0, i32 4, metadata !"lli.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 135593)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 655406, i32 0, metadata !2, metadata !"ise", metadata !"ise", metadata !"_Z3iseRKxS0_", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64*, i64*)* @_Z3iseRKxS0_, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 655401, metadata !"lli.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 655381, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 655396, metadata !0, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 655406, i32 0, metadata !2, metadata !"fn", metadata !"fn", metadata !"_Z2fnx", metadata !2, i32 6, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64)* @_Z2fnx, null, null} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 655617, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 655376, metadata !0, null, null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
+!9 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ]
+!10 = metadata !{i32 655396, metadata !0, metadata !"long long int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 655617, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 655617, metadata !6, metadata !"a", metadata !2, i32 16777222, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 2, i32 27, metadata !1, null}
+!14 = metadata !{i32 2, i32 49, metadata !1, null}
+!15 = metadata !{i32 3, i32 3, metadata !16, null}
+!16 = metadata !{i32 655371, metadata !1, i32 2, i32 54, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{metadata !"long long", metadata !18}
+!18 = metadata !{metadata !"omnipotent char", metadata !19}
+!19 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!20 = metadata !{i32 6, i32 19, metadata !6, null}
+!21 = metadata !{i32 655617, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 7, i32 10, metadata !23, null}
+!23 = metadata !{i32 655371, metadata !6, i32 6, i32 22, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{i32 2, i32 27, metadata !1, metadata !22}
+!25 = metadata !{i64 9223372036854775807}         
+!26 = metadata !{i32 655617, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
+!27 = metadata !{i32 2, i32 49, metadata !1, metadata !22}
+!28 = metadata !{i32 3, i32 3, metadata !16, metadata !22}

diff --git a/src/LLVM/test/CodeGen/X86/dbg-merge-loc-entry.ll b/src/LLVM/test/CodeGen/X86/dbg-merge-loc-entry.ll
new file mode 100644
index 0000000..afe1729
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-merge-loc-entry.ll

@@ -0,0 +1,75 @@
+; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -regalloc=basic | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin8"
+
+;CHECK: Ldebug_loc0:
+;CHECK-NEXT:	.quad	Lfunc_begin0
+;CHECK-NEXT:	.quad	L
+;CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}          ## Loc expr size
+;CHECK-NEXT:    .short  Lset
+;CHECK-NEXT: Ltmp
+;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
+;CHECK-NEXT: Ltmp7
+;CHECK-NEXT:	.quad	0
+;CHECK-NEXT:	.quad	0
+
+%0 = type { i64, i1 }
+
+@__clz_tab = external constant [256 x i8]
+
+define hidden i128 @__divti3(i128 %u, i128 %v) nounwind readnone {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i128 %u}, i64 0, metadata !14), !dbg !15
+  tail call void @llvm.dbg.value(metadata !16, i64 0, metadata !17), !dbg !21
+  br i1 undef, label %bb2, label %bb4, !dbg !22
+
+bb2:                                              ; preds = %entry
+  br label %bb4, !dbg !23
+
+bb4:                                              ; preds = %bb2, %entry
+  br i1 undef, label %__udivmodti4.exit, label %bb82.i, !dbg !24
+
+bb82.i:                                           ; preds = %bb4
+  unreachable
+
+__udivmodti4.exit:                                ; preds = %bb4
+  ret i128 undef, !dbg !27
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !9}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"foobar.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"foobar.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8}
+!5 = metadata !{i32 589846, metadata !6, metadata !"UTItype", metadata !6, i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 589865, metadata !"foobar.h", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", metadata !1, i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{metadata !12, metadata !12, metadata !12}
+!12 = metadata !{i32 589846, metadata !6, metadata !"TItype", metadata !6, i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
+!13 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 590081, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 1093, i32 0, metadata !9, null}
+!16 = metadata !{i64 0}
+!17 = metadata !{i32 590080, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 589835, metadata !9, i32 1094, i32 0, metadata !1, i32 13} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 589846, metadata !6, metadata !"word_type", metadata !6, i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
+!20 = metadata !{i32 589860, metadata !1, metadata !"long int", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 1095, i32 0, metadata !18, null}
+!22 = metadata !{i32 1103, i32 0, metadata !18, null}
+!23 = metadata !{i32 1104, i32 0, metadata !18, null}
+!24 = metadata !{i32 1003, i32 0, metadata !25, metadata !26}
+!25 = metadata !{i32 589835, metadata !0, i32 879, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 1107, i32 0, metadata !18, null}
+!27 = metadata !{i32 1111, i32 0, metadata !18, null}

diff --git a/src/LLVM/test/CodeGen/X86/dbg-prolog-end.ll b/src/LLVM/test/CodeGen/X86/dbg-prolog-end.ll
new file mode 100644
index 0000000..81303bb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-prolog-end.ll

@@ -0,0 +1,55 @@
+; RUN: llc -O0 < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.7"
+
+;CHECK: .loc	1 2 11 prologue_end
+define i32 @foo(i32 %i) nounwind ssp {
+entry:
+  %i.addr = alloca i32, align 4
+  %j = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !7), !dbg !8
+  call void @llvm.dbg.declare(metadata !{i32* %j}, metadata !9), !dbg !11
+  store i32 2, i32* %j, align 4, !dbg !12
+  %tmp = load i32* %j, align 4, !dbg !13
+  %inc = add nsw i32 %tmp, 1, !dbg !13
+  store i32 %inc, i32* %j, align 4, !dbg !13
+  %tmp1 = load i32* %j, align 4, !dbg !14
+  %tmp2 = load i32* %i.addr, align 4, !dbg !14
+  %add = add nsw i32 %tmp1, %tmp2, !dbg !14
+  store i32 %add, i32* %j, align 4, !dbg !14
+  %tmp3 = load i32* %j, align 4, !dbg !15
+  ret i32 %tmp3, !dbg !15
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 @foo(i32 21), !dbg !16
+  ret i32 %call, !dbg !16
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.sp = !{!1, !6}
+
+!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 131100)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 590081, metadata !1, metadata !"i", metadata !2, i32 16777217, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 1, i32 13, metadata !1, null}
+!9 = metadata !{i32 590080, metadata !10, metadata !"j", metadata !2, i32 2, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 589835, metadata !1, i32 1, i32 16, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 2, i32 6, metadata !10, null}
+!12 = metadata !{i32 2, i32 11, metadata !10, null}
+!13 = metadata !{i32 3, i32 2, metadata !10, null}
+!14 = metadata !{i32 4, i32 2, metadata !10, null}
+!15 = metadata !{i32 5, i32 2, metadata !10, null}
+!16 = metadata !{i32 8, i32 2, metadata !17, null}
+!17 = metadata !{i32 589835, metadata !6, i32 7, i32 12, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]

diff --git a/src/LLVM/test/CodeGen/X86/dbg-value-dag-combine.ll b/src/LLVM/test/CodeGen/X86/dbg-value-dag-combine.ll
new file mode 100644
index 0000000..b115bf4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-value-dag-combine.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+; PR 9817
+
+
+declare  <4 x i32> @__amdil_get_global_id_int()
+declare  void @llvm.dbg.value(metadata , i64 , metadata )
+define void @__OpenCL_test_kernel(i32 addrspace(1)* %ip) nounwind {
+entry:
+  call void @llvm.dbg.value(metadata !{i32 addrspace(1)* %ip}, i64 0, metadata
+!7), !dbg !8
+  %0 = call <4 x i32> @__amdil_get_global_id_int() nounwind
+  %1 = extractelement <4 x i32> %0, i32 0
+  call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !9), !dbg !11
+  call void @llvm.dbg.value(metadata !12, i64 0, metadata !13), !dbg !14
+  %tmp2 = load i32 addrspace(1)* %ip, align 4, !dbg !15
+  %tmp3 = add i32 0, %tmp2, !dbg !15
+; CHECK:  ##DEBUG_VALUE: idx <- EAX+0
+  call void @llvm.dbg.value(metadata !{i32 %tmp3}, i64 0, metadata !13), !dbg
+!15
+  %arrayidx = getelementptr i32 addrspace(1)* %ip, i32 %1, !dbg !16
+  store i32 %tmp3, i32 addrspace(1)* %arrayidx, align 4, !dbg !16
+  ret void, !dbg !17
+}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata
+!"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata
+!"__OpenCL_test_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp", metadata !"clc", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
+!6 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 590081, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 1, i32 42, metadata !0, null}
+!9 = metadata !{i32 590080, metadata !10, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 589835, metadata !0, i32 2, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 3, i32 41, metadata !10, null}
+!12 = metadata !{i32 0}
+!13 = metadata !{i32 590080, metadata !10, metadata !"idx", metadata !1, i32 4, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!14 = metadata !{i32 4, i32 20, metadata !10, null}
+!15 = metadata !{i32 5, i32 15, metadata !10, null}
+!16 = metadata !{i32 6, i32 18, metadata !10, null}
+!17 = metadata !{i32 7, i32 1, metadata !0, null}
+

diff --git a/src/LLVM/test/CodeGen/X86/dbg-value-inlined-parameter.ll b/src/LLVM/test/CodeGen/X86/dbg-value-inlined-parameter.ll
new file mode 100644
index 0000000..481c4ba
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-value-inlined-parameter.ll

@@ -0,0 +1,87 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin -regalloc=basic < %s | FileCheck %s
+
+;CHECK: DW_TAG_inlined_subroutine
+;CHECK-NEXT: DW_AT_abstract_origin
+;CHECK-NEXT: DW_AT_low_pc
+;CHECK-NEXT: DW_AT_high_pc
+;CHECK-NEXT: DW_AT_call_file
+;CHECK-NEXT: DW_AT_call_line
+;CHECK-NEXT: DW_TAG_formal_parameter
+;CHECK-NEXT: .ascii   "sp"                   ## DW_AT_name
+
+%struct.S1 = type { float*, i32 }
+
+@p = common global %struct.S1 zeroinitializer, align 8
+
+define i32 @foo(%struct.S1* nocapture %sp, i32 %nums) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.S1* %sp}, i64 0, metadata !9), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{i32 %nums}, i64 0, metadata !18), !dbg !21
+  %tmp2 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 1, !dbg !22
+  store i32 %nums, i32* %tmp2, align 4, !dbg !22, !tbaa !24
+  %call = tail call float* @bar(i32 %nums) nounwind optsize, !dbg !27
+  %tmp5 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 0, !dbg !27
+  store float* %call, float** %tmp5, align 8, !dbg !27, !tbaa !28
+  %cmp = icmp ne float* %call, null, !dbg !29
+  %cond = zext i1 %cmp to i32, !dbg !29
+  ret i32 %cond, !dbg !29
+}
+
+declare float* @bar(i32) optsize
+
+define void @foobar() nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !30, i64 0, metadata !9) nounwind, !dbg !31
+  tail call void @llvm.dbg.value(metadata !34, i64 0, metadata !18) nounwind, !dbg !35
+  store i32 1, i32* getelementptr inbounds (%struct.S1* @p, i64 0, i32 1), align 8, !dbg !36, !tbaa !24
+  %call.i = tail call float* @bar(i32 1) nounwind optsize, !dbg !37
+  store float* %call.i, float** getelementptr inbounds (%struct.S1* @p, i64 0, i32 0), align 8, !dbg !37, !tbaa !28
+  ret void, !dbg !38
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !6}
+!llvm.dbg.lv.foo = !{!9, !18}
+!llvm.dbg.gv = !{!19}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"nm2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"nm2.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 125693)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", metadata !1, i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foobar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null}
+!9 = metadata !{i32 590081, metadata !0, metadata !"sp", metadata !1, i32 7, metadata !10, i32 0} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 589846, metadata !2, metadata !"S1", metadata !1, i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
+!12 = metadata !{i32 589843, metadata !2, metadata !"S1", metadata !1, i32 1, i64 128, i64 64, i32 0, i32 0, i32 0, metadata !13, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!13 = metadata !{metadata !14, metadata !17}
+!14 = metadata !{i32 589837, metadata !1, metadata !"m", metadata !1, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ]
+!15 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
+!16 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 589837, metadata !1, metadata !"nums", metadata !1, i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!18 = metadata !{i32 590081, metadata !0, metadata !"nums", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 589876, i32 0, metadata !2, metadata !"p", metadata !"p", metadata !"", metadata !1, i32 14, metadata !11, i32 0, i32 1, %struct.S1* @p} ; [ DW_TAG_variable ]
+!20 = metadata !{i32 7, i32 13, metadata !0, null}
+!21 = metadata !{i32 7, i32 21, metadata !0, null}
+!22 = metadata !{i32 9, i32 3, metadata !23, null}
+!23 = metadata !{i32 589835, metadata !0, i32 8, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{metadata !"int", metadata !25}
+!25 = metadata !{metadata !"omnipotent char", metadata !26}
+!26 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!27 = metadata !{i32 10, i32 3, metadata !23, null}
+!28 = metadata !{metadata !"any pointer", metadata !25}
+!29 = metadata !{i32 11, i32 3, metadata !23, null}
+!30 = metadata !{%struct.S1* @p}
+!31 = metadata !{i32 7, i32 13, metadata !0, metadata !32}
+!32 = metadata !{i32 16, i32 3, metadata !33, null}
+!33 = metadata !{i32 589835, metadata !6, i32 15, i32 15, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!34 = metadata !{i32 1}
+!35 = metadata !{i32 7, i32 21, metadata !0, metadata !32}
+!36 = metadata !{i32 9, i32 3, metadata !23, metadata !32}
+!37 = metadata !{i32 10, i32 3, metadata !23, metadata !32}
+!38 = metadata !{i32 17, i32 1, metadata !33, null}

diff --git a/src/LLVM/test/CodeGen/X86/dbg-value-isel.ll b/src/LLVM/test/CodeGen/X86/dbg-value-isel.ll
new file mode 100644
index 0000000..f1101e6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-value-isel.ll

@@ -0,0 +1,102 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+; PR 9879
+
+; CHECK: ##DEBUG_VALUE: tid <-
+%0 = type { i8*, i8*, i8*, i8*, i32 }
+
+@sgv = internal addrspace(2) constant [1 x i8] zeroinitializer
+@fgv = internal addrspace(2) constant [1 x i8] zeroinitializer
+@lvgv = internal constant [0 x i8*] zeroinitializer
+@llvm.global.annotations = appending global [1 x %0] [%0 { i8* bitcast (void (i32 addrspace(1)*)* @__OpenCL_nbt02_kernel to i8*), i8* bitcast ([1 x i8] addrspace(2)* @sgv to i8*), i8* bitcast ([1 x i8] addrspace(2)* @fgv to i8*), i8* bitcast ([0 x i8*]* @lvgv to i8*), i32 0 }], section "llvm.metadata"
+
+define void @__OpenCL_nbt02_kernel(i32 addrspace(1)* %ip) nounwind {
+entry:
+  call void @llvm.dbg.value(metadata !{i32 addrspace(1)* %ip}, i64 0, metadata !8), !dbg !9
+  %0 = call <4 x i32> @__amdil_get_local_id_int() nounwind
+  %1 = extractelement <4 x i32> %0, i32 0
+  br label %2
+
+; <label>:2                                       ; preds = %entry
+  %3 = phi i32 [ %1, %entry ]
+  br label %4
+
+; <label>:4                                       ; preds = %2
+  %5 = phi i32 [ %3, %2 ]
+  br label %get_local_id.exit
+
+get_local_id.exit:                                ; preds = %4
+  %6 = phi i32 [ %5, %4 ]
+  call void @llvm.dbg.value(metadata !{i32 %6}, i64 0, metadata !10), !dbg !12
+  %7 = call <4 x i32> @__amdil_get_global_id_int() nounwind, !dbg !12
+  %8 = extractelement <4 x i32> %7, i32 0, !dbg !12
+  br label %9
+
+; <label>:9                                       ; preds = %get_local_id.exit
+  %10 = phi i32 [ %8, %get_local_id.exit ]
+  br label %11
+
+; <label>:11                                      ; preds = %9
+  %12 = phi i32 [ %10, %9 ]
+  br label %get_global_id.exit
+
+get_global_id.exit:                               ; preds = %11
+  %13 = phi i32 [ %12, %11 ]
+  call void @llvm.dbg.value(metadata !{i32 %13}, i64 0, metadata !13), !dbg !14
+  %14 = call <4 x i32> @__amdil_get_local_size_int() nounwind
+  %15 = extractelement <4 x i32> %14, i32 0
+  br label %16
+
+; <label>:16                                      ; preds = %get_global_id.exit
+  %17 = phi i32 [ %15, %get_global_id.exit ]
+  br label %18
+
+; <label>:18                                      ; preds = %16
+  %19 = phi i32 [ %17, %16 ]
+  br label %get_local_size.exit
+
+get_local_size.exit:                              ; preds = %18
+  %20 = phi i32 [ %19, %18 ]
+  call void @llvm.dbg.value(metadata !{i32 %20}, i64 0, metadata !15), !dbg !16
+  %tmp5 = add i32 %6, %13, !dbg !17
+  %tmp7 = add i32 %tmp5, %20, !dbg !17
+  store i32 %tmp7, i32 addrspace(1)* %ip, align 4, !dbg !17
+  br label %return, !dbg !17
+
+return:                                           ; preds = %get_local_size.exit
+  ret void, !dbg !18
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare <4 x i32> @__amdil_get_local_size_int() nounwind
+
+declare <4 x i32> @__amdil_get_local_id_int() nounwind
+
+declare <4 x i32> @__amdil_get_global_id_int() nounwind
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"OCLlLwTXZ.cl", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"OCLlLwTXZ.cl", metadata !"/tmp", metadata !"clc", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
+!6 = metadata !{i32 589846, metadata !2, metadata !"uint", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 590081, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 1, i32 32, metadata !0, null}
+!10 = metadata !{i32 590080, metadata !11, metadata !"tid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 589835, metadata !0, i32 2, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 5, i32 24, metadata !11, null}
+!13 = metadata !{i32 590080, metadata !11, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!14 = metadata !{i32 6, i32 25, metadata !11, null}
+!15 = metadata !{i32 590080, metadata !11, metadata !"lsz", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!16 = metadata !{i32 7, i32 26, metadata !11, null}
+!17 = metadata !{i32 9, i32 24, metadata !11, null}
+!18 = metadata !{i32 10, i32 1, metadata !0, null}
+

diff --git a/src/LLVM/test/CodeGen/X86/dbg-value-location.ll b/src/LLVM/test/CodeGen/X86/dbg-value-location.ll
new file mode 100644
index 0000000..a0e4d16
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-value-location.ll

@@ -0,0 +1,71 @@
+; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -regalloc=basic | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+;Radar 8950491
+
+;CHECK:        .ascii   "var"                  ## DW_AT_name
+;CHECK-NEXT:        .byte   0
+;CHECK-NEXT:        ## DW_AT_decl_file
+;CHECK-NEXT:        ## DW_AT_decl_line
+;CHECK-NEXT:        ## DW_AT_type
+;CHECK-NEXT:        ## DW_AT_location
+
+@dfm = external global i32, align 4
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @foo(i32 %dev, i64 %cmd, i8* %data, i32 %data2) nounwind optsize ssp {
+entry:
+  call void @llvm.dbg.value(metadata !{i32 %dev}, i64 0, metadata !12), !dbg !13
+  %tmp.i = load i32* @dfm, align 4, !dbg !14
+  %cmp.i = icmp eq i32 %tmp.i, 0, !dbg !14
+  br i1 %cmp.i, label %if.else, label %if.end.i, !dbg !14
+
+if.end.i:                                         ; preds = %entry
+  switch i64 %cmd, label %if.then [
+    i64 2147772420, label %bb.i
+    i64 536897538, label %bb116.i
+  ], !dbg !22
+
+bb.i:                                             ; preds = %if.end.i
+  unreachable
+
+bb116.i:                                          ; preds = %if.end.i
+  unreachable
+
+if.then:                                          ; preds = %if.end.i
+  ret i32 undef, !dbg !23
+
+if.else:                                          ; preds = %entry
+  ret i32 0
+}
+
+declare hidden fastcc i32 @bar(i32, i32* nocapture) nounwind optsize ssp
+declare hidden fastcc i32 @bar2(i32) nounwind optsize ssp
+declare hidden fastcc i32 @bar3(i32) nounwind optsize ssp
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !6, !7, !8}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/f.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"f.i", metadata !"/tmp", metadata !"clang version 2.9 (trunk 124753)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", metadata !1, i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", metadata !1, i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 589860, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 590081, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 19509, i32 20, metadata !0, null}
+!14 = metadata !{i32 18091, i32 2, metadata !15, metadata !17}
+!15 = metadata !{i32 589835, metadata !16, i32 18086, i32 1, metadata !1, i32 748} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", metadata !1, i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 19514, i32 2, metadata !18, null}
+!18 = metadata !{i32 589835, metadata !0, i32 19510, i32 1, metadata !1, i32 99} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 18094, i32 2, metadata !15, metadata !17}
+!23 = metadata !{i32 19524, i32 1, metadata !18, null}

diff --git a/src/LLVM/test/CodeGen/X86/dbg-value-range.ll b/src/LLVM/test/CodeGen/X86/dbg-value-range.ll
new file mode 100644
index 0000000..28d873b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dbg-value-range.ll

@@ -0,0 +1,62 @@
+; RUN: llc -mtriple=x86_64-apple-darwin10 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin10 -regalloc=basic -join-physregs < %s | FileCheck %s
+
+%struct.a = type { i32 }
+
+define i32 @bar(%struct.a* nocapture %b) nounwind ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !6), !dbg !13
+  %tmp1 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !14
+  %tmp2 = load i32* %tmp1, align 4, !dbg !14, !tbaa !15
+  tail call void @llvm.dbg.value(metadata !{i32 %tmp2}, i64 0, metadata !11), !dbg !14
+  %call = tail call i32 (...)* @foo(i32 %tmp2) nounwind , !dbg !18
+  %add = add nsw i32 %tmp2, 1, !dbg !19
+  ret i32 %add, !dbg !19
+}
+
+declare i32 @foo(...) 
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.bar = !{!6, !11}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"bar.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"bar.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 122997)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !0, metadata !"b", metadata !1, i32 5, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 589843, metadata !2, metadata !"a", metadata !1, i32 1, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 589837, metadata !1, metadata !"c", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 590080, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 589835, metadata !0, i32 5, i32 22, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 5, i32 19, metadata !0, null}
+!14 = metadata !{i32 6, i32 14, metadata !12, null}
+!15 = metadata !{metadata !"int", metadata !16}
+!16 = metadata !{metadata !"omnipotent char", metadata !17}
+!17 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!18 = metadata !{i32 7, i32 2, metadata !12, null}
+!19 = metadata !{i32 8, i32 2, metadata !12, null}
+
+; Check that variable bar:b value range is appropriately trucated in debug info.
+; The variable is in %rdi which is clobbered by 'movl %ebx, %edi'
+; Here Ltmp7 is the end of the location range.
+
+;CHECK: .loc	1 7 2
+;CHECK: movl
+;CHECK-NEXT: [[CLOBBER:Ltmp[0-9]*]]
+
+;CHECK:Ldebug_loc0:
+;CHECK-NEXT:	.quad
+;CHECK-NEXT:	.quad	[[CLOBBER]]
+;CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}
+;CHECK-NEXT:    .short  Lset
+;CHECK-NEXT: Ltmp
+;CHECK-NEXT:	.byte	85
+;CHECK-NEXT: Ltmp
+;CHECK-NEXT:	.quad	0
+;CHECK-NEXT:	.quad	0

diff --git a/src/LLVM/test/CodeGen/X86/dg.exp b/src/LLVM/test/CodeGen/X86/dg.exp
new file mode 100644
index 0000000..0b301a8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp

+

+if { [llvm_supports_target X86] } {

+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]

+}


diff --git a/src/LLVM/test/CodeGen/X86/discontiguous-loops.ll b/src/LLVM/test/CodeGen/X86/discontiguous-loops.ll
new file mode 100644
index 0000000..479c450
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/discontiguous-loops.ll

@@ -0,0 +1,72 @@
+; RUN: llc -verify-loop-info -verify-dom-info -march=x86-64 < %s
+; PR5243
+
+@.str96 = external constant [37 x i8], align 8    ; <[37 x i8]*> [#uses=1]
+
+define void @foo() nounwind {
+bb:
+  br label %ybb1
+
+ybb1:                                              ; preds = %yybb13, %xbb6, %bb
+  switch i32 undef, label %bb18 [
+    i32 150, label %ybb2
+    i32 151, label %bb17
+    i32 152, label %bb19
+    i32 157, label %ybb8
+  ]
+
+ybb2:                                              ; preds = %ybb1
+  %tmp = icmp eq i8** undef, null                 ; <i1> [#uses=1]
+  br i1 %tmp, label %bb3, label %xbb6
+
+bb3:                                              ; preds = %ybb2
+  unreachable
+
+xbb4:                                              ; preds = %xbb6
+  store i32 0, i32* undef, align 8
+  br i1 undef, label %xbb6, label %bb5
+
+bb5:                                              ; preds = %xbb4
+  call fastcc void @decl_mode_check_failed() nounwind
+  unreachable
+
+xbb6:                                              ; preds = %xbb4, %ybb2
+  %tmp7 = icmp slt i32 undef, 0                   ; <i1> [#uses=1]
+  br i1 %tmp7, label %xbb4, label %ybb1
+
+ybb8:                                              ; preds = %ybb1
+  %tmp9 = icmp eq i8** undef, null                ; <i1> [#uses=1]
+  br i1 %tmp9, label %bb10, label %ybb12
+
+bb10:                                             ; preds = %ybb8
+  %tmp11 = load i8** undef, align 8               ; <i8*> [#uses=1]
+  call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8]* @.str96, i64 0, i64 0), i8* %tmp11) nounwind
+  unreachable
+
+ybb12:                                             ; preds = %ybb8
+  br i1 undef, label %bb15, label %ybb13
+
+ybb13:                                             ; preds = %ybb12
+  %tmp14 = icmp sgt i32 undef, 0                  ; <i1> [#uses=1]
+  br i1 %tmp14, label %bb16, label %ybb1
+
+bb15:                                             ; preds = %ybb12
+  call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8]* @.str96, i64 0, i64 0), i8* undef) nounwind
+  unreachable
+
+bb16:                                             ; preds = %ybb13
+  unreachable
+
+bb17:                                             ; preds = %ybb1
+  unreachable
+
+bb18:                                             ; preds = %ybb1
+  unreachable
+
+bb19:                                             ; preds = %ybb1
+  unreachable
+}
+
+declare void @fatal(i8*, ...)
+
+declare fastcc void @decl_mode_check_failed() nounwind

diff --git a/src/LLVM/test/CodeGen/X86/div8.ll b/src/LLVM/test/CodeGen/X86/div8.ll
new file mode 100644
index 0000000..0825f79
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/div8.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = '8div.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.6"
+
+define signext i8 @test_div(i8 %dividend, i8 %divisor) nounwind ssp {
+entry:
+  %dividend.addr = alloca i8, align 2
+  %divisor.addr = alloca i8, align 1
+  %quotient = alloca i8, align 1
+  store i8 %dividend, i8* %dividend.addr, align 2
+  store i8 %divisor, i8* %divisor.addr, align 1
+  %tmp = load i8* %dividend.addr, align 2
+  %tmp1 = load i8* %divisor.addr, align 1
+; Insist on i8->i32 zero extension, even though divb demands only i16:
+; CHECK: movzbl {{.*}}%eax
+; CHECK: divb
+  %div = udiv i8 %tmp, %tmp1
+  store i8 %div, i8* %quotient, align 1
+  %tmp4 = load i8* %quotient, align 1
+  ret i8 %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/X86/divide-by-constant.ll b/src/LLVM/test/CodeGen/X86/divide-by-constant.ll
new file mode 100644
index 0000000..87c1be5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/divide-by-constant.ll

@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define zeroext i16 @test1(i16 zeroext %x) nounwind {
+entry:
+	%div = udiv i16 %x, 33
+	ret i16 %div
+; CHECK: test1:
+; CHECK: imull	$63551, %eax, %eax
+; CHECK-NEXT: shrl	$21, %eax
+; CHECK-NEXT: ret
+}
+
+define zeroext i16 @test2(i8 signext %x, i16 zeroext %c) nounwind readnone ssp noredzone {
+entry:
+  %div = udiv i16 %c, 3
+  ret i16 %div
+
+; CHECK: test2:
+; CHECK: imull	$43691, %eax, %eax
+; CHECK-NEXT: shrl	$17, %eax
+; CHECK-NEXT: ret
+}
+
+define zeroext i8 @test3(i8 zeroext %x, i8 zeroext %c) nounwind readnone ssp noredzone {
+entry:
+  %div = udiv i8 %c, 3
+  ret i8 %div
+
+; CHECK: test3:
+; CHECK: movzbl  8(%esp), %eax
+; CHECK-NEXT: imull	$171, %eax, %eax
+; CHECK-NEXT: shrl	$9, %eax
+; CHECK-NEXT: ret
+}
+
+define signext i16 @test4(i16 signext %x) nounwind {
+entry:
+	%div = sdiv i16 %x, 33		; <i32> [#uses=1]
+	ret i16 %div
+; CHECK: test4:
+; CHECK: imull	$1986, %eax, %
+}
+
+define i32 @test5(i32 %A) nounwind {
+        %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
+        ret i32 %tmp1
+; CHECK: test5:
+; CHECK: movl	$365384439, %eax
+; CHECK: mull	4(%esp)
+}
+
+define signext i16 @test6(i16 signext %x) nounwind {
+entry:
+  %div = sdiv i16 %x, 10
+  ret i16 %div
+; CHECK: test6:
+; CHECK: imull	$26215, %eax, %eax
+; CHECK: shrl	$31, %ecx
+; CHECK: sarl	$18, %eax
+}
+
+define i32 @test7(i32 %x) nounwind {
+  %div = udiv i32 %x, 28
+  ret i32 %div
+; CHECK: test7:
+; CHECK: shrl $2
+; CHECK: movl $613566757
+; CHECK: mull
+; CHECK-NOT: shrl
+; CHECK: ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/divrem.ll b/src/LLVM/test/CodeGen/X86/divrem.ll
new file mode 100644
index 0000000..e86b52f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/divrem.ll

@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=x86-64 | grep div | count 8
+
+define void @si64(i64 %x, i64 %y, i64* %p, i64* %q) {
+	%r = sdiv i64 %x, %y
+	%t = srem i64 %x, %y
+	store i64 %r, i64* %p
+	store i64 %t, i64* %q
+	ret void
+}
+define void @si32(i32 %x, i32 %y, i32* %p, i32* %q) {
+	%r = sdiv i32 %x, %y
+	%t = srem i32 %x, %y
+	store i32 %r, i32* %p
+	store i32 %t, i32* %q
+	ret void
+}
+define void @si16(i16 %x, i16 %y, i16* %p, i16* %q) {
+	%r = sdiv i16 %x, %y
+	%t = srem i16 %x, %y
+	store i16 %r, i16* %p
+	store i16 %t, i16* %q
+	ret void
+}
+define void @si8(i8 %x, i8 %y, i8* %p, i8* %q) {
+	%r = sdiv i8 %x, %y
+	%t = srem i8 %x, %y
+	store i8 %r, i8* %p
+	store i8 %t, i8* %q
+	ret void
+}
+define void @ui64(i64 %x, i64 %y, i64* %p, i64* %q) {
+	%r = udiv i64 %x, %y
+	%t = urem i64 %x, %y
+	store i64 %r, i64* %p
+	store i64 %t, i64* %q
+	ret void
+}
+define void @ui32(i32 %x, i32 %y, i32* %p, i32* %q) {
+	%r = udiv i32 %x, %y
+	%t = urem i32 %x, %y
+	store i32 %r, i32* %p
+	store i32 %t, i32* %q
+	ret void
+}
+define void @ui16(i16 %x, i16 %y, i16* %p, i16* %q) {
+	%r = udiv i16 %x, %y
+	%t = urem i16 %x, %y
+	store i16 %r, i16* %p
+	store i16 %t, i16* %q
+	ret void
+}
+define void @ui8(i8 %x, i8 %y, i8* %p, i8* %q) {
+	%r = udiv i8 %x, %y
+	%t = urem i8 %x, %y
+	store i8 %r, i8* %p
+	store i8 %t, i8* %q
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/dll-linkage.ll b/src/LLVM/test/CodeGen/X86/dll-linkage.ll
new file mode 100644
index 0000000..a0c2a54
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dll-linkage.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-pc-mingw32 | FileCheck %s
+
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -O0 | FileCheck %s -check-prefix=FAST
+; PR6275
+
+declare dllimport void @foo()
+
+define void @bar() nounwind {
+; CHECK: calll	*__imp__foo
+; FAST:  movl   __imp__foo, [[R:%[a-z]{3}]]
+; FAST:  calll  *[[R]]
+  call void @foo()
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/dllexport.ll b/src/LLVM/test/CodeGen/X86/dllexport.ll
new file mode 100644
index 0000000..bf57e78
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dllexport.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s | FileCheck %s
+; PR2936
+
+target triple = "i386-pc-mingw32"
+
+define dllexport x86_fastcallcc i32 @foo() nounwind  {
+entry:
+	ret i32 0
+}
+
+; CHECK: .section .drectve
+; CHECK: -export:@foo@0

diff --git a/src/LLVM/test/CodeGen/X86/dollar-name.ll b/src/LLVM/test/CodeGen/X86/dollar-name.ll
new file mode 100644
index 0000000..fc7af9d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dollar-name.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux | FileCheck %s

+; PR1339

+

+@"$bar" = global i32 zeroinitializer

+@"$qux" = external global i32

+

+define i32 @"$foo"() nounwind {

+; CHECK: movl	($bar),

+; CHECK: addl	($qux),

+; CHECK: calll	($hen)

+  %m = load i32* @"$bar"

+  %n = load i32* @"$qux"

+  %t = add i32 %m, %n

+  %u = call i32 @"$hen"(i32 %t)

+  ret i32 %u

+}

+

+declare i32 @"$hen"(i32 %a)


diff --git a/src/LLVM/test/CodeGen/X86/dyn-stackalloc.ll b/src/LLVM/test/CodeGen/X86/dyn-stackalloc.ll
new file mode 100644
index 0000000..7b0fe18
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/dyn-stackalloc.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefix=X32
+; X32-NOT:     {{$429496728|-7}}
+; X32:     {{$4294967280|-16}}
+; X32-NOT:     {{$429496728|-7}}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64:     -16
+
+define void @t() nounwind {
+A:
+	br label %entry
+
+entry:
+	%m1 = alloca i32, align 4
+	%m2 = alloca [7 x i8], align 16
+	call void @s( i32* %m1, [7 x i8]* %m2 )
+	ret void
+}
+
+declare void @s(i32*, [7 x i8]*)

diff --git a/src/LLVM/test/CodeGen/X86/eh_frame.ll b/src/LLVM/test/CodeGen/X86/eh_frame.ll
new file mode 100644
index 0000000..3b792b2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/eh_frame.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple x86_64-unknown-linux-gnu | FileCheck -check-prefix=STATIC %s
+; RUN: llc < %s -mtriple x86_64-unknown-linux-gnu -relocation-model=pic | FileCheck -check-prefix=PIC %s
+
+@__FRAME_END__ = constant [1 x i32] zeroinitializer, section ".eh_frame"
+
+@foo = external global i32
+@bar1 = constant i8* bitcast (i32* @foo to i8*), section "my_bar1", align 8
+
+
+; STATIC: .section	.eh_frame,"a",@progbits
+; STATIC: .section	my_bar1,"a",@progbits
+
+; PIC:	.section	.eh_frame,"a",@progbits
+; PIC:	.section	my_bar1,"aw",@progbits

diff --git a/src/LLVM/test/CodeGen/X86/empty-functions.ll b/src/LLVM/test/CodeGen/X86/empty-functions.ll
new file mode 100644
index 0000000..874c53a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/empty-functions.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+
+define void @func() {
+entry:
+  unreachable
+}
+; CHECK-NO-FP:     _func:
+; CHECK-NO-FP-NEXT: :
+; CHECK-NO-FP-NEXT: .cfi_startproc
+; CHECK-NO-FP:     nop
+; CHECK-NO-FP-NEXT: :
+; CHECK-NO-FP-NEXT: .cfi_endproc
+
+; CHECK-FP:      _func:
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_startproc
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: pushq %rbp
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_def_cfa_offset 16
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_offset %rbp, -16
+; CHECK-FP-NEXT: movq %rsp, %rbp
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_def_cfa_register %rbp
+; CHECK-FP-NEXT: nop
+; CHECK-FP-NEXT: :
+; CHECK-FP-NEXT: .cfi_endproc

diff --git a/src/LLVM/test/CodeGen/X86/empty-struct-return-type.ll b/src/LLVM/test/CodeGen/X86/empty-struct-return-type.ll
new file mode 100644
index 0000000..34cd5d9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/empty-struct-return-type.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep call
+; PR4688
+
+; Return types can be empty structs, which can be awkward.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @_ZN15QtSharedPointer22internalSafetyCheckAddEPVKv(i8* %ptr) {
+entry:
+	%0 = call { } @_ZNK5QHashIPv15QHashDummyValueE5valueERKS0_(i8** undef)		; <{ }> [#uses=0]
+        ret void
+}
+
+declare hidden { } @_ZNK5QHashIPv15QHashDummyValueE5valueERKS0_(i8** nocapture) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/epilogue.ll b/src/LLVM/test/CodeGen/X86/epilogue.ll
new file mode 100644
index 0000000..52dcb61
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/epilogue.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 | grep {movl	%ebp}
+
+declare void @bar(<2 x i64>* %n)
+
+define void @foo(i64 %h) {
+  %k = trunc i64 %h to i32
+  %p = alloca <2 x i64>, i32 %k
+  call void @bar(<2 x i64>* %p)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/extend.ll b/src/LLVM/test/CodeGen/X86/extend.ll
new file mode 100644
index 0000000..cf30ada
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/extend.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep movzx | count 1

+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep movsx | count 1

+

+@G1 = internal global i8 0              ; <i8*> [#uses=1]

+@G2 = internal global i8 0              ; <i8*> [#uses=1]

+

+define i16 @test1() {

+        %tmp.0 = load i8* @G1           ; <i8> [#uses=1]

+        %tmp.3 = zext i8 %tmp.0 to i16          ; <i16> [#uses=1]

+        ret i16 %tmp.3

+}

+

+define i16 @test2() {

+        %tmp.0 = load i8* @G2           ; <i8> [#uses=1]

+        %tmp.3 = sext i8 %tmp.0 to i16          ; <i16> [#uses=1]

+        ret i16 %tmp.3

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/extern_weak.ll b/src/LLVM/test/CodeGen/X86/extern_weak.ll
new file mode 100644
index 0000000..ec667cd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/extern_weak.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin | grep weak_reference | count 2

+

+@Y = global i32 (i8*)* @X               ; <i32 (i8*)**> [#uses=0]

+

+declare extern_weak i32 @X(i8*)

+

+define void @bar() {

+        tail call void (...)* @foo( )

+        ret void

+}

+

+declare extern_weak void @foo(...)

+


diff --git a/src/LLVM/test/CodeGen/X86/extmul128.ll b/src/LLVM/test/CodeGen/X86/extmul128.ll
new file mode 100644
index 0000000..9b59829
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/extmul128.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 | grep mul | count 2
+
+define i128 @i64_sext_i128(i64 %a, i64 %b) {
+  %aa = sext i64 %a to i128
+  %bb = sext i64 %b to i128
+  %cc = mul i128 %aa, %bb
+  ret i128 %cc
+}
+define i128 @i64_zext_i128(i64 %a, i64 %b) {
+  %aa = zext i64 %a to i128
+  %bb = zext i64 %b to i128
+  %cc = mul i128 %aa, %bb
+  ret i128 %cc
+}

diff --git a/src/LLVM/test/CodeGen/X86/extmul64.ll b/src/LLVM/test/CodeGen/X86/extmul64.ll
new file mode 100644
index 0000000..9e20ded
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/extmul64.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep mul | count 2
+
+define i64 @i32_sext_i64(i32 %a, i32 %b) {
+  %aa = sext i32 %a to i64
+  %bb = sext i32 %b to i64
+  %cc = mul i64 %aa, %bb
+  ret i64 %cc
+}
+define i64 @i32_zext_i64(i32 %a, i32 %b) {
+  %aa = zext i32 %a to i64
+  %bb = zext i32 %b to i64
+  %cc = mul i64 %aa, %bb
+  ret i64 %cc
+}

diff --git a/src/LLVM/test/CodeGen/X86/extract-combine.ll b/src/LLVM/test/CodeGen/X86/extract-combine.ll
new file mode 100644
index 0000000..2040e87
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/extract-combine.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -o %t
+; RUN: not grep unpcklps %t
+
+define i32 @foo() nounwind {
+entry:
+	%tmp74.i25762 = shufflevector <16 x float> zeroinitializer, <16 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>		; <<16 x float>> [#uses=1]
+	%tmp518 = shufflevector <16 x float> %tmp74.i25762, <16 x float> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>		; <<4 x float>> [#uses=1]
+	%movss.i25611 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp518, <4 x i32> <i32 4, i32 1, i32 2, i32 3>		; <<4 x float>> [#uses=1]
+	%conv3.i25615 = shufflevector <4 x float> %movss.i25611, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>		; <<4 x float>> [#uses=1]
+	%sub.i25620 = fsub <4 x float> %conv3.i25615, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul.i25621 = fmul <4 x float> zeroinitializer, %sub.i25620		; <<4 x float>> [#uses=1]
+	%add.i25622 = fadd <4 x float> zeroinitializer, %mul.i25621		; <<4 x float>> [#uses=1]
+	store <4 x float> %add.i25622, <4 x float>* null
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/extract-extract.ll b/src/LLVM/test/CodeGen/X86/extract-extract.ll
new file mode 100644
index 0000000..ad79ab9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/extract-extract.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 >/dev/null
+; PR4699
+
+; Handle this extractvalue-of-extractvalue case without getting in
+; trouble with CSE in DAGCombine.
+
+        %cc = type { %crd }
+        %cr = type { i32 }
+        %crd = type { i64, %cr* }
+        %pp = type { %cc }
+
+define fastcc void @foo(%pp* nocapture byval %p_arg) {
+entry:
+        %tmp2 = getelementptr %pp* %p_arg, i64 0, i32 0         ; <%cc*> [#uses=
+        %tmp3 = load %cc* %tmp2         ; <%cc> [#uses=1]
+        %tmp34 = extractvalue %cc %tmp3, 0              ; <%crd> [#uses=1]
+        %tmp345 = extractvalue %crd %tmp34, 0           ; <i64> [#uses=1]
+        %.ptr.i = load %cr** undef              ; <%cr*> [#uses=0]
+        %tmp15.i = shl i64 %tmp345, 3           ; <i64> [#uses=0]
+        store %cr* undef, %cr** undef
+        ret void
+}
+
+

diff --git a/src/LLVM/test/CodeGen/X86/extractelement-from-arg.ll b/src/LLVM/test/CodeGen/X86/extractelement-from-arg.ll
new file mode 100644
index 0000000..4ea37f0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/extractelement-from-arg.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2
+
+define void @test(float* %R, <4 x float> %X) nounwind {
+	%tmp = extractelement <4 x float> %X, i32 3
+	store float %tmp, float* %R
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/extractelement-load.ll b/src/LLVM/test/CodeGen/X86/extractelement-load.ll
new file mode 100644
index 0000000..06d739c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/extractelement-load.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 -mcpu=core2 | FileCheck %s
+
+define i32 @t(<2 x i64>* %val) nounwind  {
+; CHECK: t:
+; CHECK-NOT: movd
+; CHECK: movl 8(
+; CHECK-NEXT: ret
+	%tmp2 = load <2 x i64>* %val, align 16		; <<2 x i64>> [#uses=1]
+	%tmp3 = bitcast <2 x i64> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp4 = extractelement <4 x i32> %tmp3, i32 2		; <i32> [#uses=1]
+	ret i32 %tmp4
+}
+
+; Case where extractelement of load ends up as undef.
+; (Making sure this doesn't crash.)
+define i32 @t2(<8 x i32>* %xp) {
+; CHECK: t2:
+; CHECK: ret
+  %x = load <8 x i32>* %xp
+  %Shuff68 = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32
+undef, i32 7, i32 9, i32 undef, i32 13, i32 15, i32 1, i32 3>
+  %y = extractelement <8 x i32> %Shuff68, i32 0
+  ret i32 %y
+}

diff --git a/src/LLVM/test/CodeGen/X86/extractelement-shuffle.ll b/src/LLVM/test/CodeGen/X86/extractelement-shuffle.ll
new file mode 100644
index 0000000..d1ba9a8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/extractelement-shuffle.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s
+
+; Examples that exhibits a bug in DAGCombine.  The case is triggered by the
+; following program.  The bug is DAGCombine assumes that the bit convert
+; preserves the number of elements so the optimization code tries to read
+; through the 3rd mask element, which doesn't exist.
+define i32 @update(<2 x i64> %val1, <2 x i64> %val2) nounwind readnone {
+entry:
+	%shuf = shufflevector <2 x i64> %val1, <2 x i64> %val2, <2 x i32> <i32 0, i32 3>
+	%bit  = bitcast <2 x i64> %shuf to <4 x i32>
+	%res =  extractelement <4 x i32> %bit, i32 3
+	ret i32 %res
+}

diff --git a/src/LLVM/test/CodeGen/X86/extractps.ll b/src/LLVM/test/CodeGen/X86/extractps.ll
new file mode 100644
index 0000000..14778f0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/extractps.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -mcpu=penryn > %t
+; RUN: not grep movd %t
+; RUN: grep {movss	%xmm} %t | count 1
+; RUN: grep {extractps	\\\$1, %xmm0, } %t | count 1
+; PR2647
+
+external global float, align 16         ; <float*>:0 [#uses=2]
+
+define internal void @""() nounwind {
+        load float* @0, align 16                ; <float>:1 [#uses=1]
+        insertelement <4 x float> undef, float %1, i32 0                ; <<4 x float>>:2 [#uses=1]
+        call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 )              ; <<4 x float>>:3 [#uses=1]
+        extractelement <4 x float> %3, i32 0            ; <float>:4 [#uses=1]
+        store float %4, float* @0, align 16
+        ret void
+}
+define internal void @""() nounwind {
+        load float* @0, align 16                ; <float>:1 [#uses=1]
+        insertelement <4 x float> undef, float %1, i32 1                ; <<4 x float>>:2 [#uses=1]
+        call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 )              ; <<4 x float>>:3 [#uses=1]
+        extractelement <4 x float> %3, i32 1            ; <float>:4 [#uses=1]
+        store float %4, float* @0, align 16
+        ret void
+}
+
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
+

diff --git a/src/LLVM/test/CodeGen/X86/fabs.ll b/src/LLVM/test/CodeGen/X86/fabs.ll
new file mode 100644
index 0000000..bb4daf6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fabs.ll

@@ -0,0 +1,28 @@
+; Make sure this testcase codegens to the fabs instruction, not a call to fabsf

+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \

+; RUN:   count 2

+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | \

+; RUN:   grep fabs\$ | count 3

+

+declare float @fabsf(float)

+

+declare x86_fp80 @fabsl(x86_fp80)

+

+define float @test1(float %X) {

+        %Y = call float @fabsf(float %X)

+        ret float %Y

+}

+

+define double @test2(double %X) {

+        %Y = fcmp oge double %X, -0.0

+        %Z = fsub double -0.0, %X

+        %Q = select i1 %Y, double %X, double %Z

+        ret double %Q

+}

+

+define x86_fp80 @test3(x86_fp80 %X) {

+        %Y = call x86_fp80 @fabsl(x86_fp80 %X)

+        ret x86_fp80 %Y

+}

+

+


diff --git a/src/LLVM/test/CodeGen/X86/fast-cc-callee-pops.ll b/src/LLVM/test/CodeGen/X86/fast-cc-callee-pops.ll
new file mode 100644
index 0000000..20b9998
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-cc-callee-pops.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=yonah | FileCheck %s

+

+; Check that a fastcc function pops its stack variables before returning.

+

+define x86_fastcallcc void @func(i64 %X, i64 %Y, float %G, double %Z) nounwind {

+        ret void

+; CHECK: ret{{.*}}20

+}

+

+define x86_thiscallcc void @func2(i32 %X, i64 %Y, float %G, double %Z) nounwind {

+        ret void

+; CHECK: ret{{.*}}20

+}


diff --git a/src/LLVM/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/src/LLVM/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
new file mode 100644
index 0000000..b22e3f8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-cc-merge-stack-adj.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \

+; RUN:   grep {add	ESP, 8}

+

+target triple = "i686-pc-linux-gnu"

+

+declare x86_fastcallcc void @func(i32*, i64)

+

+define x86_fastcallcc void @caller(i32, i64) {

+        %X = alloca i32         ; <i32*> [#uses=1]

+        call x86_fastcallcc void @func( i32* %X, i64 0 )

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/fast-cc-pass-in-regs.ll b/src/LLVM/test/CodeGen/X86/fast-cc-pass-in-regs.ll
new file mode 100644
index 0000000..533e49a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-cc-pass-in-regs.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s

+; check that fastcc is passing stuff in regs.

+

+declare x86_fastcallcc i64 @callee(i64)

+

+define i64 @caller() {

+        %X = call x86_fastcallcc  i64 @callee( i64 4294967299 )          ; <i64> [#uses=1]

+; CHECK: mov{{.*}}EDX, 1

+        ret i64 %X

+}

+

+define x86_fastcallcc i64 @caller2(i64 %X) {

+        ret i64 %X

+; CHECK: mov{{.*}}EAX, ECX

+}

+

+declare x86_thiscallcc i64 @callee2(i32)

+

+define i64 @caller3() {

+        %X = call x86_thiscallcc i64 @callee2( i32 3 )

+; CHECK: mov{{.*}}ECX, 3

+        ret i64 %X

+}

+

+define x86_thiscallcc i32 @caller4(i32 %X) {

+        ret i32 %X

+; CHECK: mov{{.*}}EAX, ECX

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-agg-constant.ll b/src/LLVM/test/CodeGen/X86/fast-isel-agg-constant.ll
new file mode 100644
index 0000000..ce0dff7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-agg-constant.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s
+; Make sure fast-isel doesn't screw up aggregate constants.
+; (Failing out is okay, as long as we don't miscompile.)
+
+%bar = type { i32 }
+
+define i32 @foo()  {
+  %tmp = extractvalue %bar { i32 3 }, 0
+  ret i32 %tmp
+; CHECK: movl $3, %eax
+}

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-atomic.ll b/src/LLVM/test/CodeGen/X86/fast-isel-atomic.ll
new file mode 100644
index 0000000..5f761dd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-atomic.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -O0 -march=x86-64
+; rdar://8204072
+; PR7652
+
+@sc = external global i8
+@uc = external global i8
+
+define void @test_fetch_and_op() nounwind {
+entry:
+  %tmp40 = atomicrmw and i8* @sc, i8 11 monotonic
+  store i8 %tmp40, i8* @sc
+  %tmp41 = atomicrmw and i8* @uc, i8 11 monotonic
+  store i8 %tmp41, i8* @uc
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll b/src/LLVM/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
new file mode 100644
index 0000000..9233d3f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll

@@ -0,0 +1,23 @@
+; RUN: llc -O0 -relocation-model=pic < %s | not grep call
+; rdar://8396318
+
+; Don't emit a PIC base register if no addresses are needed.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind ssp {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  %z.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  store i32 %z, i32* %z.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %tmp1 = load i32* %y.addr, align 4
+  %add = add nsw i32 %tmp, %tmp1
+  %tmp2 = load i32* %z.addr, align 4
+  %add3 = add nsw i32 %add, %tmp2
+  ret i32 %add3
+}

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-bail.ll b/src/LLVM/test/CodeGen/X86/fast-isel-bail.ll
new file mode 100644
index 0000000..a485827
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-bail.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -O0
+
+; This file is for regression tests for cases where FastISel needs
+; to gracefully bail out and let SelectionDAGISel take over.
+
+	%0 = type { i64, i8* }		; type %0
+
+declare void @bar(%0)
+
+define fastcc void @foo() nounwind {
+entry:
+	call void @bar(%0 zeroinitializer)
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-bc.ll b/src/LLVM/test/CodeGen/X86/fast-isel-bc.ll
new file mode 100644
index 0000000..4abc3b5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-bc.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -O0 -regalloc=linearscan -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
+; PR4684
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.8"
+
+declare void @func2(x86_mmx)
+
+define void @func1() nounwind {
+
+; This isn't spectacular, but it's MMX code at -O0...
+; CHECK:  movq2dq %mm0, %xmm0
+; For now, handling of x86_mmx parameters in fast Isel is unimplemented,
+; so we get pretty poor code.  The below is preferable.
+; CHEK: movl $2, %eax
+; CHEK: movd %rax, %mm0
+; CHEK: movd %mm0, %rdi
+
+        %tmp0 = bitcast <2 x i32><i32 0, i32 2> to x86_mmx
+        call void @func2(x86_mmx %tmp0)
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-call.ll b/src/LLVM/test/CodeGen/X86/fast-isel-call.ll
new file mode 100644
index 0000000..3159741
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-call.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -O0 -fast-isel-abort -march=x86 | FileCheck %s
+
+%struct.s = type {i32, i32, i32}
+
+define i32 @test1() nounwind {
+tak:
+	%tmp = call i1 @foo()
+	br i1 %tmp, label %BB1, label %BB2
+BB1:
+	ret i32 1
+BB2:
+	ret i32 0
+; CHECK: test1:
+; CHECK: calll
+; CHECK-NEXT: testb	$1
+}
+declare zeroext i1 @foo()  nounwind
+
+declare void @foo2(%struct.s* byval)
+
+define void @test2(%struct.s* %d) nounwind {
+  call void @foo2(%struct.s* byval %d )
+  ret void
+; CHECK: test2:
+; CHECK: movl	(%eax)
+; CHECK: movl {{.*}}, (%esp)
+; CHECK: movl	4(%eax)
+; CHECK: movl {{.*}}, 4(%esp)
+; CHECK: movl	8(%eax)
+; CHECK: movl {{.*}}, 8(%esp)
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @test3(i8* %a) {
+  call void @llvm.memset.p0i8.i32(i8* %a, i8 0, i32 100, i32 1, i1 false)
+  ret void
+; CHECK: test3:
+; CHECK:   movl	{{.*}}, (%esp)
+; CHECK:   movl	$0, 4(%esp)
+; CHECK:   movl	$100, 8(%esp)
+; CHECK:   calll {{.*}}memset
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @test4(i8* %a, i8* %b) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 100, i32 1, i1 false)
+  ret void
+; CHECK: test4:
+; CHECK:   movl	{{.*}}, (%esp)
+; CHECK:   movl	{{.*}}, 4(%esp)
+; CHECK:   movl	$100, 8(%esp)
+; CHECK:   calll {{.*}}memcpy
+}

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-cmp-branch.ll b/src/LLVM/test/CodeGen/X86/fast-isel-cmp-branch.ll
new file mode 100644
index 0000000..6e408f8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-cmp-branch.ll

@@ -0,0 +1,34 @@
+; RUN: llc -O0 -mtriple=x86_64-linux -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=x86_64-win32 -asm-verbose=false < %s | FileCheck %s
+; rdar://8337108
+
+; Fast-isel shouldn't try to look through the compare because it's in a
+; different basic block, so its operands aren't necessarily exported
+; for cross-block usage.
+
+; CHECK: movb    %al, [[OFS:[0-9]*]](%rsp)
+; CHECK: callq   {{_?}}bar
+; CHECK: movb    [[OFS]](%rsp), %al
+
+declare void @bar()
+
+define void @foo(i32 %a, i32 %b) nounwind {
+entry:
+  %q = add i32 %a, 7
+  %r = add i32 %b, 9
+  %t = icmp ult i32 %q, %r
+  invoke void @bar() to label %next unwind label %unw
+next:
+  br i1 %t, label %true, label %return
+true:
+  call void @bar()
+  br label %return
+return:
+  ret void
+unw:
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  unreachable
+}
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-constpool.ll b/src/LLVM/test/CodeGen/X86/fast-isel-constpool.ll
new file mode 100644
index 0000000..323c853
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-constpool.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -fast-isel | grep {LCPI0_0(%rip)}
+; Make sure fast isel uses rip-relative addressing when required.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.0"
+
+define i32 @f0(double %x) nounwind {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%x.addr = alloca double		; <double*> [#uses=2]
+	store double %x, double* %x.addr
+	%tmp = load double* %x.addr		; <double> [#uses=1]
+	%cmp = fcmp olt double %tmp, 8.500000e-01		; <i1> [#uses=1]
+	%conv = zext i1 %cmp to i32		; <i32> [#uses=1]
+	store i32 %conv, i32* %retval
+	%0 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-extract.ll b/src/LLVM/test/CodeGen/X86/fast-isel-extract.ll
new file mode 100644
index 0000000..f63396e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-extract.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin11 -O0 -fast-isel-abort | FileCheck %s
+
+%struct.x = type { i64, i64 }
+%addovf = type { i32, i1 }
+declare %struct.x @f()
+
+define void @test1(i64*) nounwind ssp {
+  %2 = tail call %struct.x @f() nounwind
+  %3 = extractvalue %struct.x %2, 0
+  %4 = add i64 %3, 10
+  store i64 %4, i64* %0
+  ret void
+; CHECK: test1:
+; CHECK: callq _f
+; CHECK-NEXT: addq	$10, %rax
+}
+
+define void @test2(i64*) nounwind ssp {
+  %2 = tail call %struct.x @f() nounwind
+  %3 = extractvalue %struct.x %2, 1
+  %4 = add i64 %3, 10
+  store i64 %4, i64* %0
+  ret void
+; CHECK: test2:
+; CHECK: callq _f
+; CHECK-NEXT: addq	$10, %rdx
+}
+
+declare %addovf @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+
+define void @test3(i32 %x, i32 %y, i32* %z) {
+  %r = call %addovf @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
+  %sum = extractvalue %addovf %r, 0
+  %sum3 = mul i32 %sum, 3
+  %bit = extractvalue %addovf %r, 1
+  br i1 %bit, label %then, label %end
+  
+then:
+  store i32 %sum3, i32* %z
+  br label %end
+
+end:
+  ret void
+; CHECK: test3
+; CHECK: addl
+; CHECK: seto %al
+; CHECK: testb $1, %al
+}

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-fneg.ll b/src/LLVM/test/CodeGen/X86/fast-isel-fneg.ll
new file mode 100644
index 0000000..f42a4a2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-fneg.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | grep xor | count 2
+
+; CHECK: doo:
+; CHECK: xor
+define double @doo(double %x) nounwind {
+  %y = fsub double -0.0, %x
+  ret double %y
+}
+
+; CHECK: foo:
+; CHECK: xor
+define float @foo(float %x) nounwind {
+  %y = fsub float -0.0, %x
+  ret float %y
+}

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-gep.ll b/src/LLVM/test/CodeGen/X86/fast-isel-gep.ll
new file mode 100644
index 0000000..91d1f5d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-gep.ll

@@ -0,0 +1,139 @@
+; RUN: llc < %s -mtriple=x86_64-linux -O0 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -O0 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -march=x86 -O0 | FileCheck %s --check-prefix=X32
+
+; GEP indices are interpreted as signed integers, so they
+; should be sign-extended to 64 bits on 64-bit targets.
+; PR3181
+define i32 @test1(i32 %t3, i32* %t1) nounwind {
+       %t9 = getelementptr i32* %t1, i32 %t3           ; <i32*> [#uses=1]
+       %t15 = load i32* %t9            ; <i32> [#uses=1]
+       ret i32 %t15
+; X32: test1:
+; X32:  	movl	(%eax,%ecx,4), %eax
+; X32:  	ret
+
+; X64: test1:
+; X64:  	movslq	%e[[A0:di|cx]], %rax
+; X64:  	movl	(%r[[A1:si|dx]],%rax,4), %eax
+; X64:  	ret
+
+}
+define i32 @test2(i64 %t3, i32* %t1) nounwind {
+       %t9 = getelementptr i32* %t1, i64 %t3           ; <i32*> [#uses=1]
+       %t15 = load i32* %t9            ; <i32> [#uses=1]
+       ret i32 %t15
+; X32: test2:
+; X32:  	movl	(%edx,%ecx,4), %e
+; X32:  	ret
+
+; X64: test2:
+; X64:  	movl	(%r[[A1]],%r[[A0]],4), %eax
+; X64:  	ret
+}
+
+
+
+; PR4984
+define i8 @test3(i8* %start) nounwind {
+entry:
+  %A = getelementptr i8* %start, i64 -2               ; <i8*> [#uses=1]
+  %B = load i8* %A, align 1                       ; <i8> [#uses=1]
+  ret i8 %B
+  
+  
+; X32: test3:
+; X32:  	movl	4(%esp), %eax
+; X32:  	movb	-2(%eax), %al
+; X32:  	ret
+
+; X64: test3:
+; X64:  	movb	-2(%r[[A0]]), %al
+; X64:  	ret
+
+}
+
+define double @test4(i64 %x, double* %p) nounwind {
+entry:
+  %x.addr = alloca i64, align 8                   ; <i64*> [#uses=2]
+  %p.addr = alloca double*, align 8               ; <double**> [#uses=2]
+  store i64 %x, i64* %x.addr
+  store double* %p, double** %p.addr
+  %tmp = load i64* %x.addr                        ; <i64> [#uses=1]
+  %add = add nsw i64 %tmp, 16                     ; <i64> [#uses=1]
+  %tmp1 = load double** %p.addr                   ; <double*> [#uses=1]
+  %arrayidx = getelementptr inbounds double* %tmp1, i64 %add ; <double*> [#uses=1]
+  %tmp2 = load double* %arrayidx                  ; <double> [#uses=1]
+  ret double %tmp2
+
+; X32: test4:
+; X32: 128(%e{{.*}},%e{{.*}},8)
+; X64: test4:
+; X64: 128(%r{{.*}},%r{{.*}},8)
+}
+
+; PR8961 - Make sure the sext for the GEP addressing comes before the load that
+; is folded.
+define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
+  %v8 = getelementptr i8* %A, i32 %I
+  %v9 = bitcast i8* %v8 to i64*
+  %v10 = load i64* %v9
+  %v11 = add i64 %B, %v10
+  ret i64 %v11
+; X64: test5:
+; X64: movslq	%e[[A1]], %rax
+; X64-NEXT: movq	(%r[[A0]],%rax), %rax
+; X64-NEXT: addq	%{{rdx|r8}}, %rax
+; X64-NEXT: ret
+}
+
+; PR9500, rdar://9156159 - Don't do non-local address mode folding,
+; because it may require values which wouldn't otherwise be live out
+; of their blocks.
+define void @test6() {
+if.end:                                           ; preds = %if.then, %invoke.cont
+  %tmp15 = load i64* undef
+  %dec = add i64 %tmp15, 13
+  store i64 %dec, i64* undef
+  %call17 = invoke i8* @_ZNK18G__FastAllocString4dataEv()
+          to label %invoke.cont16 unwind label %lpad
+
+invoke.cont16:                                    ; preds = %if.then14
+  %arrayidx18 = getelementptr inbounds i8* %call17, i64 %dec
+  store i8 0, i8* %arrayidx18
+  unreachable
+
+lpad:                                             ; preds = %if.end19, %if.then14, %if.end, %entry
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  unreachable
+}
+declare i8* @_ZNK18G__FastAllocString4dataEv() nounwind
+
+
+; PR10605 / rdar://9930964 - Don't fold loads incorrectly.  The load should
+; happen before the store.  
+define i32 @test7({i32,i32,i32}* %tmp1, i32 %tmp71, i32 %tmp63) nounwind  {
+; X64: test7:
+; X64:    movl	8({{%rdi|%rcx}}), %eax
+; X64:     movl	$4, 8({{%rdi|%rcx}})
+
+
+  %tmp29 = getelementptr inbounds {i32,i32,i32}* %tmp1, i32 0, i32 2
+  %tmp30 = load i32* %tmp29, align 4
+
+  %p2 = getelementptr inbounds {i32,i32,i32}* %tmp1, i32 0, i32 2
+  store i32 4, i32* %p2
+  
+  %tmp72 = or i32 %tmp71, %tmp30
+  %tmp73 = icmp ne i32 %tmp63, 32
+  br i1 %tmp73, label %T, label %F
+
+T:
+  ret i32 %tmp72
+
+F:
+  ret i32 4
+}
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-gv.ll b/src/LLVM/test/CodeGen/X86/fast-isel-gv.ll
new file mode 100644
index 0000000..34f8b38
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-gv.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -fast-isel | grep {_kill@GOTPCREL(%rip)}
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+@f = global i8 (...)* @kill		; <i8 (...)**> [#uses=1]
+
+declare signext i8 @kill(...)
+
+define i32 @main() nounwind ssp {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%1 = load i8 (...)** @f, align 8		; <i8 (...)*> [#uses=1]
+	%2 = icmp ne i8 (...)* %1, @kill		; <i1> [#uses=1]
+	%3 = zext i1 %2 to i32		; <i32> [#uses=1]
+	store i32 %3, i32* %0, align 4
+	%4 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %4, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval1
+}

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-i1.ll b/src/LLVM/test/CodeGen/X86/fast-isel-i1.ll
new file mode 100644
index 0000000..bea18a1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-i1.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
+
+declare i32 @test1a(i32)
+
+define i32 @test1(i32 %x) nounwind {
+; CHECK: test1:
+; CHECK: andb $1, %
+	%y = add i32 %x, -3
+	%t = call i32 @test1a(i32 %y)
+	%s = mul i32 %t, 77
+	%z = trunc i32 %s to i1
+	br label %next
+
+next:		; preds = %0
+	%u = zext i1 %z to i32
+	%v = add i32 %u, 1999
+	br label %exit
+
+exit:		; preds = %next
+	ret i32 %v
+}
+
+define void @test2(i8* %a) nounwind {
+entry:
+; CHECK: test2:
+; CHECK: movb {{.*}} %al
+; CHECK-NEXT: xorb $1, %al
+; CHECK-NEXT: testb $1
+  %tmp = load i8* %a, align 1
+  %tobool = trunc i8 %tmp to i1
+  %tobool2 = xor i1 %tobool, true
+  br i1 %tobool2, label %if.then, label %if.end
+
+if.then:
+  call void @test2(i8* null)
+  br label %if.end
+
+if.end:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-mem.ll b/src/LLVM/test/CodeGen/X86/fast-isel-mem.ll
new file mode 100644
index 0000000..8db1936
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-mem.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | FileCheck %s
+
+@src = external global i32
+
+; rdar://6653118
+define i32 @loadgv() nounwind {
+entry:
+	%0 = load i32* @src, align 4
+	%1 = load i32* @src, align 4
+        %2 = add i32 %0, %1
+        store i32 %2, i32* @src
+	ret i32 %2
+; This should fold one of the loads into the add.
+; CHECK: loadgv:
+; CHECK: 	movl	L_src$non_lazy_ptr, %ecx
+; CHECK: 	movl	(%ecx), %eax
+; CHECK: 	addl	(%ecx), %eax
+; CHECK: 	movl	%eax, (%ecx)
+; CHECK: 	ret
+
+}
+
+%stuff = type { i32 (...)** }
+@LotsStuff = external constant [4 x i32 (...)*]
+
+define void @t(%stuff* %this) nounwind {
+entry:
+	store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
+	ret void
+; CHECK: _t:
+; CHECK:	movl	$0, %eax
+; CHECK:	movl	L_LotsStuff$non_lazy_ptr, %ecx
+
+}

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-ret-ext.ll b/src/LLVM/test/CodeGen/X86/fast-isel-ret-ext.ll
new file mode 100644
index 0000000..fd768cb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-ret-ext.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s  -O0 -fast-isel-abort -mtriple i686-apple-darwin10 | FileCheck %s
+; RUN: llc < %s  -O0 -fast-isel-abort -mtriple x86_64-apple-darwin10 | FileCheck %s
+
+define zeroext i8 @test1(i32 %y) nounwind {
+  %conv = trunc i32 %y to i8
+  ret i8 %conv
+  ; CHECK: test1:
+  ; CHECK: movzbl {{.*}}, %eax
+}
+
+define signext i8 @test2(i32 %y) nounwind {
+  %conv = trunc i32 %y to i8
+  ret i8 %conv
+  ; CHECK: test2:
+  ; CHECK: movsbl {{.*}}, %eax
+}
+
+define zeroext i16 @test3(i32 %y) nounwind {
+  %conv = trunc i32 %y to i16
+  ret i16 %conv
+  ; CHECK: test3:
+  ; CHECK: movzwl {{.*}}, %eax
+}
+
+define signext i16 @test4(i32 %y) nounwind {
+  %conv = trunc i32 %y to i16
+  ret i16 %conv
+  ; CHECK: test4:
+  ; CHECK: movswl {{.*}}, %eax
+}
+
+define zeroext i1 @test5(i32 %y) nounwind {
+  %conv = trunc i32 %y to i1
+  ret i1 %conv
+  ; CHECK: test5:
+  ; CHECK: andb $1
+  ; CHECK: movzbl {{.*}}, %eax
+}

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-tailcall.ll b/src/LLVM/test/CodeGen/X86/fast-isel-tailcall.ll
new file mode 100644
index 0000000..c3e527c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-tailcall.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -fast-isel -tailcallopt -march=x86 | not grep add
+; PR4154
+
+; On x86, -tailcallopt changes the ABI so the caller shouldn't readjust
+; the stack pointer after the call in this code.
+
+define i32 @stub(i8* %t0) nounwind {
+entry:
+        %t1 = load i32* inttoptr (i32 139708680 to i32*)         ; <i32> [#uses=1]
+        %t2 = bitcast i8* %t0 to i32 (i32)*               ; <i32 (i32)*> [#uses=1]
+        %t3 = call fastcc i32 %t2(i32 %t1)         ; <i32> [#uses=1]
+        ret i32 %t3
+}

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-tls.ll b/src/LLVM/test/CodeGen/X86/fast-isel-tls.ll
new file mode 100644
index 0000000..0963c52
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-tls.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -relocation-model=pic -mtriple=i686-unknown-linux-gnu -fast-isel | FileCheck %s
+; PR3654
+
+@v = thread_local global i32 0
+define i32 @f() nounwind {
+entry:
+          %t = load i32* @v
+          %s = add i32 %t, 1
+          ret i32 %s
+}
+
+; CHECK: f:
+; CHECK: leal	v@TLSGD
+; CHECK: __tls_get_addr
+
+@alias = alias internal i32* @v
+define i32 @f_alias() nounwind {
+entry:
+          %t = load i32* @v
+          %s = add i32 %t, 1
+          ret i32 %s
+}
+
+; CHECK: f_alias:
+; CHECK: leal	v@TLSGD
+; CHECK: __tls_get_addr

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-x86-64.ll b/src/LLVM/test/CodeGen/X86/fast-isel-x86-64.ll
new file mode 100644
index 0000000..6a5a102
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-x86-64.ll

@@ -0,0 +1,285 @@
+; RUN: llc < %s  -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; Make sure that fast-isel folds the immediate into the binop even though it
+; is non-canonical.
+define i32 @test1(i32 %i) nounwind ssp {
+  %and = and i32 8, %i
+  ret i32 %and
+}
+
+; CHECK: test1:
+; CHECK: andl	$8, 
+
+
+; rdar://9289512 - The load should fold into the compare.
+define void @test2(i64 %x) nounwind ssp {
+entry:
+  %x.addr = alloca i64, align 8
+  store i64 %x, i64* %x.addr, align 8
+  %tmp = load i64* %x.addr, align 8
+  %cmp = icmp sgt i64 %tmp, 42
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+; CHECK: test2:
+; CHECK: movq	%rdi, -8(%rsp)
+; CHECK: cmpq	$42, -8(%rsp)
+}
+
+
+
+
+@G = external global i32
+define i64 @test3() nounwind {
+  %A = ptrtoint i32* @G to i64
+  ret i64 %A
+; CHECK: test3:
+; CHECK: movq _G@GOTPCREL(%rip), %rax
+; CHECK-NEXT: ret
+}
+
+
+
+; rdar://9289558
+@rtx_length = external global [153 x i8]
+
+define i32 @test4(i64 %idxprom9) nounwind {
+  %arrayidx10 = getelementptr inbounds [153 x i8]* @rtx_length, i32 0, i64 %idxprom9
+  %tmp11 = load i8* %arrayidx10, align 1
+  %conv = zext i8 %tmp11 to i32
+  ret i32 %conv
+
+; CHECK: test4:
+; CHECK: movq	_rtx_length@GOTPCREL(%rip), %rax
+; CHECK-NEXT: movzbl	(%rax,%rdi), %eax
+; CHECK-NEXT: ret
+}
+
+
+; PR3242 - Out of range shifts should not be folded by fastisel.
+define void @test5(i32 %x, i32* %p) nounwind {
+  %y = ashr i32 %x, 50000
+  store i32 %y, i32* %p
+  ret void
+
+; CHECK: test5:
+; CHECK: movl	$50000, %ecx
+; CHECK: sarl	%cl, %edi
+; CHECK: ret
+}
+
+; rdar://9289501 - fast isel should fold trivial multiplies to shifts.
+define i64 @test6(i64 %x) nounwind ssp {
+entry:
+  %mul = mul nsw i64 %x, 8
+  ret i64 %mul
+
+; CHECK: test6:
+; CHECK: leaq	(,%rdi,8), %rax
+}
+
+define i32 @test7(i32 %x) nounwind ssp {
+entry:
+  %mul = mul nsw i32 %x, 8
+  ret i32 %mul
+; CHECK: test7:
+; CHECK: leal	(,%rdi,8), %eax
+}
+
+
+; rdar://9289507 - folding of immediates into 64-bit operations.
+define i64 @test8(i64 %x) nounwind ssp {
+entry:
+  %add = add nsw i64 %x, 7
+  ret i64 %add
+
+; CHECK: test8:
+; CHECK: addq	$7, %rdi
+}
+
+define i64 @test9(i64 %x) nounwind ssp {
+entry:
+  %add = mul nsw i64 %x, 7
+  ret i64 %add
+; CHECK: test9:
+; CHECK: imulq	$7, %rdi, %rax
+}
+
+; rdar://9297011 - Don't reject udiv by a power of 2.
+define i32 @test10(i32 %X) nounwind {
+  %Y = udiv i32 %X, 8
+  ret i32 %Y
+; CHECK: test10:
+; CHECK: shrl	$3, 
+}
+
+define i32 @test11(i32 %X) nounwind {
+  %Y = sdiv exact i32 %X, 8
+  ret i32 %Y
+; CHECK: test11:
+; CHECK: sarl	$3, 
+}
+
+
+; rdar://9297006 - Trunc to bool.
+define void @test12(i8 %tmp) nounwind ssp noredzone {
+entry:
+  %tobool = trunc i8 %tmp to i1
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @test12(i8 0) noredzone
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+; CHECK: test12:
+; CHECK: testb	$1,
+; CHECK-NEXT: je L
+; CHECK-NEXT: movl $0, %edi
+; CHECK-NEXT: callq
+}
+
+declare void @test13f(i1 %X)
+
+define void @test13() nounwind {
+  call void @test13f(i1 0)
+  ret void
+; CHECK: test13:
+; CHECK: movl $0, %edi
+; CHECK-NEXT: callq
+}
+
+
+
+; rdar://9297003 - fast isel bails out on all functions taking bools
+define void @test14(i8 %tmp) nounwind ssp noredzone {
+entry:
+  %tobool = trunc i8 %tmp to i1
+  call void @test13f(i1 zeroext %tobool) noredzone
+  ret void
+; CHECK: test14:
+; CHECK: andb	$1, 
+; CHECK: callq
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
+
+; rdar://9289488 - fast-isel shouldn't bail out on llvm.memcpy
+define void @test15(i8* %a, i8* %b) nounwind {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 4, i32 4, i1 false)
+  ret void
+; CHECK: test15:
+; CHECK-NEXT: movl	(%rsi), %eax
+; CHECK-NEXT: movl	%eax, (%rdi)
+; CHECK-NEXT: ret
+}
+
+; Handling for varargs calls
+declare void @test16callee(...) nounwind
+define void @test16() nounwind {
+; CHECK: test16:
+; CHECK: movl $1, %edi
+; CHECK: movb $0, %al
+; CHECK: callq _test16callee
+  call void (...)* @test16callee(i32 1)
+  br label %block2
+
+block2:
+; CHECK: movabsq $1
+; CHECK: cvtsi2sdq {{.*}} %xmm0
+; CHECK: movb $1, %al
+; CHECK: callq _test16callee
+  call void (...)* @test16callee(double 1.000000e+00)
+  ret void
+}
+
+
+declare void @foo() unnamed_addr ssp align 2
+
+; Verify that we don't fold the load into the compare here.  That would move it
+; w.r.t. the call.
+define i32 @test17(i32 *%P) ssp nounwind {
+entry:
+  %tmp = load i32* %P
+  %cmp = icmp ne i32 %tmp, 5
+  call void @foo()
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  ret i32 1
+
+if.else:                                          ; preds = %entry
+  ret i32 2
+; CHECK: test17:
+; CHECK: movl	(%rdi), %eax
+; CHECK: callq _foo
+; CHECK: cmpl	$5, %eax
+; CHECK-NEXT: je 
+}
+
+; Check that 0.0 is materialized using pxor
+define void @test18(float* %p1) {
+  store float 0.0, float* %p1
+  ret void
+; CHECK: test18:
+; CHECK: pxor
+}
+define void @test19(double* %p1) {
+  store double 0.0, double* %p1
+  ret void
+; CHECK: test19:
+; CHECK: pxor
+}
+
+; Check that we fast-isel sret
+%struct.a = type { i64, i64, i64 }
+define void @test20() nounwind ssp {
+entry:
+  %tmp = alloca %struct.a, align 8
+  call void @test20sret(%struct.a* sret %tmp)
+  ret void
+; CHECK: test20:
+; CHECK: leaq (%rsp), %rdi
+; CHECK: callq _test20sret
+}
+declare void @test20sret(%struct.a* sret)
+
+; Check that -0.0 is not materialized using pxor
+define void @test21(double* %p1) {
+  store double -0.0, double* %p1
+  ret void
+; CHECK: test21:
+; CHECK-NOT: pxor
+; CHECK: movsd	LCPI
+}
+
+; Check that immediate arguments to a function
+; do not cause massive spilling and are used
+; as immediates just before the call.
+define void @test22() nounwind {
+entry:
+  call void @foo22(i32 0)
+  call void @foo22(i32 1)
+  call void @foo22(i32 2)
+  call void @foo22(i32 3)
+  ret void
+; CHECK: test22:
+; CHECK: movl	$0, %edi
+; CHECK: callq	_foo22
+; CHECK: movl	$1, %edi
+; CHECK: callq	_foo22
+; CHECK: movl	$2, %edi
+; CHECK: callq	_foo22
+; CHECK: movl	$3, %edi
+; CHECK: callq	_foo22
+}
+
+declare void @foo22(i32)

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel-x86.ll b/src/LLVM/test/CodeGen/X86/fast-isel-x86.ll
new file mode 100644
index 0000000..19972f7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel-x86.ll

@@ -0,0 +1,48 @@
+; RUN: llc -fast-isel -O0 -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s
+
+; This should use flds to set the return value.
+; CHECK: test0:
+; CHECK: flds
+; CHECK: ret
+@G = external global float
+define float @test0() nounwind {
+  %t = load float* @G
+  ret float %t
+}
+
+; This should pop 4 bytes on return.
+; CHECK: test1:
+; CHECK: ret $4
+define void @test1({i32, i32, i32, i32}* sret %p) nounwind {
+  store {i32, i32, i32, i32} zeroinitializer, {i32, i32, i32, i32}* %p
+  ret void
+}
+
+; Properly initialize the pic base.
+; CHECK: test2:
+; CHECK-NOT: HHH
+; CHECK: call{{.*}}L2$pb
+; CHECK-NEXT: L2$pb:
+; CHECK-NEXT: pop
+; CHECK: HHH
+; CHECK: ret
+@HHH = external global i32
+define i32 @test2() nounwind {
+  %t = load i32* @HHH
+  ret i32 %t
+}
+
+; Check that we fast-isel sret, and handle the callee-pops behavior correctly.
+%struct.a = type { i64, i64, i64 }
+define void @test3() nounwind ssp {
+entry:
+  %tmp = alloca %struct.a, align 8
+  call void @test3sret(%struct.a* sret %tmp)
+  ret void
+; CHECK: test3:
+; CHECK: subl $44
+; CHECK: leal 16(%esp)
+; CHECK: calll _test3sret
+; CHECK: addl $40
+}
+declare void @test3sret(%struct.a* sret)

diff --git a/src/LLVM/test/CodeGen/X86/fast-isel.ll b/src/LLVM/test/CodeGen/X86/fast-isel.ll
new file mode 100644
index 0000000..8391860
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fast-isel.ll

@@ -0,0 +1,110 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86 -mattr=sse2
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10
+
+; This tests very minimal fast-isel functionality.
+
+define i32* @foo(i32* %p, i32* %q, i32** %z) nounwind {
+entry:
+  %r = load i32* %p
+  %s = load i32* %q
+  %y = load i32** %z
+  br label %fast
+
+fast:
+  %t0 = add i32 %r, %s
+  %t1 = mul i32 %t0, %s
+  %t2 = sub i32 %t1, %s
+  %t3 = and i32 %t2, %s
+  %t4 = xor i32 %t3, 3
+  %t5 = xor i32 %t4, %s
+  %t6 = add i32 %t5, 2
+  %t7 = getelementptr i32* %y, i32 1
+  %t8 = getelementptr i32* %t7, i32 %t6
+  call void asm sideeffect "hello world", ""()
+  br label %exit
+
+exit:
+  ret i32* %t8
+}
+
+define void @bar(double* %p, double* %q) nounwind {
+entry:
+  %r = load double* %p
+  %s = load double* %q
+  br label %fast
+
+fast:
+  %t0 = fadd double %r, %s
+  %t1 = fmul double %t0, %s
+  %t2 = fsub double %t1, %s
+  %t3 = fadd double %t2, 707.0
+  br label %exit
+
+exit:
+  store double %t3, double* %q
+  ret void
+}
+
+define i32 @cast() nounwind {
+entry:
+	%tmp2 = bitcast i32 0 to i32
+	ret i32 %tmp2
+}
+
+define void @ptrtoint_i1(i8* %p, i1* %q) nounwind {
+  %t = ptrtoint i8* %p to i1
+  store i1 %t, i1* %q
+  ret void
+}
+define i8* @inttoptr_i1(i1 %p) nounwind {
+  %t = inttoptr i1 %p to i8*
+  ret i8* %t
+}
+define i32 @ptrtoint_i32(i8* %p) nounwind {
+  %t = ptrtoint i8* %p to i32
+  ret i32 %t
+}
+define i8* @inttoptr_i32(i32 %p) nounwind {
+  %t = inttoptr i32 %p to i8*
+  ret i8* %t
+}
+
+define void @trunc_i32_i8(i32 %x, i8* %p) nounwind  {
+	%tmp1 = trunc i32 %x to i8
+	store i8 %tmp1, i8* %p
+	ret void
+}
+
+define void @trunc_i16_i8(i16 signext %x, i8* %p) nounwind  {
+	%tmp1 = trunc i16 %x to i8
+	store i8 %tmp1, i8* %p
+	ret void
+}
+
+define void @shl_i8(i8 %a, i8 %c, i8* %p) nounwind {
+  %tmp = shl i8 %a, %c
+  store i8 %tmp, i8* %p
+  ret void
+}
+
+define void @mul_i8(i8 %a, i8* %p) nounwind {
+  %tmp = mul i8 %a, 17
+  store i8 %tmp, i8* %p
+  ret void
+}
+
+define void @load_store_i1(i1* %p, i1* %q) nounwind {
+  %t = load i1* %p
+  store i1 %t, i1* %q
+  ret void
+}
+
+
+@crash_test1x = external global <2 x i32>, align 8
+
+define void @crash_test1() nounwind ssp {
+  %tmp = load <2 x i32>* @crash_test1x, align 8
+  %neg = xor <2 x i32> %tmp, <i32 -1, i32 -1>
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/fastcall-correct-mangling.ll b/src/LLVM/test/CodeGen/X86/fastcall-correct-mangling.ll
new file mode 100644
index 0000000..36b82e2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fastcall-correct-mangling.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i386-unknown-mingw32 | FileCheck %s

+

+; Check that a fastcall function gets correct mangling

+

+define x86_fastcallcc void @func(i64 %X, i8 %Y, i8 %G, i16 %Z) {

+; CHECK: @func@20:

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/fastcc-2.ll b/src/LLVM/test/CodeGen/X86/fastcc-2.ll
new file mode 100644
index 0000000..d044a2a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fastcc-2.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep movsd
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 1
+
+define i32 @foo() nounwind {
+entry:
+	tail call fastcc void @bar( double 1.000000e+00 ) nounwind
+	ret i32 0
+}
+
+declare fastcc void @bar(double)

diff --git a/src/LLVM/test/CodeGen/X86/fastcc-byval.ll b/src/LLVM/test/CodeGen/X86/fastcc-byval.ll
new file mode 100644
index 0000000..52b3e57
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fastcc-byval.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -tailcallopt=false | grep {movl\[\[:space:\]\]*8(%esp), %eax} | count 2
+; PR3122
+; rdar://6400815
+
+; byval requires a copy, even with fastcc.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+	%struct.MVT = type { i32 }
+
+define fastcc i32 @bar() nounwind {
+	%V = alloca %struct.MVT
+	%a = getelementptr %struct.MVT* %V, i32 0, i32 0
+	store i32 1, i32* %a
+	call fastcc void @foo(%struct.MVT* byval %V) nounwind
+	%t = load i32* %a
+	ret i32 %t
+}
+
+declare fastcc void @foo(%struct.MVT* byval)

diff --git a/src/LLVM/test/CodeGen/X86/fastcc-sret.ll b/src/LLVM/test/CodeGen/X86/fastcc-sret.ll
new file mode 100644
index 0000000..d457418
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fastcc-sret.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -tailcallopt=false | grep ret | not grep 4
+
+	%struct.foo = type { [4 x i32] }
+
+define fastcc void @bar(%struct.foo* noalias sret %agg.result) nounwind  {
+entry:
+	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0
+	%tmp3 = getelementptr [4 x i32]* %tmp1, i32 0, i32 0
+	store i32 1, i32* %tmp3, align 8
+        ret void
+}
+
+@dst = external global i32
+
+define void @foo() nounwind {
+	%memtmp = alloca %struct.foo, align 4
+        call fastcc void @bar( %struct.foo* sret %memtmp ) nounwind
+        %tmp4 = getelementptr %struct.foo* %memtmp, i32 0, i32 0
+	%tmp5 = getelementptr [4 x i32]* %tmp4, i32 0, i32 0
+        %tmp6 = load i32* %tmp5
+        store i32 %tmp6, i32* @dst
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/fastcc.ll b/src/LLVM/test/CodeGen/X86/fastcc.ll
new file mode 100644
index 0000000..705ab7b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fastcc.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -post-RA-scheduler=false | FileCheck %s
+; CHECK: movsd %xmm0, 8(%esp)
+; CHECK: xorl %ecx, %ecx
+
+@d = external global double		; <double*> [#uses=1]
+@c = external global double		; <double*> [#uses=1]
+@b = external global double		; <double*> [#uses=1]
+@a = external global double		; <double*> [#uses=1]
+
+define i32 @foo() nounwind {
+entry:
+	%0 = load double* @d, align 8		; <double> [#uses=1]
+	%1 = load double* @c, align 8		; <double> [#uses=1]
+	%2 = load double* @b, align 8		; <double> [#uses=1]
+	%3 = load double* @a, align 8		; <double> [#uses=1]
+	tail call fastcc void @bar( i32 0, i32 1, i32 2, double 1.000000e+00, double %3, double %2, double %1, double %0 ) nounwind
+	ret i32 0
+}
+
+declare fastcc void @bar(i32, i32, i32, double, double, double, double, double)

diff --git a/src/LLVM/test/CodeGen/X86/fastcc3struct.ll b/src/LLVM/test/CodeGen/X86/fastcc3struct.ll
new file mode 100644
index 0000000..84f8ef6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fastcc3struct.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -o %t
+; RUN: grep "movl	.48, %ecx" %t
+; RUN: grep "movl	.24, %edx" %t
+; RUN: grep "movl	.12, %eax" %t
+
+%0 = type { i32, i32, i32 }
+
+define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+entry:
+  %0 = insertvalue %0 zeroinitializer, i32 12, 0
+  %1 = insertvalue %0 %0, i32 24, 1
+  %2 = insertvalue %0 %1, i32 48, 2
+  ret %0 %2
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/field-extract-use-trunc.ll b/src/LLVM/test/CodeGen/X86/field-extract-use-trunc.ll
new file mode 100644
index 0000000..735e134
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/field-extract-use-trunc.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=x86 | grep sar | count 1
+; RUN: llc < %s -march=x86-64 | not grep sar
+
+define i32 @test(i32 %f12) nounwind {
+	%tmp7.25 = lshr i32 %f12, 16		
+	%tmp7.26 = trunc i32 %tmp7.25 to i8
+	%tmp78.2 = sext i8 %tmp7.26 to i32
+	ret i32 %tmp78.2
+}
+
+define i32 @test2(i32 %f12) nounwind {
+	%f11 = shl i32 %f12, 8
+	%tmp7.25 = ashr i32 %f11, 24
+	ret i32 %tmp7.25
+}
+
+define i32 @test3(i32 %f12) nounwind {
+	%f11 = shl i32 %f12, 13
+	%tmp7.25 = ashr i32 %f11, 24
+	ret i32 %tmp7.25
+}
+
+define i64 @test4(i64 %f12) nounwind {
+	%f11 = shl i64 %f12, 32
+	%tmp7.25 = ashr i64 %f11, 32
+	ret i64 %tmp7.25
+}
+
+define i16 @test5(i16 %f12) nounwind {
+	%f11 = shl i16 %f12, 2
+	%tmp7.25 = ashr i16 %f11, 8
+	ret i16 %tmp7.25
+}
+
+define i16 @test6(i16 %f12) nounwind {
+	%f11 = shl i16 %f12, 8
+	%tmp7.25 = ashr i16 %f11, 8
+	ret i16 %tmp7.25
+}

diff --git a/src/LLVM/test/CodeGen/X86/fildll.ll b/src/LLVM/test/CodeGen/X86/fildll.ll
new file mode 100644
index 0000000..c32ccd0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fildll.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att -mattr=-sse2 | grep fildll | count 2

+

+define fastcc double @sint64_to_fp(i64 %X) {

+        %R = sitofp i64 %X to double            ; <double> [#uses=1]

+        ret double %R

+}

+

+define fastcc double @uint64_to_fp(i64 %X) {

+        %R = uitofp i64 %X to double            ; <double> [#uses=1]

+        ret double %R

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/fltused.ll b/src/LLVM/test/CodeGen/X86/fltused.ll
new file mode 100644
index 0000000..2ffcb96
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fltused.ll

@@ -0,0 +1,19 @@
+; The purpose of this test to to verify that the fltused symbol is emitted when
+; any function is called with floating point arguments on Windows. And that it
+; is not emitted otherwise.
+
+; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+
+@.str = private constant [4 x i8] c"%f\0A\00"
+
+define i32 @main() nounwind {
+entry:
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+; WIN32: .globl __fltused
+; WIN64: .globl _fltused

diff --git a/src/LLVM/test/CodeGen/X86/fma.ll b/src/LLVM/test/CodeGen/X86/fma.ll
new file mode 100644
index 0000000..5deedb9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fma.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+
+; CHECK: test_f32
+; CHECK: _fmaf
+
+define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp {
+entry:
+  %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
+  ret float %call
+}
+
+; CHECK: test_f64
+; CHECK: _fma
+
+define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+  %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
+  ret double %call
+}
+
+; CHECK: test_f80
+; CHECK: _fmal
+
+define x86_fp80 @test_f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) nounwind readnone ssp {
+entry:
+  %call = tail call x86_fp80 @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) nounwind readnone
+  ret x86_fp80 %call
+}
+
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/fmul-zero.ll b/src/LLVM/test/CodeGen/X86/fmul-zero.ll
new file mode 100644
index 0000000..03bad65
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fmul-zero.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 -enable-unsafe-fp-math | not grep mulps
+; RUN: llc < %s -march=x86-64 | grep mulps
+
+define void @test14(<4 x float>*) nounwind {
+        load <4 x float>* %0, align 1
+        fmul <4 x float> %2, zeroinitializer
+        store <4 x float> %3, <4 x float>* %0, align 1
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/fold-add.ll b/src/LLVM/test/CodeGen/X86/fold-add.ll
new file mode 100644
index 0000000..63e7d36
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fold-add.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.6"
+@prev_length = internal global i32 0		; <i32*> [#uses=1]
+@window = internal global [65536 x i8] zeroinitializer, align 32		; <[65536 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32)* @longest_match to i8*)]		; <[1 x i8*]*> [#uses=0]
+
+define fastcc i32 @longest_match(i32 %cur_match) nounwind {
+; CHECK: longest_match:
+; CHECK-NOT: ret
+; CHECK: cmpb $0, (%r{{.*}},%r{{.*}})
+; CHECK: ret
+
+entry:
+	%0 = load i32* @prev_length, align 4		; <i32> [#uses=3]
+	%1 = zext i32 %cur_match to i64		; <i64> [#uses=1]
+	%2 = sext i32 %0 to i64		; <i64> [#uses=1]
+	%.sum3 = add i64 %1, %2		; <i64> [#uses=1]
+	%3 = getelementptr [65536 x i8]* @window, i64 0, i64 %.sum3		; <i8*> [#uses=1]
+	%4 = load i8* %3, align 1		; <i8> [#uses=1]
+	%5 = icmp eq i8 %4, 0		; <i1> [#uses=1]
+	br i1 %5, label %bb5, label %bb23
+
+bb5:		; preds = %entry
+	ret i32 %0
+
+bb23:		; preds = %entry
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/fold-and-shift.ll b/src/LLVM/test/CodeGen/X86/fold-and-shift.ll
new file mode 100644
index 0000000..9f79f77
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fold-and-shift.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 | not grep and
+
+define i32 @t1(i8* %X, i32 %i) {
+entry:
+	%tmp2 = shl i32 %i, 2		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp2, 1020		; <i32> [#uses=1]
+	%tmp7 = getelementptr i8* %X, i32 %tmp4		; <i8*> [#uses=1]
+	%tmp78 = bitcast i8* %tmp7 to i32*		; <i32*> [#uses=1]
+	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
+	ret i32 %tmp9
+}
+
+define i32 @t2(i16* %X, i32 %i) {
+entry:
+	%tmp2 = shl i32 %i, 1		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp2, 131070		; <i32> [#uses=1]
+	%tmp7 = getelementptr i16* %X, i32 %tmp4		; <i16*> [#uses=1]
+	%tmp78 = bitcast i16* %tmp7 to i32*		; <i32*> [#uses=1]
+	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
+	ret i32 %tmp9
+}

diff --git a/src/LLVM/test/CodeGen/X86/fold-call-2.ll b/src/LLVM/test/CodeGen/X86/fold-call-2.ll
new file mode 100644
index 0000000..7a2b038
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fold-call-2.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 1
+
+@f = external global void ()*		; <void ()**> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+	load void ()** @f, align 8		; <void ()*>:0 [#uses=1]
+	tail call void %0( ) nounwind
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/fold-call-3.ll b/src/LLVM/test/CodeGen/X86/fold-call-3.ll
new file mode 100644
index 0000000..337a7ed
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fold-call-3.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep call | grep 560
+; rdar://6522427
+
+	%"struct.clang::Action" = type { %"struct.clang::ActionBase" }
+	%"struct.clang::ActionBase" = type { i32 (...)** }
+	%"struct.clang::ActionBase::ActionResult<0u>" = type { i8*, i8 }
+@NumTrials = internal global i32 10000000		; <i32*> [#uses=2]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (i8*, %"struct.clang::Action"*)* @_Z25RawPointerPerformanceTestPvRN5clang6ActionE to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(i8* %Val, %"struct.clang::Action"* %Actions) nounwind {
+entry:
+	%0 = alloca %"struct.clang::ActionBase::ActionResult<0u>", align 8		; <%"struct.clang::ActionBase::ActionResult<0u>"*> [#uses=3]
+	%1 = load i32* @NumTrials, align 4		; <i32> [#uses=1]
+	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %return, label %bb.nph
+
+bb.nph:		; preds = %entry
+	%3 = getelementptr %"struct.clang::Action"* %Actions, i64 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
+	%mrv_gep = bitcast %"struct.clang::ActionBase::ActionResult<0u>"* %0 to i64*		; <i64*> [#uses=1]
+	%mrv_gep1 = getelementptr %"struct.clang::ActionBase::ActionResult<0u>"* %0, i64 0, i32 1		; <i8*> [#uses=1]
+	%4 = bitcast i8* %mrv_gep1 to i64*		; <i64*> [#uses=1]
+	%5 = getelementptr %"struct.clang::ActionBase::ActionResult<0u>"* %0, i64 0, i32 0		; <i8**> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.nph
+	%Trial.01 = phi i32 [ 0, %bb.nph ], [ %12, %bb ]		; <i32> [#uses=1]
+	%Val_addr.02 = phi i8* [ %Val, %bb.nph ], [ %11, %bb ]		; <i8*> [#uses=1]
+	%6 = load i32 (...)*** %3, align 8		; <i32 (...)**> [#uses=1]
+	%7 = getelementptr i32 (...)** %6, i64 70		; <i32 (...)**> [#uses=1]
+	%8 = load i32 (...)** %7, align 8		; <i32 (...)*> [#uses=1]
+	%9 = bitcast i32 (...)* %8 to { i64, i64 } (%"struct.clang::Action"*, i8*)*		; <{ i64, i64 } (%"struct.clang::Action"*, i8*)*> [#uses=1]
+	%10 = call { i64, i64 } %9(%"struct.clang::Action"* %Actions, i8* %Val_addr.02) nounwind		; <{ i64, i64 }> [#uses=2]
+	%mrv_gr = extractvalue { i64, i64 } %10, 0		; <i64> [#uses=1]
+	store i64 %mrv_gr, i64* %mrv_gep
+	%mrv_gr2 = extractvalue { i64, i64 } %10, 1		; <i64> [#uses=1]
+	store i64 %mrv_gr2, i64* %4
+	%11 = load i8** %5, align 8		; <i8*> [#uses=1]
+	%12 = add i32 %Trial.01, 1		; <i32> [#uses=2]
+	%13 = load i32* @NumTrials, align 4		; <i32> [#uses=1]
+	%14 = icmp ult i32 %12, %13		; <i1> [#uses=1]
+	br i1 %14, label %bb, label %return
+
+return:		; preds = %bb, %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/fold-call.ll b/src/LLVM/test/CodeGen/X86/fold-call.ll
new file mode 100644
index 0000000..603e9ad
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fold-call.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | not grep mov
+; RUN: llc < %s -march=x86-64 | not grep mov
+
+declare void @bar()
+
+define void @foo(i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, void()* %arg) nounwind {
+	call void @bar()
+	call void %arg()
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/fold-imm.ll b/src/LLVM/test/CodeGen/X86/fold-imm.ll
new file mode 100644
index 0000000..f1fcbcf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fold-imm.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep inc
+; RUN: llc < %s -march=x86 | grep add | grep 4
+
+define i32 @test(i32 %X) nounwind {
+entry:
+	%0 = add i32 %X, 1
+	ret i32 %0
+}
+
+define i32 @test2(i32 %X) nounwind {
+entry:
+	%0 = add i32 %X, 4
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/fold-load.ll b/src/LLVM/test/CodeGen/X86/fold-load.ll
new file mode 100644
index 0000000..ad9a40a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fold-load.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=x86 | FileCheck %s

+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }

+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }

+@stmt_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=1]

+

+; This should just not crash.

+define void @test1() nounwind {

+entry:

+	br i1 true, label %cond_true, label %cond_next

+

+cond_true:		; preds = %entry

+	%new_size.0.i = select i1 false, i32 0, i32 0		; <i32> [#uses=1]

+	%tmp.i = load i32* bitcast (i8* getelementptr (%struct.obstack* @stmt_obstack, i32 0, i32 10) to i32*)		; <i32> [#uses=1]

+	%tmp.i.upgrd.1 = trunc i32 %tmp.i to i8		; <i8> [#uses=1]

+	%tmp21.i = and i8 %tmp.i.upgrd.1, 1		; <i8> [#uses=1]

+	%tmp22.i = icmp eq i8 %tmp21.i, 0		; <i1> [#uses=1]

+	br i1 %tmp22.i, label %cond_false30.i, label %cond_true23.i

+

+cond_true23.i:		; preds = %cond_true

+	ret void

+

+cond_false30.i:		; preds = %cond_true

+	%tmp35.i = tail call %struct._obstack_chunk* null( i32 %new_size.0.i )		; <%struct._obstack_chunk*> [#uses=0]

+	ret void

+

+cond_next:		; preds = %entry

+	ret void

+}

+

+

+

+define i32 @test2(i16* %P, i16* %Q) nounwind {

+  %A = load i16* %P, align 4                      ; <i16> [#uses=11]

+  %C = zext i16 %A to i32                         ; <i32> [#uses=1]

+  %D = and i32 %C, 255                            ; <i32> [#uses=1]

+  br label %L

+L:

+

+  store i16 %A, i16* %Q

+  ret i32 %D

+  

+; CHECK: test2:

+; CHECK: 	movl	4(%esp), %eax

+; CHECK-NEXT:	movzwl	(%eax), %ecx

+

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/fold-mul-lohi.ll b/src/LLVM/test/CodeGen/X86/fold-mul-lohi.ll
new file mode 100644
index 0000000..5614c80
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fold-mul-lohi.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86            | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT: lea
+
+@B = external global [1000 x i8], align 32
+@A = external global [1000 x i8], align 32
+@P = external global [1000 x i8], align 32
+
+define void @foo(i32 %m) nounwind {
+entry:
+	%tmp1 = icmp sgt i32 %m, 0
+	br i1 %tmp1, label %bb, label %return
+
+bb:
+	%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
+	%tmp2 = getelementptr [1000 x i8]* @B, i32 0, i32 %i.019.0
+	%tmp3 = load i8* %tmp2, align 4
+	%tmp4 = mul i8 %tmp3, 2
+	%tmp5 = getelementptr [1000 x i8]* @A, i32 0, i32 %i.019.0
+	store i8 %tmp4, i8* %tmp5, align 4
+	%tmp8 = mul i32 %i.019.0, 9
+	%tmp10 = getelementptr [1000 x i8]* @P, i32 0, i32 %tmp8
+	store i8 17, i8* %tmp10, align 4
+	%indvar.next = add i32 %i.019.0, 1
+	%exitcond = icmp eq i32 %indvar.next, %m
+	br i1 %exitcond, label %return, label %bb
+
+return:
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/fold-pcmpeqd-0.ll b/src/LLVM/test/CodeGen/X86/fold-pcmpeqd-0.ll
new file mode 100644
index 0000000..647bbdb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fold-pcmpeqd-0.ll

@@ -0,0 +1,115 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=X86-64 %s
+
+; This testcase shouldn't need to spill the -1 value,
+; so it should just use pcmpeqd to materialize an all-ones vector.
+; For i386, cp load of -1 are folded.
+
+; With -regalloc=greedy, the live range is split before spilling, so the first
+; pcmpeq doesn't get folded as a constant pool load.
+
+; I386-NOT: pcmpeqd
+; I386: orps LCPI0_2, %xmm
+; I386-NOT: pcmpeqd
+; I386: orps LCPI0_2, %xmm
+
+; X86-64: pcmpeqd
+; X86-64-NOT: pcmpeqd
+
+	%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
+	%struct._cl_image_format_t = type <{ i32, i32, i32 }>
+	%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>
+
+define void @program_1(%struct._image2d_t* %dest, %struct._image2d_t* %t0, <4 x float> %p0, <4 x float> %p1, <4 x float> %p4, <4 x float> %p5, <4 x float> %p6) nounwind {
+entry:
+	%tmp3.i = load i32* null		; <i32> [#uses=1]
+	%cmp = icmp sgt i32 %tmp3.i, 200		; <i1> [#uses=1]
+	br i1 %cmp, label %forcond, label %ifthen
+
+ifthen:		; preds = %entry
+	ret void
+
+forcond:		; preds = %entry
+	%tmp3.i536 = load i32* null		; <i32> [#uses=1]
+	%cmp12 = icmp slt i32 0, %tmp3.i536		; <i1> [#uses=1]
+	br i1 %cmp12, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%bitcast204.i313 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul233 = fmul <4 x float> %bitcast204.i313, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul257 = fmul <4 x float> %mul233, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul275 = fmul <4 x float> %mul257, zeroinitializer		; <<4 x float>> [#uses=1]
+	%tmp51 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %mul275, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast198.i182 = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=0]
+	%bitcast204.i185 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp69 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> zeroinitializer) nounwind		; <<4 x i32>> [#uses=1]
+	%tmp70 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp69) nounwind		; <<4 x float>> [#uses=1]
+	%sub140.i78 = fsub <4 x float> zeroinitializer, %tmp70		; <<4 x float>> [#uses=2]
+	%mul166.i86 = fmul <4 x float> zeroinitializer, %sub140.i78		; <<4 x float>> [#uses=1]
+	%add167.i87 = fadd <4 x float> %mul166.i86, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
+	%mul171.i88 = fmul <4 x float> %add167.i87, %sub140.i78		; <<4 x float>> [#uses=1]
+	%add172.i89 = fadd <4 x float> %mul171.i88, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%bitcast176.i90 = bitcast <4 x float> %add172.i89 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps178.i92 = and <4 x i32> %bitcast176.i90, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast179.i93 = bitcast <4 x i32> %andnps178.i92 to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul186.i96 = fmul <4 x float> %bitcast179.i93, zeroinitializer		; <<4 x float>> [#uses=1]
+	%bitcast190.i98 = bitcast <4 x float> %mul186.i96 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps192.i100 = and <4 x i32> %bitcast190.i98, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%xorps.i102 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%orps203.i103 = or <4 x i32> %andnps192.i100, %xorps.i102		; <<4 x i32>> [#uses=1]
+	%bitcast204.i104 = bitcast <4 x i32> %orps203.i103 to <4 x float>		; <<4 x float>> [#uses=1]
+	%cmple.i = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> %tmp51, i8 2) nounwind		; <<4 x float>> [#uses=1]
+	%tmp80 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%sub140.i = fsub <4 x float> zeroinitializer, %tmp80		; <<4 x float>> [#uses=1]
+	%bitcast148.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps150.i = and <4 x i32> %bitcast148.i, < i32 -2139095041, i32 -2139095041, i32 -2139095041, i32 -2139095041 >		; <<4 x i32>> [#uses=0]
+	%mul171.i = fmul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
+	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%bitcast189.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=0]
+	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast198.i = bitcast <4 x float> %cmple.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%xorps.i = xor <4 x i32> %bitcast198.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
+	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul307 = fmul <4 x float> %bitcast204.i185, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
+	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%tmp82 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul307, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast11.i15 = bitcast <4 x float> %tmp82 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps.i17 = and <4 x i32> %bitcast11.i15, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%orps.i18 = or <4 x i32> %andnps.i17, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%bitcast6.i4 = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=2]
+	%andps.i5 = and <4 x i32> %bitcast.i3, %bitcast6.i4		; <<4 x i32>> [#uses=1]
+	%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%not.i7 = xor <4 x i32> %bitcast6.i4, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7		; <<4 x i32>> [#uses=1]
+	%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5		; <<4 x i32>> [#uses=1]
+	%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float>		; <<4 x float>> [#uses=1]
+	%bitcast.i = bitcast <4 x float> %mul313 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andps.i = and <4 x i32> %bitcast.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%orps.i = or <4 x i32> zeroinitializer, %andps.i		; <<4 x i32>> [#uses=1]
+	%bitcast17.i = bitcast <4 x i32> %orps.i to <4 x float>		; <<4 x float>> [#uses=1]
+	call void null(<4 x float> %bitcast17.i19, <4 x float> %bitcast17.i10, <4 x float> %bitcast17.i, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
+	unreachable
+
+afterfor:		; preds = %forcond
+	ret void
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/fold-pcmpeqd-1.ll b/src/LLVM/test/CodeGen/X86/fold-pcmpeqd-1.ll
new file mode 100644
index 0000000..cc4198d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fold-pcmpeqd-1.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
+; RUN: grep pcmpeqd %t | count 1
+; RUN: grep xor %t | count 1
+; RUN: not grep LCP %t
+
+define <2 x double> @foo() nounwind {
+  ret <2 x double> bitcast (<2 x i64><i64 -1, i64 -1> to <2 x double>)
+}
+define <2 x double> @bar() nounwind {
+  ret <2 x double> bitcast (<2 x i64><i64 0, i64 0> to <2 x double>)
+}

diff --git a/src/LLVM/test/CodeGen/X86/fold-pcmpeqd-2.ll b/src/LLVM/test/CodeGen/X86/fold-pcmpeqd-2.ll
new file mode 100644
index 0000000..9f8d990
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fold-pcmpeqd-2.ll

@@ -0,0 +1,94 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -regalloc=linearscan | FileCheck %s
+
+; This testcase should need to spill the -1 value on both x86-32 and x86-64,
+; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
+; should use a constant-pool load instead.
+
+; Constant pool all-ones vector:
+; CHECK: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+
+; No pcmpeqd instructions, everybody uses the constant pool.
+; CHECK: program_1:
+; CHECK-NOT: pcmpeqd
+
+	%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
+	%struct._cl_image_format_t = type <{ i32, i32, i32 }>
+	%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>
+
+define void @program_1(%struct._image2d_t* %dest, %struct._image2d_t* %t0, <4 x float> %p0, <4 x float> %p1, <4 x float> %p4, <4 x float> %p5, <4 x float> %p6) nounwind {
+entry:
+	%tmp3.i = load i32* null		; <i32> [#uses=1]
+	%cmp = icmp slt i32 0, %tmp3.i		; <i1> [#uses=1]
+	br i1 %cmp, label %forcond, label %ifthen
+
+ifthen:		; preds = %entry
+	ret void
+
+forcond:		; preds = %entry
+	%tmp3.i536 = load i32* null		; <i32> [#uses=1]
+	%cmp12 = icmp slt i32 0, %tmp3.i536		; <i1> [#uses=1]
+	br i1 %cmp12, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%bitcast204.i104 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp78 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> < float 1.280000e+02, float 1.280000e+02, float 1.280000e+02, float 1.280000e+02 >, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=2]
+	%tmp79 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp78) nounwind		; <<4 x i32>> [#uses=1]
+	%tmp80 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp79) nounwind		; <<4 x float>> [#uses=1]
+	%sub140.i = fsub <4 x float> %tmp78, %tmp80		; <<4 x float>> [#uses=2]
+	%mul166.i = fmul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
+	%add167.i = fadd <4 x float> %mul166.i, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
+	%mul171.i = fmul <4 x float> %add167.i, %sub140.i		; <<4 x float>> [#uses=1]
+	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
+	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
+	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32>		; <<4 x i32>> [#uses=2]
+	%andps.i14 = and <4 x i32> zeroinitializer, %bitcast6.i13		; <<4 x i32>> [#uses=1]
+	%not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%andnps.i17 = and <4 x i32> zeroinitializer, %not.i16		; <<4 x i32>> [#uses=1]
+	%orps.i18 = or <4 x i32> %andnps.i17, %andps.i14		; <<4 x i32>> [#uses=1]
+	%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7		; <<4 x i32>> [#uses=1]
+	call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
+	%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5		; <<4 x i32>> [#uses=1]
+	%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast6.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=2]
+	%andps.i = and <4 x i32> zeroinitializer, %bitcast6.i		; <<4 x i32>> [#uses=1]
+	%bitcast11.i = bitcast <4 x float> %tmp84 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%not.i = xor <4 x i32> %bitcast6.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%andnps.i = and <4 x i32> %bitcast11.i, %not.i		; <<4 x i32>> [#uses=1]
+	%orps.i = or <4 x i32> %andnps.i, %andps.i		; <<4 x i32>> [#uses=1]
+	%bitcast17.i = bitcast <4 x i32> %orps.i to <4 x float>		; <<4 x float>> [#uses=1]
+	call void null(<4 x float> %bitcast17.i19, <4 x float> %bitcast17.i10, <4 x float> %bitcast17.i, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
+	unreachable
+
+afterfor:		; preds = %forcond
+	ret void
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/fold-sext-trunc.ll b/src/LLVM/test/CodeGen/X86/fold-sext-trunc.ll
new file mode 100644
index 0000000..b453310
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fold-sext-trunc.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 | grep movslq | count 1
+; PR4050
+
+	%0 = type { i64 }		; type %0
+	%struct.S1 = type { i16, i32 }
+@g_10 = external global %struct.S1		; <%struct.S1*> [#uses=2]
+
+declare void @func_28(i64, i64)
+
+define void @int322(i32 %foo) nounwind {
+entry:
+	%val = load i64* getelementptr (%0* bitcast (%struct.S1* @g_10 to %0*), i32 0, i32 0)		; <i64> [#uses=1]
+	%0 = load i32* getelementptr (%struct.S1* @g_10, i32 0, i32 1), align 4		; <i32> [#uses=1]
+	%1 = sext i32 %0 to i64		; <i64> [#uses=1]
+	%tmp4.i = lshr i64 %val, 32		; <i64> [#uses=1]
+	%tmp5.i = trunc i64 %tmp4.i to i32		; <i32> [#uses=1]
+	%2 = sext i32 %tmp5.i to i64		; <i64> [#uses=1]
+	tail call void @func_28(i64 %2, i64 %1) nounwind
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/fold-xmm-zero.ll b/src/LLVM/test/CodeGen/X86/fold-xmm-zero.ll
new file mode 100644
index 0000000..b4eeb40
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fold-xmm-zero.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=i386-apple-macosx10.6.7 -mattr=+sse2 | FileCheck %s
+
+; Simple test to make sure folding for special constants (like float zero)
+; isn't completely broken.
+
+; CHECK: divss	LCPI0
+
+%0 = type { float, float, float, float, float, float, float, float }
+
+define void @f() nounwind ssp {
+entry:
+  %0 = tail call %0 asm sideeffect "foo", "={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},0,1,2,3,4,5,6,7,~{dirflag},~{fpsr},~{flags}"(float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00) nounwind
+  %asmresult = extractvalue %0 %0, 0
+  %asmresult8 = extractvalue %0 %0, 1
+  %asmresult9 = extractvalue %0 %0, 2
+  %asmresult10 = extractvalue %0 %0, 3
+  %asmresult11 = extractvalue %0 %0, 4
+  %asmresult12 = extractvalue %0 %0, 5
+  %asmresult13 = extractvalue %0 %0, 6
+  %asmresult14 = extractvalue %0 %0, 7
+  %div = fdiv float %asmresult, 0.000000e+00
+  %1 = tail call %0 asm sideeffect "bar", "={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},0,1,2,3,4,5,6,7,~{dirflag},~{fpsr},~{flags}"(float %div, float %asmresult8, float %asmresult9, float %asmresult10, float %asmresult11, float %asmresult12, float %asmresult13, float %asmresult14) nounwind
+  %asmresult24 = extractvalue %0 %1, 0
+  %asmresult25 = extractvalue %0 %1, 1
+  %asmresult26 = extractvalue %0 %1, 2
+  %asmresult27 = extractvalue %0 %1, 3
+  %asmresult28 = extractvalue %0 %1, 4
+  %asmresult29 = extractvalue %0 %1, 5
+  %asmresult30 = extractvalue %0 %1, 6
+  %asmresult31 = extractvalue %0 %1, 7
+  %div33 = fdiv float %asmresult24, 0.000000e+00
+  %2 = tail call %0 asm sideeffect "baz", "={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},0,1,2,3,4,5,6,7,~{dirflag},~{fpsr},~{flags}"(float %div33, float %asmresult25, float %asmresult26, float %asmresult27, float %asmresult28, float %asmresult29, float %asmresult30, float %asmresult31) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/fold-zext-trunc.ll b/src/LLVM/test/CodeGen/X86/fold-zext-trunc.ll
new file mode 100644
index 0000000..f901ad2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fold-zext-trunc.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s | FileCheck %s
+; PR9055
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-linux-gnu"
+
+%struct.S0 = type { i32, [2 x i8], [2 x i8], [4 x i8] }
+
+@g_98 = common global %struct.S0 zeroinitializer, align 4
+
+define void @foo() nounwind {
+; CHECK: movzbl
+; CHECK-NOT: movzbl
+; CHECK: calll
+entry:
+  %tmp17 = load i8* getelementptr inbounds (%struct.S0* @g_98, i32 0, i32 1, i32 0), align 4
+  %tmp54 = zext i8 %tmp17 to i32
+  %foo = load i32* bitcast (i8* getelementptr inbounds (%struct.S0* @g_98, i32 0, i32 1, i32 0) to i32*), align 4
+  %conv.i = trunc i32 %foo to i8
+  tail call void @func_12(i32 %tmp54, i8 zeroext %conv.i) nounwind
+  ret void
+}
+
+declare void @func_12(i32, i8 zeroext)

diff --git a/src/LLVM/test/CodeGen/X86/force-align-stack.ll b/src/LLVM/test/CodeGen/X86/force-align-stack.ll
new file mode 100644
index 0000000..ffcbf8a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/force-align-stack.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -relocation-model=static -force-align-stack | FileCheck %s
+; Tests to make sure that we always align the stack out to the minimum needed - 
+; in this case 16-bytes.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.3"
+
+define void @a() nounwind ssp {
+entry:
+; CHECK: _a:
+; CHECK: andl    $-16, %esp
+  %z = alloca <16 x i8>                           ; <<16 x i8>*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store <16 x i8> zeroinitializer, <16 x i8>* %z, align 16
+  call void @b(<16 x i8>* %z) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare void @b(<16 x i8>*)

diff --git a/src/LLVM/test/CodeGen/X86/fp-elim.ll b/src/LLVM/test/CodeGen/X86/fp-elim.ll
new file mode 100644
index 0000000..60892a2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-elim.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=x86 -asm-verbose=false                           | FileCheck %s -check-prefix=FP-ELIM
+; RUN: llc < %s -march=x86 -asm-verbose=false -disable-fp-elim          | FileCheck %s -check-prefix=NO-ELIM
+; RUN: llc < %s -march=x86 -asm-verbose=false -disable-non-leaf-fp-elim | FileCheck %s -check-prefix=NON-LEAF
+
+; Implement -momit-leaf-frame-pointer
+; rdar://7886181
+
+define i32 @t1() nounwind readnone {
+entry:
+; FP-ELIM:      t1:
+; FP-ELIM-NEXT: movl
+; FP-ELIM-NEXT: ret
+
+; NO-ELIM:      t1:
+; NO-ELIM-NEXT: pushl %ebp
+; NO-ELIM:      popl %ebp
+; NO-ELIM-NEXT: ret
+
+; NON-LEAF:      t1:
+; NON-LEAF-NEXT: movl
+; NON-LEAF-NEXT: ret
+  ret i32 10
+}
+
+define void @t2() nounwind {
+entry:
+; FP-ELIM:     t2:
+; FP-ELIM-NOT: pushl %ebp
+; FP-ELIM:     ret
+
+; NO-ELIM:      t2:
+; NO-ELIM-NEXT: pushl %ebp
+; NO-ELIM:      popl %ebp
+; NO-ELIM-NEXT: ret
+
+; NON-LEAF:      t2:
+; NON-LEAF-NEXT: pushl %ebp
+; NON-LEAF:      popl %ebp
+; NON-LEAF-NEXT: ret
+  tail call void @foo(i32 0) nounwind
+  ret void
+}
+
+declare void @foo(i32)

diff --git a/src/LLVM/test/CodeGen/X86/fp-immediate-shorten.ll b/src/LLVM/test/CodeGen/X86/fp-immediate-shorten.ll
new file mode 100644
index 0000000..40adabd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-immediate-shorten.ll

@@ -0,0 +1,9 @@
+;; Test that this FP immediate is stored in the constant pool as a float.

+

+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | \

+; RUN:   grep {.long.1123418112}

+

+define double @D() {

+        ret double 1.230000e+02

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/fp-in-intregs.ll b/src/LLVM/test/CodeGen/X86/fp-in-intregs.ll
new file mode 100644
index 0000000..6966cf0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-in-intregs.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; CHECK-NOT:     {{((xor|and)ps|movd)}}
+
+; These operations should be done in integer registers, eliminating constant
+; pool loads, movd's etc.
+
+define i32 @test1(float %x) nounwind  {
+entry:
+	%tmp2 = fsub float -0.000000e+00, %x		; <float> [#uses=1]
+	%tmp210 = bitcast float %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp210
+}
+
+define i32 @test2(float %x) nounwind  {
+entry:
+	%tmp2 = tail call float @copysignf( float 1.000000e+00, float %x ) nounwind readnone 		; <float> [#uses=1]
+	%tmp210 = bitcast float %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp210
+}
+
+declare float @copysignf(float, float) nounwind readnone 
+

diff --git a/src/LLVM/test/CodeGen/X86/fp-stack-2results.ll b/src/LLVM/test/CodeGen/X86/fp-stack-2results.ll
new file mode 100644
index 0000000..c8da9ea
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-stack-2results.ll

@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86 | grep fldz
+; RUN: llc < %s -march=x86-64 | grep fld1
+
+%0 = type { x86_fp80, x86_fp80 }
+
+; This is basically this code on x86-64:
+; _Complex long double test() { return 1.0; }
+define %0 @test() {
+  %A = fpext double 1.0 to x86_fp80
+  %B = fpext double 0.0 to x86_fp80
+  %mrv = insertvalue %0 undef, x86_fp80 %A, 0
+  %mrv1 = insertvalue %0 %mrv, x86_fp80 %B, 1
+  ret %0 %mrv1
+}
+
+
+;_test2:
+;	fld1
+;	fld	%st(0)
+;	ret
+define %0 @test2() {
+  %A = fpext double 1.0 to x86_fp80
+  %mrv = insertvalue %0 undef, x86_fp80 %A, 0
+  %mrv1 = insertvalue %0 %mrv, x86_fp80 %A, 1
+  ret %0 %mrv1
+}
+
+; Uses both values.
+define void @call1(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call %0 @test()
+  %b = extractvalue %0 %a, 0
+  store x86_fp80 %b, x86_fp80* %P1
+
+  %c = extractvalue %0 %a, 1
+  store x86_fp80 %c, x86_fp80* %P2
+  ret void 
+}
+
+; Uses both values, requires fxch
+define void @call2(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call %0 @test()
+  %b = extractvalue %0 %a, 1
+  store x86_fp80 %b, x86_fp80* %P1
+
+  %c = extractvalue %0 %a, 0
+  store x86_fp80 %c, x86_fp80* %P2
+  ret void
+}
+
+; Uses ST(0), ST(1) is dead but must be popped.
+define void @call3(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call %0 @test()
+  %b = extractvalue %0 %a, 0
+  store x86_fp80 %b, x86_fp80* %P1
+  ret void 
+}
+
+; Uses ST(1), ST(0) is dead and must be popped.
+define void @call4(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call %0 @test()
+
+  %c = extractvalue %0 %a, 1
+  store x86_fp80 %c, x86_fp80* %P2
+  ret void 
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/fp-stack-O0-crash.ll b/src/LLVM/test/CodeGen/X86/fp-stack-O0-crash.ll
new file mode 100644
index 0000000..ae83a02
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-stack-O0-crash.ll

@@ -0,0 +1,49 @@
+; RUN: llc %s -O0 -fast-isel -regalloc=fast -mcpu=i386 -o -
+; PR4767
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10"
+
+define void @fn(x86_fp80 %x) nounwind ssp {
+entry:
+  %x.addr = alloca x86_fp80                       ; <x86_fp80*> [#uses=5]
+  store x86_fp80 %x, x86_fp80* %x.addr
+  br i1 false, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %tmp = load x86_fp80* %x.addr                   ; <x86_fp80> [#uses=1]
+  %tmp1 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %cmp = fcmp oeq x86_fp80 %tmp, %tmp1            ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %if.end
+
+cond.false:                                       ; preds = %entry
+  %tmp2 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %tmp3 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %cmp4 = fcmp une x86_fp80 %tmp2, %tmp3          ; <i1> [#uses=1]
+  br i1 %cmp4, label %if.then, label %if.end
+
+if.then:                                          ; preds = %cond.false, %cond.true
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %cond.false, %cond.true
+  ret void
+}
+
+; PR10575
+; This produces a FP0 = IMPLICIT_DEF instruction.
+define void @__m_rankmerge_MOD_dindexmerge_() nounwind {
+entry:
+  br label %"20"
+
+"20":                                             ; preds = %"23", %entry
+    %0 = phi double [ undef, %entry ], [ %0, %"23" ]
+    %1 = phi double [ 0.000000e+00, %entry ], [ %2, %"23" ]
+    br i1 undef, label %"21", label %"23"
+
+"21":                                             ; preds = %"20"
+    ret void
+
+"23":                                             ; preds = %"20"
+    %2 = select i1 undef, double %0, double %1
+    br label %"20"
+}

diff --git a/src/LLVM/test/CodeGen/X86/fp-stack-O0.ll b/src/LLVM/test/CodeGen/X86/fp-stack-O0.ll
new file mode 100644
index 0000000..b9cb5d7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-stack-O0.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -O0 | FileCheck %s
+target triple = "x86_64-apple-macosx"
+
+declare x86_fp80 @x1(i32) nounwind
+declare i32 @x2(x86_fp80, x86_fp80) nounwind
+
+; Keep track of the return value.
+; CHECK: test1
+; CHECK: x1
+; Pass arguments on the stack.
+; CHECK-NEXT: movq %rsp, [[RCX:%r..]]
+; Copy constant-pool value.
+; CHECK-NEXT: fldt LCPI
+; CHECK-NEXT: fstpt 16([[RCX]])
+; Copy x1 return value.
+; CHECK-NEXT: fstpt ([[RCX]])
+; CHECK-NEXT: x2
+define i32 @test1() nounwind uwtable ssp {
+entry:
+  %call = call x86_fp80 (...)* bitcast (x86_fp80 (i32)* @x1 to x86_fp80 (...)*)(i32 -1)
+  %call1 = call i32 @x2(x86_fp80 %call, x86_fp80 0xK401EFFFFFFFF00000000)
+  ret i32 %call1
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/fp-stack-compare.ll b/src/LLVM/test/CodeGen/X86/fp-stack-compare.ll
new file mode 100644
index 0000000..c7fe4b3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-stack-compare.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 | FileCheck %s

+; PR1012

+

+define float @foo(float* %col.2.0) {

+; CHECK: fucompi

+  %tmp = load float* %col.2.0

+  %tmp16 = fcmp olt float %tmp, 0.000000e+00

+  %tmp20 = fsub float -0.000000e+00, %tmp

+  %iftmp.2.0 = select i1 %tmp16, float %tmp20, float %tmp

+  ret float %iftmp.2.0

+}


diff --git a/src/LLVM/test/CodeGen/X86/fp-stack-direct-ret.ll b/src/LLVM/test/CodeGen/X86/fp-stack-direct-ret.ll
new file mode 100644
index 0000000..5a28bb5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-stack-direct-ret.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 | not grep fstp
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep movsd
+
+declare double @foo()
+
+define double @bar() {
+entry:
+	%tmp5 = tail call double @foo()
+	ret double %tmp5
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/fp-stack-ret-conv.ll b/src/LLVM/test/CodeGen/X86/fp-stack-ret-conv.ll
new file mode 100644
index 0000000..f220b24
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-stack-ret-conv.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mcpu=yonah | grep cvtss2sd
+; RUN: llc < %s -mcpu=yonah | grep fstps
+; RUN: llc < %s -mcpu=yonah | not grep cvtsd2ss
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define void @test(double *%b) {
+entry:
+	%tmp13 = tail call double @foo()
+	%tmp1314 = fptrunc double %tmp13 to float		; <float> [#uses=1]
+	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
+	volatile store double %tmp3940, double* %b
+	ret void
+}
+
+declare double @foo()

diff --git a/src/LLVM/test/CodeGen/X86/fp-stack-ret-store.ll b/src/LLVM/test/CodeGen/X86/fp-stack-ret-store.ll
new file mode 100644
index 0000000..05dfc54
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-stack-ret-store.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -mcpu=yonah | not grep movss
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+; This should store directly into P from the FP stack.  It should not
+; go through a stack slot to get there.
+
+define void @bar(double* %P) {
+entry:
+	%tmp = tail call double (...)* @foo( )		; <double> [#uses=1]
+	store double %tmp, double* %P, align 8
+	ret void
+}
+
+declare double @foo(...)
+
+define void @bar2(float* %P) {
+entry:
+	%tmp = tail call double (...)* @foo2( )		; <double> [#uses=1]
+	%tmp1 = fptrunc double %tmp to float		; <float> [#uses=1]
+	store float %tmp1, float* %P, align 4
+	ret void
+}
+
+declare double @foo2(...)
+

diff --git a/src/LLVM/test/CodeGen/X86/fp-stack-ret.ll b/src/LLVM/test/CodeGen/X86/fp-stack-ret.ll
new file mode 100644
index 0000000..bf17c52
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-stack-ret.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin8 -mcpu=yonah -march=x86 | FileCheck %s

+

+; These testcases shouldn't require loading into an XMM register then storing 

+; to memory, then reloading into an FPStack reg.

+

+; CHECK: test1

+; CHECK: fldl

+; CHECK-NEXT: ret

+define double @test1(double *%P) {

+        %A = load double* %P

+        ret double %A

+}

+

+; fastcc should return a value

+; CHECK: test2

+; CHECK-NOT: xmm

+; CHECK: ret

+define fastcc double @test2(<2 x double> %A) {

+	%B = extractelement <2 x double> %A, i32 0

+	ret double %B

+}

+

+; CHECK: test3

+; CHECK: sub{{.*}}%esp

+; CHECLK-NOT: xmm

+define fastcc double @test3(<4 x float> %A) {

+	%B = bitcast <4 x float> %A to <2 x double>

+	%C = call fastcc double @test2(<2 x double> %B)

+	ret double %C

+}

+

+; Clear the stack when not using a return value.

+; CHECK: test4

+; CHECK: call

+; CHECK: fstp

+; CHECK: ret

+define void @test4(double *%P) {

+  %A = call double @test1(double *%P)

+  ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/fp-stack-retcopy.ll b/src/LLVM/test/CodeGen/X86/fp-stack-retcopy.ll
new file mode 100644
index 0000000..67dcb18
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-stack-retcopy.ll

@@ -0,0 +1,12 @@
+; This should not copy the result of foo into an xmm register.
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin9 | not grep xmm
+; rdar://5689903
+
+declare double @foo()
+
+define double @carg({ double, double }* byval  %z) nounwind  {
+entry:
+	%tmp5 = tail call double @foo() nounwind 		; <double> [#uses=1]
+	ret double %tmp5
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/fp-stack-set-st1.ll b/src/LLVM/test/CodeGen/X86/fp-stack-set-st1.ll
new file mode 100644
index 0000000..894897a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-stack-set-st1.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 | grep fxch | count 2
+
+define i32 @main() nounwind {
+entry:
+	%asmtmp = tail call { double, double } asm sideeffect "fmul\09%st(1),%st\0A\09fst\09%st(1)\0A\09frndint\0A\09fxch  %st(1)\0A\09fsub\09%st(1),%st\0A\09f2xm1\0A\09", "={st},={st(1)},0,1,~{dirflag},~{fpsr},~{flags}"(double 0x4030FEFBD582097D, double 4.620000e+01) nounwind		; <{ double, double }> [#uses=0]
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/fp-stack.ll b/src/LLVM/test/CodeGen/X86/fp-stack.ll
new file mode 100644
index 0000000..dca644d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-stack.ll

@@ -0,0 +1,25 @@
+; RUN: llc %s -o - -mcpu=pentium
+; PR6828
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+  %tmp6 = load x86_fp80* undef                       ; <x86_fp80> [#uses=2]
+  %tmp15 = load x86_fp80* undef                      ; <x86_fp80> [#uses=2]
+  %tmp24 = load x86_fp80* undef                      ; <x86_fp80> [#uses=1]
+  br i1 undef, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %cmp139 = fcmp ogt x86_fp80 %tmp15, %tmp6          ; <i1> [#uses=1]
+  %maxdiag.0 = select i1 %cmp139, x86_fp80 %tmp15, x86_fp80 %tmp6 ; <x86_fp80> [#uses=1]
+  %cmp139.1 = fcmp ogt x86_fp80 %tmp24, %maxdiag.0   ; <i1> [#uses=1]
+  br i1 %cmp139.1, label %sw.bb372, label %return
+
+sw.bb372:                                         ; preds = %for.end
+  ret void
+
+return:                                           ; preds = %for.end
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/fp-trunc.ll b/src/LLVM/test/CodeGen/X86/fp-trunc.ll
new file mode 100644
index 0000000..170637a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp-trunc.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s
+
+define <1 x float> @test1(<1 x double> %x) nounwind {
+; CHECK: cvtsd2ss
+; CHECK: ret
+  %y = fptrunc <1 x double> %x to <1 x float>
+  ret <1 x float> %y
+}
+
+
+define <2 x float> @test2(<2 x double> %x) nounwind {
+; FIXME: It would be nice if this compiled down to a cvtpd2ps
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: ret
+  %y = fptrunc <2 x double> %x to <2 x float>
+  ret <2 x float> %y
+}
+
+define <8 x float> @test3(<8 x double> %x) nounwind {
+; FIXME: It would be nice if this compiled down to a series of cvtpd2ps
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: ret
+  %y = fptrunc <8 x double> %x to <8 x float>
+  ret <8 x float> %y
+}
+
+

diff --git a/src/LLVM/test/CodeGen/X86/fp2sint.ll b/src/LLVM/test/CodeGen/X86/fp2sint.ll
new file mode 100644
index 0000000..1675444
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp2sint.ll

@@ -0,0 +1,18 @@
+;; LowerFP_TO_SINT should not create a stack object if it's not needed.
+
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep add
+
+define i32 @main(i32 %argc, i8** %argv) {
+cond_false.i.i.i:               ; preds = %bb.i5
+       %tmp35.i = load double* null, align 8           ; <double> [#uses=1]
+       %tmp3536.i = fptosi double %tmp35.i to i32              ; <i32> [#uses=1]
+       %tmp3536140.i = zext i32 %tmp3536.i to i64              ; <i64> [#uses=1]
+       %tmp39.i = load double* null, align 4           ; <double> [#uses=1]
+       %tmp3940.i = fptosi double %tmp39.i to i32              ; <i32> [#uses=1]
+       %tmp3940137.i = zext i32 %tmp3940.i to i64              ; <i64> [#uses=1]
+       %tmp3940137138.i = shl i64 %tmp3940137.i, 32            ; <i64> [#uses=1]
+       %tmp3940137138.ins.i = or i64 %tmp3940137138.i, %tmp3536140.i           ; <i64> [#uses=1]
+       %tmp95.i.i = trunc i64 %tmp3940137138.ins.i to i32              ; <i32> [#uses=1]
+       store i32 %tmp95.i.i, i32* null, align 4
+       ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/fp_constant_op.ll b/src/LLVM/test/CodeGen/X86/fp_constant_op.ll
new file mode 100644
index 0000000..b66b6ad
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp_constant_op.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=i486 | FileCheck %s

+; Test that the load of the constant is folded into the operation.

+

+

+define double @foo_add(double %P) {

+	%tmp.1 = fadd double %P, 1.230000e+02		; <double> [#uses=1]

+	ret double %tmp.1

+}

+; CHECK: foo_add:

+; CHECK: fadd DWORD PTR

+

+define double @foo_mul(double %P) {

+	%tmp.1 = fmul double %P, 1.230000e+02		; <double> [#uses=1]

+	ret double %tmp.1

+}

+; CHECK: foo_mul:

+; CHECK: fmul DWORD PTR

+

+define double @foo_sub(double %P) {

+	%tmp.1 = fsub double %P, 1.230000e+02		; <double> [#uses=1]

+	ret double %tmp.1

+}

+; CHECK: foo_sub:

+; CHECK: fadd DWORD PTR

+

+define double @foo_subr(double %P) {

+	%tmp.1 = fsub double 1.230000e+02, %P		; <double> [#uses=1]

+	ret double %tmp.1

+}

+; CHECK: foo_subr:

+; CHECK: fsub QWORD PTR

+

+define double @foo_div(double %P) {

+	%tmp.1 = fdiv double %P, 1.230000e+02		; <double> [#uses=1]

+	ret double %tmp.1

+}

+; CHECK: foo_div:

+; CHECK: fdiv DWORD PTR

+

+define double @foo_divr(double %P) {

+	%tmp.1 = fdiv double 1.230000e+02, %P		; <double> [#uses=1]

+	ret double %tmp.1

+}

+; CHECK: foo_divr:

+; CHECK: fdiv QWORD PTR

+


diff --git a/src/LLVM/test/CodeGen/X86/fp_load_cast_fold.ll b/src/LLVM/test/CodeGen/X86/fp_load_cast_fold.ll
new file mode 100644
index 0000000..66e2599
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp_load_cast_fold.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | grep fild | not grep ESP

+

+define double @short(i16* %P) {

+        %V = load i16* %P               ; <i16> [#uses=1]

+        %V2 = sitofp i16 %V to double           ; <double> [#uses=1]

+        ret double %V2

+}

+

+define double @int(i32* %P) {

+        %V = load i32* %P               ; <i32> [#uses=1]

+        %V2 = sitofp i32 %V to double           ; <double> [#uses=1]

+        ret double %V2

+}

+

+define double @long(i64* %P) {

+        %V = load i64* %P               ; <i64> [#uses=1]

+        %V2 = sitofp i64 %V to double           ; <double> [#uses=1]

+        ret double %V2

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/fp_load_fold.ll b/src/LLVM/test/CodeGen/X86/fp_load_fold.ll
new file mode 100644
index 0000000..fa9a7a1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fp_load_fold.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \

+; RUN:   grep -i ST | not grep {fadd\\|fsub\\|fdiv\\|fmul}

+

+; Test that the load of the memory location is folded into the operation.

+

+define double @test_add(double %X, double* %P) {

+	%Y = load double* %P		; <double> [#uses=1]

+	%R = fadd double %X, %Y		; <double> [#uses=1]

+	ret double %R

+}

+

+define double @test_mul(double %X, double* %P) {

+	%Y = load double* %P		; <double> [#uses=1]

+	%R = fmul double %X, %Y		; <double> [#uses=1]

+	ret double %R

+}

+

+define double @test_sub(double %X, double* %P) {

+	%Y = load double* %P		; <double> [#uses=1]

+	%R = fsub double %X, %Y		; <double> [#uses=1]

+	ret double %R

+}

+

+define double @test_subr(double %X, double* %P) {

+	%Y = load double* %P		; <double> [#uses=1]

+	%R = fsub double %Y, %X		; <double> [#uses=1]

+	ret double %R

+}

+

+define double @test_div(double %X, double* %P) {

+	%Y = load double* %P		; <double> [#uses=1]

+	%R = fdiv double %X, %Y		; <double> [#uses=1]

+	ret double %R

+}

+

+define double @test_divr(double %X, double* %P) {

+	%Y = load double* %P		; <double> [#uses=1]

+	%R = fdiv double %Y, %X		; <double> [#uses=1]

+	ret double %R

+}


diff --git a/src/LLVM/test/CodeGen/X86/fsxor-alignment.ll b/src/LLVM/test/CodeGen/X86/fsxor-alignment.ll
new file mode 100644
index 0000000..6a8dbcf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/fsxor-alignment.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -enable-unsafe-fp-math | \
+; RUN:  grep -v sp | grep xorps | count 2
+
+; Don't fold the incoming stack arguments into the xorps instructions used
+; to do floating-point negations, because the arguments aren't vectors
+; and aren't vector-aligned.
+
+define void @foo(float* %p, float* %q, float %s, float %y) {
+  %ss = fsub float -0.0, %s
+  %yy = fsub float -0.0, %y
+  store float %ss, float* %p
+  store float %yy, float* %q
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/full-lsr.ll b/src/LLVM/test/CodeGen/X86/full-lsr.ll
new file mode 100644
index 0000000..ff9b1b0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/full-lsr.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 >%t
+
+; RUN: grep {addl	\\\$4,} %t | count 3
+; RUN: not grep {,%} %t
+
+define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
+entry:
+	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %return
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=5]
+	%1 = getelementptr float* %A, i32 %i.03		; <float*> [#uses=1]
+	%2 = load float* %1, align 4		; <float> [#uses=1]
+	%3 = getelementptr float* %B, i32 %i.03		; <float*> [#uses=1]
+	%4 = load float* %3, align 4		; <float> [#uses=1]
+	%5 = fadd float %2, %4		; <float> [#uses=1]
+	%6 = getelementptr float* %C, i32 %i.03		; <float*> [#uses=1]
+	store float %5, float* %6, align 4
+	%7 = add i32 %i.03, 10		; <i32> [#uses=3]
+	%8 = getelementptr float* %A, i32 %7		; <float*> [#uses=1]
+	%9 = load float* %8, align 4		; <float> [#uses=1]
+	%10 = getelementptr float* %B, i32 %7		; <float*> [#uses=1]
+	%11 = load float* %10, align 4		; <float> [#uses=1]
+	%12 = fadd float %9, %11		; <float> [#uses=1]
+	%13 = getelementptr float* %C, i32 %7		; <float*> [#uses=1]
+	store float %12, float* %13, align 4
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/ga-offset.ll b/src/LLVM/test/CodeGen/X86/ga-offset.ll
new file mode 100644
index 0000000..9f6d3f7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/ga-offset.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: not grep lea %t
+; RUN: not grep add %t
+; RUN: grep mov %t | count 1
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static > %t
+; RUN: not grep lea %t
+; RUN: not grep add %t
+; RUN: grep mov %t | count 1
+
+; This store should fold to a single mov instruction.
+
+@ptr = global i32* null
+@dst = global [131072 x i32] zeroinitializer
+
+define void @foo() nounwind {
+  store i32* getelementptr ([131072 x i32]* @dst, i32 0, i32 16), i32** @ptr
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/gather-addresses.ll b/src/LLVM/test/CodeGen/X86/gather-addresses.ll
new file mode 100644
index 0000000..4a6927f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/gather-addresses.ll

@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-win32 < %s | FileCheck %s
+; rdar://7398554
+
+; When doing vector gather-scatter index calculation with 32-bit indices,
+; bounce the vector off of cache rather than shuffling each individual
+; element out of the index vector.
+
+; CHECK: andps    ([[H:%rdx|%r8]]), %xmm0
+; CHECK: movaps   %xmm0, {{(-24)?}}(%rsp)
+; CHECK: movslq   {{(-24)?}}(%rsp), %rax
+; CHECK: movsd    ([[P:%rdi|%rcx]],%rax,8), %xmm0
+; CHECK: movslq   {{-20|4}}(%rsp), %rax
+; CHECK: movhpd   ([[P]],%rax,8), %xmm0
+; CHECK: movslq   {{-16|8}}(%rsp), %rax
+; CHECK: movsd    ([[P]],%rax,8), %xmm1
+; CHECK: movslq   {{-12|12}}(%rsp), %rax
+; CHECK: movhpd   ([[P]],%rax,8), %xmm1
+
+define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
+  %a = load <4 x i32>* %i
+  %b = load <4 x i32>* %h
+  %j = and <4 x i32> %a, %b
+  %d0 = extractelement <4 x i32> %j, i32 0
+  %d1 = extractelement <4 x i32> %j, i32 1
+  %d2 = extractelement <4 x i32> %j, i32 2
+  %d3 = extractelement <4 x i32> %j, i32 3
+  %q0 = getelementptr double* %p, i32 %d0
+  %q1 = getelementptr double* %p, i32 %d1
+  %q2 = getelementptr double* %p, i32 %d2
+  %q3 = getelementptr double* %p, i32 %d3
+  %r0 = load double* %q0
+  %r1 = load double* %q1
+  %r2 = load double* %q2
+  %r3 = load double* %q3
+  %v0 = insertelement <4 x double> undef, double %r0, i32 0
+  %v1 = insertelement <4 x double> %v0, double %r1, i32 1
+  %v2 = insertelement <4 x double> %v1, double %r2, i32 2
+  %v3 = insertelement <4 x double> %v2, double %r3, i32 3
+  ret <4 x double> %v3
+}

diff --git a/src/LLVM/test/CodeGen/X86/ghc-cc.ll b/src/LLVM/test/CodeGen/X86/ghc-cc.ll
new file mode 100644
index 0000000..0e65cfd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/ghc-cc.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -tailcallopt -mtriple=i686-linux-gnu | FileCheck %s
+
+; Test the GHC call convention works (x86-32)
+
+@base = external global i32 ; assigned to register: EBX
+@sp   = external global i32 ; assigned to register: EBP
+@hp   = external global i32 ; assigned to register: EDI
+@r1   = external global i32 ; assigned to register: ESI
+
+define void @zap(i32 %a, i32 %b) nounwind {
+entry:
+  ; CHECK: movl {{[0-9]*}}(%esp), %ebx
+  ; CHECK-NEXT: movl {{[0-9]*}}(%esp), %ebp
+  ; CHECK-NEXT: calll addtwo
+  %0 = call cc 10 i32 @addtwo(i32 %a, i32 %b)
+  ; CHECK: calll foo
+  call void @foo() nounwind
+  ret void
+}
+
+define cc 10 i32 @addtwo(i32 %x, i32 %y) nounwind {
+entry:
+  ; CHECK: leal (%ebx,%ebp), %eax
+  %0 = add i32 %x, %y
+  ; CHECK-NEXT: ret
+  ret i32 %0
+}
+
+define cc 10 void @foo() nounwind {
+entry:
+  ; CHECK: movl base, %ebx
+  ; CHECK-NEXT: movl sp, %ebp
+  ; CHECK-NEXT: movl hp, %edi
+  ; CHECK-NEXT: movl r1, %esi
+  %0 = load i32* @r1
+  %1 = load i32* @hp
+  %2 = load i32* @sp
+  %3 = load i32* @base
+  ; CHECK: jmp bar
+  tail call cc 10 void @bar( i32 %3, i32 %2, i32 %1, i32 %0 ) nounwind
+  ret void
+}
+
+declare cc 10 void @bar(i32, i32, i32, i32)
+

diff --git a/src/LLVM/test/CodeGen/X86/ghc-cc64.ll b/src/LLVM/test/CodeGen/X86/ghc-cc64.ll
new file mode 100644
index 0000000..fcf7e17
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/ghc-cc64.ll

@@ -0,0 +1,86 @@
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; Check the GHC call convention works (x86-64)
+
+@base  = external global i64 ; assigned to register: R13
+@sp    = external global i64 ; assigned to register: RBP
+@hp    = external global i64 ; assigned to register: R12
+@r1    = external global i64 ; assigned to register: RBX
+@r2    = external global i64 ; assigned to register: R14
+@r3    = external global i64 ; assigned to register: RSI
+@r4    = external global i64 ; assigned to register: RDI
+@r5    = external global i64 ; assigned to register: R8
+@r6    = external global i64 ; assigned to register: R9
+@splim = external global i64 ; assigned to register: R15
+
+@f1 = external global float  ; assigned to register: XMM1
+@f2 = external global float  ; assigned to register: XMM2
+@f3 = external global float  ; assigned to register: XMM3
+@f4 = external global float  ; assigned to register: XMM4
+@d1 = external global double ; assigned to register: XMM5
+@d2 = external global double ; assigned to register: XMM6
+
+define void @zap(i64 %a, i64 %b) nounwind {
+entry:
+  ; CHECK:      movq %rdi, %r13
+  ; CHECK-NEXT: movq %rsi, %rbp
+  ; CHECK-NEXT: callq addtwo
+  %0 = call cc 10 i64 @addtwo(i64 %a, i64 %b)
+  ; CHECK:      callq foo
+  call void @foo() nounwind
+  ret void
+}
+
+define cc 10 i64 @addtwo(i64 %x, i64 %y) nounwind {
+entry:
+  ; CHECK:      leaq (%r13,%rbp), %rax
+  %0 = add i64 %x, %y
+  ; CHECK-NEXT: ret
+  ret i64 %0
+}
+
+define cc 10 void @foo() nounwind {
+entry:
+  ; CHECK: movq base(%rip), %r13
+  ; CHECK-NEXT: movq sp(%rip), %rbp
+  ; CHECK-NEXT: movq hp(%rip), %r12
+  ; CHECK-NEXT: movq r1(%rip), %rbx
+  ; CHECK-NEXT: movq r2(%rip), %r14
+  ; CHECK-NEXT: movq r3(%rip), %rsi
+  ; CHECK-NEXT: movq r4(%rip), %rdi
+  ; CHECK-NEXT: movq r5(%rip), %r8
+  ; CHECK-NEXT: movq r6(%rip), %r9
+  ; CHECK-NEXT: movq splim(%rip), %r15
+  ; CHECK-NEXT: movss f1(%rip), %xmm1
+  ; CHECK-NEXT: movss f2(%rip), %xmm2
+  ; CHECK-NEXT: movss f3(%rip), %xmm3
+  ; CHECK-NEXT: movss f4(%rip), %xmm4
+  ; CHECK-NEXT: movsd d1(%rip), %xmm5
+  ; CHECK-NEXT: movsd d2(%rip), %xmm6
+  %0 = load double* @d2
+  %1 = load double* @d1
+  %2 = load float* @f4
+  %3 = load float* @f3
+  %4 = load float* @f2
+  %5 = load float* @f1
+  %6 = load i64* @splim
+  %7 = load i64* @r6
+  %8 = load i64* @r5
+  %9 = load i64* @r4
+  %10 = load i64* @r3
+  %11 = load i64* @r2
+  %12 = load i64* @r1
+  %13 = load i64* @hp
+  %14 = load i64* @sp
+  %15 = load i64* @base
+  ; CHECK: jmp bar
+  tail call cc 10 void @bar( i64 %15, i64 %14, i64 %13, i64 %12, i64 %11,
+                             i64 %10, i64 %9, i64 %8, i64 %7, i64 %6,
+                             float %5, float %4, float %3, float %2, double %1,
+                             double %0 ) nounwind
+  ret void
+}
+
+declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
+                        float, float, float, float, double, double)
+

diff --git a/src/LLVM/test/CodeGen/X86/global-sections-tls.ll b/src/LLVM/test/CodeGen/X86/global-sections-tls.ll
new file mode 100644
index 0000000..d5409a5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/global-sections-tls.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+
+; PR4639
+@G1 = internal thread_local global i32 0		; <i32*> [#uses=1]
+; LINUX: .section	.tbss,"awT",@nobits
+; LINUX: G1:
+
+
+define i32* @foo() nounwind readnone {
+entry:
+	ret i32* @G1
+}
+
+

diff --git a/src/LLVM/test/CodeGen/X86/global-sections.ll b/src/LLVM/test/CodeGen/X86/global-sections.ll
new file mode 100644
index 0000000..194f597
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/global-sections.ll

@@ -0,0 +1,160 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=i386-apple-darwin9.7 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -fdata-sections | FileCheck %s -check-prefix=LINUX-SECTIONS
+
+
+; int G1;
+@G1 = common global i32 0
+
+; LINUX: .type   G1,@object
+; LINUX: .comm  G1,4,4
+
+; DARWIN: .comm	_G1,4,2
+
+
+
+
+; const int G2 __attribute__((weak)) = 42;
+@G2 = weak_odr unnamed_addr constant i32 42	
+
+
+; TODO: linux drops this into .rodata, we drop it into ".gnu.linkonce.r.G2"
+
+; DARWIN: .section __TEXT,__const_coal,coalesced
+; DARWIN: _G2:
+; DARWIN:    .long 42
+
+
+; int * const G3 = &G1;
+@G3 = unnamed_addr constant i32* @G1
+
+; DARWIN: .section        __DATA,__const
+; DARWIN: .globl _G3
+; DARWIN: _G3:
+; DARWIN:     .long _G1
+
+; LINUX:   .section        .rodata,"a",@progbits
+; LINUX:   .globl  G3
+
+; LINUX-SECTIONS: .section        .rodata.G3,"a",@progbits
+; LINUX-SECTIONS: .globl  G3
+
+
+; _Complex long long const G4 = 34;
+@G4 = unnamed_addr constant {i64,i64} { i64 34, i64 0 }
+
+; DARWIN: .section        __TEXT,__literal16,16byte_literals
+; DARWIN: _G4:
+; DARWIN:     .long 34
+
+
+; int G5 = 47;
+@G5 = global i32 47
+
+; LINUX: .data
+; LINUX: .globl G5
+; LINUX: G5:
+; LINUX:    .long 47
+
+; DARWIN: .section        __DATA,__data
+; DARWIN: .globl _G5
+; DARWIN: _G5:
+; DARWIN:    .long 47
+
+
+; PR4584
+@"foo bar" = linkonce global i32 42
+
+; LINUX: .type	foo_20_bar,@object
+; LINUX: .section .data.foo_20_bar,"aGw",@progbits,foo_20_bar,comdat
+; LINUX: .weak	foo_20_bar
+; LINUX: foo_20_bar:
+
+; DARWIN: .section		__DATA,__datacoal_nt,coalesced
+; DARWIN: .globl	"_foo bar"
+; DARWIN:	.weak_definition "_foo bar"
+; DARWIN: "_foo bar":
+
+; PR4650
+@G6 = weak_odr unnamed_addr constant [1 x i8] c"\01"
+
+; LINUX:   .type	G6,@object
+; LINUX:   .section	.rodata.G6,"aG",@progbits,G6,comdat
+; LINUX:   .weak	G6
+; LINUX: G6:
+; LINUX:   .byte	1
+; LINUX:   .size	G6, 1
+
+; DARWIN:  .section __TEXT,__const_coal,coalesced
+; DARWIN:  .globl _G6
+; DARWIN:  .weak_definition _G6
+; DARWIN:_G6:
+; DARWIN:  .byte 1
+
+
+@G7 = unnamed_addr constant [10 x i8] c"abcdefghi\00"
+
+; DARWIN:	__TEXT,__cstring,cstring_literals
+; DARWIN:	.globl _G7
+; DARWIN: _G7:
+; DARWIN:	.asciz	"abcdefghi"
+
+; LINUX:	.section	.rodata.str1.1,"aMS",@progbits,1
+; LINUX:	.globl G7
+; LINUX: G7:
+; LINUX:	.asciz	"abcdefghi"
+
+; LINUX-SECTIONS: .section        .rodata.G7,"aMS",@progbits,1
+; LINUX-SECTIONS:	.globl G7
+
+
+@G8 = unnamed_addr constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
+
+; DARWIN:	.section	__TEXT,__const
+; DARWIN:	.globl _G8
+; DARWIN: _G8:
+
+; LINUX:	.section	.rodata.str2.2,"aMS",@progbits,2
+; LINUX:	.globl G8
+; LINUX:G8:
+
+@G9 = unnamed_addr constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
+
+; DARWIN:	.globl _G9
+; DARWIN: _G9:
+
+; LINUX:	.section	.rodata.str4.4,"aMS",@progbits,4
+; LINUX:	.globl G9
+; LINUX:G9
+
+
+@G10 = weak global [100 x i32] zeroinitializer, align 32 ; <[100 x i32]*> [#uses=0]
+
+
+; DARWIN: 	.section	__DATA,__datacoal_nt,coalesced
+; DARWIN: .globl _G10
+; DARWIN:	.weak_definition _G10
+; DARWIN:	.align	5
+; DARWIN: _G10:
+; DARWIN:	.space	400
+
+; LINUX:	.bss
+; LINUX:	.weak	G10
+; LINUX:	.align	32
+; LINUX: G10:
+; LINUX:	.zero	400
+
+
+
+;; Zero sized objects should round up to 1 byte in zerofill directives.
+; rdar://7886017
+@G11 = global [0 x i32] zeroinitializer
+@G12 = global {} zeroinitializer
+@G13 = global { [0 x {}] } zeroinitializer
+
+; DARWIN: .globl _G11
+; DARWIN: .zerofill __DATA,__common,_G11,1,2
+; DARWIN: .globl _G12
+; DARWIN: .zerofill __DATA,__common,_G12,1,3
+; DARWIN: .globl _G13
+; DARWIN: .zerofill __DATA,__common,_G13,1,3

diff --git a/src/LLVM/test/CodeGen/X86/h-register-addressing-32.ll b/src/LLVM/test/CodeGen/X86/h-register-addressing-32.ll
new file mode 100644
index 0000000..76ffd66
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/h-register-addressing-32.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86 | grep {movzbl	%\[abcd\]h,} | count 7
+
+; Use h-register extract and zero-extend.
+
+define double @foo8(double* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr double* %p, i32 %t1
+  %t3 = load double* %t2, align 8
+  ret double %t3
+}
+define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr float* %p, i32 %t1
+  %t3 = load float* %t2, align 8
+  ret float %t3
+}
+define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr i16* %p, i32 %t1
+  %t3 = load i16* %t2, align 8
+  ret i16 %t3
+}
+define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 5
+  %t1 = and i32 %t0, 2040
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 6
+  %t1 = and i32 %t0, 1020
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar2(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 7
+  %t1 = and i32 %t0, 510
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}

diff --git a/src/LLVM/test/CodeGen/X86/h-register-addressing-64.ll b/src/LLVM/test/CodeGen/X86/h-register-addressing-64.ll
new file mode 100644
index 0000000..98817f3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/h-register-addressing-64.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 7
+
+; Use h-register extract and zero-extend.
+
+define double @foo8(double* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr double* %p, i64 %t1
+  %t3 = load double* %t2, align 8
+  ret double %t3
+}
+define float @foo4(float* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr float* %p, i64 %t1
+  %t3 = load float* %t2, align 8
+  ret float %t3
+}
+define i16 @foo2(i16* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr i16* %p, i64 %t1
+  %t3 = load i16* %t2, align 8
+  ret i16 %t3
+}
+define i8 @foo1(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar8(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 5
+  %t1 = and i64 %t0, 2040
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar4(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 6
+  %t1 = and i64 %t0, 1020
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar2(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 7
+  %t1 = and i64 %t0, 510
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}

diff --git a/src/LLVM/test/CodeGen/X86/h-register-store.ll b/src/LLVM/test/CodeGen/X86/h-register-store.ll
new file mode 100644
index 0000000..0adb2b1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/h-register-store.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64:      mov
+; X64-NEXT: movb %ah, (%rsi)
+; X64:      mov
+; X64-NEXT: movb %ah, (%rsi)
+; X64:      mov
+; X64-NEXT: movb %ah, (%rsi)
+; X64-NOT:      mov
+
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
+; W64-NOT:      mov
+; W64:      movb %ch, (%rdx)
+; W64-NOT:      mov
+; W64:      movb %ch, (%rdx)
+; W64-NOT:      mov
+; W64:      movb %ch, (%rdx)
+; W64-NOT:      mov
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:      mov
+; X32:      movb %ah, (%e
+; X32-NOT:      mov
+; X32:      movb %ah, (%e
+; X32-NOT:      mov
+; X32:      movb %ah, (%e
+; X32-NOT:      mov
+
+; Use h-register extract and store.
+
+define void @foo16(i16 inreg %p, i8* inreg %z) nounwind {
+  %q = lshr i16 %p, 8
+  %t = trunc i16 %q to i8
+  store i8 %t, i8* %z
+  ret void
+}
+define void @foo32(i32 inreg %p, i8* inreg %z) nounwind {
+  %q = lshr i32 %p, 8
+  %t = trunc i32 %q to i8
+  store i8 %t, i8* %z
+  ret void
+}
+define void @foo64(i64 inreg %p, i8* inreg %z) nounwind {
+  %q = lshr i64 %p, 8
+  %t = trunc i64 %q to i8
+  store i8 %t, i8* %z
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/h-registers-0.ll b/src/LLVM/test/CodeGen/X86/h-registers-0.ll
new file mode 100644
index 0000000..cdc75af
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/h-registers-0.ll

@@ -0,0 +1,106 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=X86-32
+
+; Use h registers. On x86-64, codegen doesn't support general allocation
+; of h registers yet, due to x86 encoding complications.
+
+define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
+; X86-64: bar64:
+; X86-64: shrq $8, %rdi
+; X86-64: incb %dil
+
+; See FIXME: on regclass GR8.
+; It could be optimally transformed like; incb %ch; movb %ch, (%rdx)
+; WIN64:  bar64:
+; WIN64:  shrq $8, %rcx
+; WIN64:  incb %cl
+
+; X86-32: bar64:
+; X86-32: incb %ah
+  %t0 = lshr i64 %x, 8
+  %t1 = trunc i64 %t0 to i8
+  %t2 = add i8 %t1, 1
+  store i8 %t2, i8* %p
+  ret void
+}
+
+define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
+; X86-64: bar32:
+; X86-64: shrl $8, %edi
+; X86-64: incb %dil
+
+; WIN64:  bar32:
+; WIN64:  shrl $8, %ecx
+; WIN64:  incb %cl
+
+; X86-32: bar32:
+; X86-32: incb %ah
+  %t0 = lshr i32 %x, 8
+  %t1 = trunc i32 %t0 to i8
+  %t2 = add i8 %t1, 1
+  store i8 %t2, i8* %p
+  ret void
+}
+
+define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
+; X86-64: bar16:
+; X86-64: shrl $8, %edi
+; X86-64: incb %dil
+
+; WIN64:  bar16:
+; WIN64:  shrl $8, %ecx
+; WIN64:  incb %cl
+
+; X86-32: bar16:
+; X86-32: incb %ah
+  %t0 = lshr i16 %x, 8
+  %t1 = trunc i16 %t0 to i8
+  %t2 = add i8 %t1, 1
+  store i8 %t2, i8* %p
+  ret void
+}
+
+define i64 @qux64(i64 inreg %x) nounwind {
+; X86-64: qux64:
+; X86-64: movq %rdi, %rax
+; X86-64: movzbl %ah, %eax
+
+; WIN64:  qux64:
+; WIN64:  movzbl %ch, %eax
+
+; X86-32: qux64:
+; X86-32: movzbl %ah, %eax
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  ret i64 %t1
+}
+
+define i32 @qux32(i32 inreg %x) nounwind {
+; X86-64: qux32:
+; X86-64: movl %edi, %eax
+; X86-64: movzbl %ah, %eax
+
+; WIN64:  qux32:
+; WIN64:  movzbl %ch, %eax
+
+; X86-32: qux32:
+; X86-32: movzbl %ah, %eax
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  ret i32 %t1
+}
+
+define i16 @qux16(i16 inreg %x) nounwind {
+; X86-64: qux16:
+; X86-64: movl %edi, %eax
+; X86-64: movzbl %ah, %eax
+
+; WIN64:  qux16:
+; WIN64:  movzbl %ch, %eax
+
+; X86-32: qux16:
+; X86-32: movzbl %ah, %eax
+  %t0 = lshr i16 %x, 8
+  ret i16 %t0
+}

diff --git a/src/LLVM/test/CodeGen/X86/h-registers-1.ll b/src/LLVM/test/CodeGen/X86/h-registers-1.ll
new file mode 100644
index 0000000..402cdfe
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/h-registers-1.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=x86_64-linux > %t
+; RUN: grep {movzbl	%\[abcd\]h,} %t | count 8
+; RUN: grep {%\[abcd\]h} %t | not grep {%r\[\[:digit:\]\]*d}
+
+; LLVM creates virtual registers for values live across blocks
+; based on the type of the value. Make sure that the extracts
+; here use the GR64_NOREX register class for their result,
+; instead of plain GR64.
+
+define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d,
+                i64 %e, i64 %f, i64 %g, i64 %h) {
+  %sa = lshr i64 %a, 8
+  %A = and i64 %sa, 255
+  %sb = lshr i64 %b, 8
+  %B = and i64 %sb, 255
+  %sc = lshr i64 %c, 8
+  %C = and i64 %sc, 255
+  %sd = lshr i64 %d, 8
+  %D = and i64 %sd, 255
+  %se = lshr i64 %e, 8
+  %E = and i64 %se, 255
+  %sf = lshr i64 %f, 8
+  %F = and i64 %sf, 255
+  %sg = lshr i64 %g, 8
+  %G = and i64 %sg, 255
+  %sh = lshr i64 %h, 8
+  %H = and i64 %sh, 255
+  br label %next
+
+next:
+  %u = add i64 %A, %B
+  %v = add i64 %C, %D
+  %w = add i64 %E, %F
+  %x = add i64 %G, %H
+  %y = add i64 %u, %v
+  %z = add i64 %w, %x
+  %t = add i64 %y, %z
+  ret i64 %t
+}

diff --git a/src/LLVM/test/CodeGen/X86/h-registers-2.ll b/src/LLVM/test/CodeGen/X86/h-registers-2.ll
new file mode 100644
index 0000000..488444c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/h-registers-2.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; Use an h register, but don't omit the explicit shift for
+; non-address use(s).
+
+define i32 @foo(i8* %x, i32 %y) nounwind {
+; CHECK: foo:
+; CHECK-NOT: ret
+; CHECK: movzbl %{{[abcd]h}},
+; CHECK-NOT: ret
+; CHECK: shll $3,
+; CHECK: ret
+
+	%t0 = lshr i32 %y, 8		; <i32> [#uses=1]
+	%t1 = and i32 %t0, 255		; <i32> [#uses=2]
+  %t2 = shl i32 %t1, 3
+	%t3 = getelementptr i8* %x, i32 %t2		; <i8*> [#uses=1]
+	store i8 77, i8* %t3, align 4
+	ret i32 %t2
+}

diff --git a/src/LLVM/test/CodeGen/X86/h-registers-3.ll b/src/LLVM/test/CodeGen/X86/h-registers-3.ll
new file mode 100644
index 0000000..8a0b07b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/h-registers-3.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86    | grep mov | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
+
+define zeroext i8 @foo() nounwind ssp {
+entry:
+  %0 = tail call zeroext i16 (...)* @bar() nounwind
+  %1 = lshr i16 %0, 8
+  %2 = trunc i16 %1 to i8
+  ret i8 %2
+}
+
+declare zeroext i16 @bar(...)

diff --git a/src/LLVM/test/CodeGen/X86/haddsub.ll b/src/LLVM/test/CodeGen/X86/haddsub.ll
new file mode 100644
index 0000000..91758ea
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/haddsub.ll

@@ -0,0 +1,194 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,-avx | FileCheck %s -check-prefix=SSE3
+; RUN: llc < %s -march=x86-64 -mattr=-sse3,+avx | FileCheck %s -check-prefix=AVX
+
+; SSE3: haddpd1:
+; SSE3-NOT: vhaddpd
+; SSE3: haddpd
+; AVX: haddpd1:
+; AVX: vhaddpd
+define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) {
+  %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
+  %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3>
+  %r = fadd <2 x double> %a, %b
+  ret <2 x double> %r
+}
+
+; SSE3: haddpd2:
+; SSE3-NOT: vhaddpd
+; SSE3: haddpd
+; AVX: haddpd2:
+; AVX: vhaddpd
+define <2 x double> @haddpd2(<2 x double> %x, <2 x double> %y) {
+  %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 2>
+  %b = shufflevector <2 x double> %y, <2 x double> %x, <2 x i32> <i32 2, i32 1>
+  %r = fadd <2 x double> %a, %b
+  ret <2 x double> %r
+}
+
+; SSE3: haddpd3:
+; SSE3-NOT: vhaddpd
+; SSE3: haddpd
+; AVX: haddpd3:
+; AVX: vhaddpd
+define <2 x double> @haddpd3(<2 x double> %x) {
+  %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
+  %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
+  %r = fadd <2 x double> %a, %b
+  ret <2 x double> %r
+}
+
+; SSE3: haddps1:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; AVX: haddps1:
+; AVX: vhaddps
+define <4 x float> @haddps1(<4 x float> %x, <4 x float> %y) {
+  %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %r = fadd <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: haddps2:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; AVX: haddps2:
+; AVX: vhaddps
+define <4 x float> @haddps2(<4 x float> %x, <4 x float> %y) {
+  %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+  %b = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3>
+  %r = fadd <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: haddps3:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; AVX: haddps3:
+; AVX: vhaddps
+define <4 x float> @haddps3(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
+  %r = fadd <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: haddps4:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; AVX: haddps4:
+; AVX: vhaddps
+define <4 x float> @haddps4(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %r = fadd <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: haddps5:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; AVX: haddps5:
+; AVX: vhaddps
+define <4 x float> @haddps5(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>
+  %r = fadd <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: haddps6:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; AVX: haddps6:
+; AVX: vhaddps
+define <4 x float> @haddps6(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %r = fadd <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: haddps7:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; AVX: haddps7:
+; AVX: vhaddps
+define <4 x float> @haddps7(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef>
+  %r = fadd <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: hsubpd1:
+; SSE3-NOT: vhsubpd
+; SSE3: hsubpd
+; AVX: hsubpd1:
+; AVX: vhsubpd
+define <2 x double> @hsubpd1(<2 x double> %x, <2 x double> %y) {
+  %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
+  %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3>
+  %r = fsub <2 x double> %a, %b
+  ret <2 x double> %r
+}
+
+; SSE3: hsubpd2:
+; SSE3-NOT: vhsubpd
+; SSE3: hsubpd
+; AVX: hsubpd2:
+; AVX: vhsubpd
+define <2 x double> @hsubpd2(<2 x double> %x) {
+  %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
+  %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
+  %r = fsub <2 x double> %a, %b
+  ret <2 x double> %r
+}
+
+; SSE3: hsubps1:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; AVX: hsubps1:
+; AVX: vhsubps
+define <4 x float> @hsubps1(<4 x float> %x, <4 x float> %y) {
+  %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %r = fsub <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: hsubps2:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; AVX: hsubps2:
+; AVX: vhsubps
+define <4 x float> @hsubps2(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
+  %r = fsub <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: hsubps3:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; AVX: hsubps3:
+; AVX: vhsubps
+define <4 x float> @hsubps3(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %r = fsub <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: hsubps4:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; AVX: hsubps4:
+; AVX: vhsubps
+define <4 x float> @hsubps4(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %r = fsub <4 x float> %a, %b
+  ret <4 x float> %r
+}

diff --git a/src/LLVM/test/CodeGen/X86/hidden-vis-2.ll b/src/LLVM/test/CodeGen/X86/hidden-vis-2.ll
new file mode 100644
index 0000000..74554d1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/hidden-vis-2.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9   | grep mov | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | not grep GOT
+
+@x = weak hidden global i32 0		; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/hidden-vis-3.ll b/src/LLVM/test/CodeGen/X86/hidden-vis-3.ll
new file mode 100644
index 0000000..4be881e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/hidden-vis-3.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9   | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | FileCheck %s -check-prefix=X64
+
+@x = external hidden global i32		; <i32*> [#uses=1]
+@y = extern_weak hidden global i32	; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+; X32: _t:
+; X32: movl _y, %eax
+
+; X64: _t:
+; X64: movl _y(%rip), %eax
+
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	%1 = load i32* @y, align 4		; <i32> [#uses=1]
+	%2 = add i32 %1, %0		; <i32> [#uses=1]
+	ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/X86/hidden-vis-4.ll b/src/LLVM/test/CodeGen/X86/hidden-vis-4.ll
new file mode 100644
index 0000000..a8aede5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/hidden-vis-4.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s
+
+@x = common hidden global i32 0		; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+; CHECK: t:
+; CHECK: movl _x, %eax
+; CHECK: .comm _x,4
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/hidden-vis-pic.ll b/src/LLVM/test/CodeGen/X86/hidden-vis-pic.ll
new file mode 100644
index 0000000..67be3d0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/hidden-vis-pic.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -disable-cfi -mtriple=i386-apple-darwin9 -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+
+
+; PR7353 PR7334 rdar://8072315 rdar://8018308
+
+define available_externally hidden 
+void @_ZNSbIcED1Ev() nounwind readnone ssp align 2 {
+entry:
+  ret void
+}
+
+define void()* @test1() nounwind {
+entry:
+  ret void()* @_ZNSbIcED1Ev
+}
+
+; This must use movl of the stub, not an lea, since the function isn't being
+; emitted here.
+; CHECK: movl L__ZNSbIcED1Ev$non_lazy_ptr-L1$pb(
+
+
+
+
+; <rdar://problem/7383328>
+
+@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
+
+define hidden void @func() nounwind ssp uwtable {
+entry:
+  %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare i32 @puts(i8*)
+
+define hidden i32 @main() nounwind ssp uwtable {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @func() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval                    ; <i32> [#uses=1]
+  ret i32 %retval1
+}
+
+; CHECK: .private_extern _func.eh
+; CHECK: .private_extern _main.eh
+
+

diff --git a/src/LLVM/test/CodeGen/X86/hidden-vis.ll b/src/LLVM/test/CodeGen/X86/hidden-vis.ll
new file mode 100644
index 0000000..ac6155e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/hidden-vis.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s -check-prefix=LINUX

+; RUN: llc < %s -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN

+; RUN: llc < %s -mtriple=x86_64-w64-mingw32 | FileCheck %s -check-prefix=WINDOWS

+

+

+@a = hidden global i32 0

+@b = external hidden global i32

+@c = global i32* @b

+

+define weak hidden void @t1() nounwind {

+; LINUX: .hidden t1

+; LINUX: t1:

+

+; DARWIN: .private_extern _t1

+; DARWIN: t1:

+

+; WINDOWS: t1:

+; WINDOWS-NOT: hidden

+  ret void

+}

+

+define weak void @t2() nounwind {

+; DARWIN: .weak_definition	_t2

+  ret void

+}

+

+; LINUX: .hidden a

+; LINUX: .hidden b

+

+; DARWIN: .private_extern _a

+; DARWIN-NOT: private_extern


diff --git a/src/LLVM/test/CodeGen/X86/hoist-common.ll b/src/LLVM/test/CodeGen/X86/hoist-common.ll
new file mode 100644
index 0000000..72e17c0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/hoist-common.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx  | FileCheck %s
+
+; Common "xorb al, al" instruction in the two successor blocks should be
+; moved to the entry block above the test + je.
+
+; rdar://9145558
+
+define zeroext i1 @t(i32 %c) nounwind ssp {
+entry:
+; CHECK: t:
+; CHECK: xorb %al, %al
+; CHECK: test
+; CHECK: je
+  %tobool = icmp eq i32 %c, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:
+; CHECK: callq
+  %call = tail call zeroext i1 (...)* @foo() nounwind
+  br label %return
+
+return:
+; CHECK: ret
+  %retval.0 = phi i1 [ %call, %if.then ], [ false, %entry ]
+  ret i1 %retval.0
+}
+
+declare zeroext i1 @foo(...)

diff --git a/src/LLVM/test/CodeGen/X86/i128-and-beyond.ll b/src/LLVM/test/CodeGen/X86/i128-and-beyond.ll
new file mode 100644
index 0000000..b741681
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/i128-and-beyond.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep -- -1 | count 14
+
+; These static initializers are too big to hand off to assemblers
+; as monolithic blobs.
+
+@x = global i128 -1
+@y = global i256 -1
+@z = global i512 -1

diff --git a/src/LLVM/test/CodeGen/X86/i128-immediate.ll b/src/LLVM/test/CodeGen/X86/i128-immediate.ll
new file mode 100644
index 0000000..c47569e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/i128-immediate.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86-64 | grep movq | count 2
+
+define i128 @__addvti3() {
+          ret i128 -1
+}

diff --git a/src/LLVM/test/CodeGen/X86/i128-mul.ll b/src/LLVM/test/CodeGen/X86/i128-mul.ll
new file mode 100644
index 0000000..259dbc5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/i128-mul.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64

+; PR1198

+

+define i64 @foo(i64 %x, i64 %y) {

+        %tmp0 = zext i64 %x to i128

+        %tmp1 = zext i64 %y to i128

+        %tmp2 = mul i128 %tmp0, %tmp1

+        %tmp7 = zext i32 64 to i128

+        %tmp3 = lshr i128 %tmp2, %tmp7

+        %tmp4 = trunc i128 %tmp3 to i64

+        ret i64 %tmp4

+}


diff --git a/src/LLVM/test/CodeGen/X86/i128-ret.ll b/src/LLVM/test/CodeGen/X86/i128-ret.ll
new file mode 100644
index 0000000..c019794
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/i128-ret.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s

+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s

+; CHECK: movq ([[A0:%rdi|%rcx]]), %rax

+; CHECK: movq 8([[A0]]), %rdx

+

+define i128 @test(i128 *%P) {

+        %A = load i128* %P

+        ret i128 %A

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/i256-add.ll b/src/LLVM/test/CodeGen/X86/i256-add.ll
new file mode 100644
index 0000000..5a7a7a7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/i256-add.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep adcl %t | count 7
+; RUN: grep sbbl %t | count 7
+
+define void @add(i256* %p, i256* %q) nounwind {
+  %a = load i256* %p
+  %b = load i256* %q
+  %c = add i256 %a, %b
+  store i256 %c, i256* %p
+  ret void
+}
+define void @sub(i256* %p, i256* %q) nounwind {
+  %a = load i256* %p
+  %b = load i256* %q
+  %c = sub i256 %a, %b
+  store i256 %c, i256* %p
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/i2k.ll b/src/LLVM/test/CodeGen/X86/i2k.ll
new file mode 100644
index 0000000..6116c2e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/i2k.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86
+
+define void @foo(i2011* %x, i2011* %y, i2011* %p) nounwind {
+  %a = load i2011* %x
+  %b = load i2011* %y
+  %c = add i2011 %a, %b
+  store i2011 %c, i2011* %p
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/i64-mem-copy.ll b/src/LLVM/test/CodeGen/X86/i64-mem-copy.ll
new file mode 100644
index 0000000..dce12ae
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/i64-mem-copy.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-linux   | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32   | FileCheck %s -check-prefix=X64
+; X64: movq ({{%rsi|%rdx}}), %r
+
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=X32
+; X32: movsd (%eax), %xmm
+
+; Uses movsd to load / store i64 values if sse2 is available.
+
+; rdar://6659858
+
+define void @foo(i64* %x, i64* %y) nounwind  {
+entry:
+	%tmp1 = load i64* %y, align 8		; <i64> [#uses=1]
+	store i64 %tmp1, i64* %x, align 8
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/iabs.ll b/src/LLVM/test/CodeGen/X86/iabs.ll
new file mode 100644
index 0000000..2e7ec0c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/iabs.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -stats  |& \

+; RUN:   grep {5 .*Number of machine instrs printed}

+

+;; Integer absolute value, should produce something at least as good as:

+;;       movl %edi, %ecx

+;;       sarl $31, %ecx

+;;       leal (%rdi,%rcx), %eax

+;;       xorl %ecx, %eax

+;;       ret

+define i32 @test(i32 %a) nounwind {

+        %tmp1neg = sub i32 0, %a

+        %b = icmp sgt i32 %a, -1

+        %abs = select i1 %b, i32 %a, i32 %tmp1neg

+        ret i32 %abs

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/illegal-insert.ll b/src/LLVM/test/CodeGen/X86/illegal-insert.ll
new file mode 100644
index 0000000..dbf1b14
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/illegal-insert.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64
+
+define <4 x double> @foo0(<4 x double> %t) {
+  %r = insertelement <4 x double> %t, double 2.3, i32 0
+  ret <4 x double> %r
+}
+define <4 x double> @foo1(<4 x double> %t) {
+  %r = insertelement <4 x double> %t, double 2.3, i32 1
+  ret <4 x double> %r
+}
+define <4 x double> @foo2(<4 x double> %t) {
+  %r = insertelement <4 x double> %t, double 2.3, i32 2
+  ret <4 x double> %r
+}
+define <4 x double> @foo3(<4 x double> %t) {
+  %r = insertelement <4 x double> %t, double 2.3, i32 3
+  ret <4 x double> %r
+}

diff --git a/src/LLVM/test/CodeGen/X86/illegal-vector-args-return.ll b/src/LLVM/test/CodeGen/X86/illegal-vector-args-return.ll
new file mode 100644
index 0000000..cecf77a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/illegal-vector-args-return.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd	%xmm3, %xmm1}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd	%xmm2, %xmm0}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps	%xmm3, %xmm1}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps	%xmm2, %xmm0}
+
+define <4 x double> @foo(<4 x double> %x, <4 x double> %z) {
+  %y = fmul <4 x double> %x, %z
+  ret <4 x double> %y
+}
+
+define <8 x float> @bar(<8 x float> %x, <8 x float> %z) {
+  %y = fadd <8 x float> %x, %z
+  ret <8 x float> %y
+}

diff --git a/src/LLVM/test/CodeGen/X86/imul-lea-2.ll b/src/LLVM/test/CodeGen/X86/imul-lea-2.ll
new file mode 100644
index 0000000..1cb54b3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/imul-lea-2.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep lea | count 3
+; RUN: llc < %s -march=x86-64 | grep shl | count 1
+; RUN: llc < %s -march=x86-64 | not grep imul
+
+define i64 @t1(i64 %a) nounwind readnone {
+entry:
+	%0 = mul i64 %a, 81		; <i64> [#uses=1]
+	ret i64 %0
+}
+
+define i64 @t2(i64 %a) nounwind readnone {
+entry:
+	%0 = mul i64 %a, 40		; <i64> [#uses=1]
+	ret i64 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/imul-lea.ll b/src/LLVM/test/CodeGen/X86/imul-lea.ll
new file mode 100644
index 0000000..bde61ff
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/imul-lea.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | grep lea

+

+declare i32 @foo()

+

+define i32 @test() {

+        %tmp.0 = tail call i32 @foo( )          ; <i32> [#uses=1]

+        %tmp.1 = mul i32 %tmp.0, 9              ; <i32> [#uses=1]

+        ret i32 %tmp.1

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/inline-asm-2addr.ll b/src/LLVM/test/CodeGen/X86/inline-asm-2addr.ll
new file mode 100644
index 0000000..4a2c7fc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm-2addr.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 | not grep movq
+
+define i64 @t(i64 %a, i64 %b) nounwind ssp {
+entry:
+	%asmtmp = tail call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i64 %a) nounwind		; <i64> [#uses=1]
+	%asmtmp1 = tail call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i64 %b) nounwind		; <i64> [#uses=1]
+	%0 = add i64 %asmtmp1, %asmtmp		; <i64> [#uses=1]
+	ret i64 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/inline-asm-R-constraint.ll b/src/LLVM/test/CodeGen/X86/inline-asm-R-constraint.ll
new file mode 100644
index 0000000..66c27ac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm-R-constraint.ll

@@ -0,0 +1,18 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+; 7282062
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+define void @udiv8(i8* %quotient, i16 zeroext %a, i8 zeroext %b, i8 zeroext %c, i8* %remainder) nounwind ssp {
+entry:
+; CHECK: udiv8:
+; CHECK-NOT: movb %ah, (%r8)
+  %a_addr = alloca i16, align 2                   ; <i16*> [#uses=2]
+  %b_addr = alloca i8, align 1                    ; <i8*> [#uses=2]
+  store i16 %a, i16* %a_addr
+  store i8 %b, i8* %b_addr
+  call void asm "\09\09movw\09$2, %ax\09\09\0A\09\09divb\09$3\09\09\09\0A\09\09movb\09%al, $0\09\0A\09\09movb %ah, ($4)", "=*m,=*m,*m,*m,R,~{dirflag},~{fpsr},~{flags},~{ax}"(i8* %quotient, i8* %remainder, i16* %a_addr, i8* %b_addr, i8* %remainder) nounwind
+  ret void
+; CHECK: ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/inline-asm-error.ll b/src/LLVM/test/CodeGen/X86/inline-asm-error.ll
new file mode 100644
index 0000000..134d6e9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm-error.ll

@@ -0,0 +1,15 @@
+; RUN: not llc -march x86 -regalloc=fast       < %s 2> %t1
+; RUN: not llc -march x86 -regalloc=basic      < %s 2> %t2
+; RUN: not llc -march x86 -regalloc=greedy     < %s 2> %t3
+; RUN: FileCheck %s < %t1
+; RUN: FileCheck %s < %t2
+; RUN: FileCheck %s < %t3
+
+; The register allocator must fail on this function.
+; CHECK: error: ran out of registers during register allocation
+
+define void @f(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, i32 %x7, i32 %x8, i32 %x9) nounwind ssp {
+entry:
+  tail call void asm sideeffect "hello world", "r,r,r,r,r,r,r,r,r,r,~{dirflag},~{fpsr},~{flags}"(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, i32 %x7, i32 %x8, i32 %x9) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/inline-asm-flag-clobber.ll b/src/LLVM/test/CodeGen/X86/inline-asm-flag-clobber.ll
new file mode 100644
index 0000000..51ea843
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm-flag-clobber.ll

@@ -0,0 +1,19 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+; PR3701
+
+define i64 @t(i64* %arg) nounwind {
+	br i1 true, label %1, label %5
+
+; <label>:1		; preds = %0
+	%2 = icmp eq i64* null, %arg		; <i1> [#uses=1]
+	%3 = tail call i64* asm sideeffect "movl %fs:0,$0", "=r,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%struct.thread*> [#uses=0]
+; CHECK: test
+; CHECK-NEXT: j
+	br i1 %2, label %4, label %5
+
+; <label>:4		; preds = %1
+	ret i64 1
+
+; <label>:5		; preds = %1
+	ret i64 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/inline-asm-fpstack.ll b/src/LLVM/test/CodeGen/X86/inline-asm-fpstack.ll
new file mode 100644
index 0000000..c9a1c1c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm-fpstack.ll

@@ -0,0 +1,342 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+; There should be no stack manipulations between the inline asm and ret.
+; CHECK: test1
+; CHECK: InlineAsm End
+; CHECK-NEXT: ret
+define x86_fp80 @test1() {
+        %tmp85 = call x86_fp80 asm sideeffect "fld0", "={st(0)}"()
+        ret x86_fp80 %tmp85
+}
+
+; CHECK: test2
+; CHECK: InlineAsm End
+; CHECK-NEXT: ret
+define double @test2() {
+        %tmp85 = call double asm sideeffect "fld0", "={st(0)}"()
+        ret double %tmp85
+}
+
+; Setting up argument in st(0) should be a single fld.
+; CHECK: test3
+; CHECK: fld
+; CHECK-NEXT: InlineAsm Start
+; Asm consumes stack, nothing should be popped.
+; CHECK: InlineAsm End
+; CHECK-NOT: fstp
+; CHECK: ret
+define void @test3(x86_fp80 %X) {
+        call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( x86_fp80 %X)
+        ret void
+}
+
+; CHECK: test4
+; CHECK: fld
+; CHECK-NEXT: InlineAsm Start
+; CHECK: InlineAsm End
+; CHECK-NOT: fstp
+; CHECK: ret
+define void @test4(double %X) {
+        call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( double %X)
+        ret void
+}
+
+; Same as test3/4, but using value from fadd.
+; The fadd can be done in xmm or x87 regs - we don't test that.
+; CHECK: test5
+; CHECK: InlineAsm End
+; CHECK-NOT: fstp
+; CHECK: ret
+define void @test5(double %X) {
+        %Y = fadd double %X, 123.0
+        call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( double %Y)
+        ret void
+}
+
+; CHECK: test6
+define void @test6(double %A, double %B, double %C, 
+                   double %D, double %E) nounwind  {
+entry:
+; Uses the same value twice, should have one fstp after the asm.
+; CHECK: foo
+; CHECK: InlineAsm End
+; CHECK-NEXT: fstp
+; CHECK-NOT: fstp
+	tail call void asm sideeffect "foo $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %A, double %A ) nounwind 
+; Uses two different values, should be in st(0)/st(1) and both be popped.
+; CHECK: bar
+; CHECK: InlineAsm End
+; CHECK-NEXT: fstp
+; CHECK-NEXT: fstp
+	tail call void asm sideeffect "bar $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %B, double %C ) nounwind 
+; Uses two different values, one of which isn't killed in this asm, it
+; should not be popped after the asm.
+; CHECK: baz
+; CHECK: InlineAsm End
+; CHECK-NEXT: fstp
+; CHECK-NOT: fstp
+	tail call void asm sideeffect "baz $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %D, double %E ) nounwind 
+; This is the last use of %D, so it should be popped after.
+; CHECK: baz
+; CHECK: InlineAsm End
+; CHECK-NEXT: fstp
+; CHECK-NOT: fstp
+; CHECK: ret
+	tail call void asm sideeffect "baz $0", "f,~{dirflag},~{fpsr},~{flags}"( double %D ) nounwind 
+	ret void
+}
+
+; PR4185
+; Passing a non-killed value to asm in {st}.
+; Make sure it is duped before.
+; asm kills st(0), so we shouldn't pop anything
+; CHECK: testPR4185
+; CHECK: fld %st(0)
+; CHECK: fistpl
+; CHECK-NOT: fstp
+; CHECK: fistpl
+; CHECK-NOT: fstp
+; CHECK: ret
+; A valid alternative would be to remat the constant pool load before each
+; inline asm.
+define void @testPR4185() {
+return:
+	call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06)
+	call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06)
+	ret void
+}
+
+; Passing a non-killed value through asm in {st}.
+; Make sure it is not duped before.
+; Second asm kills st(0), so we shouldn't pop anything
+; CHECK: testPR4185b
+; CHECK-NOT: fld %st(0)
+; CHECK: fistl
+; CHECK-NOT: fstp
+; CHECK: fistpl
+; CHECK-NOT: fstp
+; CHECK: ret
+; A valid alternative would be to remat the constant pool load before each
+; inline asm.
+define void @testPR4185b() {
+return:
+	call void asm sideeffect "fistl $0", "{st}"(double 1.000000e+06)
+	call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06)
+	ret void
+}
+
+; PR4459
+; The return value from ceil must be duped before being consumed by asm.
+; CHECK: testPR4459
+; CHECK: ceil
+; CHECK: fld %st(0)
+; CHECK-NOT: fxch
+; CHECK: fistpl
+; CHECK-NOT: fxch
+; CHECK: fstpt
+; CHECK: test
+define void @testPR4459(x86_fp80 %a) {
+entry:
+	%0 = call x86_fp80 @ceil(x86_fp80 %a)
+	call void asm sideeffect "fistpl $0", "{st},~{st}"( x86_fp80 %0)
+	call void @test3(x86_fp80 %0 )
+        ret void
+}
+declare x86_fp80 @ceil(x86_fp80)
+
+; PR4484
+; test1 leaves a value on the stack that is needed after the asm.
+; CHECK: testPR4484
+; CHECK: test1
+; CHECK-NOT: fstp
+; Load %a from stack after ceil
+; CHECK: fldt
+; CHECK-NOT: fxch
+; CHECK: fistpl
+; CHECK-NOT: fstp
+; Set up call to test.
+; CHECK: fstpt
+; CHECK: test
+define void @testPR4484(x86_fp80 %a) {
+entry:
+	%0 = call x86_fp80 @test1()
+	call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %a)
+	call void @test3(x86_fp80 %0)
+	ret void
+}
+
+; PR4485
+; CHECK: testPR4485
+define void @testPR4485(x86_fp80* %a) {
+entry:
+	%0 = load x86_fp80* %a, align 16
+	%1 = fmul x86_fp80 %0, 0xK4006B400000000000000
+	%2 = fmul x86_fp80 %1, 0xK4012F424000000000000
+	tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %2)
+	%3 = load x86_fp80* %a, align 16
+	%4 = fmul x86_fp80 %3, 0xK4006B400000000000000
+	%5 = fmul x86_fp80 %4, 0xK4012F424000000000000
+	tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %5)
+	ret void
+}
+
+; An input argument in a fixed position is implicitly popped by the asm only if
+; the input argument is tied to an output register, or it is in the clobber list.
+; The clobber list case is tested above.
+;
+; This doesn't implicitly pop the stack:
+;
+;   void fist1(long double x, int *p) {
+;     asm volatile ("fistl %1" : : "t"(x), "m"(*p));
+;   }
+;
+; CHECK: fist1
+; CHECK: fldt
+; CHECK: fistl (%e
+; CHECK: fstp
+; CHECK: ret
+define void @fist1(x86_fp80 %x, i32* %p) nounwind ssp {
+entry:
+  tail call void asm sideeffect "fistl $1", "{st},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, i32* %p) nounwind
+  ret void
+}
+
+; Here, the input operand is tied to an output which means that is is
+; implicitly popped (and then the output is implicitly pushed).
+;
+;   long double fist2(long double x, int *p) {
+;     long double y;
+;     asm ("fistl %1" : "=&t"(y) : "0"(x), "m"(*p) : "memory");
+;     return y;
+;   }
+;
+; CHECK: fist2
+; CHECK: fldt
+; CHECK: fistl (%e
+; CHECK-NOT: fstp
+; CHECK: ret
+define x86_fp80 @fist2(x86_fp80 %x, i32* %p) nounwind ssp {
+entry:
+  %0 = tail call x86_fp80 asm "fistl $2", "=&{st},0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, i32* %p) nounwind
+  ret x86_fp80 %0
+}
+
+; An 'f' constraint is never implicitly popped:
+;
+;   void fucomp1(long double x, long double y) {
+;     asm volatile ("fucomp %1" : : "t"(x), "f"(y) : "st");
+;   }
+; CHECK: fucomp1
+; CHECK: fldt
+; CHECK: fldt
+; CHECK: fucomp %st
+; CHECK: fstp
+; CHECK-NOT: fstp
+; CHECK: ret
+define void @fucomp1(x86_fp80 %x, x86_fp80 %y) nounwind ssp {
+entry:
+  tail call void asm sideeffect "fucomp $1", "{st},f,~{st},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind
+  ret void
+}
+
+; The 'u' constraint is only popped implicitly when clobbered:
+;
+;   void fucomp2(long double x, long double y) {
+;     asm volatile ("fucomp %1" : : "t"(x), "u"(y) : "st");
+;   }
+;
+;   void fucomp3(long double x, long double y) {
+;     asm volatile ("fucompp %1" : : "t"(x), "u"(y) : "st", "st(1)");
+;   }
+;
+; CHECK: fucomp2
+; CHECK: fldt
+; CHECK: fldt
+; CHECK: fucomp %st(1)
+; CHECK: fstp
+; CHECK-NOT: fstp
+; CHECK: ret
+;
+; CHECK: fucomp3
+; CHECK: fldt
+; CHECK: fldt
+; CHECK: fucompp %st(1)
+; CHECK-NOT: fstp
+; CHECK: ret
+define void @fucomp2(x86_fp80 %x, x86_fp80 %y) nounwind ssp {
+entry:
+  tail call void asm sideeffect "fucomp $1", "{st},{st(1)},~{st},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind
+  ret void
+}
+define void @fucomp3(x86_fp80 %x, x86_fp80 %y) nounwind ssp {
+entry:
+  tail call void asm sideeffect "fucompp $1", "{st},{st(1)},~{st},~{st(1)},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind
+  ret void
+}
+
+; One input, two outputs, one dead output.
+%complex = type { float, float }
+; CHECK: sincos1
+; CHECK: flds
+; CHECK-NOT: fxch
+; CHECK: sincos
+; CHECK-NOT: fstp
+; CHECK: fstp %st(1)
+; CHECK-NOT: fstp
+; CHECK: ret
+define float @sincos1(float %x) nounwind ssp {
+entry:
+  %0 = tail call %complex asm "sincos", "={st},={st(1)},0,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind
+  %asmresult = extractvalue %complex %0, 0
+  ret float %asmresult
+}
+
+; Same thing, swapped output operands.
+; CHECK: sincos2
+; CHECK: flds
+; CHECK-NOT: fxch
+; CHECK: sincos
+; CHECK-NOT: fstp
+; CHECK: fstp %st(1)
+; CHECK-NOT: fstp
+; CHECK: ret
+define float @sincos2(float %x) nounwind ssp {
+entry:
+  %0 = tail call %complex asm "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind
+  %asmresult = extractvalue %complex %0, 1
+  ret float %asmresult
+}
+
+; Clobber st(0) after it was live-out/dead from the previous asm.
+; CHECK: sincos3
+; Load x, make a copy for the second asm.
+; CHECK: flds
+; CHECK: fld %st(0)
+; CHECK: sincos
+; Discard dead result in st(0), bring x to the top.
+; CHECK: fstp %st(0)
+; CHECK: fxch
+; x is now in st(0) for the second asm
+; CHECK: sincos
+; Discard both results.
+; CHECK: fstp
+; CHECK: fstp
+; CHECK: ret
+define float @sincos3(float %x) nounwind ssp {
+entry:
+  %0 = tail call %complex asm sideeffect "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind
+  %1 = tail call %complex asm sideeffect "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind
+  %asmresult = extractvalue %complex %0, 0
+  ret float %asmresult
+}
+
+; Pass the same value in two fixed stack slots.
+; CHECK: PR10602
+; CHECK: flds LCPI
+; CHECK: fld %st(0)
+; CHECK: fcomi %st(1), %st(0)
+define i32 @PR10602() nounwind ssp {
+entry:
+  %0 = tail call i32 asm "fcomi $2, $1; pushf; pop $0", "=r,{st},{st(1)},~{dirflag},~{fpsr},~{flags}"(double 2.000000e+00, double 2.000000e+00) nounwind
+  ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/inline-asm-h.ll b/src/LLVM/test/CodeGen/X86/inline-asm-h.ll
new file mode 100644
index 0000000..53cf419
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm-h.ll

@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s | FileCheck %s
+
+@foobar = common global i32 0, align 4
+
+define void @zed() nounwind {
+entry:
+  call void asm "movq %mm2,${0:H}", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* @foobar) nounwind
+  ret void
+}
+
+; CHECK: zed
+; CHECK: movq %mm2,foobar+8(%rip)

diff --git a/src/LLVM/test/CodeGen/X86/inline-asm-modifier-n.ll b/src/LLVM/test/CodeGen/X86/inline-asm-modifier-n.ll
new file mode 100644
index 0000000..5e76b6c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm-modifier-n.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep { 37}
+; rdar://7008959
+
+define void @bork() nounwind {
+entry:
+	tail call void asm sideeffect "BORK ${0:n}", "i,~{dirflag},~{fpsr},~{flags}"(i32 -37) nounwind
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/inline-asm-mrv.ll b/src/LLVM/test/CodeGen/X86/inline-asm-mrv.ll
new file mode 100644
index 0000000..733205d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm-mrv.ll

@@ -0,0 +1,35 @@
+; PR2094
+; RUN: llc < %s -march=x86-64 | grep movslq
+; RUN: llc < %s -march=x86-64 | grep addps
+; RUN: llc < %s -march=x86-64 | grep paddd
+; RUN: llc < %s -march=x86-64 | not grep movq
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+define i32 @test1(i8* %v, i8* %blk2, i8* %blk1, i32 %stride, i32 %h) nounwind  {
+	%tmp12 = sext i32 %stride to i64		; <i64> [#uses=1]
+	%mrv = call {i32, i8*, i8*} asm sideeffect "$0 $1 $2 $3 $4 $5 $6",
+         "=r,=r,=r,r,r,r,r"( i64 %tmp12, i32 %h, i8* %blk1, i8* %blk2 ) nounwind
+        %tmp6 = extractvalue {i32, i8*, i8*} %mrv, 0
+	%tmp7 = call i32 asm sideeffect "set $0",
+             "=r,~{dirflag},~{fpsr},~{flags}"( ) nounwind
+	ret i32 %tmp7
+}
+
+define <4 x float> @test2() nounwind {
+	%mrv = call {<4 x float>, <4 x float>} asm "set $0, $1", "=x,=x"()
+	%a = extractvalue {<4 x float>, <4 x float>} %mrv, 0
+	%b = extractvalue {<4 x float>, <4 x float>} %mrv, 1
+	%c = fadd <4 x float> %a, %b
+	ret <4 x float> %c
+}
+
+define <4 x i32> @test3() nounwind {
+	%mrv = call {<4 x i32>, <4 x i32>} asm "set $0, $1", "=x,=x"()
+	%a = extractvalue {<4 x i32>, <4 x i32>} %mrv, 0
+	%b = extractvalue {<4 x i32>, <4 x i32>} %mrv, 1
+	%c = add <4 x i32> %a, %b
+	ret <4 x i32> %c
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/inline-asm-out-regs.ll b/src/LLVM/test/CodeGen/X86/inline-asm-out-regs.ll
new file mode 100644
index 0000000..46966f5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm-out-regs.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu
+; PR3391
+
+@pci_indirect = external global { }             ; <{ }*> [#uses=1]
+@pcibios_last_bus = external global i32         ; <i32*> [#uses=2]
+
+define void @pci_pcbios_init() nounwind section ".init.text" {
+entry:
+        br label %bb1.i
+
+bb1.i:          ; preds = %bb6.i.i, %bb1.i, %entry
+        %0 = load i32* null, align 8            ; <i32> [#uses=1]
+        %1 = icmp ugt i32 %0, 1048575           ; <i1> [#uses=1]
+        br i1 %1, label %bb2.i, label %bb1.i
+
+bb2.i:          ; preds = %bb1.i
+        %asmtmp.i.i = tail call { i32, i32, i32, i32 } asm "lcall *(%edi); cld\0A\09jc 1f\0A\09xor %ah, %ah\0A1:", "={dx},={ax},={bx},={cx},1,{di},~{dirflag},~{fpsr},~{flags},~{memory}"(i32 45313, { }* @pci_indirect) nounwind             ; <{ i32, i32, i32, i32 }> [#uses=2]
+        %asmresult2.i.i = extractvalue { i32, i32, i32, i32 } %asmtmp.i.i, 1   
+        ; <i32> [#uses=1]
+        %2 = lshr i32 %asmresult2.i.i, 8                ; <i32> [#uses=1]
+        %3 = trunc i32 %2 to i8         ; <i8> [#uses=1]
+        %4 = load i32* @pcibios_last_bus, align 4               ; <i32> [#uses=1]
+        %5 = icmp slt i32 %4, 0         ; <i1> [#uses=1]
+        br i1 %5, label %bb5.i.i, label %bb6.i.i
+
+bb5.i.i:                ; preds = %bb2.i
+        %asmresult4.i.i = extractvalue { i32, i32, i32, i32 } %asmtmp.i.i, 3   
+        ; <i32> [#uses=1]
+        %6 = and i32 %asmresult4.i.i, 255               ; <i32> [#uses=1]
+        store i32 %6, i32* @pcibios_last_bus, align 4
+        br label %bb6.i.i
+
+bb6.i.i:                ; preds = %bb5.i.i, %bb2.i
+        %7 = icmp eq i8 %3, 0           ; <i1> [#uses=1]
+        %or.cond.i.i = and i1 %7, false         ; <i1> [#uses=1]
+        br i1 %or.cond.i.i, label %bb1.i, label %bb8.i.i
+
+bb8.i.i:                ; preds = %bb6.i.i
+        unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/inline-asm-pic.ll b/src/LLVM/test/CodeGen/X86/inline-asm-pic.ll
new file mode 100644
index 0000000..0b5ff08
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm-pic.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | grep lea
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | grep call
+
+@main_q = internal global i8* null		; <i8**> [#uses=1]
+
+define void @func2() nounwind {
+entry:
+	tail call void asm "mov $1,%gs:$0", "=*m,ri,~{dirflag},~{fpsr},~{flags}"(i8** inttoptr (i32 152 to i8**), i8* bitcast (i8** @main_q to i8*)) nounwind
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/inline-asm-ptr-cast.ll b/src/LLVM/test/CodeGen/X86/inline-asm-ptr-cast.ll
new file mode 100644
index 0000000..50e3021
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm-ptr-cast.ll

@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu <%s
+; ModuleID = 'bug.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@func.flagmask = internal constant i64 1, align 8
+
+define void @func() nounwind {
+entry:
+  %src = alloca i32, align 4
+  %dst = alloca i32, align 4
+  %flags = alloca i64, align 8
+  %newflags = alloca i64, align 8
+  store i32 0, i32* %src, align 4
+  store i32 0, i32* %dst, align 4
+  store i64 1, i64* %flags, align 8
+  store i64 -1, i64* %newflags, align 8
+  %0 = bitcast i32* %dst to i8*
+  %tmp = load i64* %flags, align 8
+  %and = and i64 %tmp, 1
+  %1 = bitcast i32* %src to i8*
+  %tmp1 = load i8* %1
+  %2 = bitcast i32* %dst to i8*
+  %tmp2 = load i8* %2
+  call void asm "pushfq \0Aandq $2, (%rsp) \0Aorq  $3, (%rsp) \0Apopfq \0Aaddb $4, $1 \0Apushfq \0Apopq $0 \0A", "=*&rm,=*&rm,i,r,r,1,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %newflags, i8* %0, i64 -2, i64 %and, i8 %tmp1, i8 %tmp2) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/inline-asm-q-regs.ll b/src/LLVM/test/CodeGen/X86/inline-asm-q-regs.ll
new file mode 100644
index 0000000..1c8e2f9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm-q-regs.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64
+; rdar://7066579
+
+	%0 = type { i64, i64, i64, i64, i64 }		; type %0
+
+define void @test1() nounwind {
+entry:
+	%asmtmp = call %0 asm sideeffect "mov    %cr0, $0       \0Amov    %cr2, $1       \0Amov    %cr3, $2       \0Amov    %cr4, $3       \0Amov    %cr8, $0       \0A", "=q,=q,=q,=q,=q,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%0> [#uses=0]
+	ret void
+}
+
+; PR9602
+define void @test2(float %tmp) nounwind {
+  call void asm sideeffect "$0", "q"(float %tmp) nounwind
+  call void asm sideeffect "$0", "Q"(float %tmp) nounwind
+  ret void
+}
+
+define void @test3(double %tmp) nounwind {
+  call void asm sideeffect "$0", "q"(double %tmp) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/inline-asm-tied.ll b/src/LLVM/test/CodeGen/X86/inline-asm-tied.ll
new file mode 100644
index 0000000..79b6885
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm-tied.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -regalloc=linearscan | grep {movl	%edx, 4(%esp)} | count 2
+; rdar://6992609
+
+target triple = "i386-apple-darwin9.0"
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i64 (i64)* @_OSSwapInt64 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i64 @_OSSwapInt64(i64 %_data) nounwind {
+entry:
+	%retval = alloca i64		; <i64*> [#uses=2]
+	%_data.addr = alloca i64		; <i64*> [#uses=4]
+	store i64 %_data, i64* %_data.addr
+	%tmp = load i64* %_data.addr		; <i64> [#uses=1]
+	%0 = call i64 asm "bswap   %eax\0A\09bswap   %edx\0A\09xchgl   %eax, %edx", "=A,0,~{dirflag},~{fpsr},~{flags}"(i64 %tmp) nounwind		; <i64> [#uses=1]
+	store i64 %0, i64* %_data.addr
+	%tmp1 = load i64* %_data.addr		; <i64> [#uses=1]
+	store i64 %tmp1, i64* %retval
+	%1 = load i64* %retval		; <i64> [#uses=1]
+	ret i64 %1
+}

diff --git a/src/LLVM/test/CodeGen/X86/inline-asm-x-scalar.ll b/src/LLVM/test/CodeGen/X86/inline-asm-x-scalar.ll
new file mode 100644
index 0000000..58a6e12
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm-x-scalar.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah

+

+define void @test1() {

+        tail call void asm sideeffect "ucomiss $0", "x"( float 0x41E0000000000000)

+        ret void

+}

+

+define void @test2() {

+        %tmp53 = tail call i32 asm "ucomiss $1, $3\0Acmovae  $2, $0 ", "=r,mx,mr,x,0,~{dirflag},~{fpsr},~{flags},~{cc}"( float 0x41E0000000000000, i32 2147483647, float 0.000000e+00, i32 0 )         ; <i32> [#uses

+        unreachable

+}

+

+define void @test3() {

+        tail call void asm sideeffect "ucomiss $0, $1", "mx,x,~{dirflag},~{fpsr},~{flags},~{cc}"( float 0x41E0000000000000, i32 65536 )

+        ret void

+}

+

+define void @test4() {

+        %tmp1 = tail call float asm "", "=x,0,~{dirflag},~{fpsr},~{flags}"( float 0x47EFFFFFE0000000 ); <float> [#uses=1]

+        %tmp4 = fsub float %tmp1, 0x3810000000000000             ; <float> [#uses=1]

+        tail call void asm sideeffect "", "x,~{dirflag},~{fpsr},~{flags}"( float %tmp4 )

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/inline-asm.ll b/src/LLVM/test/CodeGen/X86/inline-asm.ll
new file mode 100644
index 0000000..04d75e2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/inline-asm.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=x86

+

+define i32 @test1() nounwind {

+	; Dest is AX, dest type = i32.

+        %tmp4 = call i32 asm sideeffect "FROB $0", "={ax}"()

+        ret i32 %tmp4

+}

+

+define void @test2(i32 %V) nounwind {

+	; input is AX, in type = i32.

+        call void asm sideeffect "FROB $0", "{ax}"(i32 %V)

+        ret void

+}

+

+define void @test3() nounwind {

+        ; FP constant as a memory operand.

+        tail call void asm sideeffect "frob $0", "m"( float 0x41E0000000000000)

+        ret void

+}

+

+define void @test4() nounwind {

+       ; J means a constant in range 0 to 63.

+       tail call void asm sideeffect "bork $0", "J"(i32 37) nounwind

+       ret void

+}

+

+; rdar://9738585

+define i32 @test5() nounwind {

+entry:

+  %0 = tail call i32 asm "test", "=l,~{dirflag},~{fpsr},~{flags}"() nounwind

+  ret i32 0

+}

+

+; rdar://9777108 PR10352

+define void @test6(i1 zeroext %desired) nounwind {

+entry:

+  tail call void asm sideeffect "foo $0", "q,~{dirflag},~{fpsr},~{flags}"(i1 %desired) nounwind

+  ret void

+}

+

+define void @test7(i1 zeroext %desired, i32* %p) nounwind {

+entry:

+  %0 = tail call i8 asm sideeffect "xchg $0, $1", "=r,*m,0,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %p, i1 %desired) nounwind

+  ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/src/LLVM/test/CodeGen/X86/ins_subreg_coalesce-1.ll
new file mode 100644
index 0000000..8367436
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/ins_subreg_coalesce-1.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define fastcc i32 @t() nounwind  {
+entry:
+; CHECK: t:
+; CHECK: movzwl 0, %eax
+; CHECK: orl $2, %eax
+; CHECK: movw %ax, 0
+; CHECK: shrl $3, %eax
+; CHECK: andl $1, %eax
+	br i1 false, label %UnifiedReturnBlock, label %bb4
+bb4:		; preds = %entry
+	br i1 false, label %bb17, label %bb22
+bb17:		; preds = %bb4
+	ret i32 1
+bb22:		; preds = %bb4
+	br i1 true, label %walkExprTree.exit, label %bb4.i
+bb4.i:		; preds = %bb22
+	ret i32 0
+walkExprTree.exit:		; preds = %bb22
+	%tmp83 = load i16* null, align 4		; <i16> [#uses=1]
+	%tmp84 = or i16 %tmp83, 2		; <i16> [#uses=2]
+	store i16 %tmp84, i16* null, align 4
+	%tmp98993 = zext i16 %tmp84 to i32		; <i32> [#uses=1]
+	%tmp1004 = lshr i32 %tmp98993, 3		; <i32> [#uses=1]
+	%tmp100.lobit5 = and i32 %tmp1004, 1		; <i32> [#uses=1]
+	ret i32 %tmp100.lobit5
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/ins_subreg_coalesce-2.ll b/src/LLVM/test/CodeGen/X86/ins_subreg_coalesce-2.ll
new file mode 100644
index 0000000..f2c9cc7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/ins_subreg_coalesce-2.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86-64 | not grep movw
+
+define i16 @test5(i16 %f12) nounwind {
+	%f11 = shl i16 %f12, 2		; <i16> [#uses=1]
+	%tmp7.25 = ashr i16 %f11, 8		; <i16> [#uses=1]
+	ret i16 %tmp7.25
+}

diff --git a/src/LLVM/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/src/LLVM/test/CodeGen/X86/ins_subreg_coalesce-3.ll
new file mode 100644
index 0000000..63881e0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/ins_subreg_coalesce-3.ll

@@ -0,0 +1,92 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 3
+
+	%struct.COMPOSITE = type { i8, i16, i16 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FONT_INFO = type { %struct.metrics*, i8*, i16*, %struct.COMPOSITE*, i32, %struct.rec*, %struct.rec*, i16, i16, i16*, i8*, i8*, i16* }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { { i16, i8, i8 } }
+	%struct.STYLE = type { { %struct.GAP }, { %struct.GAP }, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %struct.FILE*, [8 x i8] }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, { %struct.rec* }, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.metrics = type { i16, i16, i16, i16, i16 }
+	%struct.rec = type { %struct.head_type }
+
+define void @FontChange(i1 %foo) nounwind {
+entry:
+	br i1 %foo, label %bb298, label %bb49
+bb49:		; preds = %entry
+	ret void
+bb298:		; preds = %entry
+	br i1 %foo, label %bb304, label %bb366
+bb304:		; preds = %bb298
+	br i1 %foo, label %bb330, label %bb428
+bb330:		; preds = %bb366, %bb304
+	br label %bb366
+bb366:		; preds = %bb330, %bb298
+	br i1 %foo, label %bb330, label %bb428
+bb428:		; preds = %bb366, %bb304
+	br i1 %foo, label %bb650, label %bb433
+bb433:		; preds = %bb428
+	ret void
+bb650:		; preds = %bb650, %bb428
+	%tmp658 = load i8* null, align 8		; <i8> [#uses=1]
+	%tmp659 = icmp eq i8 %tmp658, 0		; <i1> [#uses=1]
+	br i1 %tmp659, label %bb650, label %bb662
+bb662:		; preds = %bb650
+	br label %bb761
+bb688:		; preds = %bb662
+	ret void
+bb761:		; preds = %bb662
+	%tmp487248736542 = load i32* null, align 4		; <i32> [#uses=2]
+	%tmp487648776541 = and i32 %tmp487248736542, 57344		; <i32> [#uses=1]
+	%tmp4881 = icmp eq i32 %tmp487648776541, 8192		; <i1> [#uses=1]
+	br i1 %tmp4881, label %bb4884, label %bb4897
+bb4884:		; preds = %bb761
+	%tmp488948906540 = and i32 %tmp487248736542, 7168		; <i32> [#uses=1]
+	%tmp4894 = icmp eq i32 %tmp488948906540, 1024		; <i1> [#uses=1]
+	br i1 %tmp4894, label %bb4932, label %bb4897
+bb4897:		; preds = %bb4884, %bb761
+	ret void
+bb4932:		; preds = %bb4884
+	%tmp4933 = load i32* null, align 4		; <i32> [#uses=1]
+	br i1 %foo, label %bb5054, label %bb4940
+bb4940:		; preds = %bb4932
+	%tmp4943 = load i32* null, align 4		; <i32> [#uses=2]
+	switch i32 %tmp4933, label %bb5054 [
+		 i32 159, label %bb4970
+		 i32 160, label %bb5002
+	]
+bb4970:		; preds = %bb4940
+	%tmp49746536 = trunc i32 %tmp4943 to i16		; <i16> [#uses=1]
+	%tmp49764977 = and i16 %tmp49746536, 4095		; <i16> [#uses=1]
+	%mask498049814982 = zext i16 %tmp49764977 to i64		; <i64> [#uses=1]
+	%tmp4984 = getelementptr %struct.FONT_INFO* null, i64 %mask498049814982, i32 5		; <%struct.rec**> [#uses=1]
+	%tmp4985 = load %struct.rec** %tmp4984, align 8		; <%struct.rec*> [#uses=1]
+	%tmp4988 = getelementptr %struct.rec* %tmp4985, i64 0, i32 0, i32 3		; <%struct.THIRD_UNION*> [#uses=1]
+	%tmp4991 = bitcast %struct.THIRD_UNION* %tmp4988 to i32*		; <i32*> [#uses=1]
+	%tmp4992 = load i32* %tmp4991, align 8		; <i32> [#uses=1]
+	%tmp49924993 = trunc i32 %tmp4992 to i16		; <i16> [#uses=1]
+	%tmp4996 = add i16 %tmp49924993, 0		; <i16> [#uses=1]
+	br label %bb5054
+bb5002:		; preds = %bb4940
+	%tmp50066537 = trunc i32 %tmp4943 to i16		; <i16> [#uses=1]
+	%tmp50085009 = and i16 %tmp50066537, 4095		; <i16> [#uses=1]
+	%mask501250135014 = zext i16 %tmp50085009 to i64		; <i64> [#uses=1]
+	%tmp5016 = getelementptr %struct.FONT_INFO* null, i64 %mask501250135014, i32 5		; <%struct.rec**> [#uses=1]
+	%tmp5017 = load %struct.rec** %tmp5016, align 8		; <%struct.rec*> [#uses=1]
+	%tmp5020 = getelementptr %struct.rec* %tmp5017, i64 0, i32 0, i32 3		; <%struct.THIRD_UNION*> [#uses=1]
+	%tmp5023 = bitcast %struct.THIRD_UNION* %tmp5020 to i32*		; <i32*> [#uses=1]
+	%tmp5024 = load i32* %tmp5023, align 8		; <i32> [#uses=1]
+	%tmp50245025 = trunc i32 %tmp5024 to i16		; <i16> [#uses=1]
+	%tmp5028 = sub i16 %tmp50245025, 0		; <i16> [#uses=1]
+	br label %bb5054
+bb5054:		; preds = %bb5002, %bb4970, %bb4940, %bb4932
+	%flen.0.reg2mem.0 = phi i16 [ %tmp4996, %bb4970 ], [ %tmp5028, %bb5002 ], [ 0, %bb4932 ], [ undef, %bb4940 ]		; <i16> [#uses=0]
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/insert-positions.ll b/src/LLVM/test/CodeGen/X86/insert-positions.ll
new file mode 100644
index 0000000..1a695f3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/insert-positions.ll

@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86-64 >/dev/null
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @test0() nounwind {
+if.end90.i.i:
+  br label %while.body.i.i221.i
+
+while.body.i.i221.i:                              ; preds = %while.cond.backedge.i.i.i, %if.end90.i.i
+  br i1 undef, label %if.then.i.i224.i, label %while.cond.backedge.i.i.i
+
+while.cond.backedge.i.i.i:                        ; preds = %for.end.i.i.i, %while.body.i.i221.i
+  br label %while.body.i.i221.i
+
+if.then.i.i224.i:                                 ; preds = %while.body.i.i221.i
+  switch i32 undef, label %for.cond.i.i226.i [
+    i32 92, label %sw.bb.i.i225.i
+    i32 34, label %sw.bb.i.i225.i
+    i32 110, label %sw.bb21.i.i.i
+  ]
+
+sw.bb.i.i225.i:                                   ; preds = %if.then.i.i224.i, %if.then.i.i224.i
+  unreachable
+
+sw.bb21.i.i.i:                                    ; preds = %if.then.i.i224.i
+  unreachable
+
+for.cond.i.i226.i:                                ; preds = %for.body.i.i.i, %if.then.i.i224.i
+  %0 = phi i64 [ %tmp154.i.i.i, %for.body.i.i.i ], [ 0, %if.then.i.i224.i ] ; <i64> [#uses=2]
+  %tmp154.i.i.i = add i64 %0, 1                   ; <i64> [#uses=2]
+  %i.0.i.i.i = trunc i64 %0 to i32                ; <i32> [#uses=1]
+  br i1 undef, label %land.rhs.i.i.i, label %for.end.i.i.i
+
+land.rhs.i.i.i:                                   ; preds = %for.cond.i.i226.i
+  br i1 undef, label %for.body.i.i.i, label %for.end.i.i.i
+
+for.body.i.i.i:                                   ; preds = %land.rhs.i.i.i
+  br label %for.cond.i.i226.i
+
+for.end.i.i.i:                                    ; preds = %land.rhs.i.i.i, %for.cond.i.i226.i
+  %idx.ext.i.i.i = sext i32 %i.0.i.i.i to i64     ; <i64> [#uses=1]
+  %sub.ptr72.sum.i.i.i = xor i64 %idx.ext.i.i.i, -1 ; <i64> [#uses=1]
+  %pos.addr.1.sum155.i.i.i = add i64 %tmp154.i.i.i, %sub.ptr72.sum.i.i.i ; <i64> [#uses=1]
+  %arrayidx76.i.i.i = getelementptr inbounds i8* undef, i64 %pos.addr.1.sum155.i.i.i ; <i8*> [#uses=0]
+  br label %while.cond.backedge.i.i.i
+}
+
+define void @test1() nounwind {
+entry:
+  %t = shl i32 undef, undef                     ; <i32> [#uses=1]
+  %t9 = sub nsw i32 0, %t                     ; <i32> [#uses=1]
+  br label %outer
+
+outer:                                             ; preds = %bb18, %bb
+  %i12 = phi i32 [ %t21, %bb18 ], [ 0, %entry ]  ; <i32> [#uses=2]
+  %i13 = phi i32 [ %t20, %bb18 ], [ 0, %entry ]  ; <i32> [#uses=2]
+  br label %inner
+
+inner:                                             ; preds = %bb16, %bb11
+  %t17 = phi i32 [ %i13, %outer ], [ undef, %inner ] ; <i32> [#uses=1]
+  store i32 %t17, i32* undef
+  br i1 undef, label %bb18, label %inner
+
+bb18:                                             ; preds = %bb16
+  %t19 = add i32 %i13, %t9                 ; <i32> [#uses=1]
+  %t20 = add i32 %t19, %i12                 ; <i32> [#uses=1]
+  %t21 = add i32 %i12, 1                      ; <i32> [#uses=1]
+  br label %outer
+}

diff --git a/src/LLVM/test/CodeGen/X86/insertelement-copytoregs.ll b/src/LLVM/test/CodeGen/X86/insertelement-copytoregs.ll
new file mode 100644
index 0000000..34a29ca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/insertelement-copytoregs.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 | grep -v IMPLICIT_DEF
+
+define void @foo(<2 x float>* %p) {
+  %t = insertelement <2 x float> undef, float 0.0, i32 0
+  %v = insertelement <2 x float> %t,   float 0.0, i32 1
+  br label %bb8
+
+bb8:
+  store <2 x float> %v, <2 x float>* %p
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/insertelement-legalize.ll b/src/LLVM/test/CodeGen/X86/insertelement-legalize.ll
new file mode 100644
index 0000000..3805cbb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/insertelement-legalize.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86
+
+; Test to check that we properly legalize an insert vector element
+define void @test(<2 x i64> %val, <2 x i64>* %dst, i64 %x) nounwind {
+entry:
+	%tmp4 = insertelement <2 x i64> %val, i64 %x, i32 0		; <<2 x i64>> [#uses=1]
+	%add = add <2 x i64> %tmp4, %val		; <<2 x i64>> [#uses=1]
+	store <2 x i64> %add, <2 x i64>* %dst
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/int-intrinsic.ll b/src/LLVM/test/CodeGen/X86/int-intrinsic.ll
new file mode 100644
index 0000000..45a9b0f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/int-intrinsic.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86    | FileCheck %s
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+declare void @llvm.x86.int(i8) nounwind
+
+; CHECK: int3
+; CHECK: ret
+define void @primitive_int3 () {
+bb.entry:
+  call void @llvm.x86.int(i8 3) nounwind
+  ret void
+}
+
+; CHECK: int	$-128
+; CHECK: ret
+define void @primitive_int128 () {
+bb.entry:
+  call void @llvm.x86.int(i8 128) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/invalid-shift-immediate.ll b/src/LLVM/test/CodeGen/X86/invalid-shift-immediate.ll
new file mode 100644
index 0000000..77a9f7e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/invalid-shift-immediate.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86
+; PR2098
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @foo(i32 %x) {
+entry:
+	%x_addr = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %x, i32* %x_addr
+	%tmp = load i32* %x_addr, align 4		; <i32> [#uses=1]
+	%tmp1 = ashr i32 %tmp, -2		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 1		; <i32> [#uses=1]
+	%tmp23 = trunc i32 %tmp2 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp23, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb5
+
+bb:		; preds = %entry
+	%tmp4 = call i32 (...)* @bar( ) nounwind 		; <i32> [#uses=0]
+	br label %bb5
+
+bb5:		; preds = %bb, %entry
+	br label %return
+
+return:		; preds = %bb5
+	ret void
+}
+
+declare i32 @bar(...)

diff --git a/src/LLVM/test/CodeGen/X86/isel-sink.ll b/src/LLVM/test/CodeGen/X86/isel-sink.ll
new file mode 100644
index 0000000..b46c332
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/isel-sink.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 | FileCheck %s

+

+define i32 @test(i32* %X, i32 %B) {

+; CHECK: test:

+; CHECK-NOT: ret

+; CHECK-NOT: lea

+; CHECK: mov{{.}} $4, ({{.*}},{{.*}},4)

+; CHECK: ret

+; CHECK: mov{{.}} ({{.*}},{{.*}},4),

+; CHECK: ret

+

+	; This gep should be sunk out of this block into the load/store users.

+	%P = getelementptr i32* %X, i32 %B

+	%G = icmp ult i32 %B, 1234

+	br i1 %G, label %T, label %F

+T:

+	store i32 4, i32* %P

+	ret i32 141

+F:

+	%V = load i32* %P

+	ret i32 %V

+}


diff --git a/src/LLVM/test/CodeGen/X86/isel-sink2.ll b/src/LLVM/test/CodeGen/X86/isel-sink2.ll
new file mode 100644
index 0000000..5ed0e00
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/isel-sink2.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep {movb.7(%...)} %t
+; RUN: not grep leal %t
+
+define i8 @test(i32 *%P) nounwind {
+  %Q = getelementptr i32* %P, i32 1
+  %R = bitcast i32* %Q to i8*
+  %S = load i8* %R
+  %T = icmp eq i8 %S, 0
+  br i1 %T, label %TB, label %F
+TB:
+  ret i8 4
+F:
+  %U = getelementptr i8* %R, i32 3
+  %V = load i8* %U
+  ret i8 %V
+}

diff --git a/src/LLVM/test/CodeGen/X86/isel-sink3.ll b/src/LLVM/test/CodeGen/X86/isel-sink3.ll
new file mode 100644
index 0000000..7012cce
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/isel-sink3.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s | FileCheck %s
+; this should not sink %1 into bb1, that would increase reg pressure.
+
+; rdar://6399178
+
+; CHECK: addl $4,
+; CHECK-NOT: leal
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i32 @bar(i32** %P) nounwind {
+entry:
+	%0 = load i32** %P, align 4		; <i32*> [#uses=2]
+	%1 = getelementptr i32* %0, i32 1		; <i32*> [#uses=1]
+	%2 = icmp ugt i32* %1, inttoptr (i64 1233 to i32*)		; <i1> [#uses=1]
+	br i1 %2, label %bb1, label %bb
+
+bb:		; preds = %entry
+	store i32* inttoptr (i64 123 to i32*), i32** %P, align 4
+	br label %bb1
+
+bb1:		; preds = %entry, %bb
+	%3 = getelementptr i32* %1, i32 1		; <i32*> [#uses=1]
+	%4 = load i32* %3, align 4		; <i32> [#uses=1]
+	ret i32 %4
+}

diff --git a/src/LLVM/test/CodeGen/X86/isint.ll b/src/LLVM/test/CodeGen/X86/isint.ll
new file mode 100644
index 0000000..4a98e63
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/isint.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+define i32 @isint_return(double %d) nounwind {
+; CHECK-NOT: xor
+; CHECK: cvt
+  %i = fptosi double %d to i32
+; CHECK-NEXT: cvt
+  %e = sitofp i32 %i to double
+; CHECK: cmpeqsd
+  %c = fcmp oeq double %d, %e
+; CHECK-NEXT: movd
+; CHECK-NEXT: andl
+  %z = zext i1 %c to i32
+  ret i32 %z
+}
+
+declare void @foo()
+
+define void @isint_branch(double %d) nounwind {
+; CHECK: cvt
+  %i = fptosi double %d to i32
+; CHECK-NEXT: cvt
+  %e = sitofp i32 %i to double
+; CHECK: ucomisd
+  %c = fcmp oeq double %d, %e
+; CHECK-NEXT: jne
+; CHECK-NEXT: jp
+  br i1 %c, label %true, label %false
+true:
+  call void @foo()
+  ret void
+false:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/isnan.ll b/src/LLVM/test/CodeGen/X86/isnan.ll
new file mode 100644
index 0000000..79e3d8d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/isnan.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | not grep call

+

+declare i1 @llvm.isunordered.f64(double)

+

+define i1 @test_isnan(double %X) {

+        %R = fcmp uno double %X, %X             ; <i1> [#uses=1]

+        ret i1 %R

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/isnan2.ll b/src/LLVM/test/CodeGen/X86/isnan2.ll
new file mode 100644
index 0000000..7753346
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/isnan2.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep pxor
+
+; This should not need to materialize 0.0 to evaluate the condition.
+
+define i32 @test(double %X) nounwind  {
+entry:
+	%tmp6 = fcmp uno double %X, 0.000000e+00		; <i1> [#uses=1]
+	%tmp67 = zext i1 %tmp6 to i32		; <i32> [#uses=1]
+	ret i32 %tmp67
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/ispositive.ll b/src/LLVM/test/CodeGen/X86/ispositive.ll
new file mode 100644
index 0000000..0d4b14e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/ispositive.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep {shrl.*31}

+

+define i32 @test1(i32 %X) {

+entry:

+        icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]

+        zext i1 %0 to i32               ; <i32>:1 [#uses=1]

+        ret i32 %1

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/iv-users-in-other-loops.ll b/src/LLVM/test/CodeGen/X86/iv-users-in-other-loops.ll
new file mode 100644
index 0000000..8f79fb8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/iv-users-in-other-loops.ll

@@ -0,0 +1,300 @@
+; RUN: llc < %s -march=x86-64 -enable-lsr-nested -o %t
+; RUN: not grep inc %t
+; RUN: grep dec %t | count 2
+; RUN: grep addq %t | count 12
+; RUN: not grep addb %t
+; RUN: not grep leaq %t
+; RUN: not grep leal %t
+; RUN: not grep movq %t
+
+; IV users in each of the loops from other loops shouldn't cause LSR
+; to insert new induction variables. Previously it would create a
+; flood of new induction variables.
+; Also, the loop reversal should kick in once.
+;
+; In this example, performing LSR on the entire loop nest,
+; as opposed to only the inner loop can further reduce induction variables,
+; and their related instructions and registers.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(float* %A, i32 %IA, float* %B, i32 %IB, float* nocapture %C, i32 %N) nounwind {
+entry:
+      %0 = xor i32 %IA, 1		; <i32> [#uses=1]
+      %1 = xor i32 %IB, 1		; <i32> [#uses=1]
+      %2 = or i32 %1, %0		; <i32> [#uses=1]
+      %3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+      br i1 %3, label %bb2, label %bb13
+
+bb:		; preds = %bb3
+      %4 = load float* %A_addr.0, align 4		; <float> [#uses=1]
+      %5 = load float* %B_addr.0, align 4		; <float> [#uses=1]
+      %6 = fmul float %4, %5		; <float> [#uses=1]
+      %7 = fadd float %6, %Sum0.0		; <float> [#uses=1]
+      %indvar.next154 = add i64 %B_addr.0.rec, 1		; <i64> [#uses=1]
+      br label %bb2
+
+bb2:		; preds = %entry, %bb
+      %B_addr.0.rec = phi i64 [ %indvar.next154, %bb ], [ 0, %entry ]		; <i64> [#uses=14]
+      %Sum0.0 = phi float [ %7, %bb ], [ 0.000000e+00, %entry ]		; <float> [#uses=5]
+      %indvar146 = trunc i64 %B_addr.0.rec to i32		; <i32> [#uses=1]
+      %N_addr.0 = sub i32 %N, %indvar146		; <i32> [#uses=6]
+      %A_addr.0 = getelementptr float* %A, i64 %B_addr.0.rec		; <float*> [#uses=4]
+      %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec		; <float*> [#uses=4]
+      %8 = icmp sgt i32 %N_addr.0, 0		; <i1> [#uses=1]
+      br i1 %8, label %bb3, label %bb4
+
+bb3:		; preds = %bb2
+      %9 = ptrtoint float* %A_addr.0 to i64		; <i64> [#uses=1]
+      %10 = and i64 %9, 15		; <i64> [#uses=1]
+      %11 = icmp eq i64 %10, 0		; <i1> [#uses=1]
+      br i1 %11, label %bb4, label %bb
+
+bb4:		; preds = %bb3, %bb2
+      %12 = ptrtoint float* %B_addr.0 to i64		; <i64> [#uses=1]
+      %13 = and i64 %12, 15		; <i64> [#uses=1]
+      %14 = icmp eq i64 %13, 0		; <i1> [#uses=1]
+      %15 = icmp sgt i32 %N_addr.0, 15		; <i1> [#uses=2]
+      br i1 %14, label %bb6.preheader, label %bb10.preheader
+
+bb10.preheader:		; preds = %bb4
+      br i1 %15, label %bb9, label %bb12.loopexit
+
+bb6.preheader:		; preds = %bb4
+      br i1 %15, label %bb5, label %bb8.loopexit
+
+bb5:		; preds = %bb5, %bb6.preheader
+      %indvar143 = phi i64 [ 0, %bb6.preheader ], [ %indvar.next144, %bb5 ]		; <i64> [#uses=3]
+      %vSum0.072 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=1]
+	%vSum1.070 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=1]
+	%vSum2.069 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=1]
+	%vSum3.067 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=1]
+	%indvar145 = trunc i64 %indvar143 to i32		; <i32> [#uses=1]
+	%tmp150 = mul i32 %indvar145, -16		; <i32> [#uses=1]
+	%N_addr.268 = add i32 %tmp150, %N_addr.0		; <i32> [#uses=1]
+	%A_addr.273.rec = shl i64 %indvar143, 4		; <i64> [#uses=5]
+	%B_addr.0.sum180 = add i64 %B_addr.0.rec, %A_addr.273.rec		; <i64> [#uses=2]
+	%B_addr.271 = getelementptr float* %B, i64 %B_addr.0.sum180		; <float*> [#uses=1]
+	%A_addr.273 = getelementptr float* %A, i64 %B_addr.0.sum180		; <float*> [#uses=1]
+	tail call void asm sideeffect ";# foo", "~{dirflag},~{fpsr},~{flags}"() nounwind
+	%16 = bitcast float* %A_addr.273 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%17 = load <4 x float>* %16, align 16		; <<4 x float>> [#uses=1]
+	%18 = bitcast float* %B_addr.271 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%19 = load <4 x float>* %18, align 16		; <<4 x float>> [#uses=1]
+	%20 = fmul <4 x float> %17, %19		; <<4 x float>> [#uses=1]
+	%21 = fadd <4 x float> %20, %vSum0.072		; <<4 x float>> [#uses=2]
+	%A_addr.273.sum163 = or i64 %A_addr.273.rec, 4		; <i64> [#uses=1]
+	%A_addr.0.sum175 = add i64 %B_addr.0.rec, %A_addr.273.sum163		; <i64> [#uses=2]
+	%22 = getelementptr float* %A, i64 %A_addr.0.sum175		; <float*> [#uses=1]
+	%23 = bitcast float* %22 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%24 = load <4 x float>* %23, align 16		; <<4 x float>> [#uses=1]
+	%25 = getelementptr float* %B, i64 %A_addr.0.sum175		; <float*> [#uses=1]
+	%26 = bitcast float* %25 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%27 = load <4 x float>* %26, align 16		; <<4 x float>> [#uses=1]
+	%28 = fmul <4 x float> %24, %27		; <<4 x float>> [#uses=1]
+	%29 = fadd <4 x float> %28, %vSum1.070		; <<4 x float>> [#uses=2]
+	%A_addr.273.sum161 = or i64 %A_addr.273.rec, 8		; <i64> [#uses=1]
+	%A_addr.0.sum174 = add i64 %B_addr.0.rec, %A_addr.273.sum161		; <i64> [#uses=2]
+	%30 = getelementptr float* %A, i64 %A_addr.0.sum174		; <float*> [#uses=1]
+	%31 = bitcast float* %30 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%32 = load <4 x float>* %31, align 16		; <<4 x float>> [#uses=1]
+	%33 = getelementptr float* %B, i64 %A_addr.0.sum174		; <float*> [#uses=1]
+	%34 = bitcast float* %33 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%35 = load <4 x float>* %34, align 16		; <<4 x float>> [#uses=1]
+	%36 = fmul <4 x float> %32, %35		; <<4 x float>> [#uses=1]
+	%37 = fadd <4 x float> %36, %vSum2.069		; <<4 x float>> [#uses=2]
+	%A_addr.273.sum159 = or i64 %A_addr.273.rec, 12		; <i64> [#uses=1]
+	%A_addr.0.sum173 = add i64 %B_addr.0.rec, %A_addr.273.sum159		; <i64> [#uses=2]
+	%38 = getelementptr float* %A, i64 %A_addr.0.sum173		; <float*> [#uses=1]
+	%39 = bitcast float* %38 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%40 = load <4 x float>* %39, align 16		; <<4 x float>> [#uses=1]
+	%41 = getelementptr float* %B, i64 %A_addr.0.sum173		; <float*> [#uses=1]
+	%42 = bitcast float* %41 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%43 = load <4 x float>* %42, align 16		; <<4 x float>> [#uses=1]
+	%44 = fmul <4 x float> %40, %43		; <<4 x float>> [#uses=1]
+	%45 = fadd <4 x float> %44, %vSum3.067		; <<4 x float>> [#uses=2]
+	%.rec83 = add i64 %A_addr.273.rec, 16		; <i64> [#uses=1]
+	%A_addr.0.sum172 = add i64 %B_addr.0.rec, %.rec83		; <i64> [#uses=2]
+	%46 = getelementptr float* %A, i64 %A_addr.0.sum172		; <float*> [#uses=1]
+	%47 = getelementptr float* %B, i64 %A_addr.0.sum172		; <float*> [#uses=1]
+	%48 = add i32 %N_addr.268, -16		; <i32> [#uses=2]
+	%49 = icmp sgt i32 %48, 15		; <i1> [#uses=1]
+	%indvar.next144 = add i64 %indvar143, 1		; <i64> [#uses=1]
+	br i1 %49, label %bb5, label %bb8.loopexit
+
+bb7:		; preds = %bb7, %bb8.loopexit
+	%indvar130 = phi i64 [ 0, %bb8.loopexit ], [ %indvar.next131, %bb7 ]		; <i64> [#uses=3]
+	%vSum0.260 = phi <4 x float> [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ]		; <<4 x float>> [#uses=1]
+	%indvar132 = trunc i64 %indvar130 to i32		; <i32> [#uses=1]
+	%tmp133 = mul i32 %indvar132, -4		; <i32> [#uses=1]
+	%N_addr.358 = add i32 %tmp133, %N_addr.2.lcssa		; <i32> [#uses=1]
+	%A_addr.361.rec = shl i64 %indvar130, 2		; <i64> [#uses=3]
+	%B_addr.359 = getelementptr float* %B_addr.2.lcssa, i64 %A_addr.361.rec		; <float*> [#uses=1]
+	%A_addr.361 = getelementptr float* %A_addr.2.lcssa, i64 %A_addr.361.rec		; <float*> [#uses=1]
+	%50 = bitcast float* %A_addr.361 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%51 = load <4 x float>* %50, align 16		; <<4 x float>> [#uses=1]
+	%52 = bitcast float* %B_addr.359 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%53 = load <4 x float>* %52, align 16		; <<4 x float>> [#uses=1]
+	%54 = fmul <4 x float> %51, %53		; <<4 x float>> [#uses=1]
+	%55 = fadd <4 x float> %54, %vSum0.260		; <<4 x float>> [#uses=2]
+	%.rec85 = add i64 %A_addr.361.rec, 4		; <i64> [#uses=2]
+	%56 = getelementptr float* %A_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
+	%57 = getelementptr float* %B_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
+	%58 = add i32 %N_addr.358, -4		; <i32> [#uses=2]
+	%59 = icmp sgt i32 %58, 3		; <i1> [#uses=1]
+	%indvar.next131 = add i64 %indvar130, 1		; <i64> [#uses=1]
+	br i1 %59, label %bb7, label %bb13
+
+bb8.loopexit:		; preds = %bb5, %bb6.preheader
+	%A_addr.2.lcssa = phi float* [ %A_addr.0, %bb6.preheader ], [ %46, %bb5 ]		; <float*> [#uses=3]
+	%vSum0.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=2]
+	%B_addr.2.lcssa = phi float* [ %B_addr.0, %bb6.preheader ], [ %47, %bb5 ]		; <float*> [#uses=3]
+	%vSum1.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=2]
+	%vSum2.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=2]
+	%N_addr.2.lcssa = phi i32 [ %N_addr.0, %bb6.preheader ], [ %48, %bb5 ]		; <i32> [#uses=3]
+	%vSum3.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=2]
+	%60 = icmp sgt i32 %N_addr.2.lcssa, 3		; <i1> [#uses=1]
+	br i1 %60, label %bb7, label %bb13
+
+bb9:		; preds = %bb9, %bb10.preheader
+	%indvar106 = phi i64 [ 0, %bb10.preheader ], [ %indvar.next107, %bb9 ]		; <i64> [#uses=3]
+	%vSum0.339 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %75, %bb9 ]		; <<4 x float>> [#uses=1]
+	%vSum1.237 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %80, %bb9 ]		; <<4 x float>> [#uses=1]
+	%vSum2.236 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %85, %bb9 ]		; <<4 x float>> [#uses=1]
+	%vSum3.234 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %90, %bb9 ]		; <<4 x float>> [#uses=1]
+	%indvar108 = trunc i64 %indvar106 to i32		; <i32> [#uses=1]
+	%tmp113 = mul i32 %indvar108, -16		; <i32> [#uses=1]
+	%N_addr.435 = add i32 %tmp113, %N_addr.0		; <i32> [#uses=1]
+	%A_addr.440.rec = shl i64 %indvar106, 4		; <i64> [#uses=5]
+	%B_addr.0.sum = add i64 %B_addr.0.rec, %A_addr.440.rec		; <i64> [#uses=2]
+	%B_addr.438 = getelementptr float* %B, i64 %B_addr.0.sum		; <float*> [#uses=1]
+	%A_addr.440 = getelementptr float* %A, i64 %B_addr.0.sum		; <float*> [#uses=1]
+	%61 = bitcast float* %B_addr.438 to <4 x float>*		; <i8*> [#uses=1]
+	%62 = load <4 x float>* %61, align 1
+	%B_addr.438.sum169 = or i64 %A_addr.440.rec, 4		; <i64> [#uses=1]
+	%B_addr.0.sum187 = add i64 %B_addr.0.rec, %B_addr.438.sum169		; <i64> [#uses=2]
+	%63 = getelementptr float* %B, i64 %B_addr.0.sum187		; <float*> [#uses=1]
+	%64 = bitcast float* %63 to <4 x float>*		; <i8*> [#uses=1]
+	%65 = load <4 x float>* %64, align 1
+	%B_addr.438.sum168 = or i64 %A_addr.440.rec, 8		; <i64> [#uses=1]
+	%B_addr.0.sum186 = add i64 %B_addr.0.rec, %B_addr.438.sum168		; <i64> [#uses=2]
+	%66 = getelementptr float* %B, i64 %B_addr.0.sum186		; <float*> [#uses=1]
+	%67 = bitcast float* %66 to <4 x float>*		; <i8*> [#uses=1]
+	%68 = load <4 x float>* %67, align 1
+	%B_addr.438.sum167 = or i64 %A_addr.440.rec, 12		; <i64> [#uses=1]
+	%B_addr.0.sum185 = add i64 %B_addr.0.rec, %B_addr.438.sum167		; <i64> [#uses=2]
+	%69 = getelementptr float* %B, i64 %B_addr.0.sum185		; <float*> [#uses=1]
+	%70 = bitcast float* %69 to <4 x float>*		; <i8*> [#uses=1]
+	%71 = load <4 x float>* %70, align 1
+	%72 = bitcast float* %A_addr.440 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%73 = load <4 x float>* %72, align 16		; <<4 x float>> [#uses=1]
+	%74 = fmul <4 x float> %73, %62		; <<4 x float>> [#uses=1]
+	%75 = fadd <4 x float> %74, %vSum0.339		; <<4 x float>> [#uses=2]
+	%76 = getelementptr float* %A, i64 %B_addr.0.sum187		; <float*> [#uses=1]
+	%77 = bitcast float* %76 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%78 = load <4 x float>* %77, align 16		; <<4 x float>> [#uses=1]
+	%79 = fmul <4 x float> %78, %65		; <<4 x float>> [#uses=1]
+	%80 = fadd <4 x float> %79, %vSum1.237		; <<4 x float>> [#uses=2]
+	%81 = getelementptr float* %A, i64 %B_addr.0.sum186		; <float*> [#uses=1]
+	%82 = bitcast float* %81 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%83 = load <4 x float>* %82, align 16		; <<4 x float>> [#uses=1]
+	%84 = fmul <4 x float> %83, %68		; <<4 x float>> [#uses=1]
+	%85 = fadd <4 x float> %84, %vSum2.236		; <<4 x float>> [#uses=2]
+	%86 = getelementptr float* %A, i64 %B_addr.0.sum185		; <float*> [#uses=1]
+	%87 = bitcast float* %86 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%88 = load <4 x float>* %87, align 16		; <<4 x float>> [#uses=1]
+	%89 = fmul <4 x float> %88, %71		; <<4 x float>> [#uses=1]
+	%90 = fadd <4 x float> %89, %vSum3.234		; <<4 x float>> [#uses=2]
+	%.rec89 = add i64 %A_addr.440.rec, 16		; <i64> [#uses=1]
+	%A_addr.0.sum170 = add i64 %B_addr.0.rec, %.rec89		; <i64> [#uses=2]
+	%91 = getelementptr float* %A, i64 %A_addr.0.sum170		; <float*> [#uses=1]
+	%92 = getelementptr float* %B, i64 %A_addr.0.sum170		; <float*> [#uses=1]
+	%93 = add i32 %N_addr.435, -16		; <i32> [#uses=2]
+	%94 = icmp sgt i32 %93, 15		; <i1> [#uses=1]
+	%indvar.next107 = add i64 %indvar106, 1		; <i64> [#uses=1]
+	br i1 %94, label %bb9, label %bb12.loopexit
+
+bb11:		; preds = %bb11, %bb12.loopexit
+	%indvar = phi i64 [ 0, %bb12.loopexit ], [ %indvar.next, %bb11 ]		; <i64> [#uses=3]
+	%vSum0.428 = phi <4 x float> [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
+	%indvar96 = trunc i64 %indvar to i32		; <i32> [#uses=1]
+	%tmp = mul i32 %indvar96, -4		; <i32> [#uses=1]
+	%N_addr.526 = add i32 %tmp, %N_addr.4.lcssa		; <i32> [#uses=1]
+	%A_addr.529.rec = shl i64 %indvar, 2		; <i64> [#uses=3]
+	%B_addr.527 = getelementptr float* %B_addr.4.lcssa, i64 %A_addr.529.rec		; <float*> [#uses=1]
+	%A_addr.529 = getelementptr float* %A_addr.4.lcssa, i64 %A_addr.529.rec		; <float*> [#uses=1]
+	%95 = bitcast float* %B_addr.527 to <4 x float>*		; <i8*> [#uses=1]
+	%96 = load <4 x float>* %95, align 1
+	%97 = bitcast float* %A_addr.529 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%98 = load <4 x float>* %97, align 16		; <<4 x float>> [#uses=1]
+	%99 = fmul <4 x float> %98, %96		; <<4 x float>> [#uses=1]
+	%100 = fadd <4 x float> %99, %vSum0.428		; <<4 x float>> [#uses=2]
+	%.rec91 = add i64 %A_addr.529.rec, 4		; <i64> [#uses=2]
+	%101 = getelementptr float* %A_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
+	%102 = getelementptr float* %B_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
+	%103 = add i32 %N_addr.526, -4		; <i32> [#uses=2]
+	%104 = icmp sgt i32 %103, 3		; <i1> [#uses=1]
+	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
+	br i1 %104, label %bb11, label %bb13
+
+bb12.loopexit:		; preds = %bb9, %bb10.preheader
+	%A_addr.4.lcssa = phi float* [ %A_addr.0, %bb10.preheader ], [ %91, %bb9 ]		; <float*> [#uses=3]
+	%vSum0.3.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %75, %bb9 ]		; <<4 x float>> [#uses=2]
+	%B_addr.4.lcssa = phi float* [ %B_addr.0, %bb10.preheader ], [ %92, %bb9 ]		; <float*> [#uses=3]
+	%vSum1.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %80, %bb9 ]		; <<4 x float>> [#uses=2]
+	%vSum2.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %85, %bb9 ]		; <<4 x float>> [#uses=2]
+	%N_addr.4.lcssa = phi i32 [ %N_addr.0, %bb10.preheader ], [ %93, %bb9 ]		; <i32> [#uses=3]
+	%vSum3.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %90, %bb9 ]		; <<4 x float>> [#uses=2]
+	%105 = icmp sgt i32 %N_addr.4.lcssa, 3		; <i1> [#uses=1]
+	br i1 %105, label %bb11, label %bb13
+
+bb13:		; preds = %bb12.loopexit, %bb11, %bb8.loopexit, %bb7, %entry
+	%Sum0.1 = phi float [ 0.000000e+00, %entry ], [ %Sum0.0, %bb7 ], [ %Sum0.0, %bb8.loopexit ], [ %Sum0.0, %bb11 ], [ %Sum0.0, %bb12.loopexit ]		; <float> [#uses=1]
+	%vSum3.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum3.0.lcssa, %bb7 ], [ %vSum3.0.lcssa, %bb8.loopexit ], [ %vSum3.2.lcssa, %bb11 ], [ %vSum3.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
+	%N_addr.1 = phi i32 [ %N, %entry ], [ %N_addr.2.lcssa, %bb8.loopexit ], [ %58, %bb7 ], [ %N_addr.4.lcssa, %bb12.loopexit ], [ %103, %bb11 ]		; <i32> [#uses=2]
+	%vSum2.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum2.0.lcssa, %bb7 ], [ %vSum2.0.lcssa, %bb8.loopexit ], [ %vSum2.2.lcssa, %bb11 ], [ %vSum2.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
+	%vSum1.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum1.0.lcssa, %bb7 ], [ %vSum1.0.lcssa, %bb8.loopexit ], [ %vSum1.2.lcssa, %bb11 ], [ %vSum1.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
+	%B_addr.1 = phi float* [ %B, %entry ], [ %B_addr.2.lcssa, %bb8.loopexit ], [ %57, %bb7 ], [ %B_addr.4.lcssa, %bb12.loopexit ], [ %102, %bb11 ]		; <float*> [#uses=1]
+	%vSum0.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ], [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
+	%A_addr.1 = phi float* [ %A, %entry ], [ %A_addr.2.lcssa, %bb8.loopexit ], [ %56, %bb7 ], [ %A_addr.4.lcssa, %bb12.loopexit ], [ %101, %bb11 ]		; <float*> [#uses=1]
+	%106 = fadd <4 x float> %vSum0.1, %vSum2.1		; <<4 x float>> [#uses=1]
+	%107 = fadd <4 x float> %vSum1.1, %vSum3.1		; <<4 x float>> [#uses=1]
+	%108 = fadd <4 x float> %106, %107		; <<4 x float>> [#uses=4]
+	%tmp23 = extractelement <4 x float> %108, i32 0		; <float> [#uses=1]
+	%tmp21 = extractelement <4 x float> %108, i32 1		; <float> [#uses=1]
+	%109 = fadd float %tmp23, %tmp21		; <float> [#uses=1]
+	%tmp19 = extractelement <4 x float> %108, i32 2		; <float> [#uses=1]
+	%tmp17 = extractelement <4 x float> %108, i32 3		; <float> [#uses=1]
+	%110 = fadd float %tmp19, %tmp17		; <float> [#uses=1]
+	%111 = fadd float %109, %110		; <float> [#uses=1]
+	%Sum0.254 = fadd float %111, %Sum0.1		; <float> [#uses=2]
+	%112 = icmp sgt i32 %N_addr.1, 0		; <i1> [#uses=1]
+	br i1 %112, label %bb.nph56, label %bb16
+
+bb.nph56:		; preds = %bb13
+	%tmp. = zext i32 %N_addr.1 to i64		; <i64> [#uses=1]
+	br label %bb14
+
+bb14:		; preds = %bb14, %bb.nph56
+	%indvar117 = phi i64 [ 0, %bb.nph56 ], [ %indvar.next118, %bb14 ]		; <i64> [#uses=3]
+	%Sum0.255 = phi float [ %Sum0.254, %bb.nph56 ], [ %Sum0.2, %bb14 ]		; <float> [#uses=1]
+	%tmp.122 = sext i32 %IB to i64		; <i64> [#uses=1]
+	%B_addr.652.rec = mul i64 %indvar117, %tmp.122		; <i64> [#uses=1]
+	%tmp.124 = sext i32 %IA to i64		; <i64> [#uses=1]
+	%A_addr.653.rec = mul i64 %indvar117, %tmp.124		; <i64> [#uses=1]
+	%B_addr.652 = getelementptr float* %B_addr.1, i64 %B_addr.652.rec		; <float*> [#uses=1]
+	%A_addr.653 = getelementptr float* %A_addr.1, i64 %A_addr.653.rec		; <float*> [#uses=1]
+	%113 = load float* %A_addr.653, align 4		; <float> [#uses=1]
+	%114 = load float* %B_addr.652, align 4		; <float> [#uses=1]
+	%115 = fmul float %113, %114		; <float> [#uses=1]
+	%Sum0.2 = fadd float %115, %Sum0.255		; <float> [#uses=2]
+	%indvar.next118 = add i64 %indvar117, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %indvar.next118, %tmp.		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb16, label %bb14
+
+bb16:		; preds = %bb14, %bb13
+	%Sum0.2.lcssa = phi float [ %Sum0.254, %bb13 ], [ %Sum0.2, %bb14 ]		; <float> [#uses=1]
+	store float %Sum0.2.lcssa, float* %C, align 4
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/jump_sign.ll b/src/LLVM/test/CodeGen/X86/jump_sign.ll
new file mode 100644
index 0000000..c7bdd7d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/jump_sign.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | grep jns

+

+define i32 @f(i32 %X) {

+entry:

+	%tmp1 = add i32 %X, 1		; <i32> [#uses=1]

+	%tmp = icmp slt i32 %tmp1, 0		; <i1> [#uses=1]

+	br i1 %tmp, label %cond_true, label %cond_next

+

+cond_true:		; preds = %entry

+	%tmp2 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]

+	br label %cond_next

+

+cond_next:		; preds = %cond_true, %entry

+	%tmp3 = tail call i32 (...)* @baz( )		; <i32> [#uses=0]

+	ret i32 undef

+}

+

+declare i32 @bar(...)

+

+declare i32 @baz(...)


diff --git a/src/LLVM/test/CodeGen/X86/label-redefinition.ll b/src/LLVM/test/CodeGen/X86/label-redefinition.ll
new file mode 100644
index 0000000..9ad33e0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/label-redefinition.ll

@@ -0,0 +1,15 @@
+; PR7054
+; RUN: not llc %s -o - |& grep {'_foo' label emitted multiple times to assembly}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0.0"
+
+define i32 @"\01_foo"() {
+  unreachable
+}
+
+define i32 @foo() {
+entry:
+  unreachable
+}
+
+declare i32 @xstat64(i32, i8*, i8*)

diff --git a/src/LLVM/test/CodeGen/X86/large-gep-scale.ll b/src/LLVM/test/CodeGen/X86/large-gep-scale.ll
new file mode 100644
index 0000000..143294e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/large-gep-scale.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; PR5281
+
+; After scaling, this type doesn't fit in memory. Codegen should generate
+; correct addressing still.
+
+; CHECK: shll $2, %edx
+
+define fastcc i32* @_ada_smkr([2147483647 x i32]* %u, i32 %t) nounwind {
+  %x = getelementptr [2147483647 x i32]* %u, i32 %t, i32 0
+  ret i32* %x
+}

diff --git a/src/LLVM/test/CodeGen/X86/ldzero.ll b/src/LLVM/test/CodeGen/X86/ldzero.ll
new file mode 100644
index 0000000..dab04bc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/ldzero.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s
+; verify PR 1700 is still fixed
+; ModuleID = 'hh.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define x86_fp80 @x() {
+entry:
+	%retval = alloca x86_fp80, align 16		; <x86_fp80*> [#uses=2]
+	%tmp = alloca x86_fp80, align 16		; <x86_fp80*> [#uses=2]
+	%d = alloca double, align 8		; <double*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store double 0.000000e+00, double* %d, align 8
+	%tmp1 = load double* %d, align 8		; <double> [#uses=1]
+	%tmp12 = fpext double %tmp1 to x86_fp80		; <x86_fp80> [#uses=1]
+	store x86_fp80 %tmp12, x86_fp80* %tmp, align 16
+	%tmp3 = load x86_fp80* %tmp, align 16		; <x86_fp80> [#uses=1]
+	store x86_fp80 %tmp3, x86_fp80* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval4 = load x86_fp80* %retval		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %retval4
+}
+
+define double @y() {
+entry:
+	%retval = alloca double, align 8		; <double*> [#uses=2]
+	%tmp = alloca double, align 8		; <double*> [#uses=2]
+	%ld = alloca x86_fp80, align 16		; <x86_fp80*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store x86_fp80 0xK00000000000000000000, x86_fp80* %ld, align 16
+	%tmp1 = load x86_fp80* %ld, align 16		; <x86_fp80> [#uses=1]
+	%tmp12 = fptrunc x86_fp80 %tmp1 to double		; <double> [#uses=1]
+	store double %tmp12, double* %tmp, align 8
+	%tmp3 = load double* %tmp, align 8		; <double> [#uses=1]
+	store double %tmp3, double* %retval, align 8
+	br label %return
+
+return:		; preds = %entry
+	%retval4 = load double* %retval		; <double> [#uses=1]
+	ret double %retval4
+}

diff --git a/src/LLVM/test/CodeGen/X86/lea-2.ll b/src/LLVM/test/CodeGen/X86/lea-2.ll
new file mode 100644
index 0000000..878dd4c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lea-2.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \

+; RUN:   grep {lea	EAX, DWORD PTR \\\[... + 4\\*... - 5\\\]}

+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \

+; RUN:   not grep add

+

+define i32 @test1(i32 %A, i32 %B) {

+        %tmp1 = shl i32 %A, 2           ; <i32> [#uses=1]

+        %tmp3 = add i32 %B, -5          ; <i32> [#uses=1]

+        %tmp4 = add i32 %tmp3, %tmp1            ; <i32> [#uses=1]

+        ret i32 %tmp4

+}

+

+


diff --git a/src/LLVM/test/CodeGen/X86/lea-3.ll b/src/LLVM/test/CodeGen/X86/lea-3.ll
new file mode 100644
index 0000000..f86dfdb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lea-3.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s

+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s

+

+; CHECK: leaq (,[[A0:%rdi|%rcx]],4), %rax

+define i64 @test2(i64 %a) {

+        %tmp2 = shl i64 %a, 2

+	%tmp3 = or i64 %tmp2, %a

+        ret i64 %tmp3

+}

+

+; CHECK: leal ([[A0]],[[A0]],2), %eax

+define i32 @test(i32 %a) {

+        %tmp2 = mul i32 %a, 3           ; <i32> [#uses=1]

+        ret i32 %tmp2

+}

+

+; CHECK: leaq (,[[A0]],8), %rax

+define i64 @test3(i64 %a) {

+        %tmp2 = shl i64 %a, 3

+        ret i64 %tmp2

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/lea-4.ll b/src/LLVM/test/CodeGen/X86/lea-4.ll
new file mode 100644
index 0000000..2171204
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lea-4.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | grep lea | count 2
+
+define zeroext i16 @t1(i32 %on_off) nounwind {
+entry:
+	%0 = sub i32 %on_off, 1
+	%1 = mul i32 %0, 2
+	%2 = trunc i32 %1 to i16
+	%3 = zext i16 %2 to i32
+	%4 = trunc i32 %3 to i16
+	ret i16 %4
+}
+
+define i32 @t2(i32 %on_off) nounwind {
+entry:
+	%0 = sub i32 %on_off, 1
+	%1 = mul i32 %0, 2
+        %2 = and i32 %1, 65535
+	ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/X86/lea-recursion.ll b/src/LLVM/test/CodeGen/X86/lea-recursion.ll
new file mode 100644
index 0000000..3f32fd2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lea-recursion.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=x86-64 | grep lea | count 12
+
+; This testcase was written to demonstrate an instruction-selection problem,
+; however it also happens to expose a limitation in the DAGCombiner's
+; expression reassociation which causes it to miss opportunities for
+; constant folding due to the intermediate adds having multiple uses.
+; The Reassociate pass has similar limitations. If these limitations are
+; fixed, the test commands above will need to be updated to expect fewer
+; lea instructions.
+
+@g0 = weak global [1000 x i32] zeroinitializer, align 32		; <[1000 x i32]*> [#uses=8]
+@g1 = weak global [1000 x i32] zeroinitializer, align 32		; <[1000 x i32]*> [#uses=7]
+
+define void @foo() {
+entry:
+	%tmp4 = load i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 0)		; <i32> [#uses=1]
+	%tmp8 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 0)		; <i32> [#uses=1]
+	%tmp9 = add i32 %tmp4, 1		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=2]
+	store i32 %tmp10, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 1)
+	%tmp8.1 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 1)		; <i32> [#uses=1]
+	%tmp9.1 = add i32 %tmp10, 1		; <i32> [#uses=1]
+	%tmp10.1 = add i32 %tmp9.1, %tmp8.1		; <i32> [#uses=2]
+	store i32 %tmp10.1, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 2)
+	%tmp8.2 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 2)		; <i32> [#uses=1]
+	%tmp9.2 = add i32 %tmp10.1, 1		; <i32> [#uses=1]
+	%tmp10.2 = add i32 %tmp9.2, %tmp8.2		; <i32> [#uses=2]
+	store i32 %tmp10.2, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 3)
+	%tmp8.3 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 3)		; <i32> [#uses=1]
+	%tmp9.3 = add i32 %tmp10.2, 1		; <i32> [#uses=1]
+	%tmp10.3 = add i32 %tmp9.3, %tmp8.3		; <i32> [#uses=2]
+	store i32 %tmp10.3, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 4)
+	%tmp8.4 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 4)		; <i32> [#uses=1]
+	%tmp9.4 = add i32 %tmp10.3, 1		; <i32> [#uses=1]
+	%tmp10.4 = add i32 %tmp9.4, %tmp8.4		; <i32> [#uses=2]
+	store i32 %tmp10.4, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 5)
+	%tmp8.5 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 5)		; <i32> [#uses=1]
+	%tmp9.5 = add i32 %tmp10.4, 1		; <i32> [#uses=1]
+	%tmp10.5 = add i32 %tmp9.5, %tmp8.5		; <i32> [#uses=2]
+	store i32 %tmp10.5, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 6)
+	%tmp8.6 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 6)		; <i32> [#uses=1]
+	%tmp9.6 = add i32 %tmp10.5, 1		; <i32> [#uses=1]
+	%tmp10.6 = add i32 %tmp9.6, %tmp8.6		; <i32> [#uses=1]
+	store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7)
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/lea.ll b/src/LLVM/test/CodeGen/X86/lea.ll
new file mode 100644
index 0000000..6e5fbea
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lea.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s

+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s

+

+define i32 @test1(i32 %x) nounwind {

+        %tmp1 = shl i32 %x, 3

+        %tmp2 = add i32 %tmp1, 7

+        ret i32 %tmp2

+; CHECK: test1:

+; CHECK:    leal 7(,[[A0:%rdi|%rcx]],8), %eax

+}

+

+

+; ISel the add of -4 with a neg and use an lea for the rest of the

+; arithemtic.

+define i32 @test2(i32 %x_offs) nounwind readnone {

+entry:

+	%t0 = icmp sgt i32 %x_offs, 4

+	br i1 %t0, label %bb.nph, label %bb2

+

+bb.nph:

+	%tmp = add i32 %x_offs, -5

+	%tmp6 = lshr i32 %tmp, 2

+	%tmp7 = mul i32 %tmp6, -4

+	%tmp8 = add i32 %tmp7, %x_offs

+	%tmp9 = add i32 %tmp8, -4

+	ret i32 %tmp9

+

+bb2:

+	ret i32 %x_offs

+; CHECK: test2:

+; CHECK:	leal	-5([[A0]]), %eax

+; CHECK:	andl	$-4, %eax

+; CHECK:	negl	%eax

+; CHECK:	leal	-4([[A0]],%rax), %eax

+}


diff --git a/src/LLVM/test/CodeGen/X86/leaf-fp-elim.ll b/src/LLVM/test/CodeGen/X86/leaf-fp-elim.ll
new file mode 100644
index 0000000..607dc72
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/leaf-fp-elim.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -disable-non-leaf-fp-elim -relocation-model=pic -mtriple=x86_64-apple-darwin | FileCheck %s
+; <rdar://problem/8170192>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0"
+
+@msg = internal global i8* null                   ; <i8**> [#uses=1]
+@.str = private constant [2 x i8] c"x\00", align 1 ; <[2 x i8]*> [#uses=1]
+
+define void @test(i8* %p) nounwind optsize ssp {
+
+; No stack frame, please.
+; CHECK:     _test
+; CHECK-NOT: pushq %rbp
+; CHECK-NOT: movq %rsp, %rbp
+; CHECK:     InlineAsm Start
+
+entry:
+  %0 = icmp eq i8* %p, null                       ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  tail call void asm "mov $1, $0", "=*m,{cx},~{dirflag},~{fpsr},~{flags}"(i8** @msg, i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0)) nounwind
+  tail call void @llvm.trap()
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare void @llvm.trap() nounwind

diff --git a/src/LLVM/test/CodeGen/X86/legalize-fmp-oeq-vector-select.ll b/src/LLVM/test/CodeGen/X86/legalize-fmp-oeq-vector-select.ll
new file mode 100644
index 0000000..6a8c154
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/legalize-fmp-oeq-vector-select.ll

@@ -0,0 +1,11 @@
+; RUN: llc -march=x86-64 -enable-legalize-types-checking < %s
+; PR5092
+
+define <4 x float> @bug(float %a) nounwind {
+entry:
+  %cmp = fcmp oeq float %a, 0.000000e+00          ; <i1> [#uses=1]
+  %temp = select i1 %cmp, <4 x float> <float 1.000000e+00, float 0.000000e+00,
+float 0.000000e+00, float 0.000000e+00>, <4 x float> zeroinitializer
+  ret <4 x float> %temp
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/legalize-sub-zero-2.ll b/src/LLVM/test/CodeGen/X86/legalize-sub-zero-2.ll
new file mode 100644
index 0000000..f02ca71
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/legalize-sub-zero-2.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+
+define fastcc void @foo(i32 %type) nounwind optsize {
+entry:
+  switch i32 %type, label %bb26 [
+    i32 33634, label %bb11
+    i32 5121, label %bb27
+  ]
+
+bb11:                                             ; preds = %entry
+  br label %bb27
+
+bb26:                                             ; preds = %entry
+  unreachable
+
+bb27:                                             ; preds = %bb11, %entry
+  %srcpb.0 = phi i32 [ 1, %bb11 ], [ 0, %entry ]
+  br i1 undef, label %bb348, label %bb30.lr.ph
+
+bb30.lr.ph:                                       ; preds = %bb27
+  %.sum743 = shl i32 %srcpb.0, 1
+  %0 = mul i32 %srcpb.0, -2
+  %.sum745 = add i32 %.sum743, %0
+  br i1 undef, label %bb70, label %bb71
+
+bb70:                                             ; preds = %bb30.lr.ph
+  unreachable
+
+bb71:                                             ; preds = %bb30.lr.ph
+  br i1 undef, label %bb92, label %bb80
+
+bb80:                                             ; preds = %bb71
+  unreachable
+
+bb92:                                             ; preds = %bb71
+  %1 = getelementptr inbounds i8* undef, i32 %.sum745
+  unreachable
+
+bb348:                                            ; preds = %bb27
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/legalize-sub-zero.ll b/src/LLVM/test/CodeGen/X86/legalize-sub-zero.ll
new file mode 100644
index 0000000..ee76d46
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/legalize-sub-zero.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=i686-pc-win32
+
+;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+;target triple = "i686-pc-win32"
+
+define void @test() nounwind {
+  %1 = fdiv <3 x double> zeroinitializer, undef
+  %2 = fdiv <2 x double> zeroinitializer, undef
+  %3 = shufflevector <2 x double> %2, <2 x double> undef, <3 x i32> <i32 0, i32
+1, i32 undef>
+  %4 = insertelement <3 x double> %3, double undef, i32 2
+  %5 = bitcast <3 x double> %1 to <3 x i64>
+  %6 = bitcast <3 x double> %4 to <3 x i64>
+  %7 = sub <3 x i64> %5, %6
+  %8 = shufflevector <3 x i64> %7, <3 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %9 = xor <2 x i64> %8, zeroinitializer
+  %10 = add nsw <2 x i64> %9, zeroinitializer
+  %11 = shufflevector <2 x i64> %10, <2 x i64> undef, <3 x i32> <i32 0, i32 1,
+i32 undef>
+  %12 = insertelement <3 x i64> %11, i64 0, i32 2
+  %13 = shufflevector <3 x i64> %12, <3 x i64> undef, <4 x i32> <i32 0, i32 1,
+i32 2, i32 3>
+  %14 = shufflevector <4 x i64> %13, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %15 = bitcast <2 x i64> %14 to <4 x i32>
+  %16 = shufflevector <4 x i32> %15, <4 x i32> undef, <4 x i32> <i32 0, i32 2,
+i32 0, i32 2>
+  %17 = bitcast <4 x i32> %16 to <2 x i64>
+  %18 = shufflevector <2 x i64> %17, <2 x i64> undef, <2 x i32> <i32 0, i32 2>
+  %19 = bitcast <2 x i64> %18 to <4 x i32>
+  %20 = shufflevector <4 x i32> %19, <4 x i32> undef, <3 x i32> <i32 0, i32 1,
+i32 2>
+  %21 = or <3 x i32> %20, zeroinitializer
+  store <3 x i32> %21, <3 x i32> addrspace(1)* undef, align 16
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/legalizedag_vec.ll b/src/LLVM/test/CodeGen/X86/legalizedag_vec.ll
new file mode 100644
index 0000000..dff6931
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/legalizedag_vec.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=sse2 | FileCheck %s
+
+
+; Test case for r63760 where we generate a legalization assert that an illegal
+; type has been inserted by LegalizeDAG after LegalizeType has run. With sse2,
+; v2i64 is a legal type but with mmx disabled, i64 is an illegal type. When
+; legalizing the divide in LegalizeDAG, we scalarize the vector divide and make
+; two 64 bit divide library calls which introduces i64 nodes that needs to be
+; promoted.
+
+define <2 x i64> @test_long_div(<2 x i64> %num, <2 x i64> %div) {
+  %div.r = sdiv <2 x i64> %num, %div
+  ret <2 x i64>  %div.r
+}
+
+; CHECK: call{{.*(divdi3|alldiv)}}
+; CHECK: call{{.*(divdi3|alldiv)}}

diff --git a/src/LLVM/test/CodeGen/X86/lfence.ll b/src/LLVM/test/CodeGen/X86/lfence.ll
new file mode 100644
index 0000000..1903a1e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lfence.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep lfence
+
+declare void @llvm.x86.sse2.lfence() nounwind
+
+define void @test() {
+  call void @llvm.x86.sse2.lfence()
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/licm-dominance.ll b/src/LLVM/test/CodeGen/X86/licm-dominance.ll
new file mode 100644
index 0000000..8a0958d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/licm-dominance.ll

@@ -0,0 +1,36 @@
+; RUN: llc -asm-verbose=false < %s | FileCheck %s
+
+; MachineLICM should check dominance before hoisting instructions.
+; CHECK:	jne	LBB0_3
+; CHECK-NEXT:	xorb	%al, %al
+; CHECK-NEXT:	testb	%al, %al
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.2"
+
+define void @CMSColorWorldCreateParametricData() nounwind uwtable optsize ssp {
+entry:
+  br label %for.body.i
+
+for.body.i:                                       
+  br i1 undef, label %for.inc.i, label %if.then26.i
+
+if.then26.i:                                      
+  br i1 undef, label %if.else.i.i, label %lor.lhs.false.i.i
+
+if.else.i.i:                                      
+  br i1 undef, label %lor.lhs.false.i.i, label %if.then116.i.i
+
+lor.lhs.false.i.i:                                
+  br i1 undef, label %for.inc.i, label %if.then116.i.i
+
+if.then116.i.i:                                   
+  unreachable
+
+for.inc.i:                                        
+  %cmp17.i = icmp ult i64 undef, undef
+  br i1 %cmp17.i, label %for.body.i, label %if.end28.i
+
+if.end28.i:                                       
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/licm-nested.ll b/src/LLVM/test/CodeGen/X86/licm-nested.ll
new file mode 100644
index 0000000..c3f991d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/licm-nested.ll

@@ -0,0 +1,89 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 3
+
+; MachineLICM should be able to hoist the symbolic addresses out of
+; the inner loops.
+
+@main.flags = internal global [8193 x i8] zeroinitializer, align 16 ; <[8193 x i8]*> [#uses=3]
+@.str = private constant [11 x i8] c"Count: %d\0A\00" ; <[11 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  %cmp = icmp eq i32 %argc, 2                     ; <i1> [#uses=1]
+  br i1 %cmp, label %while.cond.preheader, label %bb.nph53
+
+while.cond.preheader:                             ; preds = %entry
+  %arrayidx = getelementptr inbounds i8** %argv, i64 1 ; <i8**> [#uses=1]
+  %tmp2 = load i8** %arrayidx                     ; <i8*> [#uses=1]
+  %call = tail call i32 @atoi(i8* %tmp2) nounwind ; <i32> [#uses=2]
+  %tobool51 = icmp eq i32 %call, 0                ; <i1> [#uses=1]
+  br i1 %tobool51, label %while.end, label %bb.nph53
+
+while.cond.loopexit:                              ; preds = %for.inc35
+  %indvar.next77 = add i32 %indvar76, 1           ; <i32> [#uses=2]
+  %exitcond78 = icmp eq i32 %indvar.next77, %NUM.0.ph80 ; <i1> [#uses=1]
+  br i1 %exitcond78, label %while.end, label %bb.nph
+
+bb.nph53:                                         ; preds = %entry, %while.cond.preheader
+  %NUM.0.ph80 = phi i32 [ %call, %while.cond.preheader ], [ 17000, %entry ] ; <i32> [#uses=1]
+  br label %bb.nph
+
+bb.nph:                                           ; preds = %while.cond.loopexit, %bb.nph53
+  %indvar76 = phi i32 [ 0, %bb.nph53 ], [ %indvar.next77, %while.cond.loopexit ] ; <i32> [#uses=1]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %bb.nph
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] ; <i64> [#uses=2]
+  %tmp = add i64 %indvar, 2                       ; <i64> [#uses=1]
+  %arrayidx10 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp ; <i8*> [#uses=1]
+  store i8 1, i8* %arrayidx10
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next, 8191      ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.body15, label %for.body
+
+for.body15:                                       ; preds = %for.body, %for.inc35
+  %indvar57 = phi i64 [ %indvar.next58, %for.inc35 ], [ 0, %for.body ] ; <i64> [#uses=4]
+  %count.248 = phi i32 [ %count.1, %for.inc35 ], [ 0, %for.body ] ; <i32> [#uses=2]
+  %tmp68 = add i64 %indvar57, 2                   ; <i64> [#uses=2]
+  %tmp70 = mul i64 %indvar57, 3                   ; <i64> [#uses=1]
+  %tmp71 = add i64 %tmp70, 6                      ; <i64> [#uses=1]
+  %tmp73 = shl i64 %indvar57, 1                   ; <i64> [#uses=1]
+  %add = add i64 %tmp73, 4                        ; <i64> [#uses=2]
+  %arrayidx17 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp68 ; <i8*> [#uses=1]
+  %tmp18 = load i8* %arrayidx17                   ; <i8> [#uses=1]
+  %tobool19 = icmp eq i8 %tmp18, 0                ; <i1> [#uses=1]
+  br i1 %tobool19, label %for.inc35, label %if.then
+
+if.then:                                          ; preds = %for.body15
+  %cmp2443 = icmp slt i64 %add, 8193              ; <i1> [#uses=1]
+  br i1 %cmp2443, label %for.body25, label %for.end32
+
+for.body25:                                       ; preds = %if.then, %for.body25
+  %indvar55 = phi i64 [ %indvar.next56, %for.body25 ], [ 0, %if.then ] ; <i64> [#uses=2]
+  %tmp60 = mul i64 %tmp68, %indvar55              ; <i64> [#uses=2]
+  %tmp75 = add i64 %add, %tmp60                   ; <i64> [#uses=1]
+  %arrayidx27 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp75 ; <i8*> [#uses=1]
+  store i8 0, i8* %arrayidx27
+  %add31 = add i64 %tmp71, %tmp60                 ; <i64> [#uses=1]
+  %cmp24 = icmp slt i64 %add31, 8193              ; <i1> [#uses=1]
+  %indvar.next56 = add i64 %indvar55, 1           ; <i64> [#uses=1]
+  br i1 %cmp24, label %for.body25, label %for.end32
+
+for.end32:                                        ; preds = %for.body25, %if.then
+  %inc34 = add nsw i32 %count.248, 1              ; <i32> [#uses=1]
+  br label %for.inc35
+
+for.inc35:                                        ; preds = %for.body15, %for.end32
+  %count.1 = phi i32 [ %inc34, %for.end32 ], [ %count.248, %for.body15 ] ; <i32> [#uses=2]
+  %indvar.next58 = add i64 %indvar57, 1           ; <i64> [#uses=2]
+  %exitcond67 = icmp eq i64 %indvar.next58, 8191  ; <i1> [#uses=1]
+  br i1 %exitcond67, label %while.cond.loopexit, label %for.body15
+
+while.end:                                        ; preds = %while.cond.loopexit, %while.cond.preheader
+  %count.0.lcssa = phi i32 [ 0, %while.cond.preheader ], [ %count.1, %while.cond.loopexit ] ; <i32> [#uses=1]
+  %call40 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i64 0, i64 0), i32 %count.0.lcssa) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @atoi(i8* nocapture) nounwind readonly
+
+declare i32 @printf(i8* nocapture, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/licm-symbol.ll b/src/LLVM/test/CodeGen/X86/licm-symbol.ll
new file mode 100644
index 0000000..c3d1938
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/licm-symbol.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s | FileCheck %s
+
+; MachineLICM should be able to hoist the sF reference out of the loop.
+
+; CHECK: pushl %esi
+; CHECK: pushl
+; CHECK: movl  $176, %esi
+; CHECK: addl  L___sF$non_lazy_ptr, %esi
+; CHECK: .align  4, 0x90
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin8"
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+%struct.gcov_ctr_summary = type { i32, i32, i64, i64, i64 }
+%struct.gcov_summary = type { i32, [1 x %struct.gcov_ctr_summary] }
+
+@__sF = external global [0 x %struct.FILE]        ; <[0 x %struct.FILE]*> [#uses=1]
+
+declare i32 @fprintf(%struct.FILE* nocapture) nounwind
+
+define void @gcov_exit() nounwind {
+entry:
+  br label %bb151
+
+bb151:                                            ; preds = %bb59, %bb56, %bb14
+  br i1 undef, label %bb56, label %bb59
+
+bb56:                                             ; preds = %bb151
+  %t0 = call i32 (%struct.FILE*)* @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind
+  br label %bb151
+
+bb59:                                             ; preds = %bb151
+  %t1 = call i32 (%struct.FILE*)* @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind
+  br label %bb151
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/limited-prec.ll b/src/LLVM/test/CodeGen/X86/limited-prec.ll
new file mode 100644
index 0000000..7bf4ac2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/limited-prec.ll

@@ -0,0 +1,60 @@
+; RUN: llc < %s -limit-float-precision=6 -march=x86 | \
+; RUN:    not grep exp | not grep log | not grep pow
+; RUN: llc < %s -limit-float-precision=12 -march=x86 | \
+; RUN:    not grep exp | not grep log | not grep pow
+; RUN: llc < %s -limit-float-precision=18 -march=x86 | \
+; RUN:    not grep exp | not grep log | not grep pow
+
+define float @f1(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.exp.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.exp.f32(float) nounwind readonly
+
+define float @f2(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.exp2.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.exp2.f32(float) nounwind readonly
+
+define float @f3(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.pow.f32(float 1.000000e+01, float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.pow.f32(float, float) nounwind readonly
+
+define float @f4(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.log.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.log.f32(float) nounwind readonly
+
+define float @f5(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.log2.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.log2.f32(float) nounwind readonly
+
+define float @f6(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.log10.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.log10.f32(float) nounwind readonly

diff --git a/src/LLVM/test/CodeGen/X86/live-out-reg-info.ll b/src/LLVM/test/CodeGen/X86/live-out-reg-info.ll
new file mode 100644
index 0000000..8cd9774
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/live-out-reg-info.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 | grep testb
+
+; Make sure dagcombine doesn't eliminate the comparison due
+; to an off-by-one bug with ComputeMaskedBits information.
+
+declare void @qux()
+
+define void @foo(i32 %a) {
+  %t0 = lshr i32 %a, 23
+  br label %next
+next:
+  %t1 = and i32 %t0, 256
+  %t2 = icmp eq i32 %t1, 0
+  br i1 %t2, label %true, label %false
+true:
+  call void @qux()
+  ret void
+false:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/liveness-local-regalloc.ll b/src/LLVM/test/CodeGen/X86/liveness-local-regalloc.ll
new file mode 100644
index 0000000..b469d08
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/liveness-local-regalloc.ll

@@ -0,0 +1,60 @@
+; RUN: llc < %s -O3 -regalloc=fast -mtriple=x86_64-apple-darwin10
+; <rdar://problem/7755473>
+
+%0 = type { i32, i8*, i8*, %1*, i8*, i64, i64, i32, i32, i32, i32, [1024 x i8] }
+%1 = type { i8*, i32, i32, i16, i16, %2, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %2, %3*, i32, [3 x i8], [1 x i8], %2, i32, i64 }
+%2 = type { i8*, i32 }
+%3 = type opaque
+
+declare fastcc i32 @func(%0*, i32, i32) nounwind ssp
+
+define fastcc void @func2(%0* %arg, i32 %arg1) nounwind ssp {
+bb:
+  br label %.exit3
+
+.exit3:                                           ; preds = %.exit3, %bb
+  switch i32 undef, label %.exit3 [
+    i32 -1, label %.loopexit
+    i32 37, label %bb2
+  ]
+
+bb2:                                              ; preds = %bb5, %bb3, %.exit3
+  br i1 undef, label %bb3, label %bb5
+
+bb3:                                              ; preds = %bb2
+  switch i32 undef, label %infloop [
+    i32 125, label %.loopexit
+    i32 -1, label %bb4
+    i32 37, label %bb2
+  ]
+
+bb4:                                              ; preds = %bb3
+  %tmp = add nsw i32 undef, 1                     ; <i32> [#uses=1]
+  br label %.loopexit
+
+bb5:                                              ; preds = %bb2
+  switch i32 undef, label %infloop1 [
+    i32 -1, label %.loopexit
+    i32 37, label %bb2
+  ]
+
+.loopexit:                                        ; preds = %bb5, %bb4, %bb3, %.exit3
+  %.04 = phi i32 [ %tmp, %bb4 ], [ undef, %bb3 ], [ undef, %.exit3 ], [ undef, %bb5 ] ; <i32> [#uses=2]
+  br i1 undef, label %bb8, label %bb6
+
+bb6:                                              ; preds = %.loopexit
+  %tmp7 = tail call fastcc i32 @func(%0* %arg, i32 %.04, i32 undef) nounwind ssp ; <i32> [#uses=0]
+  ret void
+
+bb8:                                              ; preds = %.loopexit
+  %tmp9 = sext i32 %.04 to i64                    ; <i64> [#uses=1]
+  %tmp10 = getelementptr inbounds %0* %arg, i64 0, i32 11, i64 %tmp9 ; <i8*> [#uses=1]
+  store i8 0, i8* %tmp10, align 1
+  ret void
+
+infloop:                                          ; preds = %infloop, %bb3
+  br label %infloop
+
+infloop1:                                         ; preds = %infloop1, %bb5
+  br label %infloop1
+}

diff --git a/src/LLVM/test/CodeGen/X86/lock-inst-encoding.ll b/src/LLVM/test/CodeGen/X86/lock-inst-encoding.ll
new file mode 100644
index 0000000..9765fae
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lock-inst-encoding.ll

@@ -0,0 +1,44 @@
+; RUN: llc -O0 --show-mc-encoding < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK: f1:
+; CHECK: addq %{{.*}}, ({{.*}}){{.*}}encoding: [0xf0,0x48,0x01,0x37]
+; CHECK: ret
+define void @f1(i64* %a, i64 %b) nounwind {
+  %1 = atomicrmw add i64* %a, i64 %b monotonic
+  ret void
+}
+
+; CHECK: f2:
+; CHECK: subq %{{.*}}, ({{.*}}){{.*}}encoding: [0xf0,0x48,0x29,0x37]
+; CHECK: ret
+define void @f2(i64* %a, i64 %b) nounwind {
+  %1 = atomicrmw sub i64* %a, i64 %b monotonic
+  ret void
+}
+
+; CHECK: f3:
+; CHECK: andq %{{.*}}, ({{.*}}){{.*}}encoding: [0xf0,0x48,0x21,0x37]
+; CHECK: ret
+define void @f3(i64* %a, i64 %b) nounwind {
+  %1 = atomicrmw and i64* %a, i64 %b monotonic
+  ret void
+}
+
+; CHECK: f4:
+; CHECK: orq %{{.*}}, ({{.*}}){{.*}}encoding: [0xf0,0x48,0x09,0x37]
+; CHECK: ret
+define void @f4(i64* %a, i64 %b) nounwind {
+  %1 = atomicrmw or i64* %a, i64 %b monotonic
+  ret void
+}
+
+; CHECK: f5:
+; CHECK: xorq %{{.*}}, ({{.*}}){{.*}}encoding: [0xf0,0x48,0x31,0x37]
+; CHECK: ret
+define void @f5(i64* %a, i64 %b) nounwind {
+  %1 = atomicrmw xor i64* %a, i64 %b monotonic
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/long-setcc.ll b/src/LLVM/test/CodeGen/X86/long-setcc.ll
new file mode 100644
index 0000000..69c1ca6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/long-setcc.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | grep cmp | count 1

+; RUN: llc < %s -march=x86 | grep shr | count 1

+; RUN: llc < %s -march=x86 | grep xor | count 1

+

+define i1 @t1(i64 %x) nounwind {

+	%B = icmp slt i64 %x, 0

+	ret i1 %B

+}

+

+define i1 @t2(i64 %x) nounwind {

+	%tmp = icmp ult i64 %x, 4294967296

+	ret i1 %tmp

+}

+

+define i1 @t3(i32 %x) nounwind {

+	%tmp = icmp ugt i32 %x, -1

+	ret i1 %tmp

+}


diff --git a/src/LLVM/test/CodeGen/X86/longlong-deadload.ll b/src/LLVM/test/CodeGen/X86/longlong-deadload.ll
new file mode 100644
index 0000000..db91961
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/longlong-deadload.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; This should not load or store the top part of *P.
+
+define void @test(i64* %P) nounwind  {
+; CHECK: test:
+; CHECK: movl 4(%esp), %[[REGISTER:.*]]
+; CHECK-NOT: 4(%[[REGISTER]])
+; CHECK: ret
+	%tmp1 = load i64* %P, align 8		; <i64> [#uses=1]
+	%tmp2 = xor i64 %tmp1, 1		; <i64> [#uses=1]
+	store i64 %tmp2, i64* %P, align 8
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/loop-blocks.ll b/src/LLVM/test/CodeGen/X86/loop-blocks.ll
new file mode 100644
index 0000000..faba630
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/loop-blocks.ll

@@ -0,0 +1,208 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s
+
+; These tests check for loop branching structure, and that the loop align
+; directive is placed in the expected place.
+
+; CodeGen should insert a branch into the middle of the loop in
+; order to avoid a branch within the loop.
+
+; CHECK: simple:
+;      CHECK:   jmp   .LBB0_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT:   callq loop_latch
+; CHECK-NEXT: .LBB0_1:
+; CHECK-NEXT:   callq loop_header
+
+define void @simple() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @loop_header()
+  %t0 = tail call i32 @get()
+  %t1 = icmp slt i32 %t0, 0
+  br i1 %t1, label %done, label %bb
+
+bb:
+  call void @loop_latch()
+  br label %loop
+
+done:
+  call void @exit()
+  ret void
+}
+
+; CodeGen should move block_a to the top of the loop so that it
+; falls through into the loop, avoiding a branch within the loop.
+
+; CHECK: slightly_more_involved:
+;      CHECK:   jmp .LBB1_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB1_4:
+; CHECK-NEXT:   callq bar99
+; CHECK-NEXT: .LBB1_1:
+; CHECK-NEXT:   callq body
+
+define void @slightly_more_involved() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @body()
+  %t0 = call i32 @get()
+  %t1 = icmp slt i32 %t0, 2
+  br i1 %t1, label %block_a, label %bb
+
+bb:
+  %t2 = call i32 @get()
+  %t3 = icmp slt i32 %t2, 99
+  br i1 %t3, label %exit, label %loop
+
+block_a:
+  call void @bar99()
+  br label %loop
+
+exit:
+  call void @exit()
+  ret void
+}
+
+; Same as slightly_more_involved, but block_a is now a CFG diamond with
+; fallthrough edges which should be preserved.
+; "callq block_a_merge_func" is tail duped.
+
+; CHECK: yet_more_involved:
+;      CHECK:   jmp .LBB2_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT:   callq bar99
+; CHECK-NEXT:   callq get
+; CHECK-NEXT:   cmpl $2999, %eax
+; CHECK-NEXT:   jle .LBB2_5
+; CHECK-NEXT:   callq block_a_false_func
+; CHECK-NEXT:   callq block_a_merge_func
+; CHECK-NEXT:   jmp .LBB2_1
+; CHECK-NEXT: .LBB2_5:
+; CHECK-NEXT:   callq block_a_true_func
+; CHECK-NEXT:   callq block_a_merge_func
+; CHECK-NEXT: .LBB2_1:
+; CHECK-NEXT:   callq body
+
+define void @yet_more_involved() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @body()
+  %t0 = call i32 @get()
+  %t1 = icmp slt i32 %t0, 2
+  br i1 %t1, label %block_a, label %bb
+
+bb:
+  %t2 = call i32 @get()
+  %t3 = icmp slt i32 %t2, 99
+  br i1 %t3, label %exit, label %loop
+
+block_a:
+  call void @bar99()
+  %z0 = call i32 @get()
+  %z1 = icmp slt i32 %z0, 3000
+  br i1 %z1, label %block_a_true, label %block_a_false
+
+block_a_true:
+  call void @block_a_true_func()
+  br label %block_a_merge
+
+block_a_false:
+  call void @block_a_false_func()
+  br label %block_a_merge
+
+block_a_merge:
+  call void @block_a_merge_func()
+  br label %loop
+
+exit:
+  call void @exit()
+  ret void
+}
+
+; CodeGen should move the CFG islands that are part of the loop but don't
+; conveniently fit anywhere so that they are at least contiguous with the
+; loop.
+
+; CHECK: cfg_islands:
+;      CHECK:   jmp     .LBB3_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB3_7:
+; CHECK-NEXT:   callq   bar100
+; CHECK-NEXT:   jmp     .LBB3_1
+; CHECK-NEXT: .LBB3_8:
+; CHECK-NEXT:   callq   bar101
+; CHECK-NEXT:   jmp     .LBB3_1
+; CHECK-NEXT: .LBB3_9:
+; CHECK-NEXT:   callq   bar102
+; CHECK-NEXT:   jmp     .LBB3_1
+; CHECK-NEXT: .LBB3_5:
+; CHECK-NEXT:   callq   loop_latch
+; CHECK-NEXT: .LBB3_1:
+; CHECK-NEXT:   callq   loop_header
+
+define void @cfg_islands() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @loop_header()
+  %t0 = call i32 @get()
+  %t1 = icmp slt i32 %t0, 100
+  br i1 %t1, label %block100, label %bb
+
+bb:
+  %t2 = call i32 @get()
+  %t3 = icmp slt i32 %t2, 101
+  br i1 %t3, label %block101, label %bb1
+
+bb1:
+  %t4 = call i32 @get()
+  %t5 = icmp slt i32 %t4, 102
+  br i1 %t5, label %block102, label %bb2
+
+bb2:
+  %t6 = call i32 @get()
+  %t7 = icmp slt i32 %t6, 103
+  br i1 %t7, label %exit, label %bb3
+
+bb3:
+  call void @loop_latch()
+  br label %loop
+
+exit:
+  call void @exit()
+  ret void
+
+block100:
+  call void @bar100()
+  br label %loop
+
+block101:
+  call void @bar101()
+  br label %loop
+
+block102:
+  call void @bar102()
+  br label %loop
+}
+
+declare void @bar99() nounwind
+declare void @bar100() nounwind
+declare void @bar101() nounwind
+declare void @bar102() nounwind
+declare void @body() nounwind
+declare void @exit() nounwind
+declare void @loop_header() nounwind
+declare void @loop_latch() nounwind
+declare i32 @get() nounwind
+declare void @block_a_true_func() nounwind
+declare void @block_a_false_func() nounwind
+declare void @block_a_merge_func() nounwind

diff --git a/src/LLVM/test/CodeGen/X86/loop-hoist.ll b/src/LLVM/test/CodeGen/X86/loop-hoist.ll
new file mode 100644
index 0000000..60b2951
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/loop-hoist.ll

@@ -0,0 +1,27 @@
+; LSR should hoist the load from the "Arr" stub out of the loop.

+

+; RUN: llc < %s -relocation-model=dynamic-no-pic -mtriple=i686-apple-darwin8.7.2 | FileCheck %s

+

+; CHECK: _foo:

+; CHECK:    L_Arr$non_lazy_ptr

+; CHECK: LBB0_1:

+

+@Arr = external global [0 x i32]		; <[0 x i32]*> [#uses=1]

+

+define void @foo(i32 %N.in, i32 %x) nounwind {

+entry:

+	%N = bitcast i32 %N.in to i32		; <i32> [#uses=1]

+	br label %cond_true

+

+cond_true:		; preds = %cond_true, %entry

+	%indvar = phi i32 [ %x, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]

+	%i.0.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]

+	%tmp = getelementptr [0 x i32]* @Arr, i32 0, i32 %i.0.0		; <i32*> [#uses=1]

+	store i32 %i.0.0, i32* %tmp

+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]

+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]

+	br i1 %exitcond, label %return, label %cond_true

+

+return:		; preds = %cond_true

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/loop-strength-reduce-2.ll b/src/LLVM/test/CodeGen/X86/loop-strength-reduce-2.ll
new file mode 100644
index 0000000..b546462
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/loop-strength-reduce-2.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s -check-prefix=STATIC
+;
+; Make sure the common loop invariant A is hoisted up to preheader,
+; since too many registers are needed to subsume it into the addressing modes.
+; It's safe to sink A in when it's not pic.
+
+; PIC:  align
+; PIC:  movl  $4, -4([[REG:%e[a-z]+]])
+; PIC:  movl  $5, ([[REG]])
+; PIC:  addl  $4, [[REG]]
+; PIC:  decl  {{%e[[a-z]+}}
+; PIC:  jne
+
+; STATIC: align
+; STATIC: movl  $4, -4(%ecx)
+; STATIC: movl  $5, (%ecx)
+; STATIC: addl  $4, %ecx
+; STATIC: decl  %eax
+; STATIC: jne
+
+@A = global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
+
+define void @test(i32 %row, i32 %N.in) nounwind {
+entry:
+	%N = bitcast i32 %N.in to i32		; <i32> [#uses=1]
+	%tmp5 = icmp sgt i32 %N.in, 0		; <i1> [#uses=1]
+	br i1 %tmp5, label %cond_true, label %return
+
+cond_true:		; preds = %cond_true, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%i.0.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%tmp2 = add i32 %i.0.0, 1		; <i32> [#uses=1]
+	%tmp = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2		; <i32*> [#uses=1]
+	store i32 4, i32* %tmp
+	%tmp5.upgrd.1 = add i32 %i.0.0, 2		; <i32> [#uses=1]
+	%tmp7 = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1		; <i32*> [#uses=1]
+	store i32 5, i32* %tmp7
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %cond_true
+
+return:		; preds = %cond_true, %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/loop-strength-reduce-3.ll b/src/LLVM/test/CodeGen/X86/loop-strength-reduce-3.ll
new file mode 100644
index 0000000..b1c9fb9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/loop-strength-reduce-3.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s
+
+; CHECK: align
+; CHECK: movl  $4, -4(%ecx)
+; CHECK: movl  $5, (%ecx)
+; CHECK: addl  $4, %ecx
+; CHECK: decl  %eax
+; CHECK: jne
+
+@A = global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
+
+define void @test(i32 %row, i32 %N.in) nounwind {
+entry:
+	%N = bitcast i32 %N.in to i32		; <i32> [#uses=1]
+	%tmp5 = icmp sgt i32 %N.in, 0		; <i1> [#uses=1]
+	br i1 %tmp5, label %cond_true, label %return
+
+cond_true:		; preds = %cond_true, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%i.0.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%tmp2 = add i32 %i.0.0, 1		; <i32> [#uses=1]
+	%tmp = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2		; <i32*> [#uses=1]
+	store i32 4, i32* %tmp
+	%tmp5.upgrd.1 = add i32 %i.0.0, 2		; <i32> [#uses=1]
+	%tmp7 = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1		; <i32*> [#uses=1]
+	store i32 5, i32* %tmp7
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %cond_true
+
+return:		; preds = %cond_true, %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/loop-strength-reduce.ll b/src/LLVM/test/CodeGen/X86/loop-strength-reduce.ll
new file mode 100644
index 0000000..4a5b7a8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/loop-strength-reduce.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s

+

+; CHECK: align

+; CHECK: movl  $4, -4(%ecx)

+; CHECK: movl  $5, (%ecx)

+; CHECK: addl  $4, %ecx

+; CHECK: decl  %eax

+; CHECK: jne

+

+@A = internal global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]

+

+define void @test(i32 %row, i32 %N.in) nounwind {

+entry:

+	%N = bitcast i32 %N.in to i32		; <i32> [#uses=1]

+	%tmp5 = icmp sgt i32 %N.in, 0		; <i1> [#uses=1]

+	br i1 %tmp5, label %cond_true, label %return

+

+cond_true:		; preds = %cond_true, %entry

+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]

+	%i.0.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]

+	%tmp2 = add i32 %i.0.0, 1		; <i32> [#uses=1]

+	%tmp = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2		; <i32*> [#uses=1]

+	store i32 4, i32* %tmp

+	%tmp5.upgrd.1 = add i32 %i.0.0, 2		; <i32> [#uses=1]

+	%tmp7 = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1		; <i32*> [#uses=1]

+	store i32 5, i32* %tmp7

+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]

+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]

+	br i1 %exitcond, label %return, label %cond_true

+

+return:		; preds = %cond_true, %entry

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/loop-strength-reduce2.ll b/src/LLVM/test/CodeGen/X86/loop-strength-reduce2.ll
new file mode 100644
index 0000000..b29616f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/loop-strength-reduce2.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | FileCheck %s

+;

+; Make sure the PIC label flags2-"L1$pb" is not moved up to the preheader.

+; CHECK: mov{{.}} {{.*}}$pb

+

+@flags2 = internal global [8193 x i8] zeroinitializer, align 32		; <[8193 x i8]*> [#uses=1]

+

+define void @test(i32 %k, i32 %i) nounwind {

+entry:

+	%k_addr.012 = shl i32 %i, 1		; <i32> [#uses=1]

+	%tmp14 = icmp sgt i32 %k_addr.012, 8192		; <i1> [#uses=1]

+	br i1 %tmp14, label %return, label %bb

+

+bb:		; preds = %bb, %entry

+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]

+	%tmp. = shl i32 %i, 1		; <i32> [#uses=1]

+	%tmp.15 = mul i32 %indvar, %i		; <i32> [#uses=1]

+	%tmp.16 = add i32 %tmp.15, %tmp.		; <i32> [#uses=2]

+	%k_addr.0.0 = bitcast i32 %tmp.16 to i32		; <i32> [#uses=1]

+	%gep.upgrd.1 = zext i32 %tmp.16 to i64		; <i64> [#uses=1]

+	%tmp = getelementptr [8193 x i8]* @flags2, i32 0, i64 %gep.upgrd.1		; <i8*> [#uses=1]

+	store i8 0, i8* %tmp

+	%k_addr.0 = add i32 %k_addr.0.0, %i		; <i32> [#uses=1]

+	%tmp.upgrd.2 = icmp sgt i32 %k_addr.0, 8192		; <i1> [#uses=1]

+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]

+	br i1 %tmp.upgrd.2, label %return, label %bb

+

+return:		; preds = %bb, %entry

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/loop-strength-reduce3.ll b/src/LLVM/test/CodeGen/X86/loop-strength-reduce3.ll
new file mode 100644
index 0000000..d6c265f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/loop-strength-reduce3.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86 -enable-lsr-nested | grep cmp | grep 240
+; RUN: llc < %s -march=x86 -enable-lsr-nested | grep inc | count 1
+
+define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) nounwind {
+entry:
+	%tmp2955 = icmp sgt i32 %C, 0		; <i1> [#uses=1]
+	br i1 %tmp2955, label %bb26.outer.us, label %bb40.split
+
+bb26.outer.us:		; preds = %bb26.bb32_crit_edge.us, %entry
+	%i.044.0.ph.us = phi i32 [ 0, %entry ], [ %indvar.next57, %bb26.bb32_crit_edge.us ]		; <i32> [#uses=2]
+	%k.1.ph.us = phi i32 [ 0, %entry ], [ %k.0.us, %bb26.bb32_crit_edge.us ]		; <i32> [#uses=1]
+	%tmp3.us = mul i32 %i.044.0.ph.us, 6		; <i32> [#uses=1]
+	br label %bb1.us
+
+bb1.us:		; preds = %bb1.us, %bb26.outer.us
+	%j.053.us = phi i32 [ 0, %bb26.outer.us ], [ %tmp25.us, %bb1.us ]		; <i32> [#uses=2]
+	%k.154.us = phi i32 [ %k.1.ph.us, %bb26.outer.us ], [ %k.0.us, %bb1.us ]		; <i32> [#uses=1]
+	%tmp5.us = add i32 %tmp3.us, %j.053.us		; <i32> [#uses=1]
+	%tmp7.us = shl i32 %D, %tmp5.us		; <i32> [#uses=2]
+	%tmp9.us = icmp eq i32 %tmp7.us, %B		; <i1> [#uses=1]
+	%tmp910.us = zext i1 %tmp9.us to i32		; <i32> [#uses=1]
+	%tmp12.us = and i32 %tmp7.us, %A		; <i32> [#uses=1]
+	%tmp19.us = and i32 %tmp12.us, %tmp910.us		; <i32> [#uses=1]
+	%k.0.us = add i32 %tmp19.us, %k.154.us		; <i32> [#uses=3]
+	%tmp25.us = add i32 %j.053.us, 1		; <i32> [#uses=2]
+	%tmp29.us = icmp slt i32 %tmp25.us, %C		; <i1> [#uses=1]
+	br i1 %tmp29.us, label %bb1.us, label %bb26.bb32_crit_edge.us
+
+bb26.bb32_crit_edge.us:		; preds = %bb1.us
+	%indvar.next57 = add i32 %i.044.0.ph.us, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next57, 40		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb40.split, label %bb26.outer.us
+
+bb40.split:		; preds = %bb26.bb32_crit_edge.us, %entry
+	%k.1.lcssa.lcssa.us-lcssa = phi i32 [ %k.0.us, %bb26.bb32_crit_edge.us ], [ 0, %entry ]		; <i32> [#uses=1]
+	ret i32 %k.1.lcssa.lcssa.us-lcssa
+}

diff --git a/src/LLVM/test/CodeGen/X86/loop-strength-reduce4.ll b/src/LLVM/test/CodeGen/X86/loop-strength-reduce4.ll
new file mode 100644
index 0000000..32e7879
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/loop-strength-reduce4.ll

@@ -0,0 +1,63 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC
+
+; By starting the IV at -64 instead of 0, a cmp is eliminated,
+; as the flags from the add can be used directly.
+
+; STATIC: movl    $-64, [[ECX:%e..]]
+
+; STATIC: movl    [[EAX:%e..]], _state+76([[ECX]])
+; STATIC: addl    $16, [[ECX]]
+; STATIC: jne
+
+; In PIC mode the symbol can't be folded, so the change-compare-stride
+; trick applies.
+
+; PIC: cmpl $64
+
+@state = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
+@S = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
+
+define i32 @foo() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%t.063.0 = phi i32 [ 0, %entry ], [ %tmp47, %bb ]		; <i32> [#uses=1]
+	%j.065.0 = shl i32 %indvar, 2		; <i32> [#uses=4]
+	%tmp3 = getelementptr [0 x i32]* @state, i32 0, i32 %j.065.0		; <i32*> [#uses=2]
+	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
+	%tmp6 = getelementptr [0 x i32]* @S, i32 0, i32 %t.063.0		; <i32*> [#uses=1]
+	%tmp7 = load i32* %tmp6, align 4		; <i32> [#uses=1]
+	%tmp8 = xor i32 %tmp7, %tmp4		; <i32> [#uses=2]
+	store i32 %tmp8, i32* %tmp3, align 4
+	%tmp1378 = or i32 %j.065.0, 1		; <i32> [#uses=1]
+	%tmp16 = getelementptr [0 x i32]* @state, i32 0, i32 %tmp1378		; <i32*> [#uses=2]
+	%tmp17 = load i32* %tmp16, align 4		; <i32> [#uses=1]
+	%tmp19 = getelementptr [0 x i32]* @S, i32 0, i32 %tmp8		; <i32*> [#uses=1]
+	%tmp20 = load i32* %tmp19, align 4		; <i32> [#uses=1]
+	%tmp21 = xor i32 %tmp20, %tmp17		; <i32> [#uses=2]
+	store i32 %tmp21, i32* %tmp16, align 4
+	%tmp2680 = or i32 %j.065.0, 2		; <i32> [#uses=1]
+	%tmp29 = getelementptr [0 x i32]* @state, i32 0, i32 %tmp2680		; <i32*> [#uses=2]
+	%tmp30 = load i32* %tmp29, align 4		; <i32> [#uses=1]
+	%tmp32 = getelementptr [0 x i32]* @S, i32 0, i32 %tmp21		; <i32*> [#uses=1]
+	%tmp33 = load i32* %tmp32, align 4		; <i32> [#uses=1]
+	%tmp34 = xor i32 %tmp33, %tmp30		; <i32> [#uses=2]
+	store i32 %tmp34, i32* %tmp29, align 4
+	%tmp3982 = or i32 %j.065.0, 3		; <i32> [#uses=1]
+	%tmp42 = getelementptr [0 x i32]* @state, i32 0, i32 %tmp3982		; <i32*> [#uses=2]
+	%tmp43 = load i32* %tmp42, align 4		; <i32> [#uses=1]
+	%tmp45 = getelementptr [0 x i32]* @S, i32 0, i32 %tmp34		; <i32*> [#uses=1]
+	%tmp46 = load i32* %tmp45, align 4		; <i32> [#uses=1]
+	%tmp47 = xor i32 %tmp46, %tmp43		; <i32> [#uses=3]
+	store i32 %tmp47, i32* %tmp42, align 4
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 4		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb57, label %bb
+
+bb57:		; preds = %bb
+	%tmp59 = and i32 %tmp47, 255		; <i32> [#uses=1]
+	ret i32 %tmp59
+}

diff --git a/src/LLVM/test/CodeGen/X86/loop-strength-reduce5.ll b/src/LLVM/test/CodeGen/X86/loop-strength-reduce5.ll
new file mode 100644
index 0000000..b07eeb6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/loop-strength-reduce5.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | grep inc | count 1
+
+@X = weak global i16 0		; <i16*> [#uses=1]
+@Y = weak global i16 0		; <i16*> [#uses=1]
+
+define void @foo(i32 %N) nounwind {
+entry:
+	%tmp1019 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %tmp1019, label %bb, label %return
+
+bb:		; preds = %bb, %entry
+	%i.014.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%tmp1 = trunc i32 %i.014.0 to i16		; <i16> [#uses=2]
+	volatile store i16 %tmp1, i16* @X, align 2
+	%tmp34 = shl i16 %tmp1, 2		; <i16> [#uses=1]
+	volatile store i16 %tmp34, i16* @Y, align 2
+	%indvar.next = add i32 %i.014.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/loop-strength-reduce6.ll b/src/LLVM/test/CodeGen/X86/loop-strength-reduce6.ll
new file mode 100644
index 0000000..919f836
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/loop-strength-reduce6.ll

@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 | not grep inc
+
+define fastcc i32 @decodeMP3(i32 %isize, i32* %done) nounwind {
+entry:
+	br label %cond_true189
+
+cond_true189:		; preds = %entry
+	ret i32 0
+
+cond_next191:		; preds = %entry
+	br label %cond_false.i9
+
+cond_false.i9:		; preds = %cond_next191
+	ret i32 0
+
+cond_next37.i:		; preds = %cond_next191
+	br label %cond_true44.i
+
+cond_true44.i:		; preds = %cond_next37.i
+	br label %bb414.preheader.i
+
+cond_true11.i.i:		; preds = %cond_true44.i
+	ret i32 0
+
+cond_false50.i:		; preds = %cond_next37.i
+	ret i32 0
+
+bb414.preheader.i:		; preds = %cond_true44.i
+	br label %do_layer3.exit
+
+bb.i18:		; preds = %bb414.preheader.i
+	br label %cond_true79.i
+
+cond_true79.i:		; preds = %bb.i18
+	ret i32 0
+
+bb331.i:		; preds = %bb358.i, %cond_true.i149.i
+	br label %cond_false.i151.i
+
+cond_true.i149.i:		; preds = %bb331.i
+	br label %bb331.i
+
+cond_false.i151.i:		; preds = %bb331.i
+	ret i32 0
+
+bb163.i.i:		; preds = %bb178.preheader.i.i, %bb163.i.i
+	%rawout2.451.rec.i.i = phi i64 [ 0, %bb178.preheader.i.i ], [ %indvar.next260.i, %bb163.i.i ]		; <i64> [#uses=2]
+	%i.052.i.i = trunc i64 %rawout2.451.rec.i.i to i32		; <i32> [#uses=1]
+	%tmp165.i144.i = shl i32 %i.052.i.i, 5		; <i32> [#uses=1]
+	%tmp165169.i.i = sext i32 %tmp165.i144.i to i64		; <i64> [#uses=0]
+	%indvar.next260.i = add i64 %rawout2.451.rec.i.i, 1		; <i64> [#uses=2]
+	%exitcond261.i = icmp eq i64 %indvar.next260.i, 18		; <i1> [#uses=1]
+	br i1 %exitcond261.i, label %bb178.preheader.i.i, label %bb163.i.i
+
+bb178.preheader.i.i:		; preds = %bb163.i.i, %cond_true.i149.i
+	br label %bb163.i.i
+
+bb358.i:		; preds = %bb.i18
+	br label %bb406.i
+
+bb406.i:		; preds = %bb358.i
+	ret i32 0
+
+do_layer3.exit:		; preds = %bb414.preheader.i
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/loop-strength-reduce7.ll b/src/LLVM/test/CodeGen/X86/loop-strength-reduce7.ll
new file mode 100644
index 0000000..4b565a6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/loop-strength-reduce7.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=x86 | not grep imul
+
+target triple = "i386-apple-darwin9.6"
+	%struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] }
+	%struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] }
+	%struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] }
+	%struct.lame_global_flags = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define fastcc void @outer_loop(%struct.lame_global_flags* nocapture %gfp, double* nocapture %xr, i32 %targ_bits, double* nocapture %best_noise, %struct.III_psy_xmin* nocapture %l3_xmin, i32* nocapture %l3_enc, %struct.III_scalefac_t* nocapture %scalefac, %struct.gr_info* nocapture %cod_info, i32 %ch) nounwind {
+entry:
+	br label %bb4
+
+bb4:		; preds = %bb4, %entry
+	br i1 true, label %bb5, label %bb4
+
+bb5:		; preds = %bb4
+	br i1 true, label %bb28.i37, label %bb.i4
+
+bb.i4:		; preds = %bb.i4, %bb5
+	br label %bb.i4
+
+bb28.i37:		; preds = %bb33.i47, %bb5
+	%i.1.reg2mem.0.i = phi i32 [ %0, %bb33.i47 ], [ 0, %bb5 ]		; <i32> [#uses=2]
+	%0 = add i32 %i.1.reg2mem.0.i, 1		; <i32> [#uses=2]
+	br label %bb29.i38
+
+bb29.i38:		; preds = %bb33.i47, %bb28.i37
+	%indvar32.i = phi i32 [ %indvar.next33.i, %bb33.i47 ], [ 0, %bb28.i37 ]		; <i32> [#uses=2]
+	%sfb.314.i = add i32 %indvar32.i, 0		; <i32> [#uses=3]
+	%1 = getelementptr [4 x [21 x double]]* null, i32 0, i32 %0, i32 %sfb.314.i		; <double*> [#uses=1]
+	%2 = load double* %1, align 8		; <double> [#uses=0]
+	br i1 false, label %bb30.i41, label %bb33.i47
+
+bb30.i41:		; preds = %bb29.i38
+	%3 = getelementptr %struct.III_scalefac_t* null, i32 0, i32 1, i32 %sfb.314.i, i32 %i.1.reg2mem.0.i		; <i32*> [#uses=1]
+	store i32 0, i32* %3, align 4
+	br label %bb33.i47
+
+bb33.i47:		; preds = %bb30.i41, %bb29.i38
+	%4 = add i32 %sfb.314.i, 1		; <i32> [#uses=1]
+	%phitmp.i46 = icmp ugt i32 %4, 11		; <i1> [#uses=1]
+	%indvar.next33.i = add i32 %indvar32.i, 1		; <i32> [#uses=1]
+	br i1 %phitmp.i46, label %bb28.i37, label %bb29.i38
+}

diff --git a/src/LLVM/test/CodeGen/X86/loop-strength-reduce8.ll b/src/LLVM/test/CodeGen/X86/loop-strength-reduce8.ll
new file mode 100644
index 0000000..1d04276
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/loop-strength-reduce8.ll

@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+; CHECK: leal 16(%eax), %edx
+; CHECK: align
+; CHECK: addl    $4, %edx
+; CHECK: decl    %ecx
+; CHECK: jne     LBB0_2
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
+	%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element*, %struct.bitmap_element*, i32 }
+	%struct.branch_path = type { %struct.rtx_def*, i32 }
+	%struct.c_lang_decl = type <{ i8, [3 x i8] }>
+	%struct.constant_descriptor = type { %struct.constant_descriptor*, i8*, %struct.rtx_def*, { x86_fp80 } }
+	%struct.eh_region = type { %struct.eh_region*, %struct.eh_region*, %struct.eh_region*, i32, %struct.bitmap_head_def*, i32, { { %struct.eh_region*, %struct.eh_region*, %struct.eh_region*, %struct.rtx_def* } }, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.eh_status = type { %struct.eh_region*, %struct.eh_region**, %struct.eh_region*, %struct.eh_region*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.branch_path*, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.sequence_stack*, i32, i32, i8*, i32, i8*, %struct.tree_node**, %struct.rtx_def** }
+	%struct.equiv_table = type { %struct.rtx_def*, %struct.rtx_def* }
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.stmt_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, i8*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, i8*, %struct.initial_value_struct*, i32, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.rtx_def**, %struct.temp_slot*, i32, i32, i32, %struct.var_refs_queue*, i32, i32, i8*, %struct.tree_node*, %struct.rtx_def*, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.rtx_def*, i8, i8, i8 }
+	%struct.goto_fixup = type { %struct.goto_fixup*, %struct.rtx_def*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.rtx_def*, %struct.tree_node* }
+	%struct.initial_value_struct = type { i32, i32, %struct.equiv_table* }
+	%struct.label_chain = type { %struct.label_chain*, %struct.tree_node* }
+	%struct.lang_decl = type { %struct.c_lang_decl, %struct.tree_node* }
+	%struct.language_function = type { %struct.stmt_tree_s, %struct.tree_node* }
+	%struct.machine_function = type { [59 x [3 x %struct.rtx_def*]], i32, i32 }
+	%struct.nesting = type { %struct.nesting*, %struct.nesting*, i32, %struct.rtx_def*, { { i32, %struct.rtx_def*, %struct.rtx_def*, %struct.nesting*, %struct.tree_node*, %struct.tree_node*, %struct.label_chain*, i32, i32, i32, i32, %struct.rtx_def*, %struct.tree_node** } } }
+	%struct.pool_constant = type { %struct.constant_descriptor*, %struct.pool_constant*, %struct.pool_constant*, %struct.rtx_def*, i32, i32, i32, i64, i32 }
+	%struct.rtunion = type { i64 }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.sequence_stack* }
+	%struct.stmt_status = type { %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, i32, i32, %struct.tree_node*, %struct.rtx_def*, i32, i8*, i32, %struct.goto_fixup* }
+	%struct.stmt_tree_s = type { %struct.tree_node*, %struct.tree_node*, i8*, i32 }
+	%struct.temp_slot = type { %struct.temp_slot*, %struct.rtx_def*, %struct.rtx_def*, i32, i64, %struct.tree_node*, %struct.tree_node*, i8, i8, i32, i32, i64, i64 }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, i8*, i32, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, { %struct.function* }, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_exp = type { %struct.tree_common, i32, [1 x %struct.tree_node*] }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type { %struct.constant_descriptor**, %struct.pool_constant**, %struct.pool_constant*, %struct.pool_constant*, i64, %struct.rtx_def* }
+	%struct.varray_data = type { [1 x i64] }
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.varray_data }
+@lineno = internal global i32 0		; <i32*> [#uses=1]
+@tree_code_length = internal global [256 x i32] zeroinitializer
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (%struct.tree_node* (i32, ...)* @build_stmt to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define %struct.tree_node* @build_stmt(i32 %code, ...) nounwind {
+entry:
+	%p = alloca i8*		; <i8**> [#uses=3]
+	%p1 = bitcast i8** %p to i8*		; <i8*> [#uses=2]
+	call void @llvm.va_start(i8* %p1)
+	%0 = call fastcc %struct.tree_node* @make_node(i32 %code) nounwind		; <%struct.tree_node*> [#uses=2]
+	%1 = getelementptr [256 x i32]* @tree_code_length, i32 0, i32 %code		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 4		; <i32> [#uses=2]
+	%3 = load i32* @lineno, align 4		; <i32> [#uses=1]
+	%4 = bitcast %struct.tree_node* %0 to %struct.tree_exp*		; <%struct.tree_exp*> [#uses=2]
+	%5 = getelementptr %struct.tree_exp* %4, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %3, i32* %5, align 4
+	%6 = icmp sgt i32 %2, 0		; <i1> [#uses=1]
+	br i1 %6, label %bb, label %bb3
+
+bb:		; preds = %bb, %entry
+	%i.01 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]		; <i32> [#uses=2]
+	%7 = load i8** %p, align 4		; <i8*> [#uses=2]
+	%8 = getelementptr i8* %7, i32 4		; <i8*> [#uses=1]
+	store i8* %8, i8** %p, align 4
+	%9 = bitcast i8* %7 to %struct.tree_node**		; <%struct.tree_node**> [#uses=1]
+	%10 = load %struct.tree_node** %9, align 4		; <%struct.tree_node*> [#uses=1]
+	%11 = getelementptr %struct.tree_exp* %4, i32 0, i32 2, i32 %i.01		; <%struct.tree_node**> [#uses=1]
+	store %struct.tree_node* %10, %struct.tree_node** %11, align 4
+	%indvar.next = add i32 %i.01, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %2		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb3, label %bb
+
+bb3:		; preds = %bb, %entry
+	call void @llvm.va_end(i8* %p1)
+	ret %struct.tree_node* %0
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+declare fastcc %struct.tree_node* @make_node(i32) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/lsr-delayed-fold.ll b/src/LLVM/test/CodeGen/X86/lsr-delayed-fold.ll
new file mode 100644
index 0000000..8ed97e4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-delayed-fold.ll

@@ -0,0 +1,178 @@
+; RUN: llc -march=x86-64 < %s > /dev/null
+
+; ScalarEvolution misses an opportunity to fold ((trunc x) + (trunc -x) + y),
+; but LSR should tolerate this.
+; rdar://7886751
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0"
+
+define fastcc void @formatValue(i64 %arg5) nounwind {
+bb12:                                             ; preds = %bb11
+  %t = trunc i64 %arg5 to i32                   ; <i32> [#uses=1]
+  %t13 = sub i64 0, %arg5                       ; <i64> [#uses=1]
+  %t14 = and i64 %t13, 4294967295             ; <i64> [#uses=1]
+  br label %bb15
+
+bb15:                                             ; preds = %bb21, %bb12
+  %t16 = phi i64 [ 0, %bb12 ], [ %t23, %bb15 ] ; <i64> [#uses=2]
+  %t17 = mul i64 %t14, %t16                 ; <i64> [#uses=1]
+  %t18 = add i64 undef, %t17                  ; <i64> [#uses=1]
+  %t19 = trunc i64 %t18 to i32                ; <i32> [#uses=1]
+  %t22 = icmp eq i32 %t19, %t               ; <i1> [#uses=1]
+  %t23 = add i64 %t16, 1                      ; <i64> [#uses=1]
+  br i1 %t22, label %bb24, label %bb15
+
+bb24:                                             ; preds = %bb21, %bb11
+  unreachable
+}
+
+; ScalarEvolution should be able to correctly expand the crazy addrec here.
+; PR6914
+
+define void @int323() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %lbl_264, %for.inc, %entry
+  %g_263.tmp.1 = phi i8 [ undef, %entry ], [ %g_263.tmp.1, %for.cond ]
+  %p_95.addr.0 = phi i8 [ 0, %entry ], [ %add, %for.cond ]
+  %add = add i8 %p_95.addr.0, 1                   ; <i8> [#uses=1]
+  br i1 undef, label %for.cond, label %lbl_264
+
+lbl_264:                                          ; preds = %if.end, %lbl_264.preheader
+  %g_263.tmp.0 = phi i8 [ %g_263.tmp.1, %for.cond ] ; <i8> [#uses=1]
+  %tmp7 = load i16* undef                         ; <i16> [#uses=1]
+  %conv8 = trunc i16 %tmp7 to i8                  ; <i8> [#uses=1]
+  %mul.i = mul i8 %p_95.addr.0, %p_95.addr.0      ; <i8> [#uses=1]
+  %mul.i18 = mul i8 %mul.i, %conv8                ; <i8> [#uses=1]
+  %tobool12 = icmp eq i8 %mul.i18, 0              ; <i1> [#uses=1]
+  unreachable
+}
+
+; LSR ends up going into conservative pruning mode; don't prune the solution
+; so far that it becomes unsolvable though.
+; PR7077
+
+%struct.Bu = type { i32, i32, i32 }
+
+define void @_Z3fooP2Bui(%struct.Bu* nocapture %bu) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc131, %entry
+  %indvar = phi i64 [ %indvar.next, %for.inc131 ], [ 0, %entry ] ; <i64> [#uses=3]
+  br i1 undef, label %for.inc131, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %for.body
+  %tmp15 = add i64 %indvar, 1                     ; <i64> [#uses=1]
+  %tmp17 = add i64 %indvar, 2                      ; <i64> [#uses=1]
+  %tmp19 = add i64 %indvar, 3                      ; <i64> [#uses=1]
+  %tmp21 = add i64 %indvar, 4                      ; <i64> [#uses=1]
+  %tmp23 = add i64 %indvar, 5                      ; <i64> [#uses=1]
+  %tmp25 = add i64 %indvar, 6                      ; <i64> [#uses=1]
+  %tmp27 = add i64 %indvar, 7                      ; <i64> [#uses=1]
+  %tmp29 = add i64 %indvar, 8                      ; <i64> [#uses=1]
+  %tmp31 = add i64 %indvar, 9                      ; <i64> [#uses=1]
+  %tmp35 = add i64 %indvar, 11                     ; <i64> [#uses=1]
+  %tmp37 = add i64 %indvar, 12                     ; <i64> [#uses=1]
+  %tmp39 = add i64 %indvar, 13                     ; <i64> [#uses=1]
+  %tmp41 = add i64 %indvar, 14                     ; <i64> [#uses=1]
+  %tmp43 = add i64 %indvar, 15                     ; <i64> [#uses=1]
+  %tmp45 = add i64 %indvar, 16                     ; <i64> [#uses=1]
+  %tmp47 = add i64 %indvar, 17                     ; <i64> [#uses=1]
+  %mul = trunc i64 %indvar to i32                  ; <i32> [#uses=1]
+  %add22 = trunc i64 %tmp15 to i32                ; <i32> [#uses=1]
+  %add28 = trunc i64 %tmp17 to i32                ; <i32> [#uses=1]
+  %add34 = trunc i64 %tmp19 to i32                ; <i32> [#uses=1]
+  %add40 = trunc i64 %tmp21 to i32                ; <i32> [#uses=1]
+  %add46 = trunc i64 %tmp23 to i32                ; <i32> [#uses=1]
+  %add52 = trunc i64 %tmp25 to i32                ; <i32> [#uses=1]
+  %add58 = trunc i64 %tmp27 to i32                ; <i32> [#uses=1]
+  %add64 = trunc i64 %tmp29 to i32                ; <i32> [#uses=1]
+  %add70 = trunc i64 %tmp31 to i32                ; <i32> [#uses=1]
+  %add82 = trunc i64 %tmp35 to i32                ; <i32> [#uses=1]
+  %add88 = trunc i64 %tmp37 to i32                ; <i32> [#uses=1]
+  %add94 = trunc i64 %tmp39 to i32                ; <i32> [#uses=1]
+  %add100 = trunc i64 %tmp41 to i32               ; <i32> [#uses=1]
+  %add106 = trunc i64 %tmp43 to i32               ; <i32> [#uses=1]
+  %add112 = trunc i64 %tmp45 to i32               ; <i32> [#uses=1]
+  %add118 = trunc i64 %tmp47 to i32               ; <i32> [#uses=1]
+  %tmp10 = getelementptr %struct.Bu* %bu, i64 %indvar, i32 2 ; <i32*> [#uses=1]
+  %tmp11 = load i32* %tmp10                       ; <i32> [#uses=0]
+  tail call void undef(i32 %add22)
+  tail call void undef(i32 %add28)
+  tail call void undef(i32 %add34)
+  tail call void undef(i32 %add40)
+  tail call void undef(i32 %add46)
+  tail call void undef(i32 %add52)
+  tail call void undef(i32 %add58)
+  tail call void undef(i32 %add64)
+  tail call void undef(i32 %add70)
+  tail call void undef(i32 %add82)
+  tail call void undef(i32 %add88)
+  tail call void undef(i32 %add94)
+  tail call void undef(i32 %add100)
+  tail call void undef(i32 %add106)
+  tail call void undef(i32 %add112)
+  tail call void undef(i32 %add118)
+  br label %for.body123
+
+for.body123:                                      ; preds = %for.body123, %lor.lhs.false
+  %j.03 = phi i32 [ 0, %lor.lhs.false ], [ %inc, %for.body123 ] ; <i32> [#uses=2]
+  %add129 = add i32 %mul, %j.03                   ; <i32> [#uses=1]
+  tail call void undef(i32 %add129)
+  %inc = add nsw i32 %j.03, 1                     ; <i32> [#uses=1]
+  br i1 undef, label %for.inc131, label %for.body123
+
+for.inc131:                                       ; preds = %for.body123, %for.body
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=1]
+  br i1 undef, label %for.end134, label %for.body
+
+for.end134:                                       ; preds = %for.inc131
+  ret void
+}
+
+; LSR needs to remember inserted instructions even in postinc mode, because
+; there could be multiple subexpressions within a single expansion which
+; require insert point adjustment.
+; PR7306
+
+define fastcc i32 @GetOptimum() nounwind {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %t = phi i32 [ 0, %bb ], [ %t2, %bb1 ]      ; <i32> [#uses=1]
+  %t2 = add i32 %t, undef                     ; <i32> [#uses=3]
+  br i1 undef, label %bb1, label %bb3
+
+bb3:                                              ; preds = %bb1
+  %t4 = add i32 undef, -1                       ; <i32> [#uses=1]
+  br label %bb5
+
+bb5:                                              ; preds = %bb16, %bb3
+  %t6 = phi i32 [ %t17, %bb16 ], [ 0, %bb3 ]  ; <i32> [#uses=3]
+  %t7 = add i32 undef, %t6                    ; <i32> [#uses=2]
+  %t8 = add i32 %t4, %t6                    ; <i32> [#uses=1]
+  br i1 undef, label %bb9, label %bb10
+
+bb9:                                              ; preds = %bb5
+  br label %bb10
+
+bb10:                                             ; preds = %bb9, %bb5
+  br i1 undef, label %bb11, label %bb16
+
+bb11:                                             ; preds = %bb10
+  %t12 = icmp ugt i32 %t7, %t2              ; <i1> [#uses=1]
+  %t13 = select i1 %t12, i32 %t2, i32 %t7 ; <i32> [#uses=1]
+  br label %bb14
+
+bb14:                                             ; preds = %bb11
+  store i32 %t13, i32* null
+  ret i32 %t8
+
+bb16:                                             ; preds = %bb10
+  %t17 = add i32 %t6, 1                       ; <i32> [#uses=1]
+  br label %bb5
+}

diff --git a/src/LLVM/test/CodeGen/X86/lsr-i386.ll b/src/LLVM/test/CodeGen/X86/lsr-i386.ll
new file mode 100644
index 0000000..02baf20
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-i386.ll

@@ -0,0 +1,44 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+; PR7651
+
+; CHECK: align
+; CHECK: align
+; CHECK: align
+; CHECK: movl  $0, (%e
+; CHECK-NEXT: addl  $4, %e
+; CHECK-NEXT: decl  %e
+; CHECK-NEXT: jne
+
+%struct.anon = type { [72 x i32], i32 }
+
+@mp2grad_ = external global %struct.anon
+
+define void @chomp2g_setup_(i32 %n, i32 %m) nounwind {
+entry:
+  br label %bb1
+
+bb1:                                              ; preds = %bb6, %bb
+  %indvar11 = phi i32 [ %indvar.next12, %bb6 ], [ 0, %entry ] ; <i32> [#uses=2]
+  %tmp21 = add i32 %indvar11, 1                   ; <i32> [#uses=1]
+  %t = load i32* getelementptr inbounds (%struct.anon* @mp2grad_, i32 0, i32 1)
+  %tmp15 = mul i32 %n, %t                      ; <i32> [#uses=1]
+  %tmp16 = add i32 %tmp21, %tmp15                 ; <i32> [#uses=1]
+  %tmp17 = shl i32 %tmp16, 3                      ; <i32> [#uses=1]
+  %tmp18 = add i32 %tmp17, -8                     ; <i32> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb2, %bb2.preheader
+  %indvar = phi i32 [ 0, %bb1 ], [ %indvar.next, %bb2 ] ; <i32> [#uses=2]
+  %tmp19 = add i32 %tmp18, %indvar                ; <i32> [#uses=1]
+  %scevgep = getelementptr %struct.anon* @mp2grad_, i32 0, i32 0, i32 %tmp19 ; <i32*> [#uses=1]
+  store i32 0, i32* %scevgep
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+  %c = icmp ne i32 %indvar.next, %m
+  br i1 %c, label %bb2, label %bb6
+
+bb6:                                              ; preds = %bb2, %bb1
+  %indvar.next12 = add i32 %indvar11, 1           ; <i32> [#uses=1]
+  br label %bb1
+}

diff --git a/src/LLVM/test/CodeGen/X86/lsr-interesting-step.ll b/src/LLVM/test/CodeGen/X86/lsr-interesting-step.ll
new file mode 100644
index 0000000..d1de051
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-interesting-step.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86-64 -relocation-model=static -mtriple=x86_64-unknown-linux-gnu -asm-verbose=0 | FileCheck %s
+
+; The inner loop should require only one add (and no leas either).
+; rdar://8100380
+
+; CHECK:      BB0_3:
+; CHECK-NEXT:   movb    $0, flags(%rdx)
+; CHECK-NEXT:   addq    %rcx, %rdx
+; CHECK-NEXT:   cmpq    $8192, %rdx
+; CHECK-NEXT:   jl
+
+@flags = external global [8192 x i8], align 16 ; <[8192 x i8]*> [#uses=1]
+
+define void @foo() nounwind {
+entry:
+  %tmp = icmp slt i64 2, 8192                     ; <i1> [#uses=1]
+  br i1 %tmp, label %bb, label %bb21
+
+bb:                                               ; preds = %entry
+  br label %bb7
+
+bb7:                                              ; preds = %bb, %bb17
+  %tmp8 = phi i64 [ %tmp18, %bb17 ], [ 2, %bb ]   ; <i64> [#uses=2]
+  %tmp9 = icmp slt i64 2, 8192                    ; <i1> [#uses=1]
+  br i1 %tmp9, label %bb10, label %bb17
+
+bb10:                                             ; preds = %bb7
+  br label %bb11
+
+bb11:                                             ; preds = %bb10, %bb11
+  %tmp12 = phi i64 [ %tmp14, %bb11 ], [ 2, %bb10 ] ; <i64> [#uses=2]
+  %tmp13 = getelementptr inbounds [8192 x i8]* @flags, i64 0, i64 %tmp12 ; <i8*> [#uses=1]
+  store i8 0, i8* %tmp13, align 1
+  %tmp14 = add nsw i64 %tmp12, %tmp8              ; <i64> [#uses=2]
+  %tmp15 = icmp slt i64 %tmp14, 8192              ; <i1> [#uses=1]
+  br i1 %tmp15, label %bb11, label %bb16
+
+bb16:                                             ; preds = %bb11
+  br label %bb17
+
+bb17:                                             ; preds = %bb16, %bb7
+  %tmp18 = add nsw i64 %tmp8, 1                   ; <i64> [#uses=2]
+  %tmp19 = icmp slt i64 %tmp18, 8192              ; <i1> [#uses=1]
+  br i1 %tmp19, label %bb7, label %bb20
+
+bb20:                                             ; preds = %bb17
+  br label %bb21
+
+bb21:                                             ; preds = %bb20, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/lsr-loop-exit-cond.ll b/src/LLVM/test/CodeGen/X86/lsr-loop-exit-cond.ll
new file mode 100644
index 0000000..938023f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-loop-exit-cond.ll

@@ -0,0 +1,137 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; CHECK: decq
+; CHECK-NEXT: jne
+
+@Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
+@Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
+@Te3 = external global [256 x i32]		; <[256 x i32]*> [#uses=2]
+
+define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind {
+entry:
+	%0 = load i32* %rk, align 4		; <i32> [#uses=1]
+	%1 = getelementptr i32* %rk, i64 1		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 4		; <i32> [#uses=1]
+	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
+	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
+	br label %bb
+
+bb:		; preds = %bb1, %entry
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ]		; <i64> [#uses=3]
+	%s1.0 = phi i32 [ %2, %entry ], [ %56, %bb1 ]		; <i32> [#uses=2]
+	%s0.0 = phi i32 [ %0, %entry ], [ %43, %bb1 ]		; <i32> [#uses=2]
+	%tmp18 = shl i64 %indvar, 4		; <i64> [#uses=4]
+	%rk26 = bitcast i32* %rk to i8*		; <i8*> [#uses=6]
+	%3 = lshr i32 %s0.0, 24		; <i32> [#uses=1]
+	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
+	%5 = getelementptr [256 x i32]* @Te0, i64 0, i64 %4		; <i32*> [#uses=1]
+	%6 = load i32* %5, align 4		; <i32> [#uses=1]
+	%7 = lshr i32 %s1.0, 16		; <i32> [#uses=1]
+	%8 = and i32 %7, 255		; <i32> [#uses=1]
+	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
+	%10 = getelementptr [256 x i32]* @Te1, i64 0, i64 %9		; <i32*> [#uses=1]
+	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%ctg2.sum2728 = or i64 %tmp18, 8		; <i64> [#uses=1]
+	%12 = getelementptr i8* %rk26, i64 %ctg2.sum2728		; <i8*> [#uses=1]
+	%13 = bitcast i8* %12 to i32*		; <i32*> [#uses=1]
+	%14 = load i32* %13, align 4		; <i32> [#uses=1]
+	%15 = xor i32 %11, %6		; <i32> [#uses=1]
+	%16 = xor i32 %15, %14		; <i32> [#uses=3]
+	%17 = lshr i32 %s1.0, 24		; <i32> [#uses=1]
+	%18 = zext i32 %17 to i64		; <i64> [#uses=1]
+	%19 = getelementptr [256 x i32]* @Te0, i64 0, i64 %18		; <i32*> [#uses=1]
+	%20 = load i32* %19, align 4		; <i32> [#uses=1]
+	%21 = and i32 %s0.0, 255		; <i32> [#uses=1]
+	%22 = zext i32 %21 to i64		; <i64> [#uses=1]
+	%23 = getelementptr [256 x i32]* @Te3, i64 0, i64 %22		; <i32*> [#uses=1]
+	%24 = load i32* %23, align 4		; <i32> [#uses=1]
+	%ctg2.sum2930 = or i64 %tmp18, 12		; <i64> [#uses=1]
+	%25 = getelementptr i8* %rk26, i64 %ctg2.sum2930		; <i8*> [#uses=1]
+	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
+	%27 = load i32* %26, align 4		; <i32> [#uses=1]
+	%28 = xor i32 %24, %20		; <i32> [#uses=1]
+	%29 = xor i32 %28, %27		; <i32> [#uses=4]
+	%30 = lshr i32 %16, 24		; <i32> [#uses=1]
+	%31 = zext i32 %30 to i64		; <i64> [#uses=1]
+	%32 = getelementptr [256 x i32]* @Te0, i64 0, i64 %31		; <i32*> [#uses=1]
+	%33 = load i32* %32, align 4		; <i32> [#uses=2]
+	%exitcond = icmp eq i64 %indvar, %tmp.16		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb1:		; preds = %bb
+	%ctg2.sum31 = add i64 %tmp18, 16		; <i64> [#uses=1]
+	%34 = getelementptr i8* %rk26, i64 %ctg2.sum31		; <i8*> [#uses=1]
+	%35 = bitcast i8* %34 to i32*		; <i32*> [#uses=1]
+	%36 = lshr i32 %29, 16		; <i32> [#uses=1]
+	%37 = and i32 %36, 255		; <i32> [#uses=1]
+	%38 = zext i32 %37 to i64		; <i64> [#uses=1]
+	%39 = getelementptr [256 x i32]* @Te1, i64 0, i64 %38		; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4		; <i32> [#uses=1]
+	%41 = load i32* %35, align 4		; <i32> [#uses=1]
+	%42 = xor i32 %40, %33		; <i32> [#uses=1]
+	%43 = xor i32 %42, %41		; <i32> [#uses=1]
+	%44 = lshr i32 %29, 24		; <i32> [#uses=1]
+	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
+	%46 = getelementptr [256 x i32]* @Te0, i64 0, i64 %45		; <i32*> [#uses=1]
+	%47 = load i32* %46, align 4		; <i32> [#uses=1]
+	%48 = and i32 %16, 255		; <i32> [#uses=1]
+	%49 = zext i32 %48 to i64		; <i64> [#uses=1]
+	%50 = getelementptr [256 x i32]* @Te3, i64 0, i64 %49		; <i32*> [#uses=1]
+	%51 = load i32* %50, align 4		; <i32> [#uses=1]
+	%ctg2.sum32 = add i64 %tmp18, 20		; <i64> [#uses=1]
+	%52 = getelementptr i8* %rk26, i64 %ctg2.sum32		; <i8*> [#uses=1]
+	%53 = bitcast i8* %52 to i32*		; <i32*> [#uses=1]
+	%54 = load i32* %53, align 4		; <i32> [#uses=1]
+	%55 = xor i32 %51, %47		; <i32> [#uses=1]
+	%56 = xor i32 %55, %54		; <i32> [#uses=1]
+	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
+	br label %bb
+
+bb2:		; preds = %bb
+	%tmp10 = shl i64 %tmp.16, 4		; <i64> [#uses=2]
+	%ctg2.sum = add i64 %tmp10, 16		; <i64> [#uses=1]
+	%tmp1213 = getelementptr i8* %rk26, i64 %ctg2.sum		; <i8*> [#uses=1]
+	%57 = bitcast i8* %tmp1213 to i32*		; <i32*> [#uses=1]
+	%58 = and i32 %33, -16777216		; <i32> [#uses=1]
+	%59 = lshr i32 %29, 16		; <i32> [#uses=1]
+	%60 = and i32 %59, 255		; <i32> [#uses=1]
+	%61 = zext i32 %60 to i64		; <i64> [#uses=1]
+	%62 = getelementptr [256 x i32]* @Te1, i64 0, i64 %61		; <i32*> [#uses=1]
+	%63 = load i32* %62, align 4		; <i32> [#uses=1]
+	%64 = and i32 %63, 16711680		; <i32> [#uses=1]
+	%65 = or i32 %64, %58		; <i32> [#uses=1]
+	%66 = load i32* %57, align 4		; <i32> [#uses=1]
+	%67 = xor i32 %65, %66		; <i32> [#uses=2]
+	%68 = lshr i32 %29, 8		; <i32> [#uses=1]
+	%69 = zext i32 %68 to i64		; <i64> [#uses=1]
+	%70 = getelementptr [256 x i32]* @Te0, i64 0, i64 %69		; <i32*> [#uses=1]
+	%71 = load i32* %70, align 4		; <i32> [#uses=1]
+	%72 = and i32 %71, -16777216		; <i32> [#uses=1]
+	%73 = and i32 %16, 255		; <i32> [#uses=1]
+	%74 = zext i32 %73 to i64		; <i64> [#uses=1]
+	%75 = getelementptr [256 x i32]* @Te1, i64 0, i64 %74		; <i32*> [#uses=1]
+	%76 = load i32* %75, align 4		; <i32> [#uses=1]
+	%77 = and i32 %76, 16711680		; <i32> [#uses=1]
+	%78 = or i32 %77, %72		; <i32> [#uses=1]
+	%ctg2.sum25 = add i64 %tmp10, 20		; <i64> [#uses=1]
+	%79 = getelementptr i8* %rk26, i64 %ctg2.sum25		; <i8*> [#uses=1]
+	%80 = bitcast i8* %79 to i32*		; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4		; <i32> [#uses=1]
+	%82 = xor i32 %78, %81		; <i32> [#uses=2]
+	%83 = lshr i32 %67, 24		; <i32> [#uses=1]
+	%84 = trunc i32 %83 to i8		; <i8> [#uses=1]
+	store i8 %84, i8* %out, align 1
+	%85 = lshr i32 %67, 16		; <i32> [#uses=1]
+	%86 = trunc i32 %85 to i8		; <i8> [#uses=1]
+	%87 = getelementptr i8* %out, i64 1		; <i8*> [#uses=1]
+	store i8 %86, i8* %87, align 1
+	%88 = getelementptr i8* %out, i64 4		; <i8*> [#uses=1]
+	%89 = lshr i32 %82, 24		; <i32> [#uses=1]
+	%90 = trunc i32 %89 to i8		; <i8> [#uses=1]
+	store i8 %90, i8* %88, align 1
+	%91 = lshr i32 %82, 16		; <i32> [#uses=1]
+	%92 = trunc i32 %91 to i8		; <i8> [#uses=1]
+	%93 = getelementptr i8* %out, i64 5		; <i8*> [#uses=1]
+	store i8 %92, i8* %93, align 1
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/lsr-negative-stride.ll b/src/LLVM/test/CodeGen/X86/lsr-negative-stride.ll
new file mode 100644
index 0000000..8f86e60
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-negative-stride.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86 > %t

+; RUN: not grep neg %t

+; RUN: not grep sub.*esp %t

+; RUN: not grep esi %t

+; RUN: not grep push %t

+

+; This corresponds to:

+;int t(int a, int b) {

+;  while (a != b) {

+;    if (a > b)

+;      a -= b;

+;    else

+;      b -= a;

+;  }

+;  return a;

+;}

+

+

+define i32 @t(i32 %a, i32 %b) nounwind {

+entry:

+	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]

+	br i1 %tmp1434, label %bb17, label %bb.outer

+

+bb.outer:		; preds = %cond_false, %entry

+	%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]		; <i32> [#uses=5]

+	%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]

+	br label %bb

+

+bb:		; preds = %cond_true, %bb.outer

+	%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]

+	%tmp. = sub i32 0, %b_addr.021.0.ph		; <i32> [#uses=1]

+	%tmp.40 = mul i32 %indvar, %tmp.		; <i32> [#uses=1]

+	%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph		; <i32> [#uses=6]

+	%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph		; <i1> [#uses=1]

+	br i1 %tmp3, label %cond_true, label %cond_false

+

+cond_true:		; preds = %bb

+	%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph		; <i32> [#uses=2]

+	%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph		; <i1> [#uses=1]

+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]

+	br i1 %tmp1437, label %bb17, label %bb

+

+cond_false:		; preds = %bb

+	%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0		; <i32> [#uses=2]

+	%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10		; <i1> [#uses=1]

+	br i1 %tmp14, label %bb17, label %bb.outer

+

+bb17:		; preds = %cond_false, %cond_true, %entry

+	%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]

+	ret i32 %a_addr.026.1

+}


diff --git a/src/LLVM/test/CodeGen/X86/lsr-nonaffine.ll b/src/LLVM/test/CodeGen/X86/lsr-nonaffine.ll
new file mode 100644
index 0000000..d0d2bbd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-nonaffine.ll

@@ -0,0 +1,30 @@
+; RUN: llc -asm-verbose=false -march=x86-64 -mtriple=x86_64-apple-darwin -o - < %s | FileCheck %s
+
+; LSR should leave non-affine expressions alone because it currently
+; doesn't know how to do anything with them, and when it tries, it
+; gets SCEVExpander's current expansion for them, which is suboptimal.
+
+; CHECK:        xorl %eax, %eax
+; CHECK-NEXT:   align
+; CHECK-NEXT: BB0_1:
+; CHECK-NEXT:   movq  %rax, (%rdx)
+; CHECK-NEXT:   addq  %rsi, %rax
+; CHECK-NEXT:   cmpq  %rdi, %rax
+; CHECK-NEXT:   jl
+; CHECK-NEXT:   imulq %rax, %rax
+; CHECK-NEXT:   ret
+define i64 @foo(i64 %n, i64 %s, i64* %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  volatile store i64 %i, i64* %p
+  %i.next = add i64 %i, %s
+  %c = icmp slt i64 %i.next, %n
+  br i1 %c, label %loop, label %exit
+
+exit:
+  %mul = mul i64 %i.next, %i.next
+  ret i64 %mul
+}

diff --git a/src/LLVM/test/CodeGen/X86/lsr-normalization.ll b/src/LLVM/test/CodeGen/X86/lsr-normalization.ll
new file mode 100644
index 0000000..932141d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-normalization.ll

@@ -0,0 +1,99 @@
+; RUN: llc < %s -march=x86-64 | grep div | count 1
+; rdar://8168938
+
+; This testcase involves SCEV normalization with the exit value from
+; one loop involved with the increment value for an addrec on another
+; loop. The expression should be properly normalized and simplified,
+; and require only a single division.
+
+%0 = type { %0*, %0* }
+
+@0 = private constant [13 x i8] c"Result: %lu\0A\00" ; <[13 x i8]*> [#uses=1]
+@1 = internal constant [5 x i8] c"Huh?\00"        ; <[5 x i8]*> [#uses=1]
+
+define i32 @main(i32 %arg, i8** nocapture %arg1) nounwind {
+bb:
+  %tmp = alloca %0, align 8                       ; <%0*> [#uses=11]
+  %tmp2 = bitcast %0* %tmp to i8*                 ; <i8*> [#uses=1]
+  call void @llvm.memset.p0i8.i64(i8* %tmp2, i8 0, i64 16, i32 8, i1 false) nounwind
+  %tmp3 = getelementptr inbounds %0* %tmp, i64 0, i32 0 ; <%0**> [#uses=3]
+  store %0* %tmp, %0** %tmp3
+  %tmp4 = getelementptr inbounds %0* %tmp, i64 0, i32 1 ; <%0**> [#uses=1]
+  store %0* %tmp, %0** %tmp4
+  %tmp5 = call noalias i8* @_Znwm(i64 24) nounwind ; <i8*> [#uses=2]
+  %tmp6 = getelementptr inbounds i8* %tmp5, i64 16 ; <i8*> [#uses=2]
+  %tmp7 = icmp eq i8* %tmp6, null                 ; <i1> [#uses=1]
+  br i1 %tmp7, label %bb10, label %bb8
+
+bb8:                                              ; preds = %bb
+  %tmp9 = bitcast i8* %tmp6 to i32*               ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp9
+  br label %bb10
+
+bb10:                                             ; preds = %bb8, %bb
+  %tmp11 = bitcast i8* %tmp5 to %0*               ; <%0*> [#uses=1]
+  call void @_ZNSt15_List_node_base4hookEPS_(%0* %tmp11, %0* %tmp) nounwind
+  %tmp12 = load %0** %tmp3                        ; <%0*> [#uses=3]
+  %tmp13 = icmp eq %0* %tmp12, %tmp               ; <i1> [#uses=1]
+  br i1 %tmp13, label %bb14, label %bb16
+
+bb14:                                             ; preds = %bb10
+  %tmp15 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @1, i64 0, i64 0))
+  br label %bb35
+
+bb16:                                             ; preds = %bb16, %bb10
+  %tmp17 = phi i64 [ %tmp22, %bb16 ], [ 0, %bb10 ] ; <i64> [#uses=1]
+  %tmp18 = phi %0* [ %tmp20, %bb16 ], [ %tmp12, %bb10 ] ; <%0*> [#uses=1]
+  %tmp19 = getelementptr inbounds %0* %tmp18, i64 0, i32 0 ; <%0**> [#uses=1]
+  %tmp20 = load %0** %tmp19                       ; <%0*> [#uses=2]
+  %tmp21 = icmp eq %0* %tmp20, %tmp               ; <i1> [#uses=1]
+  %tmp22 = add i64 %tmp17, 1                      ; <i64> [#uses=2]
+  br i1 %tmp21, label %bb23, label %bb16
+
+bb23:                                             ; preds = %bb16
+  %tmp24 = udiv i64 100, %tmp22                   ; <i64> [#uses=1]
+  br label %bb25
+
+bb25:                                             ; preds = %bb25, %bb23
+  %tmp26 = phi i64 [ %tmp31, %bb25 ], [ 0, %bb23 ] ; <i64> [#uses=1]
+  %tmp27 = phi %0* [ %tmp29, %bb25 ], [ %tmp12, %bb23 ] ; <%0*> [#uses=1]
+  %tmp28 = getelementptr inbounds %0* %tmp27, i64 0, i32 0 ; <%0**> [#uses=1]
+  %tmp29 = load %0** %tmp28                       ; <%0*> [#uses=2]
+  %tmp30 = icmp eq %0* %tmp29, %tmp               ; <i1> [#uses=1]
+  %tmp31 = add i64 %tmp26, 1                      ; <i64> [#uses=2]
+  br i1 %tmp30, label %bb32, label %bb25
+
+bb32:                                             ; preds = %bb25
+  %tmp33 = mul i64 %tmp31, %tmp24                 ; <i64> [#uses=1]
+  %tmp34 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @0, i64 0, i64 0), i64 %tmp33) nounwind
+  br label %bb35
+
+bb35:                                             ; preds = %bb32, %bb14
+  %tmp36 = load %0** %tmp3                        ; <%0*> [#uses=2]
+  %tmp37 = icmp eq %0* %tmp36, %tmp               ; <i1> [#uses=1]
+  br i1 %tmp37, label %bb44, label %bb38
+
+bb38:                                             ; preds = %bb38, %bb35
+  %tmp39 = phi %0* [ %tmp41, %bb38 ], [ %tmp36, %bb35 ] ; <%0*> [#uses=2]
+  %tmp40 = getelementptr inbounds %0* %tmp39, i64 0, i32 0 ; <%0**> [#uses=1]
+  %tmp41 = load %0** %tmp40                       ; <%0*> [#uses=2]
+  %tmp42 = bitcast %0* %tmp39 to i8*              ; <i8*> [#uses=1]
+  call void @_ZdlPv(i8* %tmp42) nounwind
+  %tmp43 = icmp eq %0* %tmp41, %tmp               ; <i1> [#uses=1]
+  br i1 %tmp43, label %bb44, label %bb38
+
+bb44:                                             ; preds = %bb38, %bb35
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @_ZNSt15_List_node_base4hookEPS_(%0*, %0*)
+
+declare noalias i8* @_Znwm(i64)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/lsr-overflow.ll b/src/LLVM/test/CodeGen/X86/lsr-overflow.ll
new file mode 100644
index 0000000..09c1c07
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-overflow.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+
+; The comparison uses the pre-inc value, which could lead LSR to
+; try to compute -INT64_MIN.
+
+; CHECK: movabsq $-9223372036854775808, %rax
+; CHECK: cmpq  %rax,
+; CHECK: sete  %al
+
+declare i64 @bar()
+
+define i1 @foo() nounwind {
+entry:
+  br label %for.cond.i
+
+for.cond.i:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.cond.i ]
+  %t = call i64 @bar()
+  %indvar.next = add i64 %indvar, 1
+  %s = icmp ne i64 %indvar.next, %t
+  br i1 %s, label %for.cond.i, label %__ABContainsLabel.exit
+
+__ABContainsLabel.exit:
+  %cmp = icmp eq i64 %indvar, 9223372036854775807
+  ret i1 %cmp
+}
+
+define void @func_37() noreturn nounwind readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc8, %entry
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.inc8 ]
+  %sub.i = add i64 undef, %indvar
+  %cmp.i = icmp eq i64 %sub.i, -9223372036854775808
+  br i1 undef, label %for.inc8, label %for.cond4
+
+for.cond4:                                        ; preds = %for.cond4, %for.body
+  br label %for.cond4
+
+for.inc8:                                         ; preds = %for.body
+  %indvar.next = add i64 %indvar, 1
+  br label %for.body
+}

diff --git a/src/LLVM/test/CodeGen/X86/lsr-quadratic-expand.ll b/src/LLVM/test/CodeGen/X86/lsr-quadratic-expand.ll
new file mode 100644
index 0000000..2bbb470
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-quadratic-expand.ll

@@ -0,0 +1,22 @@
+; RUN: llc -march=x86-64 < %s
+
+define void @dw2102_i2c_transfer() nounwind {
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %z = phi i64 [ 0, %entry ], [ %z3, %bb ]
+  %z1 = phi i16 [ undef, %entry ], [ %z6, %bb ]
+  %z2 = phi i32 [ 0, %entry ], [ %z8, %bb ]
+  %z3 = add i64 %z, 1
+  %z4 = zext i16 %z1 to i32
+  %z5 = add nsw i32 %z4, %z2
+  %z6 = trunc i32 %z5 to i16
+  call fastcc void @dw210x_op_rw(i16 zeroext %z6)
+  %z7 = getelementptr i8* null, i64 %z
+  store i8 undef, i8* %z7, align 1
+  %z8 = add nsw i32 %z2, 1
+  br label %bb
+}
+
+declare fastcc void @dw210x_op_rw(i16 zeroext) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/lsr-redundant-addressing.ll b/src/LLVM/test/CodeGen/X86/lsr-redundant-addressing.ll
new file mode 100644
index 0000000..cb0ac8b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-redundant-addressing.ll

@@ -0,0 +1,51 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+; rdar://9081094
+
+; LSR shouldn't create lots of redundant address computations.
+
+%0 = type { i32, [3 x i32] }
+%1 = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
+
+@pgm = external hidden unnamed_addr global [5 x %0], align 32
+@isa = external hidden unnamed_addr constant [13 x %1], align 32
+
+define void @main_bb.i() nounwind {
+; CHECK: main_bb.i:
+; CHECK-NOT: ret
+; CHECK: addq $-16,
+; CHECK-NOT: ret
+; CHECK: ret
+
+bb:
+  br label %bb38
+
+bb38:                                             ; preds = %bb200, %bb
+  %tmp39 = phi i64 [ %tmp201, %bb200 ], [ 0, %bb ]
+  %tmp40 = sub i64 0, %tmp39
+  %tmp47 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 0
+  %tmp34 = load i32* %tmp47, align 16
+  %tmp203 = icmp slt i32 %tmp34, 12
+  br i1 %tmp203, label %bb215, label %bb200
+
+bb200:                                            ; preds = %bb38
+  %tmp201 = add i64 %tmp39, 1
+  br label %bb38
+
+bb215:                                            ; preds = %bb38
+  %tmp50 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 2
+  %tmp49 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 1
+  %tmp48 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 0
+  %tmp216 = add nsw i32 %tmp34, 1
+  store i32 %tmp216, i32* %tmp47, align 16
+  %tmp217 = sext i32 %tmp216 to i64
+  %tmp218 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 0
+  %tmp219 = load i32* %tmp218, align 8
+  store i32 %tmp219, i32* %tmp48, align 4
+  %tmp220 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 1
+  %tmp221 = load i32* %tmp220, align 4
+  store i32 %tmp221, i32* %tmp49, align 4
+  %tmp222 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 2
+  %tmp223 = load i32* %tmp222, align 8
+  store i32 %tmp223, i32* %tmp50, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/lsr-reuse-trunc.ll b/src/LLVM/test/CodeGen/X86/lsr-reuse-trunc.ll
new file mode 100644
index 0000000..1f87089
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-reuse-trunc.ll

@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+
+; Full strength reduction wouldn't reduce register pressure, so LSR should
+; stick with indexing here.
+
+; CHECK: movaps        (%{{rsi|rdx}},%rax,4), [[X3:%xmm[0-9]+]]
+; CHECK: cvtdq2ps
+; CHECK: orps          {{%xmm[0-9]+}}, [[X4:%xmm[0-9]+]]
+; CHECK: movaps        [[X4]], (%{{rdi|rcx}},%rax,4)
+; CHECK: addq  $4, %rax
+; CHECK: cmpl  %eax, (%{{rdx|r8}})
+; CHECK-NEXT: jg
+
+define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind {
+entry:
+  %0 = load i32* %n, align 4
+  %1 = icmp sgt i32 %0, 0
+  br i1 %1, label %bb, label %return
+
+bb:
+  %indvar = phi i64 [ %indvar.next, %bb ], [ 0, %entry ]
+  %tmp = shl i64 %indvar, 2
+  %scevgep = getelementptr float* %y, i64 %tmp
+  %scevgep9 = bitcast float* %scevgep to <4 x float>*
+  %scevgep10 = getelementptr float* %x, i64 %tmp
+  %scevgep1011 = bitcast float* %scevgep10 to <4 x float>*
+  %2 = load <4 x float>* %scevgep1011, align 16
+  %3 = bitcast <4 x float> %2 to <4 x i32>
+  %4 = and <4 x i32> %3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+  %5 = bitcast <4 x i32> %4 to <4 x float>
+  %6 = and <4 x i32> %3, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %7 = tail call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %5, <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06>, i8 5) nounwind
+  %tmp.i4 = bitcast <4 x float> %7 to <4 x i32>
+  %8 = xor <4 x i32> %tmp.i4, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %9 = and <4 x i32> %8, <i32 1258291200, i32 1258291200, i32 1258291200, i32 1258291200>
+  %10 = or <4 x i32> %9, %6
+  %11 = bitcast <4 x i32> %10 to <4 x float>
+  %12 = fadd <4 x float> %2, %11
+  %13 = fsub <4 x float> %12, %11
+  %14 = tail call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %2, <4 x float> %13, i8 1) nounwind
+  %15 = bitcast <4 x float> %14 to <4 x i32>
+  %16 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %15) nounwind readnone
+  %17 = fadd <4 x float> %13, %16
+  %tmp.i = bitcast <4 x float> %17 to <4 x i32>
+  %18 = or <4 x i32> %tmp.i, %6
+  %19 = bitcast <4 x i32> %18 to <4 x float>
+  store <4 x float> %19, <4 x float>* %scevgep9, align 16
+  %tmp12 = add i64 %tmp, 4
+  %tmp13 = trunc i64 %tmp12 to i32
+  %20 = load i32* %n, align 4
+  %21 = icmp sgt i32 %20, %tmp13
+  %indvar.next = add i64 %indvar, 1
+  br i1 %21, label %bb, label %return
+
+return:
+  ret void
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/lsr-reuse.ll b/src/LLVM/test/CodeGen/X86/lsr-reuse.ll
new file mode 100644
index 0000000..527a5a6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-reuse.ll

@@ -0,0 +1,751 @@
+; XFAIL: *
+; RUN: llc < %s -march=x86-64 -O3 -asm-verbose=false | FileCheck %s
+target datalayout = "e-p:64:64:64"
+target triple = "x86_64-unknown-unknown"
+
+; Full strength reduction reduces register pressure from 5 to 4 here.
+; Instruction selection should use the FLAGS value from the dec for
+; the branch. Scheduling should push the adds upwards.
+
+; CHECK: full_me_0:
+; CHECK: movsd   (%rsi), %xmm0
+; CHECK: mulsd   (%rdx), %xmm0
+; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
+; CHECK: addq    $8, %rdi
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @full_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; Mostly-full strength reduction means we do full strength reduction on all
+; except for the offsets.
+;
+; Given a choice between constant offsets -2048 and 2048, choose the negative
+; value, because at boundary conditions it has a smaller encoding.
+; TODO: That's an over-general heuristic. It would be better for the target
+; to indicate what the encoding cost would be. Then using a 2048 offset
+; would be better on x86-64, since the start value would be 0 instead of
+; 2048.
+
+; CHECK: mostly_full_me_0:
+; CHECK: movsd   -2048(%rsi), %xmm0
+; CHECK: mulsd   -2048(%rdx), %xmm0
+; CHECK: movsd   %xmm0, -2048(%rdi)
+; CHECK: movsd   (%rsi), %xmm0
+; CHECK: divsd   (%rdx), %xmm0
+; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
+; CHECK: addq    $8, %rdi
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @mostly_full_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %j = add i64 %i, 256
+  %Aj = getelementptr inbounds double* %A, i64 %j
+  %Bj = getelementptr inbounds double* %B, i64 %j
+  %Cj = getelementptr inbounds double* %C, i64 %j
+  %t3 = load double* %Bj
+  %t4 = load double* %Cj
+  %o = fdiv double %t3, %t4
+  store double %o, double* %Aj
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; A minor variation on mostly_full_me_0.
+; Prefer to start the indvar at 0.
+
+; CHECK: mostly_full_me_1:
+; CHECK: movsd   (%rsi), %xmm0
+; CHECK: mulsd   (%rdx), %xmm0
+; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: movsd   -2048(%rsi), %xmm0
+; CHECK: divsd   -2048(%rdx), %xmm0
+; CHECK: movsd   %xmm0, -2048(%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
+; CHECK: addq    $8, %rdi
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @mostly_full_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %j = sub i64 %i, 256
+  %Aj = getelementptr inbounds double* %A, i64 %j
+  %Bj = getelementptr inbounds double* %B, i64 %j
+  %Cj = getelementptr inbounds double* %C, i64 %j
+  %t3 = load double* %Bj
+  %t4 = load double* %Cj
+  %o = fdiv double %t3, %t4
+  store double %o, double* %Aj
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; A slightly less minor variation on mostly_full_me_0.
+
+; CHECK: mostly_full_me_2:
+; CHECK: movsd   (%rsi), %xmm0
+; CHECK: mulsd   (%rdx), %xmm0
+; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: movsd   -4096(%rsi), %xmm0
+; CHECK: divsd   -4096(%rdx), %xmm0
+; CHECK: movsd   %xmm0, -4096(%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
+; CHECK: addq    $8, %rdi
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @mostly_full_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %k = add i64 %i, 256
+  %Ak = getelementptr inbounds double* %A, i64 %k
+  %Bk = getelementptr inbounds double* %B, i64 %k
+  %Ck = getelementptr inbounds double* %C, i64 %k
+  %t1 = load double* %Bk
+  %t2 = load double* %Ck
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ak
+  %j = sub i64 %i, 256
+  %Aj = getelementptr inbounds double* %A, i64 %j
+  %Bj = getelementptr inbounds double* %B, i64 %j
+  %Cj = getelementptr inbounds double* %C, i64 %j
+  %t3 = load double* %Bj
+  %t4 = load double* %Cj
+  %o = fdiv double %t3, %t4
+  store double %o, double* %Aj
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; In this test, the counting IV exit value is used, so full strength reduction
+; would not reduce register pressure. IndVarSimplify ought to simplify such
+; cases away, but it's useful here to verify that LSR's register pressure
+; heuristics are working as expected.
+
+; CHECK: count_me_0:
+; CHECK: movsd   (%rsi,%rax,8), %xmm0
+; CHECK: mulsd   (%rdx,%rax,8), %xmm0
+; CHECK: movsd   %xmm0, (%rdi,%rax,8)
+; CHECK: incq    %rax
+; CHECK: cmpq    %rax, %rcx
+; CHECK: jne
+
+define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  %q = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  ret i64 %q
+}
+
+; In this test, the trip count value is used, so full strength reduction
+; would not reduce register pressure.
+; (though it would reduce register pressure inside the loop...)
+
+; CHECK: count_me_1:
+; CHECK: movsd   (%rsi,%rax,8), %xmm0
+; CHECK: mulsd   (%rdx,%rax,8), %xmm0
+; CHECK: movsd   %xmm0, (%rdi,%rax,8)
+; CHECK: incq    %rax
+; CHECK: cmpq    %rax, %rcx
+; CHECK: jne
+
+define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  %q = phi i64 [ 0, %entry ], [ %n, %loop ]
+  ret i64 %q
+}
+
+; Full strength reduction doesn't save any registers here because the
+; loop tripcount is a constant.
+
+; CHECK: count_me_2:
+; CHECK: movl    $10, %eax
+; CHECK: align
+; CHECK: BB6_1:
+; CHECK: movsd   -40(%rdi,%rax,8), %xmm0
+; CHECK: addsd   -40(%rsi,%rax,8), %xmm0
+; CHECK: movsd   %xmm0, -40(%rdx,%rax,8)
+; CHECK: movsd   (%rdi,%rax,8), %xmm0
+; CHECK: subsd   (%rsi,%rax,8), %xmm0
+; CHECK: movsd   %xmm0, (%rdx,%rax,8)
+; CHECK: incq    %rax
+; CHECK: cmpq    $5010, %rax
+; CHECK: jne
+
+define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %i5 = add i64 %i, 5
+  %Ai = getelementptr double* %A, i64 %i5
+  %t2 = load double* %Ai
+  %Bi = getelementptr double* %B, i64 %i5
+  %t4 = load double* %Bi
+  %t5 = fadd double %t2, %t4
+  %Ci = getelementptr double* %C, i64 %i5
+  store double %t5, double* %Ci
+  %i10 = add i64 %i, 10
+  %Ai10 = getelementptr double* %A, i64 %i10
+  %t9 = load double* %Ai10
+  %Bi10 = getelementptr double* %B, i64 %i10
+  %t11 = load double* %Bi10
+  %t12 = fsub double %t9, %t11
+  %Ci10 = getelementptr double* %C, i64 %i10
+  store double %t12, double* %Ci10
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 5000
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; This should be fully strength-reduced to reduce register pressure.
+
+; CHECK: full_me_1:
+; CHECK: align
+; CHECK: BB7_1:
+; CHECK: movsd   (%rdi), %xmm0
+; CHECK: addsd   (%rsi), %xmm0
+; CHECK: movsd   %xmm0, (%rdx)
+; CHECK: movsd   40(%rdi), %xmm0
+; CHECK: subsd   40(%rsi), %xmm0
+; CHECK: movsd   %xmm0, 40(%rdx)
+; CHECK: addq    $8, %rdi
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @full_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %i5 = add i64 %i, 5
+  %Ai = getelementptr double* %A, i64 %i5
+  %t2 = load double* %Ai
+  %Bi = getelementptr double* %B, i64 %i5
+  %t4 = load double* %Bi
+  %t5 = fadd double %t2, %t4
+  %Ci = getelementptr double* %C, i64 %i5
+  store double %t5, double* %Ci
+  %i10 = add i64 %i, 10
+  %Ai10 = getelementptr double* %A, i64 %i10
+  %t9 = load double* %Ai10
+  %Bi10 = getelementptr double* %B, i64 %i10
+  %t11 = load double* %Bi10
+  %t12 = fsub double %t9, %t11
+  %Ci10 = getelementptr double* %C, i64 %i10
+  store double %t12, double* %Ci10
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; This is a variation on full_me_0 in which the 0,+,1 induction variable
+; has a non-address use, pinning that value in a register.
+
+; CHECK: count_me_3:
+; CHECK: call
+; CHECK: movsd   (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
+; CHECK: mulsd   (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
+; CHECK: movsd   %xmm0, (%r{{[^,]*}},%r{{[^,]*}},8)
+; CHECK: incq    %r{{.*}}
+; CHECK: cmpq    %r{{.*}}, %r{{.*}}
+; CHECK: jne
+
+declare void @use(i64)
+
+define void @count_me_3(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  call void @use(i64 %i)
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; LSR should use only one indvar for the inner loop.
+; rdar://7657764
+
+; CHECK: asd:
+; CHECK: BB9_4:
+; CHECK-NEXT: addl  (%r{{[^,]*}},%rdi,4), %e
+; CHECK-NEXT: incq  %rdi
+; CHECK-NEXT: cmpq  %rdi, %r{{[^,]*}}
+; CHECK-NEXT: jg
+
+%struct.anon = type { i32, [4200 x i32] }
+
+@bars = common global [123123 x %struct.anon] zeroinitializer, align 32 ; <[123123 x %struct.anon]*> [#uses=2]
+
+define i32 @asd(i32 %n) nounwind readonly {
+entry:
+  %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph14, label %bb5
+
+bb.nph14:                                         ; preds = %entry
+  %tmp18 = zext i32 %n to i64                     ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb3, %bb.nph14
+  %indvar16 = phi i64 [ 0, %bb.nph14 ], [ %indvar.next17, %bb3 ] ; <i64> [#uses=3]
+  %s.113 = phi i32 [ 0, %bb.nph14 ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=2]
+  %scevgep2526 = getelementptr [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 0 ; <i32*> [#uses=1]
+  %1 = load i32* %scevgep2526, align 4            ; <i32> [#uses=2]
+  %2 = icmp sgt i32 %1, 0                         ; <i1> [#uses=1]
+  br i1 %2, label %bb.nph, label %bb3
+
+bb.nph:                                           ; preds = %bb
+  %tmp23 = sext i32 %1 to i64                     ; <i64> [#uses=1]
+  br label %bb1
+
+bb1:                                              ; preds = %bb.nph, %bb1
+  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp19, %bb1 ] ; <i64> [#uses=2]
+  %s.07 = phi i32 [ %s.113, %bb.nph ], [ %4, %bb1 ] ; <i32> [#uses=1]
+  %c.08 = getelementptr [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 1, i64 %indvar ; <i32*> [#uses=1]
+  %3 = load i32* %c.08, align 4                   ; <i32> [#uses=1]
+  %4 = add nsw i32 %3, %s.07                      ; <i32> [#uses=2]
+  %tmp19 = add i64 %indvar, 1                     ; <i64> [#uses=2]
+  %5 = icmp sgt i64 %tmp23, %tmp19                ; <i1> [#uses=1]
+  br i1 %5, label %bb1, label %bb3
+
+bb3:                                              ; preds = %bb1, %bb
+  %s.0.lcssa = phi i32 [ %s.113, %bb ], [ %4, %bb1 ] ; <i32> [#uses=2]
+  %indvar.next17 = add i64 %indvar16, 1           ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next17, %tmp18  ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb5, label %bb
+
+bb5:                                              ; preds = %bb3, %entry
+  %s.1.lcssa = phi i32 [ 0, %entry ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=1]
+  ret i32 %s.1.lcssa
+}
+
+; Two loops here are of particular interest; the one at %bb21, where
+; we don't want to leave extra induction variables around, or use an
+; lea to compute an exit condition inside the loop:
+
+; CHECK: test:
+
+; CHECK:      BB10_4:
+; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addss   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   mulss   (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   movss   %xmm{{.*}}, (%r{{[^,]*}})
+; CHECK-NEXT:   addq    $4, %r{{.*}}
+; CHECK-NEXT:   decq    %r{{.*}}
+; CHECK-NEXT:   addq    $4, %r{{.*}}
+; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: BB10_2:
+; CHECK-NEXT:   testq   %r{{.*}}, %r{{.*}}
+; CHECK-NEXT:   jle
+; CHECK-NEXT:   testb   $15, %r{{.*}}
+; CHECK-NEXT:   jne
+
+; And the one at %bb68, where we want to be sure to use superhero mode:
+
+; CHECK:      BB10_7:
+; CHECK-NEXT:   movaps  48(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  32(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  16(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  %xmm{{.*}}, (%r{{[^,]*}})
+; CHECK-NEXT:   movaps  %xmm{{.*}}, 16(%r{{[^,]*}})
+; CHECK-NEXT:   movaps  %xmm{{.*}}, 32(%r{{[^,]*}})
+; CHECK-NEXT:   movaps  %xmm{{.*}}, 48(%r{{[^,]*}})
+; CHECK-NEXT:   addps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addq    $64, %r{{.*}}
+; CHECK-NEXT:   addq    $64, %r{{.*}}
+; CHECK-NEXT:   addq    $-16, %r{{.*}}
+; CHECK-NEXT:   cmpq    $15, %r{{.*}}
+; CHECK-NEXT:   jg
+
+define void @test(float* %arg, i64 %arg1, float* nocapture %arg2, float* nocapture %arg3, float* %arg4, i64 %arg5, i64 %arg6) nounwind {
+bb:
+  %t = alloca float, align 4                      ; <float*> [#uses=3]
+  %t7 = alloca float, align 4                     ; <float*> [#uses=2]
+  %t8 = load float* %arg3                         ; <float> [#uses=8]
+  %t9 = ptrtoint float* %arg to i64               ; <i64> [#uses=1]
+  %t10 = ptrtoint float* %arg4 to i64             ; <i64> [#uses=1]
+  %t11 = xor i64 %t10, %t9                        ; <i64> [#uses=1]
+  %t12 = and i64 %t11, 15                         ; <i64> [#uses=1]
+  %t13 = icmp eq i64 %t12, 0                      ; <i1> [#uses=1]
+  %t14 = xor i64 %arg1, 1                         ; <i64> [#uses=1]
+  %t15 = xor i64 %arg5, 1                         ; <i64> [#uses=1]
+  %t16 = or i64 %t15, %t14                        ; <i64> [#uses=1]
+  %t17 = trunc i64 %t16 to i32                    ; <i32> [#uses=1]
+  %t18 = icmp eq i32 %t17, 0                      ; <i1> [#uses=1]
+  br i1 %t18, label %bb19, label %bb213
+
+bb19:                                             ; preds = %bb
+  %t20 = load float* %arg2                        ; <float> [#uses=1]
+  br label %bb21
+
+bb21:                                             ; preds = %bb32, %bb19
+  %t22 = phi i64 [ %t36, %bb32 ], [ 0, %bb19 ]    ; <i64> [#uses=21]
+  %t23 = phi float [ %t35, %bb32 ], [ %t20, %bb19 ] ; <float> [#uses=6]
+  %t24 = sub i64 %arg6, %t22                      ; <i64> [#uses=4]
+  %t25 = getelementptr float* %arg4, i64 %t22     ; <float*> [#uses=4]
+  %t26 = getelementptr float* %arg, i64 %t22      ; <float*> [#uses=3]
+  %t27 = icmp sgt i64 %t24, 0                     ; <i1> [#uses=1]
+  br i1 %t27, label %bb28, label %bb37
+
+bb28:                                             ; preds = %bb21
+  %t29 = ptrtoint float* %t25 to i64              ; <i64> [#uses=1]
+  %t30 = and i64 %t29, 15                         ; <i64> [#uses=1]
+  %t31 = icmp eq i64 %t30, 0                      ; <i1> [#uses=1]
+  br i1 %t31, label %bb37, label %bb32
+
+bb32:                                             ; preds = %bb28
+  %t33 = load float* %t26                         ; <float> [#uses=1]
+  %t34 = fmul float %t23, %t33                    ; <float> [#uses=1]
+  store float %t34, float* %t25
+  %t35 = fadd float %t23, %t8                     ; <float> [#uses=1]
+  %t36 = add i64 %t22, 1                          ; <i64> [#uses=1]
+  br label %bb21
+
+bb37:                                             ; preds = %bb28, %bb21
+  %t38 = fmul float %t8, 4.000000e+00             ; <float> [#uses=1]
+  store float %t38, float* %t
+  %t39 = fmul float %t8, 1.600000e+01             ; <float> [#uses=1]
+  store float %t39, float* %t7
+  %t40 = fmul float %t8, 0.000000e+00             ; <float> [#uses=1]
+  %t41 = fadd float %t23, %t40                    ; <float> [#uses=1]
+  %t42 = insertelement <4 x float> undef, float %t41, i32 0 ; <<4 x float>> [#uses=1]
+  %t43 = fadd float %t23, %t8                     ; <float> [#uses=1]
+  %t44 = insertelement <4 x float> %t42, float %t43, i32 1 ; <<4 x float>> [#uses=1]
+  %t45 = fmul float %t8, 2.000000e+00             ; <float> [#uses=1]
+  %t46 = fadd float %t23, %t45                    ; <float> [#uses=1]
+  %t47 = insertelement <4 x float> %t44, float %t46, i32 2 ; <<4 x float>> [#uses=1]
+  %t48 = fmul float %t8, 3.000000e+00             ; <float> [#uses=1]
+  %t49 = fadd float %t23, %t48                    ; <float> [#uses=1]
+  %t50 = insertelement <4 x float> %t47, float %t49, i32 3 ; <<4 x float>> [#uses=5]
+  %t51 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=3]
+  %t52 = fadd <4 x float> %t50, %t51              ; <<4 x float>> [#uses=3]
+  %t53 = fadd <4 x float> %t52, %t51              ; <<4 x float>> [#uses=3]
+  %t54 = fadd <4 x float> %t53, %t51              ; <<4 x float>> [#uses=2]
+  %t55 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t7) nounwind ; <<4 x float>> [#uses=8]
+  %t56 = icmp sgt i64 %t24, 15                    ; <i1> [#uses=2]
+  br i1 %t13, label %bb57, label %bb118
+
+bb57:                                             ; preds = %bb37
+  br i1 %t56, label %bb61, label %bb112
+
+bb58:                                             ; preds = %bb68
+  %t59 = getelementptr float* %arg, i64 %t78      ; <float*> [#uses=1]
+  %t60 = getelementptr float* %arg4, i64 %t78     ; <float*> [#uses=1]
+  br label %bb112
+
+bb61:                                             ; preds = %bb57
+  %t62 = add i64 %t22, 16                         ; <i64> [#uses=1]
+  %t63 = add i64 %t22, 4                          ; <i64> [#uses=1]
+  %t64 = add i64 %t22, 8                          ; <i64> [#uses=1]
+  %t65 = add i64 %t22, 12                         ; <i64> [#uses=1]
+  %t66 = add i64 %arg6, -16                       ; <i64> [#uses=1]
+  %t67 = sub i64 %t66, %t22                       ; <i64> [#uses=1]
+  br label %bb68
+
+bb68:                                             ; preds = %bb68, %bb61
+  %t69 = phi i64 [ 0, %bb61 ], [ %t111, %bb68 ]   ; <i64> [#uses=3]
+  %t70 = phi <4 x float> [ %t54, %bb61 ], [ %t107, %bb68 ] ; <<4 x float>> [#uses=2]
+  %t71 = phi <4 x float> [ %t50, %bb61 ], [ %t103, %bb68 ] ; <<4 x float>> [#uses=2]
+  %t72 = phi <4 x float> [ %t53, %bb61 ], [ %t108, %bb68 ] ; <<4 x float>> [#uses=2]
+  %t73 = phi <4 x float> [ %t52, %bb61 ], [ %t109, %bb68 ] ; <<4 x float>> [#uses=2]
+  %t74 = shl i64 %t69, 4                          ; <i64> [#uses=5]
+  %t75 = add i64 %t22, %t74                       ; <i64> [#uses=2]
+  %t76 = getelementptr float* %arg, i64 %t75      ; <float*> [#uses=1]
+  %t77 = bitcast float* %t76 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t78 = add i64 %t62, %t74                       ; <i64> [#uses=2]
+  %t79 = add i64 %t63, %t74                       ; <i64> [#uses=2]
+  %t80 = getelementptr float* %arg, i64 %t79      ; <float*> [#uses=1]
+  %t81 = bitcast float* %t80 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t82 = add i64 %t64, %t74                       ; <i64> [#uses=2]
+  %t83 = getelementptr float* %arg, i64 %t82      ; <float*> [#uses=1]
+  %t84 = bitcast float* %t83 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t85 = add i64 %t65, %t74                       ; <i64> [#uses=2]
+  %t86 = getelementptr float* %arg, i64 %t85      ; <float*> [#uses=1]
+  %t87 = bitcast float* %t86 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t88 = getelementptr float* %arg4, i64 %t75     ; <float*> [#uses=1]
+  %t89 = bitcast float* %t88 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t90 = getelementptr float* %arg4, i64 %t79     ; <float*> [#uses=1]
+  %t91 = bitcast float* %t90 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t92 = getelementptr float* %arg4, i64 %t82     ; <float*> [#uses=1]
+  %t93 = bitcast float* %t92 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t94 = getelementptr float* %arg4, i64 %t85     ; <float*> [#uses=1]
+  %t95 = bitcast float* %t94 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t96 = mul i64 %t69, -16                        ; <i64> [#uses=1]
+  %t97 = add i64 %t67, %t96                       ; <i64> [#uses=2]
+  %t98 = load <4 x float>* %t77                   ; <<4 x float>> [#uses=1]
+  %t99 = load <4 x float>* %t81                   ; <<4 x float>> [#uses=1]
+  %t100 = load <4 x float>* %t84                  ; <<4 x float>> [#uses=1]
+  %t101 = load <4 x float>* %t87                  ; <<4 x float>> [#uses=1]
+  %t102 = fmul <4 x float> %t98, %t71             ; <<4 x float>> [#uses=1]
+  %t103 = fadd <4 x float> %t71, %t55             ; <<4 x float>> [#uses=2]
+  %t104 = fmul <4 x float> %t99, %t73             ; <<4 x float>> [#uses=1]
+  %t105 = fmul <4 x float> %t100, %t72            ; <<4 x float>> [#uses=1]
+  %t106 = fmul <4 x float> %t101, %t70            ; <<4 x float>> [#uses=1]
+  store <4 x float> %t102, <4 x float>* %t89
+  store <4 x float> %t104, <4 x float>* %t91
+  store <4 x float> %t105, <4 x float>* %t93
+  store <4 x float> %t106, <4 x float>* %t95
+  %t107 = fadd <4 x float> %t70, %t55             ; <<4 x float>> [#uses=1]
+  %t108 = fadd <4 x float> %t72, %t55             ; <<4 x float>> [#uses=1]
+  %t109 = fadd <4 x float> %t73, %t55             ; <<4 x float>> [#uses=1]
+  %t110 = icmp sgt i64 %t97, 15                   ; <i1> [#uses=1]
+  %t111 = add i64 %t69, 1                         ; <i64> [#uses=1]
+  br i1 %t110, label %bb68, label %bb58
+
+bb112:                                            ; preds = %bb58, %bb57
+  %t113 = phi float* [ %t59, %bb58 ], [ %t26, %bb57 ] ; <float*> [#uses=1]
+  %t114 = phi float* [ %t60, %bb58 ], [ %t25, %bb57 ] ; <float*> [#uses=1]
+  %t115 = phi <4 x float> [ %t103, %bb58 ], [ %t50, %bb57 ] ; <<4 x float>> [#uses=1]
+  %t116 = phi i64 [ %t97, %bb58 ], [ %t24, %bb57 ] ; <i64> [#uses=1]
+  %t117 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=0]
+  br label %bb194
+
+bb118:                                            ; preds = %bb37
+  br i1 %t56, label %bb122, label %bb194
+
+bb119:                                            ; preds = %bb137
+  %t120 = getelementptr float* %arg, i64 %t145    ; <float*> [#uses=1]
+  %t121 = getelementptr float* %arg4, i64 %t145   ; <float*> [#uses=1]
+  br label %bb194
+
+bb122:                                            ; preds = %bb118
+  %t123 = add i64 %t22, -1                        ; <i64> [#uses=1]
+  %t124 = getelementptr inbounds float* %arg, i64 %t123 ; <float*> [#uses=1]
+  %t125 = bitcast float* %t124 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t126 = load <4 x float>* %t125                 ; <<4 x float>> [#uses=1]
+  %t127 = add i64 %t22, 16                        ; <i64> [#uses=1]
+  %t128 = add i64 %t22, 3                         ; <i64> [#uses=1]
+  %t129 = add i64 %t22, 7                         ; <i64> [#uses=1]
+  %t130 = add i64 %t22, 11                        ; <i64> [#uses=1]
+  %t131 = add i64 %t22, 15                        ; <i64> [#uses=1]
+  %t132 = add i64 %t22, 4                         ; <i64> [#uses=1]
+  %t133 = add i64 %t22, 8                         ; <i64> [#uses=1]
+  %t134 = add i64 %t22, 12                        ; <i64> [#uses=1]
+  %t135 = add i64 %arg6, -16                      ; <i64> [#uses=1]
+  %t136 = sub i64 %t135, %t22                     ; <i64> [#uses=1]
+  br label %bb137
+
+bb137:                                            ; preds = %bb137, %bb122
+  %t138 = phi i64 [ 0, %bb122 ], [ %t193, %bb137 ] ; <i64> [#uses=3]
+  %t139 = phi <4 x float> [ %t54, %bb122 ], [ %t189, %bb137 ] ; <<4 x float>> [#uses=2]
+  %t140 = phi <4 x float> [ %t50, %bb122 ], [ %t185, %bb137 ] ; <<4 x float>> [#uses=2]
+  %t141 = phi <4 x float> [ %t53, %bb122 ], [ %t190, %bb137 ] ; <<4 x float>> [#uses=2]
+  %t142 = phi <4 x float> [ %t52, %bb122 ], [ %t191, %bb137 ] ; <<4 x float>> [#uses=2]
+  %t143 = phi <4 x float> [ %t126, %bb122 ], [ %t175, %bb137 ] ; <<4 x float>> [#uses=1]
+  %t144 = shl i64 %t138, 4                        ; <i64> [#uses=9]
+  %t145 = add i64 %t127, %t144                    ; <i64> [#uses=2]
+  %t146 = add i64 %t128, %t144                    ; <i64> [#uses=1]
+  %t147 = getelementptr float* %arg, i64 %t146    ; <float*> [#uses=1]
+  %t148 = bitcast float* %t147 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t149 = add i64 %t129, %t144                    ; <i64> [#uses=1]
+  %t150 = getelementptr float* %arg, i64 %t149    ; <float*> [#uses=1]
+  %t151 = bitcast float* %t150 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t152 = add i64 %t130, %t144                    ; <i64> [#uses=1]
+  %t153 = getelementptr float* %arg, i64 %t152    ; <float*> [#uses=1]
+  %t154 = bitcast float* %t153 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t155 = add i64 %t131, %t144                    ; <i64> [#uses=1]
+  %t156 = getelementptr float* %arg, i64 %t155    ; <float*> [#uses=1]
+  %t157 = bitcast float* %t156 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t158 = add i64 %t22, %t144                     ; <i64> [#uses=1]
+  %t159 = getelementptr float* %arg4, i64 %t158   ; <float*> [#uses=1]
+  %t160 = bitcast float* %t159 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t161 = add i64 %t132, %t144                    ; <i64> [#uses=1]
+  %t162 = getelementptr float* %arg4, i64 %t161   ; <float*> [#uses=1]
+  %t163 = bitcast float* %t162 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t164 = add i64 %t133, %t144                    ; <i64> [#uses=1]
+  %t165 = getelementptr float* %arg4, i64 %t164   ; <float*> [#uses=1]
+  %t166 = bitcast float* %t165 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t167 = add i64 %t134, %t144                    ; <i64> [#uses=1]
+  %t168 = getelementptr float* %arg4, i64 %t167   ; <float*> [#uses=1]
+  %t169 = bitcast float* %t168 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t170 = mul i64 %t138, -16                      ; <i64> [#uses=1]
+  %t171 = add i64 %t136, %t170                    ; <i64> [#uses=2]
+  %t172 = load <4 x float>* %t148                 ; <<4 x float>> [#uses=2]
+  %t173 = load <4 x float>* %t151                 ; <<4 x float>> [#uses=2]
+  %t174 = load <4 x float>* %t154                 ; <<4 x float>> [#uses=2]
+  %t175 = load <4 x float>* %t157                 ; <<4 x float>> [#uses=2]
+  %t176 = shufflevector <4 x float> %t143, <4 x float> %t172, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %t177 = shufflevector <4 x float> %t176, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %t178 = shufflevector <4 x float> %t172, <4 x float> %t173, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %t179 = shufflevector <4 x float> %t178, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %t180 = shufflevector <4 x float> %t173, <4 x float> %t174, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %t181 = shufflevector <4 x float> %t180, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %t182 = shufflevector <4 x float> %t174, <4 x float> %t175, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %t183 = shufflevector <4 x float> %t182, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %t184 = fmul <4 x float> %t177, %t140           ; <<4 x float>> [#uses=1]
+  %t185 = fadd <4 x float> %t140, %t55            ; <<4 x float>> [#uses=2]
+  %t186 = fmul <4 x float> %t179, %t142           ; <<4 x float>> [#uses=1]
+  %t187 = fmul <4 x float> %t181, %t141           ; <<4 x float>> [#uses=1]
+  %t188 = fmul <4 x float> %t183, %t139           ; <<4 x float>> [#uses=1]
+  store <4 x float> %t184, <4 x float>* %t160
+  store <4 x float> %t186, <4 x float>* %t163
+  store <4 x float> %t187, <4 x float>* %t166
+  store <4 x float> %t188, <4 x float>* %t169
+  %t189 = fadd <4 x float> %t139, %t55            ; <<4 x float>> [#uses=1]
+  %t190 = fadd <4 x float> %t141, %t55            ; <<4 x float>> [#uses=1]
+  %t191 = fadd <4 x float> %t142, %t55            ; <<4 x float>> [#uses=1]
+  %t192 = icmp sgt i64 %t171, 15                  ; <i1> [#uses=1]
+  %t193 = add i64 %t138, 1                        ; <i64> [#uses=1]
+  br i1 %t192, label %bb137, label %bb119
+
+bb194:                                            ; preds = %bb119, %bb118, %bb112
+  %t195 = phi i64 [ %t116, %bb112 ], [ %t171, %bb119 ], [ %t24, %bb118 ] ; <i64> [#uses=2]
+  %t196 = phi <4 x float> [ %t115, %bb112 ], [ %t185, %bb119 ], [ %t50, %bb118 ] ; <<4 x float>> [#uses=1]
+  %t197 = phi float* [ %t114, %bb112 ], [ %t121, %bb119 ], [ %t25, %bb118 ] ; <float*> [#uses=1]
+  %t198 = phi float* [ %t113, %bb112 ], [ %t120, %bb119 ], [ %t26, %bb118 ] ; <float*> [#uses=1]
+  %t199 = extractelement <4 x float> %t196, i32 0 ; <float> [#uses=2]
+  %t200 = icmp sgt i64 %t195, 0                   ; <i1> [#uses=1]
+  br i1 %t200, label %bb201, label %bb211
+
+bb201:                                            ; preds = %bb201, %bb194
+  %t202 = phi i64 [ %t209, %bb201 ], [ 0, %bb194 ] ; <i64> [#uses=3]
+  %t203 = phi float [ %t208, %bb201 ], [ %t199, %bb194 ] ; <float> [#uses=2]
+  %t204 = getelementptr float* %t198, i64 %t202   ; <float*> [#uses=1]
+  %t205 = getelementptr float* %t197, i64 %t202   ; <float*> [#uses=1]
+  %t206 = load float* %t204                       ; <float> [#uses=1]
+  %t207 = fmul float %t203, %t206                 ; <float> [#uses=1]
+  store float %t207, float* %t205
+  %t208 = fadd float %t203, %t8                   ; <float> [#uses=2]
+  %t209 = add i64 %t202, 1                        ; <i64> [#uses=2]
+  %t210 = icmp eq i64 %t209, %t195                ; <i1> [#uses=1]
+  br i1 %t210, label %bb211, label %bb201
+
+bb211:                                            ; preds = %bb201, %bb194
+  %t212 = phi float [ %t199, %bb194 ], [ %t208, %bb201 ] ; <float> [#uses=1]
+  store float %t212, float* %arg2
+  ret void
+
+bb213:                                            ; preds = %bb
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/lsr-sort.ll b/src/LLVM/test/CodeGen/X86/lsr-sort.ll
new file mode 100644
index 0000000..1f3b59a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-sort.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep inc %t | count 1
+; RUN: not grep incw %t
+
+@X = common global i16 0		; <i16*> [#uses=1]
+
+define i32 @foo(i32 %N) nounwind {
+entry:
+	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %return
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%1 = trunc i32 %i.03 to i16		; <i16> [#uses=1]
+	volatile store i16 %1, i16* @X, align 2
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+        %h = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]
+	ret i32 %h
+}

diff --git a/src/LLVM/test/CodeGen/X86/lsr-static-addr.ll b/src/LLVM/test/CodeGen/X86/lsr-static-addr.ll
new file mode 100644
index 0000000..c9ed3e5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-static-addr.ll

@@ -0,0 +1,31 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
+
+; CHECK: xorl  %eax, %eax
+; CHECK: movsd .LCPI0_0(%rip), %xmm0
+; CHECK: align
+; CHECK-NEXT: BB0_2:
+; CHECK-NEXT: movsd A(,%rax,8)
+; CHECK-NEXT: mulsd
+; CHECK-NEXT: movsd
+; CHECK-NEXT: incq %rax
+
+@A = external global [0 x double]
+
+define void @foo(i64 %n) nounwind {
+entry:
+  %cmp5 = icmp sgt i64 %n, 0
+  br i1 %cmp5, label %for.body, label %for.end
+
+for.body:
+  %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr [0 x double]* @A, i64 0, i64 %i.06
+  %tmp3 = load double* %arrayidx, align 8
+  %mul = fmul double %tmp3, 2.300000e+00
+  store double %mul, double* %arrayidx, align 8
+  %inc = add nsw i64 %i.06, 1
+  %exitcond = icmp eq i64 %inc, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/lsr-wrap.ll b/src/LLVM/test/CodeGen/X86/lsr-wrap.ll
new file mode 100644
index 0000000..d605e4f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lsr-wrap.ll

@@ -0,0 +1,37 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; LSR would like to use a single IV for both of these, however it's
+; not safe due to wraparound.
+
+; CHECK: addb  $-4, %
+; CHECK: decw  %
+
+@g_19 = common global i32 0                       ; <i32*> [#uses=2]
+
+declare i32 @func_8(i8 zeroext) nounwind
+
+declare i32 @func_3(i8 signext) nounwind
+
+define void @func_1() nounwind {
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %indvar = phi i16 [ 0, %entry ], [ %indvar.next, %bb ] ; <i16> [#uses=2]
+  %tmp = sub i16 0, %indvar                       ; <i16> [#uses=1]
+  %tmp27 = trunc i16 %tmp to i8                   ; <i8> [#uses=1]
+  %tmp1 = load i32* @g_19, align 4                ; <i32> [#uses=2]
+  %tmp2 = add i32 %tmp1, 1                        ; <i32> [#uses=1]
+  store i32 %tmp2, i32* @g_19, align 4
+  %tmp3 = trunc i32 %tmp1 to i8                   ; <i8> [#uses=1]
+  %tmp4 = tail call i32 @func_8(i8 zeroext %tmp3) nounwind ; <i32> [#uses=0]
+  %tmp5 = shl i8 %tmp27, 2                        ; <i8> [#uses=1]
+  %tmp6 = add i8 %tmp5, -112                      ; <i8> [#uses=1]
+  %tmp7 = tail call i32 @func_3(i8 signext %tmp6) nounwind ; <i32> [#uses=0]
+  %indvar.next = add i16 %indvar, 1               ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %indvar.next, -28       ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/lzcnt.ll b/src/LLVM/test/CodeGen/X86/lzcnt.ll
new file mode 100644
index 0000000..e5a55ab
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/lzcnt.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86-64 -mattr=+lzcnt | FileCheck %s
+
+define i32 @t1(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
+	ret i32 %tmp
+; CHECK: t1:
+; CHECK: lzcntl
+}
+
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+
+define i16 @t2(i16 %x) nounwind  {
+	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x )
+	ret i16 %tmp
+; CHECK: t2:
+; CHECK: lzcntw
+}
+
+declare i16 @llvm.ctlz.i16(i16) nounwind readnone
+
+define i64 @t3(i64 %x) nounwind  {
+	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x )
+	ret i64 %tmp
+; CHECK: t3:
+; CHECK: lzcntq
+}
+
+declare i64 @llvm.ctlz.i64(i64) nounwind readnone
+
+define i8 @t4(i8 %x) nounwind  {
+	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x )
+	ret i8 %tmp
+; CHECK: t4:
+; CHECK: lzcntw
+}
+
+declare i8 @llvm.ctlz.i8(i8) nounwind readnone
+

diff --git a/src/LLVM/test/CodeGen/X86/machine-cse.ll b/src/LLVM/test/CodeGen/X86/machine-cse.ll
new file mode 100644
index 0000000..d819fc8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/machine-cse.ll

@@ -0,0 +1,79 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; rdar://7610418
+
+%ptr = type { i8* }
+%struct.s1 = type { %ptr, %ptr }
+%struct.s2 = type { i32, i8*, i8*, [256 x %struct.s1*], [8 x i32], i64, i8*, i32, i64, i64, i32, %struct.s3*, %struct.s3*, [49 x i64] }
+%struct.s3 = type { %struct.s3*, %struct.s3*, i32, i32, i32 }
+
+define fastcc i8* @t(i32 %base) nounwind {
+entry:
+; CHECK: t:
+; CHECK: leaq (%rax,%rax,4)
+  %0 = zext i32 %base to i64
+  %1 = getelementptr inbounds %struct.s2* null, i64 %0
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+; CHECK: %bb1
+; CHECK-NOT: shlq $9
+; CHECK-NOT: leaq
+; CHECK: call
+  %2 = getelementptr inbounds %struct.s2* null, i64 %0, i32 0
+  call void @bar(i32* %2) nounwind
+  unreachable
+
+bb2:
+; CHECK: %bb2
+; CHECK-NOT: leaq
+; CHECK: callq
+  %3 = call fastcc i8* @foo(%struct.s2* %1) nounwind
+  unreachable
+
+bb3:
+  ret i8* undef
+}
+
+declare void @bar(i32*)
+
+declare fastcc i8* @foo(%struct.s2*) nounwind
+
+; rdar://8773371
+
+declare void @printf(...) nounwind
+
+define void @commute(i32 %test_case, i32 %scale) nounwind ssp {
+; CHECK: commute:
+entry:
+  switch i32 %test_case, label %sw.bb307 [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb
+    i32 3, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry, %entry
+  %mul = mul nsw i32 %test_case, 3
+  %mul20 = mul nsw i32 %mul, %scale
+  br i1 undef, label %if.end34, label %sw.bb307
+
+if.end34:                                         ; preds = %sw.bb
+; CHECK: %if.end34
+; CHECK: imull
+; CHECK: leal
+; CHECK-NOT: imull
+  tail call void (...)* @printf(i32 %test_case, i32 %mul20) nounwind
+  %tmp = mul i32 %scale, %test_case
+  %tmp752 = mul i32 %tmp, 3
+  %tmp753 = zext i32 %tmp752 to i64
+  br label %bb.nph743.us
+
+for.body53.us:                                    ; preds = %bb.nph743.us, %for.body53.us
+  %exitcond = icmp eq i64 undef, %tmp753
+  br i1 %exitcond, label %bb.nph743.us, label %for.body53.us
+
+bb.nph743.us:                                     ; preds = %for.body53.us, %if.end34
+  br label %for.body53.us
+
+sw.bb307:                                         ; preds = %sw.bb, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/masked-iv-safe.ll b/src/LLVM/test/CodeGen/X86/masked-iv-safe.ll
new file mode 100644
index 0000000..0b4d73a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/masked-iv-safe.ll

@@ -0,0 +1,244 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: not grep and %t
+; RUN: not grep movz %t
+; RUN: not grep sar %t
+; RUN: not grep shl %t
+; RUN: grep add %t | count 2
+; RUN: grep inc %t | count 4
+; RUN: grep dec %t | count 2
+; RUN: grep lea %t | count 2
+
+; Optimize away zext-inreg and sext-inreg on the loop induction
+; variable using trip-count information.
+
+define void @count_up(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 10
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_down(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 10
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_up(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_down(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fdiv double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fdiv double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fdiv double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/masked-iv-unsafe.ll b/src/LLVM/test/CodeGen/X86/masked-iv-unsafe.ll
new file mode 100644
index 0000000..f23c020
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/masked-iv-unsafe.ll

@@ -0,0 +1,386 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep and %t | count 6
+; RUN: grep movzb %t | count 6
+; RUN: grep sar %t | count 12
+
+; Don't optimize away zext-inreg and sext-inreg on the loop induction
+; variable, because it isn't safe to do so in these cases.
+
+define void @count_up(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_down(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 20
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 20
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_up(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %indvar.i8 = and i64 %indvar, 255
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %indvar.i24 = and i64 %indvar, 16777215
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = add i64 %indvar, 1
+        %exitcond = icmp eq i64 %indvar.next, %n
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @another_count_down(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ]
+        %indvar.i8 = and i64 %indvar, 255
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %indvar.i24 = and i64 %indvar, 16777215
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = sub i64 %indvar, 1
+        %exitcond = icmp eq i64 %indvar.next, 10
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @another_count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+        %indvar.i8 = ashr i64 %s0, 8
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %s1 = shl i64 %indvar, 24
+        %indvar.i24 = ashr i64 %s1, 24
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = add i64 %indvar, 1
+        %exitcond = icmp eq i64 %indvar.next, %n
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @another_count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+        %indvar.i8 = ashr i64 %s0, 8
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %s1 = shl i64 %indvar, 24
+        %indvar.i24 = ashr i64 %s1, 24
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = sub i64 %indvar, 1
+        %exitcond = icmp eq i64 %indvar.next, 10
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @yet_another_count_down(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @yet_another_count_up(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %indvar.i8 = and i64 %indvar, 255
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %indvar.i24 = and i64 %indvar, 16777215
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = add i64 %indvar, 3
+        %exitcond = icmp eq i64 %indvar.next, 10
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @still_another_count_down(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %indvar.i8 = and i64 %indvar, 255
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %indvar.i24 = and i64 %indvar, 16777215
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = sub i64 %indvar, 3
+        %exitcond = icmp eq i64 %indvar.next, 0
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @yet_another_count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+        %indvar.i8 = ashr i64 %s0, 8
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %s1 = shl i64 %indvar, 24
+        %indvar.i24 = ashr i64 %s1, 24
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = add i64 %indvar, 3
+        %exitcond = icmp eq i64 %indvar.next, 10
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @yet_another_count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+        %indvar.i8 = ashr i64 %s0, 8
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %s1 = shl i64 %indvar, 24
+        %indvar.i24 = ashr i64 %s1, 24
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = sub i64 %indvar, 3
+        %exitcond = icmp eq i64 %indvar.next, 0
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+
+

diff --git a/src/LLVM/test/CodeGen/X86/maskmovdqu.ll b/src/LLVM/test/CodeGen/X86/maskmovdqu.ll
new file mode 100644
index 0000000..7796f0e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/maskmovdqu.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86    -mattr=+sse2 | grep -i EDI
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep -i RDI
+; rdar://6573467
+
+define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
+entry:
+	tail call void @llvm.x86.sse2.maskmov.dqu( <16 x i8> %a, <16 x i8> %b, i8* %c )
+	ret void
+}
+
+declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/mcinst-lowering.ll b/src/LLVM/test/CodeGen/X86/mcinst-lowering.ll
new file mode 100644
index 0000000..1ef5a97
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mcinst-lowering.ll

@@ -0,0 +1,26 @@
+; RUN: llc --show-mc-encoding < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define i32 @f0(i32* nocapture %x) nounwind readonly ssp {
+entry:
+  %tmp1 = load i32* %x                            ; <i32> [#uses=2]
+  %tobool = icmp eq i32 %tmp1, 0                  ; <i1> [#uses=1]
+  br i1 %tobool, label %if.end, label %return
+
+if.end:                                           ; preds = %entry
+
+; Check that we lower to the short form of cmpl, which has a fixed %eax
+; register.
+;
+; CHECK: cmpl $16777216, %eax
+; CHECK: # encoding: [0x3d,0x00,0x00,0x00,0x01]
+  %cmp = icmp eq i32 %tmp1, 16777216              ; <i1> [#uses=1]
+
+  %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
+  ret i32 %conv
+
+return:                                           ; preds = %entry
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/mem-promote-integers.ll b/src/LLVM/test/CodeGen/X86/mem-promote-integers.ll
new file mode 100644
index 0000000..80103d1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mem-promote-integers.ll

@@ -0,0 +1,391 @@
+; Test the basic functionality of integer element promotions of different types.
+; This tests checks passing of arguments, loading and storing to memory and
+; basic arithmetic.
+; RUN: llc -march=x86 -promote-elements < %s
+; RUN: llc -march=x86-64 -promote-elements < %s
+
+define <1 x i8> @test_1xi8(<1 x i8> %x, <1 x i8>* %b) {
+  %bb = load <1 x i8>* %b
+  %tt = xor <1 x i8> %x, %bb
+  store <1 x i8> %tt, <1 x i8>* %b
+  br label %next
+
+next:
+  ret <1 x i8> %tt
+}
+
+
+define <1 x i16> @test_1xi16(<1 x i16> %x, <1 x i16>* %b) {
+  %bb = load <1 x i16>* %b
+  %tt = xor <1 x i16> %x, %bb
+  store <1 x i16> %tt, <1 x i16>* %b
+  br label %next
+
+next:
+  ret <1 x i16> %tt
+}
+
+
+define <1 x i32> @test_1xi32(<1 x i32> %x, <1 x i32>* %b) {
+  %bb = load <1 x i32>* %b
+  %tt = xor <1 x i32> %x, %bb
+  store <1 x i32> %tt, <1 x i32>* %b
+  br label %next
+
+next:
+  ret <1 x i32> %tt
+}
+
+
+define <1 x i64> @test_1xi64(<1 x i64> %x, <1 x i64>* %b) {
+  %bb = load <1 x i64>* %b
+  %tt = xor <1 x i64> %x, %bb
+  store <1 x i64> %tt, <1 x i64>* %b
+  br label %next
+
+next:
+  ret <1 x i64> %tt
+}
+
+
+define <1 x i128> @test_1xi128(<1 x i128> %x, <1 x i128>* %b) {
+  %bb = load <1 x i128>* %b
+  %tt = xor <1 x i128> %x, %bb
+  store <1 x i128> %tt, <1 x i128>* %b
+  br label %next
+
+next:
+  ret <1 x i128> %tt
+}
+
+
+define <1 x i256> @test_1xi256(<1 x i256> %x, <1 x i256>* %b) {
+  %bb = load <1 x i256>* %b
+  %tt = xor <1 x i256> %x, %bb
+  store <1 x i256> %tt, <1 x i256>* %b
+  br label %next
+
+next:
+  ret <1 x i256> %tt
+}
+
+
+define <1 x i512> @test_1xi512(<1 x i512> %x, <1 x i512>* %b) {
+  %bb = load <1 x i512>* %b
+  %tt = xor <1 x i512> %x, %bb
+  store <1 x i512> %tt, <1 x i512>* %b
+  br label %next
+
+next:
+  ret <1 x i512> %tt
+}
+
+
+define <2 x i8> @test_2xi8(<2 x i8> %x, <2 x i8>* %b) {
+  %bb = load <2 x i8>* %b
+  %tt = xor <2 x i8> %x, %bb
+  store <2 x i8> %tt, <2 x i8>* %b
+  br label %next
+
+next:
+  ret <2 x i8> %tt
+}
+
+
+define <2 x i16> @test_2xi16(<2 x i16> %x, <2 x i16>* %b) {
+  %bb = load <2 x i16>* %b
+  %tt = xor <2 x i16> %x, %bb
+  store <2 x i16> %tt, <2 x i16>* %b
+  br label %next
+
+next:
+  ret <2 x i16> %tt
+}
+
+
+define <2 x i32> @test_2xi32(<2 x i32> %x, <2 x i32>* %b) {
+  %bb = load <2 x i32>* %b
+  %tt = xor <2 x i32> %x, %bb
+  store <2 x i32> %tt, <2 x i32>* %b
+  br label %next
+
+next:
+  ret <2 x i32> %tt
+}
+
+
+define <2 x i64> @test_2xi64(<2 x i64> %x, <2 x i64>* %b) {
+  %bb = load <2 x i64>* %b
+  %tt = xor <2 x i64> %x, %bb
+  store <2 x i64> %tt, <2 x i64>* %b
+  br label %next
+
+next:
+  ret <2 x i64> %tt
+}
+
+
+define <2 x i128> @test_2xi128(<2 x i128> %x, <2 x i128>* %b) {
+  %bb = load <2 x i128>* %b
+  %tt = xor <2 x i128> %x, %bb
+  store <2 x i128> %tt, <2 x i128>* %b
+  br label %next
+
+next:
+  ret <2 x i128> %tt
+}
+
+
+define <2 x i256> @test_2xi256(<2 x i256> %x, <2 x i256>* %b) {
+  %bb = load <2 x i256>* %b
+  %tt = xor <2 x i256> %x, %bb
+  store <2 x i256> %tt, <2 x i256>* %b
+  br label %next
+
+next:
+  ret <2 x i256> %tt
+}
+
+
+define <2 x i512> @test_2xi512(<2 x i512> %x, <2 x i512>* %b) {
+  %bb = load <2 x i512>* %b
+  %tt = xor <2 x i512> %x, %bb
+  store <2 x i512> %tt, <2 x i512>* %b
+  br label %next
+
+next:
+  ret <2 x i512> %tt
+}
+
+
+define <3 x i8> @test_3xi8(<3 x i8> %x, <3 x i8>* %b) {
+  %bb = load <3 x i8>* %b
+  %tt = xor <3 x i8> %x, %bb
+  store <3 x i8> %tt, <3 x i8>* %b
+  br label %next
+
+next:
+  ret <3 x i8> %tt
+}
+
+
+define <3 x i16> @test_3xi16(<3 x i16> %x, <3 x i16>* %b) {
+  %bb = load <3 x i16>* %b
+  %tt = xor <3 x i16> %x, %bb
+  store <3 x i16> %tt, <3 x i16>* %b
+  br label %next
+
+next:
+  ret <3 x i16> %tt
+}
+
+
+define <3 x i32> @test_3xi32(<3 x i32> %x, <3 x i32>* %b) {
+  %bb = load <3 x i32>* %b
+  %tt = xor <3 x i32> %x, %bb
+  store <3 x i32> %tt, <3 x i32>* %b
+  br label %next
+
+next:
+  ret <3 x i32> %tt
+}
+
+
+define <3 x i64> @test_3xi64(<3 x i64> %x, <3 x i64>* %b) {
+  %bb = load <3 x i64>* %b
+  %tt = xor <3 x i64> %x, %bb
+  store <3 x i64> %tt, <3 x i64>* %b
+  br label %next
+
+next:
+  ret <3 x i64> %tt
+}
+
+
+define <3 x i128> @test_3xi128(<3 x i128> %x, <3 x i128>* %b) {
+  %bb = load <3 x i128>* %b
+  %tt = xor <3 x i128> %x, %bb
+  store <3 x i128> %tt, <3 x i128>* %b
+  br label %next
+
+next:
+  ret <3 x i128> %tt
+}
+
+
+define <3 x i256> @test_3xi256(<3 x i256> %x, <3 x i256>* %b) {
+  %bb = load <3 x i256>* %b
+  %tt = xor <3 x i256> %x, %bb
+  store <3 x i256> %tt, <3 x i256>* %b
+  br label %next
+
+next:
+  ret <3 x i256> %tt
+}
+
+
+define <3 x i512> @test_3xi512(<3 x i512> %x, <3 x i512>* %b) {
+  %bb = load <3 x i512>* %b
+  %tt = xor <3 x i512> %x, %bb
+  store <3 x i512> %tt, <3 x i512>* %b
+  br label %next
+
+next:
+  ret <3 x i512> %tt
+}
+
+
+define <4 x i8> @test_4xi8(<4 x i8> %x, <4 x i8>* %b) {
+  %bb = load <4 x i8>* %b
+  %tt = xor <4 x i8> %x, %bb
+  store <4 x i8> %tt, <4 x i8>* %b
+  br label %next
+
+next:
+  ret <4 x i8> %tt
+}
+
+
+define <4 x i16> @test_4xi16(<4 x i16> %x, <4 x i16>* %b) {
+  %bb = load <4 x i16>* %b
+  %tt = xor <4 x i16> %x, %bb
+  store <4 x i16> %tt, <4 x i16>* %b
+  br label %next
+
+next:
+  ret <4 x i16> %tt
+}
+
+
+define <4 x i32> @test_4xi32(<4 x i32> %x, <4 x i32>* %b) {
+  %bb = load <4 x i32>* %b
+  %tt = xor <4 x i32> %x, %bb
+  store <4 x i32> %tt, <4 x i32>* %b
+  br label %next
+
+next:
+  ret <4 x i32> %tt
+}
+
+
+define <4 x i64> @test_4xi64(<4 x i64> %x, <4 x i64>* %b) {
+  %bb = load <4 x i64>* %b
+  %tt = xor <4 x i64> %x, %bb
+  store <4 x i64> %tt, <4 x i64>* %b
+  br label %next
+
+next:
+  ret <4 x i64> %tt
+}
+
+
+define <4 x i128> @test_4xi128(<4 x i128> %x, <4 x i128>* %b) {
+  %bb = load <4 x i128>* %b
+  %tt = xor <4 x i128> %x, %bb
+  store <4 x i128> %tt, <4 x i128>* %b
+  br label %next
+
+next:
+  ret <4 x i128> %tt
+}
+
+
+define <4 x i256> @test_4xi256(<4 x i256> %x, <4 x i256>* %b) {
+  %bb = load <4 x i256>* %b
+  %tt = xor <4 x i256> %x, %bb
+  store <4 x i256> %tt, <4 x i256>* %b
+  br label %next
+
+next:
+  ret <4 x i256> %tt
+}
+
+
+define <4 x i512> @test_4xi512(<4 x i512> %x, <4 x i512>* %b) {
+  %bb = load <4 x i512>* %b
+  %tt = xor <4 x i512> %x, %bb
+  store <4 x i512> %tt, <4 x i512>* %b
+  br label %next
+
+next:
+  ret <4 x i512> %tt
+}
+
+
+define <5 x i8> @test_5xi8(<5 x i8> %x, <5 x i8>* %b) {
+  %bb = load <5 x i8>* %b
+  %tt = xor <5 x i8> %x, %bb
+  store <5 x i8> %tt, <5 x i8>* %b
+  br label %next
+
+next:
+  ret <5 x i8> %tt
+}
+
+
+define <5 x i16> @test_5xi16(<5 x i16> %x, <5 x i16>* %b) {
+  %bb = load <5 x i16>* %b
+  %tt = xor <5 x i16> %x, %bb
+  store <5 x i16> %tt, <5 x i16>* %b
+  br label %next
+
+next:
+  ret <5 x i16> %tt
+}
+
+
+define <5 x i32> @test_5xi32(<5 x i32> %x, <5 x i32>* %b) {
+  %bb = load <5 x i32>* %b
+  %tt = xor <5 x i32> %x, %bb
+  store <5 x i32> %tt, <5 x i32>* %b
+  br label %next
+
+next:
+  ret <5 x i32> %tt
+}
+
+
+define <5 x i64> @test_5xi64(<5 x i64> %x, <5 x i64>* %b) {
+  %bb = load <5 x i64>* %b
+  %tt = xor <5 x i64> %x, %bb
+  store <5 x i64> %tt, <5 x i64>* %b
+  br label %next
+
+next:
+  ret <5 x i64> %tt
+}
+
+
+define <5 x i128> @test_5xi128(<5 x i128> %x, <5 x i128>* %b) {
+  %bb = load <5 x i128>* %b
+  %tt = xor <5 x i128> %x, %bb
+  store <5 x i128> %tt, <5 x i128>* %b
+  br label %next
+
+next:
+  ret <5 x i128> %tt
+}
+
+
+define <5 x i256> @test_5xi256(<5 x i256> %x, <5 x i256>* %b) {
+  %bb = load <5 x i256>* %b
+  %tt = xor <5 x i256> %x, %bb
+  store <5 x i256> %tt, <5 x i256>* %b
+  br label %next
+
+next:
+  ret <5 x i256> %tt
+}
+
+
+define <5 x i512> @test_5xi512(<5 x i512> %x, <5 x i512>* %b) {
+  %bb = load <5 x i512>* %b
+  %tt = xor <5 x i512> %x, %bb
+  store <5 x i512> %tt, <5 x i512>* %b
+  br label %next
+
+next:
+  ret <5 x i512> %tt
+}
+
+

diff --git a/src/LLVM/test/CodeGen/X86/membarrier.ll b/src/LLVM/test/CodeGen/X86/membarrier.ll
new file mode 100644
index 0000000..5e569aa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/membarrier.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -mattr=-sse -O0
+; PR9675
+
+define i32 @t() {
+entry:
+  %i = alloca i32, align 4
+  store i32 1, i32* %i, align 4
+  fence seq_cst
+  %0 = atomicrmw sub i32* %i, i32 1 monotonic
+  fence seq_cst
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/memcmp.ll b/src/LLVM/test/CodeGen/X86/memcmp.ll
new file mode 100644
index 0000000..f4bc1bb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/memcmp.ll

@@ -0,0 +1,111 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+
+; This tests codegen time inlining/optimization of memcmp
+; rdar://6480398
+
+@.str = private constant [23 x i8] c"fooooooooooooooooooooo\00", align 1 ; <[23 x i8]*> [#uses=1]
+
+declare i32 @memcmp(...)
+
+define void @memcmp2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 2) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp2:
+; CHECK: movw    ([[A0:%rdi|%rcx]]), %ax
+; CHECK: cmpw    ([[A1:%rsi|%rdx]]), %ax
+}
+
+define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp2a:
+; CHECK: cmpw    $28527, ([[A0]])
+}
+
+
+define void @memcmp4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 4) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp4:
+; CHECK: movl    ([[A0]]), %eax
+; CHECK: cmpl    ([[A1]]), %eax
+}
+
+define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp4a:
+; CHECK: cmpl $1869573999, ([[A0]])
+}
+
+define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 8) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp8:
+; CHECK: movq    ([[A0]]), %rax
+; CHECK: cmpq    ([[A1]]), %rax
+}
+
+define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 0), i32 8) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp8a:
+; CHECK: movabsq $8029759185026510694, %rax
+; CHECK: cmpq	%rax, ([[A0]])
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/memcpy-2.ll b/src/LLVM/test/CodeGen/X86/memcpy-2.ll
new file mode 100644
index 0000000..eae2e70
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/memcpy-2.ll

@@ -0,0 +1,167 @@
+; RUN: llc < %s -mattr=+sse2      -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE1
+; RUN: llc < %s -mattr=-sse       -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=NOSSE
+; RUN: llc < %s                 -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=X86-64
+
+@.str = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"
+@.str2 = internal constant [30 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 4
+
+define void @t1(i32 %argc, i8** %argv) nounwind  {
+entry:
+; SSE2: t1:
+; SSE2: movaps _.str, %xmm0
+; SSE2: movaps %xmm0
+; SSE2: movb $0
+; SSE2: movl $0
+; SSE2: movl $0
+
+; SSE1: t1:
+; SSE1: movaps _.str, %xmm0
+; SSE1: movaps %xmm0
+; SSE1: movb $0
+; SSE1: movl $0
+; SSE1: movl $0
+
+; NOSSE: t1:
+; NOSSE: movb $0
+; NOSSE: movl $0
+; NOSSE: movl $0
+; NOSSE: movl $0
+; NOSSE: movl $0
+; NOSSE: movl $101
+; NOSSE: movl $1734438249
+
+; X86-64: t1:
+; X86-64: movaps _.str(%rip), %xmm0
+; X86-64: movaps %xmm0
+; X86-64: movb $0
+; X86-64: movq $0
+  %tmp1 = alloca [25 x i8]
+  %tmp2 = bitcast [25 x i8]* %tmp1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([25 x i8]* @.str, i32 0, i32 0), i32 25, i32 1, i1 false)
+  unreachable
+}
+
+;rdar://7774704
+%struct.s0 = type { [2 x double] }
+
+define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
+entry:
+; SSE2: t2:
+; SSE2: movaps (%eax), %xmm0
+; SSE2: movaps %xmm0, (%eax)
+
+; SSE1: t2:
+; SSE1: movaps (%eax), %xmm0
+; SSE1: movaps %xmm0, (%eax)
+
+; NOSSE: t2:
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+
+; X86-64: t2:
+; X86-64: movaps (%rsi), %xmm0
+; X86-64: movaps %xmm0, (%rdi)
+  %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
+  %tmp3 = bitcast %struct.s0* %b to i8*           ; <i8*> [#uses=1]
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false)
+  ret void
+}
+
+define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
+entry:
+; SSE2: t3:
+; SSE2: movsd (%eax), %xmm0
+; SSE2: movsd 8(%eax), %xmm1
+; SSE2: movsd %xmm1, 8(%eax)
+; SSE2: movsd %xmm0, (%eax)
+
+; SSE1: t3:
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+
+; NOSSE: t3:
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+
+; X86-64: t3:
+; X86-64: movq (%rsi), %rax
+; X86-64: movq 8(%rsi), %rcx
+; X86-64: movq %rcx, 8(%rdi)
+; X86-64: movq %rax, (%rdi)
+  %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
+  %tmp3 = bitcast %struct.s0* %b to i8*           ; <i8*> [#uses=1]
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 8, i1 false)
+  ret void
+}
+
+define void @t4() nounwind {
+entry:
+; SSE2: t4:
+; SSE2: movw $120
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+
+; SSE1: t4:
+; SSE1: movw $120
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+
+; NOSSE: t4:
+; NOSSE: movw $120
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+
+; X86-64: t4:
+; X86-64: movabsq $8680820740569200760, %rax
+; X86-64: movq %rax
+; X86-64: movq %rax
+; X86-64: movq %rax
+; X86-64: movw $120
+; X86-64: movl $2021161080
+  %tmp1 = alloca [30 x i8]
+  %tmp2 = bitcast [30 x i8]* %tmp1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1, i1 false)
+  unreachable
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/memcpy.ll b/src/LLVM/test/CodeGen/X86/memcpy.ll
new file mode 100644
index 0000000..f43b0bf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/memcpy.ll

@@ -0,0 +1,81 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=DARWIN
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+
+; Variable memcpy's should lower to calls.
+define i8* @test1(i8* %a, i8* %b, i64 %n) nounwind {
+entry:
+	tail call void @llvm.memcpy.p0i8.p0i8.i64( i8* %a, i8* %b, i64 %n, i32 1, i1 0 )
+	ret i8* %a
+        
+; LINUX: test1:
+; LINUX: memcpy
+}
+
+; Variable memcpy's should lower to calls.
+define i8* @test2(i64* %a, i64* %b, i64 %n) nounwind {
+entry:
+	%tmp14 = bitcast i64* %a to i8*
+	%tmp25 = bitcast i64* %b to i8*
+	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp25, i64 %n, i32 8, i1 0 )
+	ret i8* %tmp14
+        
+; LINUX: test2:
+; LINUX: memcpy
+}
+
+; Large constant memcpy's should lower to a call when optimizing for size.
+; PR6623
+
+; On the other hand, Darwin's definition of -Os is optimizing for size without
+; hurting performance so it should just ignore optsize when expanding memcpy.
+; rdar://8821501
+define void @test3(i8* nocapture %A, i8* nocapture %B) nounwind optsize noredzone {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
+  ret void
+; LINUX: test3:
+; LINUX: memcpy
+
+; DARWIN: test3:
+; DARWIN-NOT: memcpy
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+}
+
+; Large constant memcpy's should be inlined when not optimizing for size.
+define void @test4(i8* nocapture %A, i8* nocapture %B) nounwind noredzone {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
+  ret void
+; LINUX: test4:
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/memset-2.ll b/src/LLVM/test/CodeGen/X86/memset-2.ll
new file mode 100644
index 0000000..b2bd72b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/memset-2.ll

@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=i386-apple-darwin -mcpu=yonah < %s | FileCheck %s
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
+
+define fastcc void @t1() nounwind {
+entry:
+; CHECK: t1:
+; CHECK: calll _memset
+  call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 188, i32 1, i1 false)
+  unreachable
+}
+
+define fastcc void @t2(i8 signext %c) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: calll _memset
+  call void @llvm.memset.p0i8.i32(i8* undef, i8 %c, i32 76, i32 1, i1 false)
+  unreachable
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @t3(i8* nocapture %s, i8 %a) nounwind {
+entry:
+  tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 8, i32 1, i1 false)
+  ret void
+; CHECK: t3:
+; CHECK: imull $16843009
+}
+
+define void @t4(i8* nocapture %s, i8 %a) nounwind {
+entry:
+  tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false)
+  ret void
+; CHECK: t4:
+; CHECK: imull $16843009
+; CHECK-NOT: imul
+; CHECK: ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/memset-3.ll b/src/LLVM/test/CodeGen/X86/memset-3.ll
new file mode 100644
index 0000000..29febfa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/memset-3.ll

@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=i386-apple-darwin < %s | not grep memset
+; PR6767
+
+define void @t() nounwind ssp {
+entry:
+  %buf = alloca [512 x i8], align 1
+  %ptr = getelementptr inbounds [512 x i8]* %buf, i32 0, i32 0
+  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 undef, i32 512, i32 1, i1 false)
+  unreachable
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/memset.ll b/src/LLVM/test/CodeGen/X86/memset.ll
new file mode 100644
index 0000000..72b3e0f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/memset.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 9
+; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 3
+
+	%struct.x = type { i16, i16 }
+
+define void @t() nounwind  {
+entry:
+	%up_mvd = alloca [8 x %struct.x]		; <[8 x %struct.x]*> [#uses=2]
+	%up_mvd116 = getelementptr [8 x %struct.x]* %up_mvd, i32 0, i32 0		; <%struct.x*> [#uses=1]
+	%tmp110117 = bitcast [8 x %struct.x]* %up_mvd to i8*		; <i8*> [#uses=1]
+	call void @llvm.memset.p0i8.i64(i8* %tmp110117, i8 0, i64 32, i32 8, i1 false)
+	call void @foo( %struct.x* %up_mvd116 ) nounwind 
+	ret void
+}
+
+declare void @foo(%struct.x*)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/memset64-on-x86-32.ll b/src/LLVM/test/CodeGen/X86/memset64-on-x86-32.ll
new file mode 100644
index 0000000..e20fce1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/memset64-on-x86-32.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin   -mcpu=nehalem | grep movups | count 5
+; RUN: llc < %s -mtriple=i386-apple-darwin   -mcpu=core2   | grep movl   | count 20
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2   | grep movq   | count 10
+
+define void @bork() nounwind {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* null, i8 0, i64 80, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/mfence.ll b/src/LLVM/test/CodeGen/X86/mfence.ll
new file mode 100644
index 0000000..6056add
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mfence.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mfence
+
+define void @test() {
+  fence seq_cst
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/mingw-alloca.ll b/src/LLVM/test/CodeGen/X86/mingw-alloca.ll
new file mode 100644
index 0000000..ad1d04f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mingw-alloca.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s | FileCheck %s

+

+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "i386-pc-mingw32"

+

+define void @foo1(i32 %N) nounwind {

+entry:

+; CHECK: _foo1:

+; CHECK: calll __alloca

+	%tmp14 = alloca i32, i32 %N		; <i32*> [#uses=1]

+	call void @bar1( i32* %tmp14 )

+	ret void

+}

+

+declare void @bar1(i32*)

+

+define void @foo2(i32 inreg  %N) nounwind {

+entry:

+; CHECK: _foo2:

+; CHECK: andl $-16, %esp

+; CHECK: pushl %eax

+; CHECK: calll __alloca

+; CHECK: movl	8028(%esp), %eax

+	%A2 = alloca [2000 x i32], align 16		; <[2000 x i32]*> [#uses=1]

+	%A2.sub = getelementptr [2000 x i32]* %A2, i32 0, i32 0		; <i32*> [#uses=1]

+	call void @bar2( i32* %A2.sub, i32 %N )

+	ret void

+}

+

+declare void @bar2(i32*, i32)


diff --git a/src/LLVM/test/CodeGen/X86/misaligned-memset.ll b/src/LLVM/test/CodeGen/X86/misaligned-memset.ll
new file mode 100644
index 0000000..21f8bf2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/misaligned-memset.ll

@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=nehalem < %s | FileCheck %s
+
+@a = common global [3 x i64] zeroinitializer, align 16
+
+define i32 @main() nounwind ssp {
+; CHECK: movups
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @llvm.memset.p0i8.i64(i8* bitcast (i64* getelementptr inbounds ([3 x i64]* @a, i32 0, i64 1) to i8*), i8 0, i64 16, i32 1, i1 false)
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/mmx-arg-passing.ll b/src/LLVM/test/CodeGen/X86/mmx-arg-passing.ll
new file mode 100644
index 0000000..b348512
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-arg-passing.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep xmm0
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep rdi
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | not grep movups
+;
+; On Darwin x86-32, v8i8, v4i16, v2i32 values are passed in MM[0-2].
+; On Darwin x86-32, v1i64 values are passed in memory.  In this example, they
+;                   are never moved into an MM register at all.
+; On Darwin x86-64, v8i8, v4i16, v2i32 values are passed in XMM[0-7].
+; On Darwin x86-64, v1i64 values are passed in 64-bit GPRs.
+
+@u1 = external global x86_mmx
+
+define void @t1(x86_mmx %v1) nounwind  {
+	store x86_mmx %v1, x86_mmx* @u1, align 8
+	ret void
+}
+
+@u2 = external global x86_mmx
+
+define void @t2(<1 x i64> %v1) nounwind  {
+        %tmp = bitcast <1 x i64> %v1 to x86_mmx
+	store x86_mmx %tmp, x86_mmx* @u2, align 8
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/mmx-arg-passing2.ll b/src/LLVM/test/CodeGen/X86/mmx-arg-passing2.ll
new file mode 100644
index 0000000..c132d31
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-arg-passing2.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
+; Since the add is not an MMX add, we don't have a movq2dq any more.
+
+@g_v8qi = external global <8 x i8>
+
+define void @t1() nounwind  {
+	%tmp3 = load <8 x i8>* @g_v8qi, align 8
+        %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
+	%tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
+	ret void
+}
+
+define void @t2(x86_mmx %v1, x86_mmx %v2) nounwind  {
+       %v1a = bitcast x86_mmx %v1 to <8 x i8>
+       %v2b = bitcast x86_mmx %v2 to <8 x i8>
+       %tmp3 = add <8 x i8> %v1a, %v2b
+       %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
+       %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
+       ret void
+}
+
+define void @t3() nounwind  {
+	call void @pass_v1di( <1 x i64> zeroinitializer )
+        ret void
+}
+
+declare i32 @pass_v8qi(...)
+declare void @pass_v1di(<1 x i64>)

diff --git a/src/LLVM/test/CodeGen/X86/mmx-arith.ll b/src/LLVM/test/CodeGen/X86/mmx-arith.ll
new file mode 100644
index 0000000..5f38e28
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-arith.ll

@@ -0,0 +1,309 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx

+

+;; A basic sanity check to make sure that MMX arithmetic actually compiles.

+;; First is a straight translation of the original with bitcasts as needed.

+

+define void @foo(x86_mmx* %A, x86_mmx* %B) {

+entry:

+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]

+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp1a = bitcast x86_mmx %tmp1 to <8 x i8>

+        %tmp3a = bitcast x86_mmx %tmp3 to <8 x i8>

+	%tmp4 = add <8 x i8> %tmp1a, %tmp3a		; <<8 x i8>> [#uses=2]

+        %tmp4a = bitcast <8 x i8> %tmp4 to x86_mmx

+	store x86_mmx %tmp4a, x86_mmx* %A

+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4a, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp12, x86_mmx* %A

+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp21, x86_mmx* %A

+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp21a = bitcast x86_mmx %tmp21 to <8 x i8>

+        %tmp27a = bitcast x86_mmx %tmp27 to <8 x i8>

+	%tmp28 = sub <8 x i8> %tmp21a, %tmp27a		; <<8 x i8>> [#uses=2]

+        %tmp28a = bitcast <8 x i8> %tmp28 to x86_mmx

+	store x86_mmx %tmp28a, x86_mmx* %A

+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28a, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp36, x86_mmx* %A

+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp45, x86_mmx* %A

+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp45a = bitcast x86_mmx %tmp45 to <8 x i8>

+        %tmp51a = bitcast x86_mmx %tmp51 to <8 x i8>

+	%tmp52 = mul <8 x i8> %tmp45a, %tmp51a		; <<8 x i8>> [#uses=2]

+        %tmp52a = bitcast <8 x i8> %tmp52 to x86_mmx

+	store x86_mmx %tmp52a, x86_mmx* %A

+	%tmp57 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp57a = bitcast x86_mmx %tmp57 to <8 x i8>

+	%tmp58 = and <8 x i8> %tmp52, %tmp57a		; <<8 x i8>> [#uses=2]

+        %tmp58a = bitcast <8 x i8> %tmp58 to x86_mmx

+	store x86_mmx %tmp58a, x86_mmx* %A

+	%tmp63 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp63a = bitcast x86_mmx %tmp63 to <8 x i8>

+	%tmp64 = or <8 x i8> %tmp58, %tmp63a		; <<8 x i8>> [#uses=2]

+        %tmp64a = bitcast <8 x i8> %tmp64 to x86_mmx

+	store x86_mmx %tmp64a, x86_mmx* %A

+	%tmp69 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp69a = bitcast x86_mmx %tmp69 to <8 x i8>

+        %tmp64b = bitcast x86_mmx %tmp64a to <8 x i8>

+	%tmp70 = xor <8 x i8> %tmp64b, %tmp69a		; <<8 x i8>> [#uses=1]

+        %tmp70a = bitcast <8 x i8> %tmp70 to x86_mmx

+	store x86_mmx %tmp70a, x86_mmx* %A

+	tail call void @llvm.x86.mmx.emms( )

+	ret void

+}

+

+define void @baz(x86_mmx* %A, x86_mmx* %B) {

+entry:

+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]

+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp1a = bitcast x86_mmx %tmp1 to <2 x i32>

+        %tmp3a = bitcast x86_mmx %tmp3 to <2 x i32>

+	%tmp4 = add <2 x i32> %tmp1a, %tmp3a		; <<2 x i32>> [#uses=2]

+        %tmp4a = bitcast <2 x i32> %tmp4 to x86_mmx

+	store x86_mmx %tmp4a, x86_mmx* %A

+	%tmp9 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp9a = bitcast x86_mmx %tmp9 to <2 x i32>

+	%tmp10 = sub <2 x i32> %tmp4, %tmp9a		; <<2 x i32>> [#uses=2]

+        %tmp10a = bitcast <2 x i32> %tmp4 to x86_mmx

+	store x86_mmx %tmp10a, x86_mmx* %A

+	%tmp15 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp10b = bitcast x86_mmx %tmp10a to <2 x i32>

+        %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>

+	%tmp16 = mul <2 x i32> %tmp10b, %tmp15a		; <<2 x i32>> [#uses=2]

+        %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx

+	store x86_mmx %tmp16a, x86_mmx* %A

+	%tmp21 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp16b = bitcast x86_mmx %tmp16a to <2 x i32>

+        %tmp21a = bitcast x86_mmx %tmp21 to <2 x i32>

+	%tmp22 = and <2 x i32> %tmp16b, %tmp21a		; <<2 x i32>> [#uses=2]

+        %tmp22a = bitcast <2 x i32> %tmp22 to x86_mmx

+	store x86_mmx %tmp22a, x86_mmx* %A

+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp22b = bitcast x86_mmx %tmp22a to <2 x i32>

+        %tmp27a = bitcast x86_mmx %tmp27 to <2 x i32>

+	%tmp28 = or <2 x i32> %tmp22b, %tmp27a		; <<2 x i32>> [#uses=2]

+        %tmp28a = bitcast <2 x i32> %tmp28 to x86_mmx

+	store x86_mmx %tmp28a, x86_mmx* %A

+	%tmp33 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp28b = bitcast x86_mmx %tmp28a to <2 x i32>

+        %tmp33a = bitcast x86_mmx %tmp33 to <2 x i32>

+	%tmp34 = xor <2 x i32> %tmp28b, %tmp33a		; <<2 x i32>> [#uses=1]

+        %tmp34a = bitcast <2 x i32> %tmp34 to x86_mmx

+	store x86_mmx %tmp34a, x86_mmx* %A

+	tail call void @llvm.x86.mmx.emms( )

+	ret void

+}

+

+define void @bar(x86_mmx* %A, x86_mmx* %B) {

+entry:

+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]

+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp1a = bitcast x86_mmx %tmp1 to <4 x i16>

+        %tmp3a = bitcast x86_mmx %tmp3 to <4 x i16>

+	%tmp4 = add <4 x i16> %tmp1a, %tmp3a		; <<4 x i16>> [#uses=2]

+        %tmp4a = bitcast <4 x i16> %tmp4 to x86_mmx

+	store x86_mmx %tmp4a, x86_mmx* %A

+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4a, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp12, x86_mmx* %A

+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp21, x86_mmx* %A

+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp21a = bitcast x86_mmx %tmp21 to <4 x i16>

+        %tmp27a = bitcast x86_mmx %tmp27 to <4 x i16>

+	%tmp28 = sub <4 x i16> %tmp21a, %tmp27a		; <<4 x i16>> [#uses=2]

+        %tmp28a = bitcast <4 x i16> %tmp28 to x86_mmx

+	store x86_mmx %tmp28a, x86_mmx* %A

+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28a, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp36, x86_mmx* %A

+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp45, x86_mmx* %A

+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp45a = bitcast x86_mmx %tmp45 to <4 x i16>

+        %tmp51a = bitcast x86_mmx %tmp51 to <4 x i16>

+	%tmp52 = mul <4 x i16> %tmp45a, %tmp51a		; <<4 x i16>> [#uses=2]

+        %tmp52a = bitcast <4 x i16> %tmp52 to x86_mmx

+	store x86_mmx %tmp52a, x86_mmx* %A

+	%tmp55 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52a, x86_mmx %tmp55 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp60, x86_mmx* %A

+	%tmp64 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 )		; <x86_mmx> [#uses=1]

+	%tmp70 = bitcast x86_mmx %tmp69 to x86_mmx		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp70, x86_mmx* %A

+	%tmp75 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp70a = bitcast x86_mmx %tmp70 to <4 x i16>

+        %tmp75a = bitcast x86_mmx %tmp75 to <4 x i16>

+	%tmp76 = and <4 x i16> %tmp70a, %tmp75a		; <<4 x i16>> [#uses=2]

+        %tmp76a = bitcast <4 x i16> %tmp76 to x86_mmx

+	store x86_mmx %tmp76a, x86_mmx* %A

+	%tmp81 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp76b = bitcast x86_mmx %tmp76a to <4 x i16>

+        %tmp81a = bitcast x86_mmx %tmp81 to <4 x i16>

+	%tmp82 = or <4 x i16> %tmp76b, %tmp81a		; <<4 x i16>> [#uses=2]

+        %tmp82a = bitcast <4 x i16> %tmp82 to x86_mmx

+	store x86_mmx %tmp82a, x86_mmx* %A

+	%tmp87 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp82b = bitcast x86_mmx %tmp82a to <4 x i16>

+        %tmp87a = bitcast x86_mmx %tmp87 to <4 x i16>

+	%tmp88 = xor <4 x i16> %tmp82b, %tmp87a		; <<4 x i16>> [#uses=1]

+        %tmp88a = bitcast <4 x i16> %tmp88 to x86_mmx

+	store x86_mmx %tmp88a, x86_mmx* %A

+	tail call void @llvm.x86.mmx.emms( )

+	ret void

+}

+

+;; The following is modified to use MMX intrinsics everywhere they work.

+

+define void @fooa(x86_mmx* %A, x86_mmx* %B) {

+entry:

+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]

+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.b( x86_mmx %tmp1, x86_mmx %tmp3 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp4, x86_mmx* %A

+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp12, x86_mmx* %A

+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp21, x86_mmx* %A

+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.b( x86_mmx %tmp21, x86_mmx %tmp27 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp28, x86_mmx* %A

+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp36, x86_mmx* %A

+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp45, x86_mmx* %A

+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp51a = bitcast x86_mmx %tmp51 to i64

+        %tmp51aa = bitcast i64 %tmp51a to <8 x i8>

+        %tmp51b = bitcast x86_mmx %tmp45 to <8 x i8>

+	%tmp52 = mul <8 x i8> %tmp51b, %tmp51aa		; <x86_mmx> [#uses=2]

+        %tmp52a = bitcast <8 x i8> %tmp52 to i64

+        %tmp52aa = bitcast i64 %tmp52a to x86_mmx

+	store x86_mmx %tmp52aa, x86_mmx* %A

+	%tmp57 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp58 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp51, x86_mmx %tmp57 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp58, x86_mmx* %A

+	%tmp63 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp64 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp58, x86_mmx %tmp63 )		; <x86_mmx> [#uses=2]	

+	store x86_mmx %tmp64, x86_mmx* %A

+	%tmp69 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp70 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp64, x86_mmx %tmp69 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp70, x86_mmx* %A

+	tail call void @llvm.x86.mmx.emms( )

+	ret void

+}

+

+define void @baza(x86_mmx* %A, x86_mmx* %B) {

+entry:

+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]

+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.d( x86_mmx %tmp1, x86_mmx %tmp3 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp4, x86_mmx* %A

+	%tmp9 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp10 = tail call x86_mmx @llvm.x86.mmx.psub.d( x86_mmx %tmp4, x86_mmx %tmp9 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp10, x86_mmx* %A

+	%tmp15 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+        %tmp10a = bitcast x86_mmx %tmp10 to <2 x i32>

+        %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>

+	%tmp16 = mul <2 x i32> %tmp10a, %tmp15a		; <x86_mmx> [#uses=2]

+        %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx

+	store x86_mmx %tmp16a, x86_mmx* %A

+	%tmp21 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp22 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp16a, x86_mmx %tmp21 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp22, x86_mmx* %A

+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp28 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp22, x86_mmx %tmp27 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp28, x86_mmx* %A

+	%tmp33 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp34 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp28, x86_mmx %tmp33 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp34, x86_mmx* %A

+	tail call void @llvm.x86.mmx.emms( )

+	ret void

+}

+

+define void @bara(x86_mmx* %A, x86_mmx* %B) {

+entry:

+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]

+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.w( x86_mmx %tmp1, x86_mmx %tmp3 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp4, x86_mmx* %A

+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp12, x86_mmx* %A

+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp21, x86_mmx* %A

+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.w( x86_mmx %tmp21, x86_mmx %tmp27 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp28, x86_mmx* %A

+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp36, x86_mmx* %A

+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp45, x86_mmx* %A

+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp52 = tail call x86_mmx @llvm.x86.mmx.pmull.w( x86_mmx %tmp45, x86_mmx %tmp51 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp52, x86_mmx* %A

+	%tmp55 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52, x86_mmx %tmp55 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp60, x86_mmx* %A

+	%tmp64 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 )		; <x86_mmx> [#uses=1]

+	%tmp70 = bitcast x86_mmx %tmp69 to x86_mmx		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp70, x86_mmx* %A

+	%tmp75 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp76 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp70, x86_mmx %tmp75 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp76, x86_mmx* %A

+	%tmp81 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp82 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp76, x86_mmx %tmp81 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp82, x86_mmx* %A

+	%tmp87 = load x86_mmx* %B		; <x86_mmx> [#uses=1]

+	%tmp88 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp82, x86_mmx %tmp87 )		; <x86_mmx> [#uses=2]

+	store x86_mmx %tmp88, x86_mmx* %A

+	tail call void @llvm.x86.mmx.emms( )

+	ret void

+}

+

+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx)

+

+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx)

+

+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)

+

+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx)

+

+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx)

+

+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx)

+

+declare void @llvm.x86.mmx.emms()

+

+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.padds.d(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.psubs.d(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx)

+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx)

+


diff --git a/src/LLVM/test/CodeGen/X86/mmx-bitcast-to-i64.ll b/src/LLVM/test/CodeGen/X86/mmx-bitcast-to-i64.ll
new file mode 100644
index 0000000..8b1840a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-bitcast-to-i64.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86-64 | grep movd | count 4
+
+define i64 @foo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
+  ret i64 %s
+}
+define i64 @goo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
+  ret i64 %s
+}
+define i64 @hoo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
+  ret i64 %s
+}
+define i64 @ioo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
+  ret i64 %s
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)

diff --git a/src/LLVM/test/CodeGen/X86/mmx-builtins.ll b/src/LLVM/test/CodeGen/X86/mmx-builtins.ll
new file mode 100644
index 0000000..3ac0e4e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-builtins.ll

@@ -0,0 +1,1324 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
+
+declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckldq
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpcklwd
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpcklbw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhdq
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhwd
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhbw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packuswb
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packssdw
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packsswb
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrad
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psraw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
+
+define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrlq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to i64
+  ret i64 %2
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrlw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
+
+define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psllq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to i64
+  ret i64 %2
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pslld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psllw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrad
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psraw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrlq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrlw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psllq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pslld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psllw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pxor
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: por
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pandn
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pand
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmullw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmullw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaddwd
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubusw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubusb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubsb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddusw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddusb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddsb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psadbw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pminsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pminub
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaxsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaxub
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pavgw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pavgb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
+
+define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
+; CHECK: movntq
+entry:
+  %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
+  ret void
+}
+
+declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
+
+define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pmovmskb
+entry:
+  %0 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
+  %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
+  ret i32 %1
+}
+
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
+
+define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
+; CHECK: maskmovq
+entry:
+  %0 = bitcast <1 x i64> %n to <8 x i8>
+  %1 = bitcast <1 x i64> %d to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
+  ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhuw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
+
+define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pshufw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmuludq
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
+
+define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: cvtpi2pd
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
+  ret <2 x double> %2
+}
+
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
+
+define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
+; CHECK: cvttpd2pi
+entry:
+  %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
+  %1 = bitcast x86_mmx %0 to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to <1 x i64>
+  %3 = extractelement <1 x i64> %2, i32 0
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
+
+define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
+; CHECK: cvtpd2pi
+entry:
+  %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
+  %1 = bitcast x86_mmx %0 to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to <1 x i64>
+  %3 = extractelement <1 x i64> %2, i32 0
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
+
+define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: palignr
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
+
+define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsd
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
+
+define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
+
+define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsb
+entry:
+  %0 = bitcast <1 x i64> %a to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pshufb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhrsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaddubsw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}

diff --git a/src/LLVM/test/CodeGen/X86/mmx-copy-gprs.ll b/src/LLVM/test/CodeGen/X86/mmx-copy-gprs.ll
new file mode 100644
index 0000000..3778755
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-copy-gprs.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-linux   | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32   | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=-sse2 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; This test should use GPRs to copy the mmx value, not MMX regs.  Using mmx regs,
+; increases the places that need to use emms.
+; CHECK-NOT: %mm
+; CHECK-NOT: emms
+; rdar://5741668
+
+define void @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind  {
+entry:
+	%tmp1 = load <1 x i64>* %y, align 8		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %tmp1, <1 x i64>* %x, align 8
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/mmx-emms.ll b/src/LLVM/test/CodeGen/X86/mmx-emms.ll
new file mode 100644
index 0000000..d6bdc45
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-emms.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep emms

+define void @foo() {

+entry:

+	call void @llvm.x86.mmx.emms( )

+	br label %return

+

+return:		; preds = %entry

+	ret void

+}

+

+declare void @llvm.x86.mmx.emms()


diff --git a/src/LLVM/test/CodeGen/X86/mmx-insert-element.ll b/src/LLVM/test/CodeGen/X86/mmx-insert-element.ll
new file mode 100644
index 0000000..5fc4abb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-insert-element.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep movq

+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pshufd

+; This is not an MMX operation; promoted to XMM.

+

+define x86_mmx @qux(i32 %A) nounwind {

+	%tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1		; <<2 x i32>> [#uses=1]

+        %tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx

+	ret x86_mmx %tmp4

+}


diff --git a/src/LLVM/test/CodeGen/X86/mmx-pinsrw.ll b/src/LLVM/test/CodeGen/X86/mmx-pinsrw.ll
new file mode 100644
index 0000000..6062b50
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-pinsrw.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsrw | count 1
+; PR2562
+
+external global i16		; <i16*>:0 [#uses=1]
+external global <4 x i16>		; <<4 x i16>*>:1 [#uses=2]
+
+declare void @abort()
+
+define void @""() {
+	load i16* @0		; <i16>:1 [#uses=1]
+	load <4 x i16>* @1		; <<4 x i16>>:2 [#uses=1]
+	insertelement <4 x i16> %2, i16 %1, i32 0		; <<4 x i16>>:3 [#uses=1]
+	store <4 x i16> %3, <4 x i16>* @1
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/mmx-punpckhdq.ll b/src/LLVM/test/CodeGen/X86/mmx-punpckhdq.ll
new file mode 100644
index 0000000..7645e17
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-punpckhdq.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=x86_64-apple-darwin10 | FileCheck %s

+; There are no MMX operations in bork; promoted to XMM.

+

+define void @bork(<1 x i64>* %x) {

+; CHECK: bork

+; CHECK: pextrd

+entry:

+	%tmp2 = load <1 x i64>* %x		; <<1 x i64>> [#uses=1]

+	%tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32>		; <<2 x i32>> [#uses=1]

+	%tmp9 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >		; <<2 x i32>> [#uses=1]

+	%tmp10 = bitcast <2 x i32> %tmp9 to <1 x i64>		; <<1 x i64>> [#uses=1]

+	store <1 x i64> %tmp10, <1 x i64>* %x

+	tail call void @llvm.x86.mmx.emms( )

+	ret void

+}

+

+; pork uses MMX.

+

+define void @pork(x86_mmx* %x) {

+; CHECK: pork

+; CHECK: punpckhdq

+entry:

+	%tmp2 = load x86_mmx* %x		; <x86_mmx> [#uses=1]

+        %tmp9 = tail call x86_mmx @llvm.x86.mmx.punpckhdq (x86_mmx %tmp2, x86_mmx %tmp2)

+	store x86_mmx %tmp9, x86_mmx* %x

+	tail call void @llvm.x86.mmx.emms( )

+	ret void

+}

+

+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx)

+declare void @llvm.x86.mmx.emms()


diff --git a/src/LLVM/test/CodeGen/X86/mmx-s2v.ll b/src/LLVM/test/CodeGen/X86/mmx-s2v.ll
new file mode 100644
index 0000000..c98023c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-s2v.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx
+; PR2574
+
+define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x) {; <label>:0
+        br i1 true, label %bb.nph, label %._crit_edge
+
+bb.nph:         ; preds = %bb.nph, %0
+        %t2206f2.0 = phi <2 x float> [ %2, %bb.nph ], [ undef, %0 ]             ; <<2 x float>> [#uses=1]
+        insertelement <2 x float> %t2206f2.0, float 0.000000e+00, i32 0         ; <<2 x float>>:1 [#uses=1]
+        insertelement <2 x float> %1, float 0.000000e+00, i32 1         ; <<2 x float>>:2 [#uses=1]
+        br label %bb.nph
+
+._crit_edge:            ; preds = %0
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/mmx-shift.ll b/src/LLVM/test/CodeGen/X86/mmx-shift.ll
new file mode 100644
index 0000000..bafc754
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-shift.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq | grep 32
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep psllq | grep 32
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psrad
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep psrlw
+
+define i64 @t1(<1 x i64> %mm1) nounwind  {
+entry:
+        %tmp = bitcast <1 x i64> %mm1 to x86_mmx
+	%tmp6 = tail call x86_mmx @llvm.x86.mmx.pslli.q( x86_mmx %tmp, i32 32 )		; <x86_mmx> [#uses=1]
+        %retval1112 = bitcast x86_mmx %tmp6 to i64
+	ret i64 %retval1112
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone 
+
+define i64 @t2(x86_mmx %mm1, x86_mmx %mm2) nounwind  {
+entry:
+	%tmp7 = tail call x86_mmx @llvm.x86.mmx.psra.d( x86_mmx %mm1, x86_mmx %mm2 ) nounwind readnone 		; <x86_mmx> [#uses=1]
+        %retval1112 = bitcast x86_mmx %tmp7 to i64
+	ret i64 %retval1112
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone 
+
+define i64 @t3(x86_mmx %mm1, i32 %bits) nounwind  {
+entry:
+	%tmp8 = tail call x86_mmx @llvm.x86.mmx.psrli.w( x86_mmx %mm1, i32 %bits ) nounwind readnone 		; <x86_mmx> [#uses=1]
+        %retval1314 = bitcast x86_mmx %tmp8 to i64
+	ret i64 %retval1314
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone 

diff --git a/src/LLVM/test/CodeGen/X86/mmx-shuffle.ll b/src/LLVM/test/CodeGen/X86/mmx-shuffle.ll
new file mode 100644
index 0000000..30fb5c4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-shuffle.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -mcpu=yonah

+; PR1427

+

+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "i686-pc-linux-gnu"

+	%struct.DrawHelper = type { void (i32, %struct.QT_FT_Span*, i8*)*, void (i32, %struct.QT_FT_Span*, i8*)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i32, i32)* }

+	%struct.QBasicAtomic = type { i32 }

+	%struct.QClipData = type { i32, %"struct.QClipData::ClipLine"*, i32, i32, %struct.QT_FT_Span*, i32, i32, i32, i32 }

+	%"struct.QClipData::ClipLine" = type { i32, %struct.QT_FT_Span* }

+	%struct.QRasterBuffer = type { %struct.QRect, %struct.QRegion, %struct.QClipData*, %struct.QClipData*, i8, i32, i32, %struct.DrawHelper*, i32, i32, i32, i8* }

+	%struct.QRect = type { i32, i32, i32, i32 }

+	%struct.QRegion = type { %"struct.QRegion::QRegionData"* }

+	%"struct.QRegion::QRegionData" = type { %struct.QBasicAtomic, %struct._XRegion*, i8*, %struct.QRegionPrivate* }

+	%struct.QRegionPrivate = type opaque

+	%struct.QT_FT_Span = type { i16, i16, i16, i8 }

+	%struct._XRegion = type opaque

+

+define void @_Z19qt_bitmapblit16_sseP13QRasterBufferiijPKhiii(%struct.QRasterBuffer* %rasterBuffer, i32 %x, i32 %y, i32 %color, i8* %src, i32 %width, i32 %height, i32 %stride) {

+entry:

+	%tmp528 = bitcast <8 x i8> zeroinitializer to <2 x i32>		; <<2 x i32>> [#uses=1]

+	%tmp529 = and <2 x i32> %tmp528, bitcast (<4 x i16> < i16 -32640, i16 16448, i16 8224, i16 4112 > to <2 x i32>)		; <<2 x i32>> [#uses=1]

+	%tmp542 = bitcast <2 x i32> %tmp529 to <4 x i16>		; <<4 x i16>> [#uses=1]

+	%tmp543 = add <4 x i16> %tmp542, < i16 0, i16 16448, i16 24672, i16 28784 >		; <<4 x i16>> [#uses=1]

+	%tmp555 = bitcast <4 x i16> %tmp543 to <8 x i8>		; <<8 x i8>> [#uses=1]

+        %tmp556 = bitcast <8 x i8> %tmp555 to x86_mmx

+        %tmp557 = bitcast <8 x i8> zeroinitializer to x86_mmx

+	tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp557, x86_mmx %tmp556, i8* null )

+	ret void

+}

+

+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)


diff --git a/src/LLVM/test/CodeGen/X86/mmx-vzmovl-2.ll b/src/LLVM/test/CodeGen/X86/mmx-vzmovl-2.ll
new file mode 100644
index 0000000..a7ce7d9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-vzmovl-2.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpckldq
+
+	%struct.vS1024 = type { [8 x <4 x i32>] }
+	%struct.vS512 = type { [4 x <4 x i32>] }
+
+declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
+
+define void @t() nounwind {
+entry:
+	br label %bb554
+
+bb554:		; preds = %bb554, %entry
+	%sum.0.reg2mem.0 = phi <1 x i64> [ %tmp562, %bb554 ], [ zeroinitializer, %entry ]		; <<1 x i64>> [#uses=1]
+	%0 = load x86_mmx* null, align 8		; <<1 x i64>> [#uses=2]
+	%1 = bitcast x86_mmx %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%tmp555 = and <2 x i32> %1, < i32 -1, i32 0 >		; <<2 x i32>> [#uses=1]
+	%2 = bitcast <2 x i32> %tmp555 to x86_mmx		; <<1 x i64>> [#uses=1]
+	%3 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %0, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
+        store <1 x i64> %sum.0.reg2mem.0, <1 x i64>* null
+        %tmp3 = bitcast x86_mmx %2 to <1 x i64>
+	%tmp558 = add <1 x i64> %sum.0.reg2mem.0, %tmp3		; <<1 x i64>> [#uses=1]
+        %tmp5 = bitcast <1 x i64> %tmp558 to x86_mmx
+	%4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %tmp5, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
+        %tmp6 = bitcast x86_mmx %4 to <1 x i64>
+        %tmp7 = bitcast x86_mmx %3 to <1 x i64>
+	%tmp562 = add <1 x i64> %tmp6, %tmp7		; <<1 x i64>> [#uses=1]
+	br label %bb554
+}

diff --git a/src/LLVM/test/CodeGen/X86/mmx-vzmovl.ll b/src/LLVM/test/CodeGen/X86/mmx-vzmovl.ll
new file mode 100644
index 0000000..191e261
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mmx-vzmovl.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep movq | count 2
+; There are no MMX operations here; this is promoted to XMM.
+
+define void @foo(<1 x i64>* %a, <1 x i64>* %b) nounwind {
+entry:
+	%0 = load <1 x i64>* %a, align 8		; <<1 x i64>> [#uses=1]
+	%1 = bitcast <1 x i64> %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%2 = and <2 x i32> %1, < i32 -1, i32 0 >		; <<2 x i32>> [#uses=1]
+	%3 = bitcast <2 x i32> %2 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %3, <1 x i64>* %b, align 8
+	br label %bb2
+
+bb2:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/movbe.ll b/src/LLVM/test/CodeGen/X86/movbe.ll
new file mode 100644
index 0000000..3d3d8cf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/movbe.ll

@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=x86_64-linux -mcpu=atom < %s | FileCheck %s
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
+define void @test1(i32* nocapture %x, i32 %y) nounwind {
+  %bswap = call i32 @llvm.bswap.i32(i32 %y)
+  store i32 %bswap, i32* %x, align 4
+  ret void
+; CHECK: test1:
+; CHECK: movbel	%esi, (%rdi)
+}
+
+define i32 @test2(i32* %x) nounwind {
+  %load = load i32* %x, align 4
+  %bswap = call i32 @llvm.bswap.i32(i32 %load)
+  ret i32 %bswap
+; CHECK: test2:
+; CHECK: movbel	(%rdi), %eax
+}
+
+define void @test3(i64* %x, i64 %y) nounwind {
+  %bswap = call i64 @llvm.bswap.i64(i64 %y)
+  store i64 %bswap, i64* %x, align 8
+  ret void
+; CHECK: test3:
+; CHECK: movbeq	%rsi, (%rdi)
+}
+
+define i64 @test4(i64* %x) nounwind {
+  %load = load i64* %x, align 8
+  %bswap = call i64 @llvm.bswap.i64(i64 %load)
+  ret i64 %bswap
+; CHECK: test4:
+; CHECK: movbeq	(%rdi), %rax
+}

diff --git a/src/LLVM/test/CodeGen/X86/movfs.ll b/src/LLVM/test/CodeGen/X86/movfs.ll
new file mode 100644
index 0000000..823e986
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/movfs.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep fs
+
+define i32 @foo() nounwind readonly {
+entry:
+	%tmp = load i32* addrspace(257)* getelementptr (i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31)		; <i32*> [#uses=1]
+	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
+	ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/movgs.ll b/src/LLVM/test/CodeGen/X86/movgs.ll
new file mode 100644
index 0000000..aeb540f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/movgs.ll

@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=sse41 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=sse41 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=sse41 | FileCheck %s --check-prefix=X64
+
+define i32 @test1() nounwind readonly {
+entry:
+	%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]
+	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+; X32: test1:
+; X32: 	movl	%gs:196, %eax
+; X32: 	movl	(%eax), %eax
+; X32: 	ret
+
+; X64: test1:
+; X64: 	movq	%gs:320, %rax
+; X64: 	movl	(%rax), %eax
+; X64: 	ret
+
+define i64 @test2(void (i8*)* addrspace(256)* %tmp8) nounwind {
+entry:
+  %tmp9 = load void (i8*)* addrspace(256)* %tmp8, align 8
+  tail call void %tmp9(i8* undef) nounwind optsize
+  ret i64 0
+}
+
+; rdar://8453210
+; X32: test2:
+; X32: movl	{{.*}}(%esp), %eax
+; X32: calll	*%gs:(%eax)
+
+; X64: test2:
+; X64: callq	*%gs:([[A0:%rdi|%rcx]])
+
+
+
+
+define <2 x i64> @pmovsxwd_1(i64 addrspace(256)* %p) nounwind readonly {
+entry:
+  %0 = load i64 addrspace(256)* %p
+  %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0
+  %1 = bitcast <2 x i64> %tmp2 to <8 x i16>
+  %2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone
+  %3 = bitcast <4 x i32> %2 to <2 x i64>
+  ret <2 x i64> %3
+  
+; X32: pmovsxwd_1:
+; X32: 	movl	4(%esp), %eax
+; X32: 	pmovsxwd	%gs:(%eax), %xmm0
+; X32: 	ret
+
+; X64: pmovsxwd_1:
+; X64:	pmovsxwd	%gs:([[A0]]), %xmm0
+; X64:	ret
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/movmsk.ll b/src/LLVM/test/CodeGen/X86/movmsk.ll
new file mode 100644
index 0000000..2368548
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/movmsk.ll

@@ -0,0 +1,110 @@
+; RUN: llc -mcpu=core2 < %s | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.6"
+
+%0 = type { double }
+%union.anon = type { float }
+
+define i32 @double_signbit(double %d1) nounwind uwtable readnone ssp {
+entry:
+  %__x.addr.i = alloca double, align 8
+  %__u.i = alloca %0, align 8
+  %0 = bitcast double* %__x.addr.i to i8*
+  %1 = bitcast %0* %__u.i to i8*
+  store double %d1, double* %__x.addr.i, align 8
+  %__f.i = getelementptr inbounds %0* %__u.i, i64 0, i32 0
+  store double %d1, double* %__f.i, align 8
+  %tmp = bitcast double %d1 to i64
+; CHECK-NOT: shr
+; CHECK: movmskpd
+; CHECK-NEXT: and
+  %tmp1 = lshr i64 %tmp, 63
+  %shr.i = trunc i64 %tmp1 to i32
+  ret i32 %shr.i
+}
+
+define i32 @double_add_signbit(double %d1, double %d2) nounwind uwtable readnone ssp {
+entry:
+  %__x.addr.i = alloca double, align 8
+  %__u.i = alloca %0, align 8
+  %add = fadd double %d1, %d2
+  %0 = bitcast double* %__x.addr.i to i8*
+  %1 = bitcast %0* %__u.i to i8*
+  store double %add, double* %__x.addr.i, align 8
+  %__f.i = getelementptr inbounds %0* %__u.i, i64 0, i32 0
+  store double %add, double* %__f.i, align 8
+  %tmp = bitcast double %add to i64
+; CHECK-NOT: shr
+; CHECK: movmskpd
+; CHECK-NEXT: and
+  %tmp1 = lshr i64 %tmp, 63
+  %shr.i = trunc i64 %tmp1 to i32
+  ret i32 %shr.i
+}
+
+define i32 @float_signbit(float %f1) nounwind uwtable readnone ssp {
+entry:
+  %__x.addr.i = alloca float, align 4
+  %__u.i = alloca %union.anon, align 4
+  %0 = bitcast float* %__x.addr.i to i8*
+  %1 = bitcast %union.anon* %__u.i to i8*
+  store float %f1, float* %__x.addr.i, align 4
+  %__f.i = getelementptr inbounds %union.anon* %__u.i, i64 0, i32 0
+  store float %f1, float* %__f.i, align 4
+  %2 = bitcast float %f1 to i32
+; CHECK-NOT: shr
+; CHECK: movmskps
+; CHECK-NEXT: and
+  %shr.i = lshr i32 %2, 31
+  ret i32 %shr.i
+}
+
+define i32 @float_add_signbit(float %f1, float %f2) nounwind uwtable readnone ssp {
+entry:
+  %__x.addr.i = alloca float, align 4
+  %__u.i = alloca %union.anon, align 4
+  %add = fadd float %f1, %f2
+  %0 = bitcast float* %__x.addr.i to i8*
+  %1 = bitcast %union.anon* %__u.i to i8*
+  store float %add, float* %__x.addr.i, align 4
+  %__f.i = getelementptr inbounds %union.anon* %__u.i, i64 0, i32 0
+  store float %add, float* %__f.i, align 4
+  %2 = bitcast float %add to i32
+; CHECK-NOT: shr
+; CHECK: movmskps
+; CHECK-NEXT: and
+  %shr.i = lshr i32 %2, 31
+  ret i32 %shr.i
+}
+
+; rdar://10247336
+; movmskp{s|d} only set low 4/2 bits, high bits are known zero
+
+define i32 @t1(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp {
+entry:
+; CHECK: t1:
+; CHECK: movmskps
+; CHECK-NOT: movslq
+  %0 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %x) nounwind
+  %idxprom = sext i32 %0 to i64
+  %arrayidx = getelementptr inbounds i32* %indexTable, i64 %idxprom
+  %1 = load i32* %arrayidx, align 4
+  ret i32 %1
+}
+
+define i32 @t2(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp {
+entry:
+; CHECK: t2:
+; CHECK: movmskpd
+; CHECK-NOT: movslq
+  %0 = bitcast <4 x float> %x to <2 x double>
+  %1 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %0) nounwind
+  %idxprom = sext i32 %1 to i64
+  %arrayidx = getelementptr inbounds i32* %indexTable, i64 %idxprom
+  %2 = load i32* %arrayidx, align 4
+  ret i32 %2
+}
+
+declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
+declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/movntdq-no-avx.ll b/src/LLVM/test/CodeGen/X86/movntdq-no-avx.ll
new file mode 100644
index 0000000..8b7e6ef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/movntdq-no-avx.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+; Test that we produce a movntdq, not a vmovntdq
+; CHECK-NOT: vmovntdq
+
+define void @test(<2 x i64>* nocapture %a, <2 x i64> %b) nounwind optsize {
+entry:
+  store <2 x i64> %b, <2 x i64>* %a, align 16, !nontemporal !0
+  ret void
+}
+
+!0 = metadata !{i32 1}

diff --git a/src/LLVM/test/CodeGen/X86/mul-legalize.ll b/src/LLVM/test/CodeGen/X86/mul-legalize.ll
new file mode 100644
index 0000000..069737d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mul-legalize.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | grep 24576
+; PR2135
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@.str = constant [13 x i8] c"c45531m.adb\00\00"		
+
+define void @main() nounwind {
+entry:
+	%tmp1 = call i1 @report__equal( i32 3, i32 3 )		
+	%b.0 = select i1 %tmp1, i64 35184372088832, i64 0		
+	%tmp7 = mul i64 3, %b.0		
+	%tmp32 = icmp eq i64 %tmp7, 105553116266496		
+	br i1 %tmp32, label %return, label %bb35
+bb35:		
+	call void @abort( )
+	unreachable
+return:		
+	ret void
+}
+
+declare i1 @report__equal(i32 %x, i32 %y) nounwind
+
+declare void @abort()

diff --git a/src/LLVM/test/CodeGen/X86/mul-remat.ll b/src/LLVM/test/CodeGen/X86/mul-remat.ll
new file mode 100644
index 0000000..3fa0050
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mul-remat.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep mov | count 1
+; PR1874
+	
+define i32 @test(i32 %a, i32 %b) {
+entry:
+	%tmp3 = mul i32 %b, %a
+	ret i32 %tmp3
+}

diff --git a/src/LLVM/test/CodeGen/X86/mul-shift-reassoc.ll b/src/LLVM/test/CodeGen/X86/mul-shift-reassoc.ll
new file mode 100644
index 0000000..c7aced3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mul-shift-reassoc.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | grep lea

+; RUN: llc < %s -march=x86 | not grep add

+

+define i32 @test(i32 %X, i32 %Y) {

+	; Push the shl through the mul to allow an LEA to be formed, instead

+        ; of using a shift and add separately.

+        %tmp.2 = shl i32 %X, 1          ; <i32> [#uses=1]

+        %tmp.3 = mul i32 %tmp.2, %Y             ; <i32> [#uses=1]

+        %tmp.5 = add i32 %tmp.3, %Y             ; <i32> [#uses=1]

+        ret i32 %tmp.5

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/mul128.ll b/src/LLVM/test/CodeGen/X86/mul128.ll
new file mode 100644
index 0000000..6825b99
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mul128.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 | grep mul | count 3
+
+define i128 @foo(i128 %t, i128 %u) {
+  %k = mul i128 %t, %u
+  ret i128 %k
+}

diff --git a/src/LLVM/test/CodeGen/X86/mul64.ll b/src/LLVM/test/CodeGen/X86/mul64.ll
new file mode 100644
index 0000000..5a25c5d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mul64.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 | grep mul | count 3
+
+define i64 @foo(i64 %t, i64 %u) {
+  %k = mul i64 %t, %u
+  ret i64 %k
+}

diff --git a/src/LLVM/test/CodeGen/X86/muloti.ll b/src/LLVM/test/CodeGen/X86/muloti.ll
new file mode 100644
index 0000000..2f0986e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/muloti.ll

@@ -0,0 +1,81 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+%0 = type { i64, i64 }
+%1 = type { i128, i1 }
+
+define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
+; CHECK: x
+entry:
+  %tmp16 = zext i64 %a.coerce0 to i128
+  %tmp11 = zext i64 %a.coerce1 to i128
+  %tmp12 = shl nuw i128 %tmp11, 64
+  %ins14 = or i128 %tmp12, %tmp16
+  %tmp6 = zext i64 %b.coerce0 to i128
+  %tmp3 = zext i64 %b.coerce1 to i128
+  %tmp4 = shl nuw i128 %tmp3, 64
+  %ins = or i128 %tmp4, %tmp6
+  %0 = tail call %1 @llvm.smul.with.overflow.i128(i128 %ins14, i128 %ins)
+; CHECK: callq   ___muloti4
+  %1 = extractvalue %1 %0, 0
+  %2 = extractvalue %1 %0, 1
+  br i1 %2, label %overflow, label %nooverflow
+
+overflow:                                         ; preds = %entry
+  tail call void @llvm.trap()
+  unreachable
+
+nooverflow:                                       ; preds = %entry
+  %tmp20 = trunc i128 %1 to i64
+  %tmp21 = insertvalue %0 undef, i64 %tmp20, 0
+  %tmp22 = lshr i128 %1, 64
+  %tmp23 = trunc i128 %tmp22 to i64
+  %tmp24 = insertvalue %0 %tmp21, i64 %tmp23, 1
+  ret %0 %tmp24
+}
+
+define %0 @foo(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
+entry:
+; CHECK: foo
+  %retval = alloca i128, align 16
+  %coerce = alloca i128, align 16
+  %a.addr = alloca i128, align 16
+  %coerce1 = alloca i128, align 16
+  %b.addr = alloca i128, align 16
+  %0 = bitcast i128* %coerce to %0*
+  %1 = getelementptr %0* %0, i32 0, i32 0
+  store i64 %a.coerce0, i64* %1
+  %2 = getelementptr %0* %0, i32 0, i32 1
+  store i64 %a.coerce1, i64* %2
+  %a = load i128* %coerce, align 16
+  store i128 %a, i128* %a.addr, align 16
+  %3 = bitcast i128* %coerce1 to %0*
+  %4 = getelementptr %0* %3, i32 0, i32 0
+  store i64 %b.coerce0, i64* %4
+  %5 = getelementptr %0* %3, i32 0, i32 1
+  store i64 %b.coerce1, i64* %5
+  %b = load i128* %coerce1, align 16
+  store i128 %b, i128* %b.addr, align 16
+  %tmp = load i128* %a.addr, align 16
+  %tmp2 = load i128* %b.addr, align 16
+  %6 = call %1 @llvm.umul.with.overflow.i128(i128 %tmp, i128 %tmp2)
+; CHECK: cmov
+; CHECK: divti3
+  %7 = extractvalue %1 %6, 0
+  %8 = extractvalue %1 %6, 1
+  br i1 %8, label %overflow, label %nooverflow
+
+overflow:                                         ; preds = %entry
+  call void @llvm.trap()
+  unreachable
+
+nooverflow:                                       ; preds = %entry
+  store i128 %7, i128* %retval
+  %9 = bitcast i128* %retval to %0*
+  %10 = load %0* %9, align 1
+  ret %0 %10
+}
+
+declare %1 @llvm.umul.with.overflow.i128(i128, i128) nounwind readnone
+
+declare %1 @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone
+
+declare void @llvm.trap() nounwind

diff --git a/src/LLVM/test/CodeGen/X86/mult-alt-generic-i686.ll b/src/LLVM/test/CodeGen/X86/mult-alt-generic-i686.ll
new file mode 100644
index 0000000..7c3499f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mult-alt-generic-i686.ll

@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=x86
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/mult-alt-generic-x86_64.ll b/src/LLVM/test/CodeGen/X86/mult-alt-generic-x86_64.ll
new file mode 100644
index 0000000..f35bb5e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mult-alt-generic-x86_64.ll

@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=x86-64
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/mult-alt-x86.ll b/src/LLVM/test/CodeGen/X86/mult-alt-x86.ll
new file mode 100644
index 0000000..06175da
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/mult-alt-x86.ll

@@ -0,0 +1,358 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; ModuleID = 'mult-alt-x86.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@dout0 = common global double 0.000000e+000, align 8
+@din1 = common global double 0.000000e+000, align 8
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_R() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=R,R,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=q,q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_Q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=Q,Q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_a() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={ax},{ax},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_b() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={bx},{bx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_c() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={cx},{cx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_d() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={dx},{dx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_S() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={si},{si},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_D() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={di},{di},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_A() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=A,A,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_f() nounwind {
+entry:
+  ret void
+}
+
+define void @single_t() nounwind {
+entry:
+  ret void
+}
+
+define void @single_u() nounwind {
+entry:
+  ret void
+}
+
+define void @single_y() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  %0 = call double asm "foo $1,$0", "=y,y,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
+  store double %0, double* @dout0, align 8
+  ret void
+}
+
+define void @single_x() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  %0 = call double asm "foo $1,$0", "=x,x,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
+  store double %0, double* @dout0, align 8
+  ret void
+}
+
+define void @single_Y0() nounwind {
+entry:
+  ret void
+}
+
+define void @single_I() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,I,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_J() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,J,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_K() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,K,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_L() nounwind {
+entry:
+; Missing lowering support for 'L'.
+;  call void asm "foo $1,$0", "=*m,L,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_M() nounwind {
+entry:
+; Missing lowering support for 'M'.
+;  call void asm "foo $1,$0", "=*m,M,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_N() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,N,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_G() nounwind {
+entry:
+; Missing lowering support for 'G'.
+;  call void asm "foo $1,$0", "=*m,G,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @single_C() nounwind {
+entry:
+; Missing lowering support for 'C'.
+;  call void asm "foo $1,$0", "=*m,C,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @single_e() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,e,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_Z() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,Z,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_R() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|R|m,r|R|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|q|m,r|q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_Q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|Q|m,r|Q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_a() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{ax}|m,r|{ax}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_b() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{bx}|m,r|{bx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_c() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{cx}|m,r|{cx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_d() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{dx}|m,r|{dx},~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_S() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{si}|m,r|{si}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_D() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{di}|m,r|{di}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_A() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|A|m,r|A|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_f() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_t() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_u() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_y() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  call void asm "foo $1,$0", "=*r|y|m,r|y|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
+  ret void
+}
+
+define void @multi_x() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  call void asm "foo $1,$0", "=*r|x|m,r|x|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
+  ret void
+}
+
+define void @multi_Y0() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_I() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|I|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_J() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|J|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_K() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|K|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_L() nounwind {
+entry:
+; Missing lowering support for 'L'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|L|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_M() nounwind {
+entry:
+; Missing lowering support for 'M'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|M|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_N() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|N|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_G() nounwind {
+entry:
+; Missing lowering support for 'G'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|G|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @multi_C() nounwind {
+entry:
+; Missing lowering support for 'C'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|C|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @multi_e() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|e|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_Z() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|Z|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/multiple-loop-post-inc.ll b/src/LLVM/test/CodeGen/X86/multiple-loop-post-inc.ll
new file mode 100644
index 0000000..51a0611
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/multiple-loop-post-inc.ll

@@ -0,0 +1,304 @@
+; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s
+; rdar://7236213
+
+; CodeGen shouldn't require any lea instructions inside the marked loop.
+; It should properly set up post-increment uses and do coalescing for
+; the induction variables.
+
+; CHECK: # Start
+; CHECK-NOT: lea
+; CHECK: # Stop
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @foo(float* %I, i64 %IS, float* nocapture %Start, float* nocapture %Step, float* %O, i64 %OS, i64 %N) nounwind {
+entry:
+  %times4 = alloca float, align 4                 ; <float*> [#uses=3]
+  %timesN = alloca float, align 4                 ; <float*> [#uses=2]
+  %0 = load float* %Step, align 4                 ; <float> [#uses=8]
+  %1 = ptrtoint float* %I to i64                  ; <i64> [#uses=1]
+  %2 = ptrtoint float* %O to i64                  ; <i64> [#uses=1]
+  %tmp = xor i64 %2, %1                           ; <i64> [#uses=1]
+  %tmp16 = and i64 %tmp, 15                       ; <i64> [#uses=1]
+  %3 = icmp eq i64 %tmp16, 0                      ; <i1> [#uses=1]
+  %4 = trunc i64 %IS to i32                       ; <i32> [#uses=1]
+  %5 = xor i32 %4, 1                              ; <i32> [#uses=1]
+  %6 = trunc i64 %OS to i32                       ; <i32> [#uses=1]
+  %7 = xor i32 %6, 1                              ; <i32> [#uses=1]
+  %8 = or i32 %7, %5                              ; <i32> [#uses=1]
+  %9 = icmp eq i32 %8, 0                          ; <i1> [#uses=1]
+  br i1 %9, label %bb, label %return
+
+bb:                                               ; preds = %entry
+  %10 = load float* %Start, align 4               ; <float> [#uses=1]
+  br label %bb2
+
+bb1:                                              ; preds = %bb3
+  %11 = load float* %I_addr.0, align 4            ; <float> [#uses=1]
+  %12 = fmul float %11, %x.0                      ; <float> [#uses=1]
+  store float %12, float* %O_addr.0, align 4
+  %13 = fadd float %x.0, %0                       ; <float> [#uses=1]
+  %indvar.next53 = add i64 %14, 1                 ; <i64> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %14 = phi i64 [ %indvar.next53, %bb1 ], [ 0, %bb ] ; <i64> [#uses=21]
+  %x.0 = phi float [ %13, %bb1 ], [ %10, %bb ]    ; <float> [#uses=6]
+  %N_addr.0 = sub i64 %N, %14                     ; <i64> [#uses=4]
+  %O_addr.0 = getelementptr float* %O, i64 %14    ; <float*> [#uses=4]
+  %I_addr.0 = getelementptr float* %I, i64 %14    ; <float*> [#uses=3]
+  %15 = icmp slt i64 %N_addr.0, 1                 ; <i1> [#uses=1]
+  br i1 %15, label %bb4, label %bb3
+
+bb3:                                              ; preds = %bb2
+  %16 = ptrtoint float* %O_addr.0 to i64          ; <i64> [#uses=1]
+  %17 = and i64 %16, 15                           ; <i64> [#uses=1]
+  %18 = icmp eq i64 %17, 0                        ; <i1> [#uses=1]
+  br i1 %18, label %bb4, label %bb1
+
+bb4:                                              ; preds = %bb3, %bb2
+  %19 = fmul float %0, 4.000000e+00               ; <float> [#uses=1]
+  store float %19, float* %times4, align 4
+  %20 = fmul float %0, 1.600000e+01               ; <float> [#uses=1]
+  store float %20, float* %timesN, align 4
+  %21 = fmul float %0, 0.000000e+00               ; <float> [#uses=1]
+  %22 = fadd float %21, %x.0                      ; <float> [#uses=1]
+  %23 = fadd float %x.0, %0                       ; <float> [#uses=1]
+  %24 = fmul float %0, 2.000000e+00               ; <float> [#uses=1]
+  %25 = fadd float %24, %x.0                      ; <float> [#uses=1]
+  %26 = fmul float %0, 3.000000e+00               ; <float> [#uses=1]
+  %27 = fadd float %26, %x.0                      ; <float> [#uses=1]
+  %28 = insertelement <4 x float> undef, float %22, i32 0 ; <<4 x float>> [#uses=1]
+  %29 = insertelement <4 x float> %28, float %23, i32 1 ; <<4 x float>> [#uses=1]
+  %30 = insertelement <4 x float> %29, float %25, i32 2 ; <<4 x float>> [#uses=1]
+  %31 = insertelement <4 x float> %30, float %27, i32 3 ; <<4 x float>> [#uses=5]
+  %asmtmp.i = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=3]
+  %32 = fadd <4 x float> %31, %asmtmp.i           ; <<4 x float>> [#uses=3]
+  %33 = fadd <4 x float> %32, %asmtmp.i           ; <<4 x float>> [#uses=3]
+  %34 = fadd <4 x float> %33, %asmtmp.i           ; <<4 x float>> [#uses=2]
+  %asmtmp.i18 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %timesN) nounwind ; <<4 x float>> [#uses=8]
+  %35 = icmp sgt i64 %N_addr.0, 15                ; <i1> [#uses=2]
+  br i1 %3, label %bb6.preheader, label %bb8
+
+bb6.preheader:                                    ; preds = %bb4
+  br i1 %35, label %bb.nph43, label %bb7
+
+bb.nph43:                                         ; preds = %bb6.preheader
+  %tmp108 = add i64 %14, 16                       ; <i64> [#uses=1]
+  %tmp111 = add i64 %14, 4                        ; <i64> [#uses=1]
+  %tmp115 = add i64 %14, 8                        ; <i64> [#uses=1]
+  %tmp119 = add i64 %14, 12                       ; <i64> [#uses=1]
+  %tmp134 = add i64 %N, -16                       ; <i64> [#uses=1]
+  %tmp135 = sub i64 %tmp134, %14                  ; <i64> [#uses=1]
+  call void asm sideeffect "# Start.", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  br label %bb5
+
+bb5:                                              ; preds = %bb.nph43, %bb5
+  %indvar102 = phi i64 [ 0, %bb.nph43 ], [ %indvar.next103, %bb5 ] ; <i64> [#uses=3]
+  %vX3.041 = phi <4 x float> [ %34, %bb.nph43 ], [ %45, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX0.039 = phi <4 x float> [ %31, %bb.nph43 ], [ %41, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX2.037 = phi <4 x float> [ %33, %bb.nph43 ], [ %46, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX1.036 = phi <4 x float> [ %32, %bb.nph43 ], [ %47, %bb5 ] ; <<4 x float>> [#uses=2]
+  %tmp104 = shl i64 %indvar102, 4                 ; <i64> [#uses=5]
+  %tmp105 = add i64 %14, %tmp104                  ; <i64> [#uses=2]
+  %scevgep106 = getelementptr float* %I, i64 %tmp105 ; <float*> [#uses=1]
+  %scevgep106107 = bitcast float* %scevgep106 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp109 = add i64 %tmp108, %tmp104              ; <i64> [#uses=2]
+  %tmp112 = add i64 %tmp111, %tmp104              ; <i64> [#uses=2]
+  %scevgep113 = getelementptr float* %I, i64 %tmp112 ; <float*> [#uses=1]
+  %scevgep113114 = bitcast float* %scevgep113 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp116 = add i64 %tmp115, %tmp104              ; <i64> [#uses=2]
+  %scevgep117 = getelementptr float* %I, i64 %tmp116 ; <float*> [#uses=1]
+  %scevgep117118 = bitcast float* %scevgep117 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp120 = add i64 %tmp119, %tmp104              ; <i64> [#uses=2]
+  %scevgep121 = getelementptr float* %I, i64 %tmp120 ; <float*> [#uses=1]
+  %scevgep121122 = bitcast float* %scevgep121 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep123 = getelementptr float* %O, i64 %tmp105 ; <float*> [#uses=1]
+  %scevgep123124 = bitcast float* %scevgep123 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep126 = getelementptr float* %O, i64 %tmp112 ; <float*> [#uses=1]
+  %scevgep126127 = bitcast float* %scevgep126 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep128 = getelementptr float* %O, i64 %tmp116 ; <float*> [#uses=1]
+  %scevgep128129 = bitcast float* %scevgep128 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep130 = getelementptr float* %O, i64 %tmp120 ; <float*> [#uses=1]
+  %scevgep130131 = bitcast float* %scevgep130 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp132 = mul i64 %indvar102, -16               ; <i64> [#uses=1]
+  %tmp136 = add i64 %tmp135, %tmp132              ; <i64> [#uses=2]
+  %36 = load <4 x float>* %scevgep106107, align 16 ; <<4 x float>> [#uses=1]
+  %37 = load <4 x float>* %scevgep113114, align 16 ; <<4 x float>> [#uses=1]
+  %38 = load <4 x float>* %scevgep117118, align 16 ; <<4 x float>> [#uses=1]
+  %39 = load <4 x float>* %scevgep121122, align 16 ; <<4 x float>> [#uses=1]
+  %40 = fmul <4 x float> %36, %vX0.039            ; <<4 x float>> [#uses=1]
+  %41 = fadd <4 x float> %vX0.039, %asmtmp.i18    ; <<4 x float>> [#uses=2]
+  %42 = fmul <4 x float> %37, %vX1.036            ; <<4 x float>> [#uses=1]
+  %43 = fmul <4 x float> %38, %vX2.037            ; <<4 x float>> [#uses=1]
+  %44 = fmul <4 x float> %39, %vX3.041            ; <<4 x float>> [#uses=1]
+  store <4 x float> %40, <4 x float>* %scevgep123124, align 16
+  store <4 x float> %42, <4 x float>* %scevgep126127, align 16
+  store <4 x float> %43, <4 x float>* %scevgep128129, align 16
+  store <4 x float> %44, <4 x float>* %scevgep130131, align 16
+  %45 = fadd <4 x float> %vX3.041, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %46 = fadd <4 x float> %vX2.037, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %47 = fadd <4 x float> %vX1.036, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %48 = icmp sgt i64 %tmp136, 15                  ; <i1> [#uses=1]
+  %indvar.next103 = add i64 %indvar102, 1         ; <i64> [#uses=1]
+  br i1 %48, label %bb5, label %bb6.bb7_crit_edge
+
+bb6.bb7_crit_edge:                                ; preds = %bb5
+  call void asm sideeffect "# Stop.", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  %scevgep110 = getelementptr float* %I, i64 %tmp109 ; <float*> [#uses=1]
+  %scevgep125 = getelementptr float* %O, i64 %tmp109 ; <float*> [#uses=1]
+  br label %bb7
+
+bb7:                                              ; preds = %bb6.bb7_crit_edge, %bb6.preheader
+  %I_addr.1.lcssa = phi float* [ %scevgep110, %bb6.bb7_crit_edge ], [ %I_addr.0, %bb6.preheader ] ; <float*> [#uses=1]
+  %O_addr.1.lcssa = phi float* [ %scevgep125, %bb6.bb7_crit_edge ], [ %O_addr.0, %bb6.preheader ] ; <float*> [#uses=1]
+  %vX0.0.lcssa = phi <4 x float> [ %41, %bb6.bb7_crit_edge ], [ %31, %bb6.preheader ] ; <<4 x float>> [#uses=1]
+  %N_addr.1.lcssa = phi i64 [ %tmp136, %bb6.bb7_crit_edge ], [ %N_addr.0, %bb6.preheader ] ; <i64> [#uses=1]
+  %asmtmp.i17 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=0]
+  br label %bb11
+
+bb8:                                              ; preds = %bb4
+  br i1 %35, label %bb.nph, label %bb11
+
+bb.nph:                                           ; preds = %bb8
+  %I_addr.0.sum = add i64 %14, -1                 ; <i64> [#uses=1]
+  %49 = getelementptr inbounds float* %I, i64 %I_addr.0.sum ; <float*> [#uses=1]
+  %50 = bitcast float* %49 to <4 x float>*        ; <<4 x float>*> [#uses=1]
+  %51 = load <4 x float>* %50, align 16           ; <<4 x float>> [#uses=1]
+  %tmp54 = add i64 %14, 16                        ; <i64> [#uses=1]
+  %tmp56 = add i64 %14, 3                         ; <i64> [#uses=1]
+  %tmp60 = add i64 %14, 7                         ; <i64> [#uses=1]
+  %tmp64 = add i64 %14, 11                        ; <i64> [#uses=1]
+  %tmp68 = add i64 %14, 15                        ; <i64> [#uses=1]
+  %tmp76 = add i64 %14, 4                         ; <i64> [#uses=1]
+  %tmp80 = add i64 %14, 8                         ; <i64> [#uses=1]
+  %tmp84 = add i64 %14, 12                        ; <i64> [#uses=1]
+  %tmp90 = add i64 %N, -16                        ; <i64> [#uses=1]
+  %tmp91 = sub i64 %tmp90, %14                    ; <i64> [#uses=1]
+  br label %bb9
+
+bb9:                                              ; preds = %bb.nph, %bb9
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb9 ] ; <i64> [#uses=3]
+  %vX3.125 = phi <4 x float> [ %34, %bb.nph ], [ %69, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX0.223 = phi <4 x float> [ %31, %bb.nph ], [ %65, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX2.121 = phi <4 x float> [ %33, %bb.nph ], [ %70, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX1.120 = phi <4 x float> [ %32, %bb.nph ], [ %71, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vI0.019 = phi <4 x float> [ %51, %bb.nph ], [ %55, %bb9 ] ; <<4 x float>> [#uses=1]
+  %tmp51 = shl i64 %indvar, 4                     ; <i64> [#uses=9]
+  %tmp55 = add i64 %tmp54, %tmp51                 ; <i64> [#uses=2]
+  %tmp57 = add i64 %tmp56, %tmp51                 ; <i64> [#uses=1]
+  %scevgep58 = getelementptr float* %I, i64 %tmp57 ; <float*> [#uses=1]
+  %scevgep5859 = bitcast float* %scevgep58 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp61 = add i64 %tmp60, %tmp51                 ; <i64> [#uses=1]
+  %scevgep62 = getelementptr float* %I, i64 %tmp61 ; <float*> [#uses=1]
+  %scevgep6263 = bitcast float* %scevgep62 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp65 = add i64 %tmp64, %tmp51                 ; <i64> [#uses=1]
+  %scevgep66 = getelementptr float* %I, i64 %tmp65 ; <float*> [#uses=1]
+  %scevgep6667 = bitcast float* %scevgep66 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp69 = add i64 %tmp68, %tmp51                 ; <i64> [#uses=1]
+  %scevgep70 = getelementptr float* %I, i64 %tmp69 ; <float*> [#uses=1]
+  %scevgep7071 = bitcast float* %scevgep70 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp72 = add i64 %14, %tmp51                    ; <i64> [#uses=1]
+  %scevgep73 = getelementptr float* %O, i64 %tmp72 ; <float*> [#uses=1]
+  %scevgep7374 = bitcast float* %scevgep73 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp77 = add i64 %tmp76, %tmp51                 ; <i64> [#uses=1]
+  %scevgep78 = getelementptr float* %O, i64 %tmp77 ; <float*> [#uses=1]
+  %scevgep7879 = bitcast float* %scevgep78 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp81 = add i64 %tmp80, %tmp51                 ; <i64> [#uses=1]
+  %scevgep82 = getelementptr float* %O, i64 %tmp81 ; <float*> [#uses=1]
+  %scevgep8283 = bitcast float* %scevgep82 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp85 = add i64 %tmp84, %tmp51                 ; <i64> [#uses=1]
+  %scevgep86 = getelementptr float* %O, i64 %tmp85 ; <float*> [#uses=1]
+  %scevgep8687 = bitcast float* %scevgep86 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp88 = mul i64 %indvar, -16                   ; <i64> [#uses=1]
+  %tmp92 = add i64 %tmp91, %tmp88                 ; <i64> [#uses=2]
+  %52 = load <4 x float>* %scevgep5859, align 16  ; <<4 x float>> [#uses=2]
+  %53 = load <4 x float>* %scevgep6263, align 16  ; <<4 x float>> [#uses=2]
+  %54 = load <4 x float>* %scevgep6667, align 16  ; <<4 x float>> [#uses=2]
+  %55 = load <4 x float>* %scevgep7071, align 16  ; <<4 x float>> [#uses=2]
+  %56 = shufflevector <4 x float> %vI0.019, <4 x float> %52, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %57 = shufflevector <4 x float> %56, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %58 = shufflevector <4 x float> %52, <4 x float> %53, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %59 = shufflevector <4 x float> %58, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %60 = shufflevector <4 x float> %53, <4 x float> %54, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %61 = shufflevector <4 x float> %60, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %62 = shufflevector <4 x float> %54, <4 x float> %55, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %63 = shufflevector <4 x float> %62, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %64 = fmul <4 x float> %57, %vX0.223            ; <<4 x float>> [#uses=1]
+  %65 = fadd <4 x float> %vX0.223, %asmtmp.i18    ; <<4 x float>> [#uses=2]
+  %66 = fmul <4 x float> %59, %vX1.120            ; <<4 x float>> [#uses=1]
+  %67 = fmul <4 x float> %61, %vX2.121            ; <<4 x float>> [#uses=1]
+  %68 = fmul <4 x float> %63, %vX3.125            ; <<4 x float>> [#uses=1]
+  store <4 x float> %64, <4 x float>* %scevgep7374, align 16
+  store <4 x float> %66, <4 x float>* %scevgep7879, align 16
+  store <4 x float> %67, <4 x float>* %scevgep8283, align 16
+  store <4 x float> %68, <4 x float>* %scevgep8687, align 16
+  %69 = fadd <4 x float> %vX3.125, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %70 = fadd <4 x float> %vX2.121, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %71 = fadd <4 x float> %vX1.120, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %72 = icmp sgt i64 %tmp92, 15                   ; <i1> [#uses=1]
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=1]
+  br i1 %72, label %bb9, label %bb10.bb11.loopexit_crit_edge
+
+bb10.bb11.loopexit_crit_edge:                     ; preds = %bb9
+  %scevgep = getelementptr float* %I, i64 %tmp55  ; <float*> [#uses=1]
+  %scevgep75 = getelementptr float* %O, i64 %tmp55 ; <float*> [#uses=1]
+  br label %bb11
+
+bb11:                                             ; preds = %bb8, %bb10.bb11.loopexit_crit_edge, %bb7
+  %N_addr.2 = phi i64 [ %N_addr.1.lcssa, %bb7 ], [ %tmp92, %bb10.bb11.loopexit_crit_edge ], [ %N_addr.0, %bb8 ] ; <i64> [#uses=2]
+  %vX0.1 = phi <4 x float> [ %vX0.0.lcssa, %bb7 ], [ %65, %bb10.bb11.loopexit_crit_edge ], [ %31, %bb8 ] ; <<4 x float>> [#uses=1]
+  %O_addr.2 = phi float* [ %O_addr.1.lcssa, %bb7 ], [ %scevgep75, %bb10.bb11.loopexit_crit_edge ], [ %O_addr.0, %bb8 ] ; <float*> [#uses=1]
+  %I_addr.2 = phi float* [ %I_addr.1.lcssa, %bb7 ], [ %scevgep, %bb10.bb11.loopexit_crit_edge ], [ %I_addr.0, %bb8 ] ; <float*> [#uses=1]
+  %73 = extractelement <4 x float> %vX0.1, i32 0  ; <float> [#uses=2]
+  %74 = icmp sgt i64 %N_addr.2, 0                 ; <i1> [#uses=1]
+  br i1 %74, label %bb12, label %bb14
+
+bb12:                                             ; preds = %bb11, %bb12
+  %indvar94 = phi i64 [ %indvar.next95, %bb12 ], [ 0, %bb11 ] ; <i64> [#uses=3]
+  %x.130 = phi float [ %77, %bb12 ], [ %73, %bb11 ] ; <float> [#uses=2]
+  %I_addr.433 = getelementptr float* %I_addr.2, i64 %indvar94 ; <float*> [#uses=1]
+  %O_addr.432 = getelementptr float* %O_addr.2, i64 %indvar94 ; <float*> [#uses=1]
+  %75 = load float* %I_addr.433, align 4          ; <float> [#uses=1]
+  %76 = fmul float %75, %x.130                    ; <float> [#uses=1]
+  store float %76, float* %O_addr.432, align 4
+  %77 = fadd float %x.130, %0                     ; <float> [#uses=2]
+  %indvar.next95 = add i64 %indvar94, 1           ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next95, %N_addr.2 ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb14, label %bb12
+
+bb14:                                             ; preds = %bb12, %bb11
+  %x.1.lcssa = phi float [ %73, %bb11 ], [ %77, %bb12 ] ; <float> [#uses=1]
+  store float %x.1.lcssa, float* %Start, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+; Codegen shouldn't crash on this testcase.
+
+define void @bar(i32 %a, i32 %b) nounwind {
+entry:                           ; preds = %bb1, %entry, %for.end204
+  br label %outer
+
+outer:                                     ; preds = %bb1, %entry
+  %i6 = phi i32 [ %storemerge171, %bb1 ], [ %a, %entry ] ; <i32> [#uses=2]
+  %storemerge171 = add i32 %i6, 1      ; <i32> [#uses=1]
+  br label %inner
+
+inner:                                       ; preds = %bb0, %if.end275
+  %i8 = phi i32 [ %a, %outer ], [ %indvar.next159, %bb0 ] ; <i32> [#uses=2]
+  %t338 = load i32* undef                     ; <i32> [#uses=1]
+  %t191 = mul i32 %i8, %t338        ; <i32> [#uses=1]
+  %t179 = add i32 %i6, %t191        ; <i32> [#uses=1]
+  br label %bb0
+
+bb0:                                     ; preds = %for.body332
+  %indvar.next159 = add i32 %i8, 1     ; <i32> [#uses=1]
+  br i1 undef, label %bb1, label %inner
+
+bb1:                                     ; preds = %bb0, %outer
+  %midx.4 = phi i32 [ %t179, %bb0 ] ; <i32> [#uses=0]
+  br label %outer
+}

diff --git a/src/LLVM/test/CodeGen/X86/multiple-return-values-cross-block.ll b/src/LLVM/test/CodeGen/X86/multiple-return-values-cross-block.ll
new file mode 100644
index 0000000..b0cb061
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/multiple-return-values-cross-block.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86
+
+declare {x86_fp80, x86_fp80} @test()
+
+define void @call2(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call {x86_fp80,x86_fp80} @test()
+  %b = extractvalue {x86_fp80,x86_fp80} %a, 1
+  store x86_fp80 %b, x86_fp80* %P1
+br label %L
+
+L:
+  %c = extractvalue {x86_fp80,x86_fp80} %a, 0
+  store x86_fp80 %c, x86_fp80* %P2
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/nancvt.ll b/src/LLVM/test/CodeGen/X86/nancvt.ll
new file mode 100644
index 0000000..82b7331
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/nancvt.ll

@@ -0,0 +1,183 @@
+; RUN: opt < %s -std-compile-opts | llc > %t
+; RUN: grep 2147027116 %t | count 3
+; RUN: grep 2147228864 %t | count 3
+; RUN: grep 2146502828 %t | count 3
+; RUN: grep 2143034560 %t | count 3
+; Compile time conversions of NaNs.
+; ModuleID = 'nan2.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+	%struct..0anon = type { float }
+	%struct..1anon = type { double }
+@fnan = constant [3 x i32] [ i32 2143831397, i32 2143831396, i32 2143831398 ]		; <[3 x i32]*> [#uses=1]
+@dnan = constant [3 x i64] [ i64 9223235251041752696, i64 9223235251041752697, i64 9223235250773317239 ], align 8		; <[3 x i64]*> [#uses=1]
+@fsnan = constant [3 x i32] [ i32 2139637093, i32 2139637092, i32 2139637094 ]		; <[3 x i32]*> [#uses=1]
+@dsnan = constant [3 x i64] [ i64 9220983451228067448, i64 9220983451228067449, i64 9220983450959631991 ], align 8		; <[3 x i64]*> [#uses=1]
+@.str = internal constant [10 x i8] c"%08x%08x\0A\00"		; <[10 x i8]*> [#uses=2]
+@.str1 = internal constant [6 x i8] c"%08x\0A\00"		; <[6 x i8]*> [#uses=2]
+
+@var = external global i32
+
+define i32 @main() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	%i = alloca i32, align 4		; <i32*> [#uses=20]
+	%uf = alloca %struct..0anon, align 4		; <%struct..0anon*> [#uses=8]
+	%ud = alloca %struct..1anon, align 8		; <%struct..1anon*> [#uses=10]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 0, i32* %i, align 4
+	br label %bb23
+
+bb:		; preds = %bb23
+	%tmp = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp1 = getelementptr [3 x i32]* @fnan, i32 0, i32 %tmp		; <i32*> [#uses=1]
+	%tmp2 = load i32* %tmp1, align 4		; <i32> [#uses=1]
+	%tmp3 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp34 = bitcast float* %tmp3 to i32*		; <i32*> [#uses=1]
+	store i32 %tmp2, i32* %tmp34, align 4
+	%tmp5 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp6 = load float* %tmp5, align 4		; <float> [#uses=1]
+	%tmp67 = fpext float %tmp6 to double		; <double> [#uses=1]
+	%tmp8 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	store double %tmp67, double* %tmp8, align 8
+	%tmp9 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp910 = bitcast double* %tmp9 to i64*		; <i64*> [#uses=1]
+	%tmp11 = load i64* %tmp910, align 8		; <i64> [#uses=1]
+	%tmp1112 = trunc i64 %tmp11 to i32		; <i32> [#uses=1]
+	%tmp13 = and i32 %tmp1112, -1		; <i32> [#uses=1]
+	%tmp14 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp1415 = bitcast double* %tmp14 to i64*		; <i64*> [#uses=1]
+	%tmp16 = load i64* %tmp1415, align 8		; <i64> [#uses=1]
+	%.cast = zext i32 32 to i64		; <i64> [#uses=1]
+	%tmp17 = ashr i64 %tmp16, %.cast		; <i64> [#uses=1]
+	%tmp1718 = trunc i64 %tmp17 to i32		; <i32> [#uses=1]
+	%tmp19 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
+	volatile store i32 %tmp1718, i32* @var
+	volatile store i32 %tmp13, i32* @var
+	%tmp21 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp22 = add i32 %tmp21, 1		; <i32> [#uses=1]
+	store i32 %tmp22, i32* %i, align 4
+	br label %bb23
+
+bb23:		; preds = %bb, %entry
+	%tmp24 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp25 = icmp sle i32 %tmp24, 2		; <i1> [#uses=1]
+	%tmp2526 = zext i1 %tmp25 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp2526, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb27
+
+bb27:		; preds = %bb23
+	store i32 0, i32* %i, align 4
+	br label %bb46
+
+bb28:		; preds = %bb46
+	%tmp29 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp30 = getelementptr [3 x i64]* @dnan, i32 0, i32 %tmp29		; <i64*> [#uses=1]
+	%tmp31 = load i64* %tmp30, align 8		; <i64> [#uses=1]
+	%tmp32 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp3233 = bitcast double* %tmp32 to i64*		; <i64*> [#uses=1]
+	store i64 %tmp31, i64* %tmp3233, align 8
+	%tmp35 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp36 = load double* %tmp35, align 8		; <double> [#uses=1]
+	%tmp3637 = fptrunc double %tmp36 to float		; <float> [#uses=1]
+	%tmp38 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	store float %tmp3637, float* %tmp38, align 4
+	%tmp39 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp3940 = bitcast float* %tmp39 to i32*		; <i32*> [#uses=1]
+	%tmp41 = load i32* %tmp3940, align 4		; <i32> [#uses=1]
+	%tmp42 = getelementptr [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
+	volatile store i32 %tmp41, i32* @var
+	%tmp44 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp45 = add i32 %tmp44, 1		; <i32> [#uses=1]
+	store i32 %tmp45, i32* %i, align 4
+	br label %bb46
+
+bb46:		; preds = %bb28, %bb27
+	%tmp47 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp48 = icmp sle i32 %tmp47, 2		; <i1> [#uses=1]
+	%tmp4849 = zext i1 %tmp48 to i8		; <i8> [#uses=1]
+	%toBool50 = icmp ne i8 %tmp4849, 0		; <i1> [#uses=1]
+	br i1 %toBool50, label %bb28, label %bb51
+
+bb51:		; preds = %bb46
+	store i32 0, i32* %i, align 4
+	br label %bb78
+
+bb52:		; preds = %bb78
+	%tmp53 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp54 = getelementptr [3 x i32]* @fsnan, i32 0, i32 %tmp53		; <i32*> [#uses=1]
+	%tmp55 = load i32* %tmp54, align 4		; <i32> [#uses=1]
+	%tmp56 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp5657 = bitcast float* %tmp56 to i32*		; <i32*> [#uses=1]
+	store i32 %tmp55, i32* %tmp5657, align 4
+	%tmp58 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp59 = load float* %tmp58, align 4		; <float> [#uses=1]
+	%tmp5960 = fpext float %tmp59 to double		; <double> [#uses=1]
+	%tmp61 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	store double %tmp5960, double* %tmp61, align 8
+	%tmp62 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp6263 = bitcast double* %tmp62 to i64*		; <i64*> [#uses=1]
+	%tmp64 = load i64* %tmp6263, align 8		; <i64> [#uses=1]
+	%tmp6465 = trunc i64 %tmp64 to i32		; <i32> [#uses=1]
+	%tmp66 = and i32 %tmp6465, -1		; <i32> [#uses=1]
+	%tmp68 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp6869 = bitcast double* %tmp68 to i64*		; <i64*> [#uses=1]
+	%tmp70 = load i64* %tmp6869, align 8		; <i64> [#uses=1]
+	%.cast71 = zext i32 32 to i64		; <i64> [#uses=1]
+	%tmp72 = ashr i64 %tmp70, %.cast71		; <i64> [#uses=1]
+	%tmp7273 = trunc i64 %tmp72 to i32		; <i32> [#uses=1]
+	%tmp74 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
+	volatile store i32 %tmp7273, i32* @var
+	volatile store i32 %tmp66, i32* @var
+	%tmp76 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp77 = add i32 %tmp76, 1		; <i32> [#uses=1]
+	store i32 %tmp77, i32* %i, align 4
+	br label %bb78
+
+bb78:		; preds = %bb52, %bb51
+	%tmp79 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp80 = icmp sle i32 %tmp79, 2		; <i1> [#uses=1]
+	%tmp8081 = zext i1 %tmp80 to i8		; <i8> [#uses=1]
+	%toBool82 = icmp ne i8 %tmp8081, 0		; <i1> [#uses=1]
+	br i1 %toBool82, label %bb52, label %bb83
+
+bb83:		; preds = %bb78
+	store i32 0, i32* %i, align 4
+	br label %bb101
+
+bb84:		; preds = %bb101
+	%tmp85 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp86 = getelementptr [3 x i64]* @dsnan, i32 0, i32 %tmp85		; <i64*> [#uses=1]
+	%tmp87 = load i64* %tmp86, align 8		; <i64> [#uses=1]
+	%tmp88 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp8889 = bitcast double* %tmp88 to i64*		; <i64*> [#uses=1]
+	store i64 %tmp87, i64* %tmp8889, align 8
+	%tmp90 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp91 = load double* %tmp90, align 8		; <double> [#uses=1]
+	%tmp9192 = fptrunc double %tmp91 to float		; <float> [#uses=1]
+	%tmp93 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	store float %tmp9192, float* %tmp93, align 4
+	%tmp94 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp9495 = bitcast float* %tmp94 to i32*		; <i32*> [#uses=1]
+	%tmp96 = load i32* %tmp9495, align 4		; <i32> [#uses=1]
+	%tmp97 = getelementptr [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
+	volatile store i32 %tmp96, i32* @var
+	%tmp99 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp100 = add i32 %tmp99, 1		; <i32> [#uses=1]
+	store i32 %tmp100, i32* %i, align 4
+	br label %bb101
+
+bb101:		; preds = %bb84, %bb83
+	%tmp102 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp103 = icmp sle i32 %tmp102, 2		; <i1> [#uses=1]
+	%tmp103104 = zext i1 %tmp103 to i8		; <i8> [#uses=1]
+	%toBool105 = icmp ne i8 %tmp103104, 0		; <i1> [#uses=1]
+	br i1 %toBool105, label %bb84, label %bb106
+
+bb106:		; preds = %bb101
+	br label %return
+
+return:		; preds = %bb106
+	%retval107 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval107
+}

diff --git a/src/LLVM/test/CodeGen/X86/narrow-shl-cst.ll b/src/LLVM/test/CodeGen/X86/narrow-shl-cst.ll
new file mode 100644
index 0000000..a404f34
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/narrow-shl-cst.ll

@@ -0,0 +1,101 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; PR5039
+
+define i32 @test1(i32 %x) nounwind {
+  %and = shl i32 %x, 10
+  %shl = and i32 %and, 31744
+  ret i32 %shl
+; CHECK: test1:
+; CHECK: andl $31
+; CHECK: shll $10
+}
+
+define i32 @test2(i32 %x) nounwind {
+  %or = shl i32 %x, 10
+  %shl = or i32 %or, 31744
+  ret i32 %shl
+; CHECK: test2:
+; CHECK: orl $31
+; CHECK: shll $10
+}
+
+define i32 @test3(i32 %x) nounwind {
+  %xor = shl i32 %x, 10
+  %shl = xor i32 %xor, 31744
+  ret i32 %shl
+; CHECK: test3:
+; CHECK: xorl $31
+; CHECK: shll $10
+}
+
+define i64 @test4(i64 %x) nounwind {
+  %and = shl i64 %x, 40
+  %shl = and i64 %and, 264982302294016
+  ret i64 %shl
+; CHECK: test4:
+; CHECK: andq $241
+; CHECK: shlq $40
+}
+
+define i64 @test5(i64 %x) nounwind {
+  %and = shl i64 %x, 40
+  %shl = and i64 %and, 34084860461056
+  ret i64 %shl
+; CHECK: test5:
+; CHECK: andq $31
+; CHECK: shlq $40
+}
+
+define i64 @test6(i64 %x) nounwind {
+  %and = shl i64 %x, 32
+  %shl = and i64 %and, -281474976710656
+  ret i64 %shl
+; CHECK: test6:
+; CHECK: andq $-65536
+; CHECK: shlq $32
+}
+
+define i64 @test7(i64 %x) nounwind {
+  %or = shl i64 %x, 40
+  %shl = or i64 %or, 264982302294016
+  ret i64 %shl
+; CHECK: test7:
+; CHECK: orq $241
+; CHECK: shlq $40
+}
+
+define i64 @test8(i64 %x) nounwind {
+  %or = shl i64 %x, 40
+  %shl = or i64 %or, 34084860461056
+  ret i64 %shl
+; CHECK: test8:
+; CHECK: orq $31
+; CHECK: shlq $40
+}
+
+define i64 @test9(i64 %x) nounwind {
+  %xor = shl i64 %x, 40
+  %shl = xor i64 %xor, 264982302294016
+  ret i64 %shl
+; CHECK: test9:
+; CHECK: orq $241
+; CHECK: shlq $40
+}
+
+define i64 @test10(i64 %x) nounwind {
+  %xor = shl i64 %x, 40
+  %shl = xor i64 %xor, 34084860461056
+  ret i64 %shl
+; CHECK: test10:
+; CHECK: xorq $31
+; CHECK: shlq $40
+}
+
+define i64 @test11(i64 %x) nounwind {
+  %xor = shl i64 %x, 33
+  %shl = xor i64 %xor, -562949953421312
+  ret i64 %shl
+; CHECK: test11:
+; CHECK: xorq $-65536
+; CHECK: shlq $33
+}

diff --git a/src/LLVM/test/CodeGen/X86/narrow-shl-load.ll b/src/LLVM/test/CodeGen/X86/narrow-shl-load.ll
new file mode 100644
index 0000000..ef27cbc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/narrow-shl-load.ll

@@ -0,0 +1,83 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; DAGCombiner should fold this code in finite time.
+; rdar://8606584
+
+define void @test1() nounwind readnone {
+bb.nph:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %bb.nph
+  %tmp6 = load i32* undef, align 4
+  %and = or i64 undef, undef
+  %conv11 = zext i32 undef to i64
+  %conv14 = zext i32 %tmp6 to i64
+  %shl15 = shl i64 %conv14, 1
+  %shl15.masked = and i64 %shl15, 4294967294
+  %and17 = or i64 %shl15.masked, %conv11
+  %add = add i64 %and17, 1
+  %xor = xor i64 %add, %and
+  %tmp20 = load i64* undef, align 8
+  %add21 = add i64 %xor, %tmp20
+  %conv22 = trunc i64 %add21 to i32
+  store i32 %conv22, i32* undef, align 4
+  br i1 false, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret void
+}
+
+
+; DAGCombiner shouldn't fold the sdiv (ashr) away.
+; rdar://8636812
+; CHECK: test2:
+; CHECK:   sarl
+
+define i32 @test2() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %j = alloca i8, align 1
+  store i32 127, i32* %i, align 4
+  store i8 0, i8* %j, align 1
+  %tmp3 = load i32* %i, align 4
+  %mul = mul nsw i32 %tmp3, 2
+  %conv4 = trunc i32 %mul to i8
+  %conv5 = sext i8 %conv4 to i32
+  %div6 = sdiv i32 %conv5, 2
+  %conv7 = trunc i32 %div6 to i8
+  %conv9 = sext i8 %conv7 to i32
+  %cmp = icmp eq i32 %conv9, -1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret i32 0
+
+if.end:                                           ; preds = %entry
+  call void @abort() noreturn
+  unreachable
+}
+
+declare void @abort() noreturn
+
+declare void @exit(i32) noreturn
+
+; DAG Combiner can't fold this into a load of the 1'th byte.
+; PR8757
+define i32 @test3(i32 *%P) nounwind ssp {
+  volatile store i32 128, i32* %P
+  %tmp4.pre = load i32* %P
+  %phitmp = trunc i32 %tmp4.pre to i16
+  %phitmp13 = shl i16 %phitmp, 8
+  %phitmp14 = ashr i16 %phitmp13, 8
+  %phitmp15 = lshr i16 %phitmp14, 8
+  %phitmp16 = zext i16 %phitmp15 to i32
+  ret i32 %phitmp16
+  
+; CHECK: movl	$128, (%rdi)
+; CHECK-NEXT: movsbl	(%rdi), %eax
+; CHECK-NEXT: movzbl	%ah, %eax
+; CHECK-NEXT: ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/narrow_op-1.ll b/src/LLVM/test/CodeGen/X86/narrow_op-1.ll
new file mode 100644
index 0000000..18f1108
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/narrow_op-1.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86-64 | grep orb | count 1
+; RUN: llc < %s -march=x86-64 | grep orb | grep 1
+; RUN: llc < %s -march=x86-64 | grep orl | count 1
+; RUN: llc < %s -march=x86-64 | grep orl | grep 16842752
+
+	%struct.bf = type { i64, i16, i16, i32 }
+@bfi = common global %struct.bf zeroinitializer, align 16
+
+define void @t1() nounwind optsize ssp {
+entry:
+	%0 = load i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+	%1 = or i32 %0, 65536
+	store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+	ret void
+}
+
+define void @t2() nounwind optsize ssp {
+entry:
+	%0 = load i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+	%1 = or i32 %0, 16842752
+	store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/neg-shl-add.ll b/src/LLVM/test/CodeGen/X86/neg-shl-add.ll
new file mode 100644
index 0000000..7aebc38
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/neg-shl-add.ll

@@ -0,0 +1,17 @@
+; RUN: llc -march=x86-64 < %s | not grep negq
+
+; These sequences don't need neg instructions; they can be done with
+; a single shift and sub each.
+
+define i64 @foo(i64 %x, i64 %y, i64 %n) nounwind {
+  %a = sub i64 0, %y
+  %b = shl i64 %a, %n
+  %c = add i64 %b, %x
+  ret i64 %c
+}
+define i64 @boo(i64 %x, i64 %y, i64 %n) nounwind {
+  %a = sub i64 0, %y
+  %b = shl i64 %a, %n
+  %c = add i64 %x, %b
+  ret i64 %c
+}

diff --git a/src/LLVM/test/CodeGen/X86/neg_fp.ll b/src/LLVM/test/CodeGen/X86/neg_fp.ll
new file mode 100644
index 0000000..57164f2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/neg_fp.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
+; RUN: grep xorps %t | count 1
+
+; Test that when we don't -enable-unsafe-fp-math, we don't do the optimization
+; -0 - (A - B) to (B - A) because A==B, -0 != 0
+
+define float @negfp(float %a, float %b) {
+entry:
+	%sub = fsub float %a, %b		; <float> [#uses=1]
+	%neg = fsub float -0.000000e+00, %sub		; <float> [#uses=1]
+	ret float %neg
+}

diff --git a/src/LLVM/test/CodeGen/X86/negate-add-zero.ll b/src/LLVM/test/CodeGen/X86/negate-add-zero.ll
new file mode 100644
index 0000000..c3f412e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/negate-add-zero.ll

@@ -0,0 +1,1145 @@
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86 | not grep xor
+; PR3374
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%struct.AtomList = type { %"struct.CDSListRep<IVMAtom*>"* }
+	%struct.AtomTree = type { %struct.IVM*, %"struct.CDSList<CDSList<HingeNode*> >" }
+	%"struct.CDS::DefaultAlloc" = type <{ i8 }>
+	%"struct.CDS::SingularError" = type { %"struct.CDS::exception" }
+	%"struct.CDS::auto_ptr<IVMAtom>" = type { %struct.IVMAtom* }
+	%"struct.CDS::exception" = type { [300 x i8] }
+	%"struct.CDSList<CDSList<HingeNode*> >" = type { %"struct.CDSListRep<CDSList<HingeNode*> >"* }
+	%"struct.CDSList<CDSList<int> >" = type { %"struct.CDSListRep<CDSList<int> >"* }
+	%"struct.CDSList<HingeNode*>" = type { %"struct.CDSListRep<HingeNode*>"* }
+	%"struct.CDSList<InternalDynamics::HingeSpec>" = type { %"struct.CDSListRep<InternalDynamics::HingeSpec>"* }
+	%"struct.CDSList<Loop>" = type { %"struct.CDSListRep<Loop>"* }
+	%"struct.CDSList<Pair<int, int> >" = type { %"struct.CDSListRep<Pair<int, int> >"* }
+	%"struct.CDSList<int>" = type { %"struct.CDSListRep<int>"* }
+	%"struct.CDSListRep<CDSList<HingeNode*> >" = type opaque
+	%"struct.CDSListRep<CDSList<int> >" = type opaque
+	%"struct.CDSListRep<HingeNode*>" = type { i32, i32, %struct.HingeNode**, i32 }
+	%"struct.CDSListRep<IVMAtom*>" = type { i32, i32, %struct.IVMAtom**, i32 }
+	%"struct.CDSListRep<InternalDynamics::HingeSpec>" = type opaque
+	%"struct.CDSListRep<Loop>" = type opaque
+	%"struct.CDSListRep<Pair<int, int> >" = type opaque
+	%"struct.CDSListRep<int>" = type { i32, i32, i32*, i32 }
+	%"struct.CDSMatrixBase<double>" = type { %"struct.CDSMatrixRep<double>"* }
+	%"struct.CDSMatrixRep<double>" = type opaque
+	%"struct.CDSStringRep<char>" = type { i8*, i32, i32, i32, i32 }
+	%"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>" = type { %"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>" }
+	%"struct.CDSVector<double,0,CDS::DefaultAlloc>" = type { %"struct.CDSVectorBase<double,CDS::DefaultAlloc>" }
+	%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>" = type { %"struct.CDSVectorRep<Vec3,CDS::DefaultAlloc>"* }
+	%"struct.CDSVectorBase<double,CDS::DefaultAlloc>" = type { %"struct.CDSVectorRep<double,CDS::DefaultAlloc>"* }
+	%"struct.CDSVectorRep<Vec3,CDS::DefaultAlloc>" = type { i32, %"struct.CDS::DefaultAlloc", %struct.Vec3*, i32 }
+	%"struct.CDSVectorRep<double,CDS::DefaultAlloc>" = type { i32, %"struct.CDS::DefaultAlloc", double*, i32 }
+	%"struct.FixedMatrix<double,1,1,0,0>" = type { %"struct.FixedMatrixBase<double,1,1>" }
+	%"struct.FixedMatrix<double,1,3,0,0>" = type { %"struct.FixedMatrixBase<double,1,3>" }
+	%"struct.FixedMatrix<double,1,6,0,0>" = type { %"struct.FixedMatrixBase<double,1,6>" }
+	%"struct.FixedMatrix<double,2,2,0,0>" = type { %"struct.FixedMatrixBase<double,2,2>" }
+	%"struct.FixedMatrix<double,2,6,0,0>" = type { %"struct.FixedMatrixBase<double,2,6>" }
+	%"struct.FixedMatrix<double,3,3,0,0>" = type { %"struct.FixedMatrixBase<double,3,3>" }
+	%"struct.FixedMatrix<double,3,6,0,0>" = type { %"struct.FixedMatrixBase<double,3,6>" }
+	%"struct.FixedMatrix<double,5,5,0,0>" = type { %"struct.FixedMatrixBase<double,5,5>" }
+	%"struct.FixedMatrix<double,5,6,0,0>" = type { %"struct.FixedMatrixBase<double,5,6>" }
+	%"struct.FixedMatrixBase<double,1,1>" = type { [1 x double] }
+	%"struct.FixedMatrixBase<double,1,3>" = type { [3 x double] }
+	%"struct.FixedMatrixBase<double,1,6>" = type { [6 x double] }
+	%"struct.FixedMatrixBase<double,2,2>" = type { [4 x double] }
+	%"struct.FixedMatrixBase<double,2,6>" = type { [12 x double] }
+	%"struct.FixedMatrixBase<double,3,3>" = type { [9 x double] }
+	%"struct.FixedMatrixBase<double,3,6>" = type { [18 x double] }
+	%"struct.FixedMatrixBase<double,5,5>" = type { [25 x double] }
+	%"struct.FixedMatrixBase<double,5,6>" = type { [30 x double] }
+	%"struct.FixedMatrixBase<double,6,6>" = type { [36 x double] }
+	%"struct.FixedVector<double,2,0>" = type { %"struct.FixedVectorBase<double,2>" }
+	%"struct.FixedVector<double,5,0>" = type { %"struct.FixedVectorBase<double,5>" }
+	%"struct.FixedVectorBase<double,2>" = type { [2 x double] }
+	%"struct.FixedVectorBase<double,5>" = type { [5 x double] }
+	%struct.HNodeOrigin = type { %struct.HingeNode }
+	%struct.HNodeRotate2 = type { %"struct.HingeNodeSpec<2>", %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Mat3, %struct.Mat3, %struct.Vec3, %"struct.CDS::auto_ptr<IVMAtom>", %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>" }
+	%struct.HNodeRotate3 = type { %"struct.HingeNodeSpec<3>", %struct.Vec4, %struct.Vec4, %struct.Vec4, %struct.Vec3, %"struct.CDS::auto_ptr<IVMAtom>", %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>", double, double, double, double, double, double, i8 }
+	%struct.HNodeTorsion = type { %"struct.HingeNodeSpec<1>", %struct.Vec3, %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>", %struct.Vec3, %struct.Mat3 }
+	%struct.HNodeTranslate = type { %"struct.HingeNodeSpec<3>", %struct.IVMAtom*, %struct.Vec3, %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>" }
+	%struct.HNodeTranslateRotate2 = type { %"struct.HingeNodeSpec<5>", %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Mat3, %struct.Mat3, %struct.Vec3, %"struct.CDS::auto_ptr<IVMAtom>", %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>" }
+	%struct.HNodeTranslateRotate3 = type { %"struct.HingeNodeSpec<6>", %struct.Vec4, %struct.Vec4, %struct.Vec4, %struct.Vec3, %"struct.CDS::auto_ptr<IVMAtom>", %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>", double, double, double, double, double, double, i8 }
+	%struct.HingeNode = type { i32 (...)**, %struct.HingeNode*, %"struct.CDSList<HingeNode*>", i32, %struct.AtomList, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %struct.PhiMatrix, %struct.Mat6, %struct.Mat6, %"struct.FixedMatrix<double,1,6,0,0>", %struct.Mat6, %"struct.FixedMatrix<double,1,6,0,0>", %struct.Mat3, %struct.Mat6, %struct.IVM*, %struct.IVMAtom* }
+	%"struct.HingeNodeSpec<1>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,6,0,0>" }
+	%"struct.HingeNodeSpec<2>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedMatrix<double,2,2,0,0>", %"struct.FixedMatrix<double,2,6,0,0>" }
+	%"struct.HingeNodeSpec<3>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,3,6,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,3,3,0,0>", %"struct.FixedMatrix<double,3,6,0,0>" }
+	%"struct.HingeNodeSpec<5>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedMatrix<double,5,6,0,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedMatrix<double,5,5,0,0>", %"struct.FixedMatrix<double,5,6,0,0>" }
+	%"struct.HingeNodeSpec<6>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %struct.Mat6, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %struct.Mat6, %struct.Mat6 }
+	%struct.IVM = type { i32 (...)**, %struct.AtomTree*, %struct.Integrator*, %struct.LengthConstraints*, i32, i32, i32, i8, i8, i8, i8, double, double, double, double, double, double, double, double, double, i32, double, double, double, double, double, double, %"struct.CDSList<Loop>", %"struct.CDSList<Pair<int, int> >", %struct.AtomList, %"struct.CDSList<CDSList<int> >", %"struct.CDSList<InternalDynamics::HingeSpec>", %struct.String, %"struct.CDSList<int>", i32 (%"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)*, double (%"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)*, i32 (%"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>"*)*, double (%"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>"*)* }
+	%struct.IVMAtom = type { i32, %struct.HingeNode*, %struct.AtomList, %struct.Vec3, %struct.Vec3, %struct.Vec3, double, double }
+	%struct.InertiaTensor = type { %struct.Mat3 }
+	%struct.Integrator = type { i32 (...)**, %"struct.CDSVector<double,0,CDS::DefaultAlloc>", %"struct.CDSVector<double,0,CDS::DefaultAlloc>", %struct.IVM* }
+	%"struct.InternalDynamics::HingeSpec" = type { %struct.String, i32, i32, %"struct.CDSList<int>" }
+	%struct.LengthConstraints = type { double, i32, i32, %struct.IVM*, %struct.LengthConstraintsPrivates* }
+	%struct.LengthConstraintsPrivates = type opaque
+	%struct.Mat3 = type { %"struct.FixedMatrix<double,3,3,0,0>" }
+	%struct.Mat6 = type { %"struct.FixedMatrixBase<double,6,6>" }
+	%"struct.MatrixTools::InverseResults<FullMatrix<double> >" = type { %"struct.CDSVector<double,0,CDS::DefaultAlloc>", i32 }
+	%struct.PhiMatrix = type { %struct.Vec3 }
+	%struct.PhiMatrixTranspose = type { %struct.PhiMatrix* }
+	%struct.RMat = type { %"struct.CDSMatrixBase<double>" }
+	%struct.String = type { %"struct.CDSStringRep<char>"* }
+	%"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >" = type { %struct.Mat6*, i32, i32, i32, i32 }
+	%"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >" = type { %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, i32, i32 }
+	%"struct.SubVector<FixedVector<double, 6, 0> >" = type { %"struct.FixedMatrix<double,1,6,0,0>"*, i32, i32 }
+	%struct.Vec3 = type { %"struct.FixedMatrix<double,1,3,0,0>" }
+	%struct.Vec4 = type { %"struct.FixedMatrix<double,2,2,0,0>" }
+	%struct.__class_type_info_pseudo = type { %struct.__type_info_pseudo }
+	%struct.__si_class_type_info_pseudo = type { %struct.__type_info_pseudo, %"struct.std::type_info"* }
+	%struct.__type_info_pseudo = type { i8*, i8* }
+	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"* }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::ios_base::_Words" = type { i8*, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+	%"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >" = type { %"struct.std::locale::facet" }
+	%"struct.std::type_info" = type { i32 (...)**, i8* }
+@_ZN9HingeNode7DEG2RADE = external constant double, align 8		; <double*> [#uses=0]
+@"\01LC" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC1" = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@"\01LC2" = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@"\01LC3" = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@"\01LC4" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC5" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC6" = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@"\01LC7" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC8" = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@"\01LC9" = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@"\01LC10" = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@_ZStL8__ioinit = external global %"struct.CDS::DefaultAlloc"		; <%"struct.CDS::DefaultAlloc"*> [#uses=0]
+@__dso_handle = external global i8*		; <i8**> [#uses=0]
+@_ZTIN9HingeNode17VirtualBaseMethodE = external constant %struct.__class_type_info_pseudo		; <%struct.__class_type_info_pseudo*> [#uses=0]
+@_ZTVN10__cxxabiv117__class_type_infoE = external constant [0 x i32 (...)*]		; <[0 x i32 (...)*]*> [#uses=0]
+@_ZTSN9HingeNode17VirtualBaseMethodE = external constant [32 x i8], align 4		; <[32 x i8]*> [#uses=0]
+@_ZTV9HingeNode = external constant [31 x i32 (...)*], align 32		; <[31 x i32 (...)*]*> [#uses=0]
+@_ZTI9HingeNode = external constant %struct.__class_type_info_pseudo		; <%struct.__class_type_info_pseudo*> [#uses=0]
+@_ZTS9HingeNode = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@_ZTV11HNodeOrigin = external constant [31 x i32 (...)*], align 32		; <[31 x i32 (...)*]*> [#uses=0]
+@_ZTI11HNodeOrigin = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTVN10__cxxabiv120__si_class_type_infoE = external constant [0 x i32 (...)*]		; <[0 x i32 (...)*]*> [#uses=0]
+@_ZTS11HNodeOrigin = external constant [14 x i8]		; <[14 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi1EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi1EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi1EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi3EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi3EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi3EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi2EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi2EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi2EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi6EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi6EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi6EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi5EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi5EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi5EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZSt4cout = external global %"struct.std::basic_ostream<char,std::char_traits<char> >"		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+@"\01LC11" = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@"\01LC12" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC13" = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@_ZSt4cerr = external global %"struct.std::basic_ostream<char,std::char_traits<char> >"		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+@"\01LC14" = external constant [29 x i8]		; <[29 x i8]*> [#uses=0]
+@"\01LC15" = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@"\01LC16" = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@"\01LC17" = external constant [21 x i8]		; <[21 x i8]*> [#uses=0]
+@"\01LC18" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC19" = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@"\01LC20" = external constant [42 x i8]		; <[42 x i8]*> [#uses=0]
+@_ZTIN16InternalDynamics9ExceptionE = external constant %struct.__class_type_info_pseudo		; <%struct.__class_type_info_pseudo*> [#uses=0]
+@_ZTSN16InternalDynamics9ExceptionE = external constant [31 x i8], align 4		; <[31 x i8]*> [#uses=0]
+@_ZTIN3CDS13SingularErrorE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTSN3CDS13SingularErrorE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTIN3CDS9exceptionE = external constant %struct.__class_type_info_pseudo		; <%struct.__class_type_info_pseudo*> [#uses=0]
+@_ZTSN3CDS9exceptionE = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@_ZTV12HNodeTorsion = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI12HNodeTorsion = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS12HNodeTorsion = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@_ZTV12HNodeRotate3 = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI12HNodeRotate3 = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS12HNodeRotate3 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@_ZTV12HNodeRotate2 = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI12HNodeRotate2 = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS12HNodeRotate2 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@_ZTV21HNodeTranslateRotate3 = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI21HNodeTranslateRotate3 = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS21HNodeTranslateRotate3 = external constant [24 x i8]		; <[24 x i8]*> [#uses=0]
+@_ZTV21HNodeTranslateRotate2 = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI21HNodeTranslateRotate2 = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS21HNodeTranslateRotate2 = external constant [24 x i8]		; <[24 x i8]*> [#uses=0]
+@_ZTV14HNodeTranslate = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI14HNodeTranslate = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS14HNodeTranslate = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@"\01LC21" = external constant [31 x i8]		; <[31 x i8]*> [#uses=0]
+@"\01LC22" = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@"\01LC23" = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@"\01LC24" = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@"\01LC25" = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@"\01LC26" = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@"\01LC27" = external constant [43 x i8]		; <[43 x i8]*> [#uses=0]
+@"\01LC28" = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@"\01LC29" = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@"\01LC30" = external constant [41 x i8]		; <[41 x i8]*> [#uses=0]
+@llvm.global_ctors = external global [1 x { i32, void ()* }]		; <[1 x { i32, void ()* }]*> [#uses=0]
+
+declare void @_GLOBAL__I__ZN9HingeNode7DEG2RADE() section "__TEXT,__StaticInit,regular,pure_instructions"
+
+declare void @_ZN9HingeNode16velFromCartesianEv(%struct.HingeNode*) nounwind
+
+declare i32 @_ZNK9HingeNode6offsetEv(%struct.HingeNode*) nounwind
+
+declare i32 @_ZNK9HingeNode6getDOFEv(%struct.HingeNode*) nounwind
+
+declare i32 @_ZNK9HingeNode6getDimEv(%struct.HingeNode*) nounwind
+
+declare double @_ZN9HingeNode8kineticEEv(%struct.HingeNode*) nounwind
+
+declare double @_ZN9HingeNode8approxKEEv(%struct.HingeNode*) nounwind
+
+declare i8* @_ZN9HingeNode4typeEv(%struct.HingeNode*) nounwind
+
+declare i8* @_ZN11HNodeOrigin4typeEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin5calcPEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin5calcZEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin9calcPandZEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin9calcAccelEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin17calcInternalForceEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin18prepareVelInternalEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin13propagateSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeOrigin*, %"struct.FixedMatrix<double,1,6,0,0>"*) nounwind
+
+declare void @_ZN11HNodeOrigin9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeOrigin*, %"struct.FixedMatrix<double,1,6,0,0>"*) nounwind
+
+declare void @_ZN11HNodeOrigin18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin5printEi(%struct.HNodeOrigin*, i32) nounwind
+
+declare void @_ZN11HNodeOrigin6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin5calcYEv(%struct.HNodeOrigin*) nounwind
+
+declare i8* @_ZN14HNodeTranslate4typeEv(%struct.HNodeTranslate*) nounwind
+
+declare i8* @_ZN21HNodeTranslateRotate34typeEv(%struct.HNodeTranslateRotate3*) nounwind
+
+declare i32 @_ZNK21HNodeTranslateRotate36getDimEv(%struct.HNodeTranslateRotate3*) nounwind
+
+declare i8* @_ZN12HNodeRotate34typeEv(%struct.HNodeRotate3*) nounwind
+
+declare i32 @_ZNK12HNodeRotate36getDimEv(%struct.HNodeRotate3*) nounwind
+
+declare i8* @_ZN12HNodeRotate24typeEv(%struct.HNodeRotate2*) nounwind
+
+declare i32 @_ZNK12HNodeRotate26getDimEv(%struct.HNodeRotate2*) nounwind
+
+declare i8* @_ZN21HNodeTranslateRotate24typeEv(%struct.HNodeTranslateRotate2*) nounwind
+
+declare i32 @_ZNK21HNodeTranslateRotate26getDimEv(%struct.HNodeTranslateRotate2*) nounwind
+
+declare i8* @_ZN12HNodeTorsion4typeEv(%struct.HNodeTorsion*) nounwind
+
+declare fastcc double @_ZL12sumMassToTipPK9HingeNode(%struct.HingeNode*)
+
+declare void @_ZN13InertiaTensor4calcERK4Vec3RK7CDSListIP7IVMAtomE(%struct.InertiaTensor*, %struct.Vec3*, %struct.AtomList*) nounwind
+
+declare fastcc double @_ZL15sumInertiaToTipPK9HingeNodeRK4Vec3S4_(%struct.HingeNode*, %struct.Vec3*, %struct.Vec3*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsI11FixedVectorIdLi6ELi0EEERSoS2_RK9SubVectorIT_E(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.SubVector<FixedVector<double, 6, 0> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_St5_Setw(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i32)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, double)
+
+declare void @_Z14orthoTransformIdLi3ELi3EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%"struct.FixedMatrix<double,3,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,3,0,0>"*, %"struct.FixedMatrix<double,3,3,0,0>"*)
+
+declare void @_ZN12HNodeRotate27calcRotEv(%struct.HNodeRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate27calcRotEv(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZmlIdLi6ELi6EE11FixedVectorIT_XT0_ELi0EERK11FixedMatrixIS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%struct.Mat6* noalias sret, %struct.Mat6*, %struct.Mat6*)
+
+declare void @_ZmlIdLi6ELi6ELi3EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi6ELi2EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_ZmlIdLi5ELi6EE11FixedVectorIT_XT0_ELi0EERK11FixedMatrixIS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_ELi0EE(%"struct.FixedVector<double,5,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi6ELi5EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZN12HNodeRotate39setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate29setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate39setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate29setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi1EE6offsetEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi1EE5posCMEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi1EE4massEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi1EE9calcPandZEv(%"struct.HingeNodeSpec<1>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi1EE6getDOFEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi1EE6getDimEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi1EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi5EE6offsetEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi5EE5posCMEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi5EE4massEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi5EE9calcPandZEv(%"struct.HingeNodeSpec<5>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi5EE6getDOFEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi5EE6getDimEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi5EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi2EE6offsetEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi2EE5posCMEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi2EE4massEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi2EE9calcPandZEv(%"struct.HingeNodeSpec<2>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi2EE6getDOFEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi2EE6getDimEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi2EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi3EE6offsetEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi3EE5posCMEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi3EE4massEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi3EE9calcPandZEv(%"struct.HingeNodeSpec<3>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi3EE6getDOFEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi6EE6offsetEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi6EE5posCMEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi6EE4massEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi6EE9calcPandZEv(%"struct.HingeNodeSpec<6>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi6EE6getDOFEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi6EE6getDimEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi6EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi3EE6getDimEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi3EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_Z14orthoTransformIdLi6ELi6EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%struct.Mat6* noalias sret, %struct.Mat6*, %struct.Mat6*)
+
+declare double @_ZN13HingeNodeSpecILi1EE8kineticEEv(%"struct.HingeNodeSpec<1>"*)
+
+declare double @_ZN13HingeNodeSpecILi3EE8kineticEEv(%"struct.HingeNodeSpec<3>"*)
+
+declare double @_ZN13HingeNodeSpecILi2EE8kineticEEv(%"struct.HingeNodeSpec<2>"*)
+
+declare double @_ZN13HingeNodeSpecILi6EE8kineticEEv(%"struct.HingeNodeSpec<6>"*)
+
+declare double @_ZN13HingeNodeSpecILi5EE8kineticEEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZmlIdLi6ELi5ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_Z14orthoTransformIdLi3ELi6EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,3,3,0,0>"*, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi1ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi5ELi5EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, %"struct.FixedMatrix<double,5,5,0,0>"*)
+
+declare void @_Z14orthoTransformIdLi5ELi6EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,5,5,0,0>"*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_Z14orthoTransformIdLi2ELi6EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,2,2,0,0>"*, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_ZmlIdLi1ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*, %struct.Mat6*)
+
+declare void @_ZmlIdLi5ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, %struct.Mat6*)
+
+declare void @_Z14orthoTransformIdLi6ELi5EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%"struct.FixedMatrix<double,5,5,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZmlIdLi2ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,6,0,0>"*, %struct.Mat6*)
+
+declare void @_Z14orthoTransformIdLi6ELi2EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%"struct.FixedMatrix<double,2,2,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_ZmlIdLi3ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,6,0,0>"*, %struct.Mat6*)
+
+declare void @_Z14orthoTransformIdLi6ELi3EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%"struct.FixedMatrix<double,3,3,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZNSt8ios_base4InitC1Ev(%"struct.CDS::DefaultAlloc"*)
+
+declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) nounwind
+
+declare void @__tcf_0(i8* nocapture)
+
+declare void @_ZNSt8ios_base4InitD1Ev(%"struct.CDS::DefaultAlloc"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsRSoRK9HingeNode(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %struct.HingeNode*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsRSoPK7IVMAtom(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %struct.IVMAtom*)
+
+declare void @_ZN9HingeNode8addChildEPS_(%struct.HingeNode*, %struct.HingeNode*)
+
+declare void @_ZN7CDSListIP9HingeNodeE6appendES1_(%"struct.CDSList<HingeNode*>"*, %struct.HingeNode*)
+
+declare void @_ZN9HingeNode4getHEv(%struct.RMat* noalias sret, %struct.HingeNode*)
+
+declare i8* @__cxa_allocate_exception(i32) nounwind
+
+declare void @__cxa_throw(i8*, i8*, void (i8*)*) noreturn
+
+declare void @_ZN9HingeNode16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode9calcAccelEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode5printEi(%struct.HingeNode*, i32)
+
+declare void @_ZN9HingeNode18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HingeNode*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN9HingeNode6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode13propagateSVelERK11FixedVectorIdLi6ELi0EE(%struct.HingeNode*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN9HingeNode18prepareVelInternalEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode17calcInternalForceEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode5calcYEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode9calcPandZEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode5calcZEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode5calcPEv(%struct.HingeNode*)
+
+declare double* @_ZNK9HingeNode4massEv(%struct.HingeNode*)
+
+declare %struct.Vec3* @_ZNK9HingeNode5posCMEv(%struct.HingeNode*)
+
+declare i8* @_Znam(i32)
+
+declare void @_ZN7CDSListIP9HingeNodeEC1Eii(%"struct.CDSList<HingeNode*>"*, i32, i32)
+
+declare i8* @_Znwm(i32)
+
+declare i8* @llvm.eh.exception() nounwind
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_Unwind_Resume_or_Rethrow(i8*)
+
+declare void @_ZN7CDSListIP7IVMAtomEC1Eii(%struct.AtomList*, i32, i32)
+
+declare void @_ZN13CDSVectorBaseIdN3CDS12DefaultAllocEE8splitRepEv(%"struct.CDSVectorBase<double,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeTorsion16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTorsion*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate316getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate216getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate28getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate26getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate26getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate38getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate36getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate36getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate316getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate38getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate36getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate36getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate216getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate28getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate26getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate26getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13CDSVectorBaseI4Vec3N3CDS12DefaultAllocEE8splitRepEv(%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"*)
+
+declare void @_ZN7CDSListIP7IVMAtomE8splitRepEv(%struct.AtomList*)
+
+declare void @_ZN7CDSListIP9HingeNodeE8splitRepEv(%"struct.CDSList<HingeNode*>"*)
+
+declare void @_ZdaPv(i8*) nounwind
+
+declare void @_ZSt9terminatev() noreturn nounwind
+
+declare void @_ZN9HingeNodeC2EPK3IVMP7IVMAtomPKS3_PS_(%struct.HingeNode*, %struct.IVM*, %struct.IVMAtom*, %struct.IVMAtom*, %struct.HingeNode*)
+
+declare void @_ZN9HingeNodeD1Ev(%struct.HingeNode*)
+
+declare void @_ZN9HingeNodeD0Ev(%struct.HingeNode*)
+
+declare void @_ZN7CDSListIP7IVMAtomE6appendES1_(%struct.AtomList*, %struct.IVMAtom*)
+
+declare void @_ZN9HingeNodeC1EPK3IVMP7IVMAtomPKS3_PS_(%struct.HingeNode*, %struct.IVM*, %struct.IVMAtom*, %struct.IVMAtom*, %struct.HingeNode*)
+
+declare void @_ZN9HingeNodeD2Ev(%struct.HingeNode*)
+
+declare void @_ZN11HNodeOriginD0Ev(%struct.HNodeOrigin*)
+
+declare void @_ZN11HNodeOriginD1Ev(%struct.HNodeOrigin*)
+
+declare void @_ZN13HingeNodeSpecILi1EED0Ev(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EED1Ev(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE5calcPEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE5calcZEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE5calcYEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE17calcInternalForceEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE18prepareVelInternalEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<1>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi1EE8approxKEEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<1>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE5printEi(%"struct.HingeNodeSpec<1>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi1EE9calcAccelEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<1>"*)
+
+declare void @__cxa_pure_virtual() nounwind
+
+declare void @_ZN13HingeNodeSpecILi3EED0Ev(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EED1Ev(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE5calcPEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE5calcZEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE5calcYEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE17calcInternalForceEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE18prepareVelInternalEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<3>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi3EE8approxKEEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<3>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE5printEi(%"struct.HingeNodeSpec<3>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi3EE9calcAccelEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EED0Ev(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EED1Ev(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE5calcPEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE5calcZEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE5calcYEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE17calcInternalForceEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE18prepareVelInternalEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<2>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi2EE8approxKEEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<2>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE5printEi(%"struct.HingeNodeSpec<2>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi2EE9calcAccelEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EED0Ev(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EED1Ev(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE5calcPEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE5calcZEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE5calcYEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE17calcInternalForceEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE18prepareVelInternalEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<6>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi6EE8approxKEEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<6>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE5printEi(%"struct.HingeNodeSpec<6>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi6EE9calcAccelEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EED0Ev(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EED1Ev(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE5calcPEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE5calcZEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE5calcYEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE17calcInternalForceEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE18prepareVelInternalEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<5>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi5EE8approxKEEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<5>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE5printEi(%"struct.HingeNodeSpec<5>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi5EE9calcAccelEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN12HNodeTorsion7calcRotEv(%struct.HNodeTorsion*)
+
+declare double @sin(double) nounwind readnone
+
+declare double @cos(double) nounwind readnone
+
+declare void @_ZN12HNodeRotate37calcRotEv(%struct.HNodeRotate3*)
+
+declare void @_ZN21HNodeTranslateRotate37calcRotEv(%struct.HNodeTranslateRotate3*)
+
+declare void @_ZN9HingeNodeC2ERKS_(%struct.HingeNode*, %struct.HingeNode*)
+
+declare void @_ZN7CDSListIP9HingeNodeEC1ERKS2_(%"struct.CDSList<HingeNode*>"*, %"struct.CDSList<HingeNode*>"*)
+
+declare void @_ZN7CDSListIP7IVMAtomEC1ERKS2_(%struct.AtomList*, %struct.AtomList*)
+
+declare void @_ZN11HNodeOriginC2EPK9HingeNode(%struct.HNodeOrigin*, %struct.HingeNode*)
+
+declare void @_ZN13HingeNodeSpecILi1EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<1>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi3EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<3>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi2EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<2>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi6EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<6>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi5EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<5>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZplI4Vec3K11FixedVectorIdLi6ELi0EEET_RK9SubVectorIT0_ERKS4_(%struct.Vec3* noalias sret, %"struct.SubVector<FixedVector<double, 6, 0> >"*, %struct.Vec3*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi1ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN12HNodeRotate314setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeRotate3*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN12HNodeRotate214setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeRotate2*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN21HNodeTranslateRotate314setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeTranslateRotate3*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN21HNodeTranslateRotate214setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeTranslateRotate2*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE9calcPropsEv(%"struct.HingeNodeSpec<1>"*)
+
+declare zeroext i8 @_ZNK3IVM12minimizationEv(%struct.IVM*)
+
+declare void @_Z8blockVecIdLi3ELi3EE11FixedVectorIT_XplT0_T1_ELi0EERKS0_IS1_XT0_ELi0EERKS0_IS1_XT1_ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,3,0,0>"*, %"struct.FixedMatrix<double,1,3,0,0>"*)
+
+declare void @_ZN12HNodeTorsion11toCartesianEv(%struct.HNodeTorsion*)
+
+declare void @_ZN13HingeNodeSpecILi1EE18calcCartesianForceEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE18calcCartesianForceEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE18calcCartesianForceEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE18calcCartesianForceEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE18calcCartesianForceEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN12HNodeTorsion5calcHEv(%struct.HNodeTorsion*)
+
+declare void @_Z10blockMat12IdLi1ELi3ELi3EE11FixedMatrixIT_XT0_EXplT1_T2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,3,0,0>"*, %"struct.FixedMatrix<double,1,3,0,0>"*)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi1ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi1ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_St13_Setprecision(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i32)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi6EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,6>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_c(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8 signext)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi3EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,3>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi1EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,1>"*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi1ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*, i32, i32, i32)
+
+declare %"struct.FixedMatrixBase<double,6,6>"* @_ZN15FixedMatrixBaseIdLi6ELi6EEpLERKS0_(%"struct.FixedMatrixBase<double,6,6>"*, %"struct.FixedMatrixBase<double,6,6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE9calcPropsEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%struct.Mat6* noalias sret, %struct.Mat6*)
+
+declare void @_ZN21HNodeTranslateRotate311toCartesianEv(%struct.HNodeTranslateRotate3*)
+
+define linkonce void @_ZN21HNodeTranslateRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3* %this, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv) {
+entry:
+	%0 = add i32 0, -1		; <i32> [#uses=1]
+	%1 = getelementptr double* null, i32 %0		; <double*> [#uses=1]
+	%2 = load double* %1, align 8		; <double> [#uses=1]
+	%3 = load double* null, align 8		; <double> [#uses=2]
+	%4 = load double* null, align 8		; <double> [#uses=2]
+	%5 = load double* null, align 8		; <double> [#uses=3]
+	%6 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
+	%7 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 1		; <double*> [#uses=0]
+	%8 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=0]
+	%9 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=0]
+	%10 = load double* null, align 8		; <double> [#uses=2]
+	%11 = fsub double -0.000000e+00, %10		; <double> [#uses=1]
+	%12 = load double* null, align 8		; <double> [#uses=2]
+	%13 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=1]
+	%14 = load double* %13, align 8		; <double> [#uses=2]
+	%15 = fsub double -0.000000e+00, %14		; <double> [#uses=1]
+	%16 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%17 = load double* %16, align 8		; <double> [#uses=2]
+	%18 = fsub double -0.000000e+00, %17		; <double> [#uses=1]
+	%19 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
+	%20 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=0]
+	%21 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 6		; <double*> [#uses=0]
+	%22 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 9		; <double*> [#uses=0]
+	%23 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 1		; <double*> [#uses=0]
+	%24 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 4		; <double*> [#uses=0]
+	%25 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 7		; <double*> [#uses=0]
+	%26 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 10		; <double*> [#uses=0]
+	%27 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=0]
+	%28 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 5		; <double*> [#uses=0]
+	%29 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 8		; <double*> [#uses=0]
+	%30 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 11		; <double*> [#uses=0]
+	%31 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
+	%32 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%33 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%34 = fmul double %17, %5		; <double> [#uses=1]
+	%35 = fadd double 0.000000e+00, %34		; <double> [#uses=1]
+	%36 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%37 = fmul double %14, %3		; <double> [#uses=1]
+	%38 = fadd double %36, %37		; <double> [#uses=1]
+	%39 = fmul double %12, %4		; <double> [#uses=1]
+	%40 = fadd double %38, %39		; <double> [#uses=1]
+	%41 = fmul double %5, %11		; <double> [#uses=1]
+	%42 = fadd double %40, %41		; <double> [#uses=2]
+	store double %42, double* %32, align 8
+	%43 = fmul double %2, %15		; <double> [#uses=1]
+	%44 = fadd double %43, 0.000000e+00		; <double> [#uses=1]
+	%45 = fmul double %3, %18		; <double> [#uses=1]
+	%46 = fadd double %44, %45		; <double> [#uses=1]
+	%47 = fmul double %10, %4		; <double> [#uses=1]
+	%48 = fadd double %46, %47		; <double> [#uses=1]
+	%49 = fmul double %12, %5		; <double> [#uses=1]
+	%50 = fadd double %48, %49		; <double> [#uses=2]
+	store double %50, double* %33, align 8
+	%51 = fmul double %35, 2.000000e+00		; <double> [#uses=1]
+	%52 = fmul double %42, 2.000000e+00		; <double> [#uses=1]
+	%53 = fmul double %50, 2.000000e+00		; <double> [#uses=1]
+	%54 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	store double %51, double* %54, align 8
+	%55 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	store double %52, double* %55, align 8
+	%56 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	store double %53, double* %56, align 8
+	%57 = add i32 0, 4		; <i32> [#uses=1]
+	%58 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 0		; <%"struct.CDSVector<double,0,CDS::DefaultAlloc>"**> [#uses=1]
+	store %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"** %58, align 8
+	%59 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %57, i32* %59, align 4
+	%60 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 2		; <i32*> [#uses=1]
+	store i32 3, i32* %60, align 8
+	unreachable
+}
+
+declare void @_ZmlRK11FixedMatrixIdLi6ELi6ELi0ELi0EERK18PhiMatrixTranspose(%struct.Mat6* noalias sret, %struct.Mat6*, %struct.PhiMatrixTranspose*)
+
+declare void @_ZmlI4Mat3K11FixedMatrixIdLi6ELi6ELi0ELi0EEET_RK9SubMatrixIT0_ERKS4_(%struct.Mat3* noalias sret, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*, %struct.Mat3*)
+
+declare void @_ZmiI4Mat3K11FixedMatrixIdLi6ELi6ELi0ELi0EEET_RK9SubMatrixIT0_ERKS4_(%struct.Mat3* noalias sret, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*, %struct.Mat3*)
+
+declare %"struct.FixedMatrixBase<double,3,3>"* @_ZN15FixedMatrixBaseIdLi3ELi3EEmIERKS0_(%"struct.FixedMatrixBase<double,3,3>"*, %"struct.FixedMatrixBase<double,3,3>"*)
+
+declare void @_ZplI4Mat311FixedMatrixIdLi6ELi6ELi0ELi0EEET_RKS3_RK9SubMatrixIT0_E(%struct.Mat3* noalias sret, %struct.Mat3*, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*)
+
+declare void @_ZN13CDSVectorBaseIdN3CDS12DefaultAllocEED2Ev(%"struct.CDSVectorBase<double,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<1>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi1ELi1ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%"struct.FixedMatrix<double,1,1,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,1,0,0>"*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare i8* @__cxa_get_exception_ptr(i8*) nounwind
+
+declare i8* @__cxa_begin_catch(i8*) nounwind
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi1ELi1EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,1>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi1ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,6>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEi(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i32)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIcERSoS0_RK9CDSStringIT_E(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %struct.String*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEPFRSoS_E(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.std::basic_ostream<char,std::char_traits<char> >"* (%"struct.std::basic_ostream<char,std::char_traits<char> >"*)*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_(%"struct.std::basic_ostream<char,std::char_traits<char> >"*)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZmlI4Mat311FixedMatrixIdLi6ELi6ELi0ELi0EEET_RKS3_RK9SubMatrixIT0_E(%struct.Mat3* noalias sret, %struct.Mat3*, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*)
+
+declare void @_ZmlI4Mat311FixedMatrixIdLi6ELi6ELi0ELi0EEET_RK9SubMatrixIT0_ERKS3_(%struct.Mat3* noalias sret, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*, %struct.Mat3*)
+
+declare void @_ZmiI4Mat311FixedMatrixIdLi6ELi6ELi0ELi0EEET_RK9SubMatrixIT0_ERKS3_(%struct.Mat3* noalias sret, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*, %struct.Mat3*)
+
+declare %"struct.FixedMatrixBase<double,6,6>"* @_ZN15FixedMatrixBaseIdLi6ELi6EEmIERKS0_(%"struct.FixedMatrixBase<double,6,6>"*, %"struct.FixedMatrixBase<double,6,6>"*)
+
+declare void @_ZN13CDSVectorBaseI4Vec3N3CDS12DefaultAllocEEC2EiS2_(%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"*, i32, %"struct.CDS::DefaultAlloc"* byval align 4)
+
+declare void @_ZN13CDSVectorBaseI4Vec3N3CDS12DefaultAllocEED2Ev(%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeTorsionD0Ev(%struct.HNodeTorsion*)
+
+declare void @_ZN12HNodeTorsionD1Ev(%struct.HNodeTorsion*)
+
+declare void @_ZN12HNodeRotate3D0Ev(%struct.HNodeRotate3*)
+
+declare void @_ZN12HNodeRotate3D1Ev(%struct.HNodeRotate3*)
+
+declare void @_ZN12HNodeRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate318enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate35printEi(%struct.HNodeRotate3*, i32)
+
+declare void @_ZN12HNodeRotate35calcHEv(%struct.HNodeRotate3*)
+
+declare void @_ZN12HNodeRotate311toCartesianEv(%struct.HNodeRotate3*)
+
+declare void @_ZN12HNodeRotate2D0Ev(%struct.HNodeRotate2*)
+
+declare void @_ZN12HNodeRotate2D1Ev(%struct.HNodeRotate2*)
+
+declare void @_ZN12HNodeRotate26setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate218enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate25printEi(%struct.HNodeRotate2*, i32)
+
+declare void @_ZN12HNodeRotate25calcHEv(%struct.HNodeRotate2*)
+
+declare void @_ZN12HNodeRotate211toCartesianEv(%struct.HNodeRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate3D0Ev(%struct.HNodeTranslateRotate3*)
+
+declare void @_ZN21HNodeTranslateRotate3D1Ev(%struct.HNodeTranslateRotate3*)
+
+declare void @_ZN21HNodeTranslateRotate318enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate35printEi(%struct.HNodeTranslateRotate3*, i32)
+
+declare void @_ZN21HNodeTranslateRotate35calcHEv(%struct.HNodeTranslateRotate3*)
+
+declare void @_ZN21HNodeTranslateRotate2D0Ev(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate2D1Ev(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate26setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate218enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate25printEi(%struct.HNodeTranslateRotate2*, i32)
+
+declare void @_ZN21HNodeTranslateRotate25calcHEv(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate211toCartesianEv(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZN14HNodeTranslateC2EPK9HingeNodeP7IVMAtomRi(%struct.HNodeTranslate*, %struct.HingeNode*, %struct.IVMAtom*, i32*)
+
+declare void @_ZN14HNodeTranslateD1Ev(%struct.HNodeTranslate*)
+
+declare void @_ZN14HNodeTranslateD0Ev(%struct.HNodeTranslate*)
+
+declare void @_ZN14HNodeTranslate5calcHEv(%struct.HNodeTranslate*)
+
+declare void @_ZN14HNodeTranslate11toCartesianEv(%struct.HNodeTranslate*)
+
+declare void @_ZN12HNodeRotate3C2EPK9HingeNodeP7IVMAtomRib(%struct.HNodeRotate3*, %struct.HingeNode*, %struct.IVMAtom*, i32*, i8 zeroext)
+
+declare void @_ZN8AtomTree6findCMEPK9HingeNode(%struct.Vec3* noalias sret, %struct.HingeNode*)
+
+declare %struct.IVMAtom** @_ZN7CDSListIP7IVMAtomE7prependERKS1_(%struct.AtomList*, %struct.IVMAtom**)
+
+declare %"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"* @_ZN13CDSVectorBaseI4Vec3N3CDS12DefaultAllocEE6resizeEi(%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"*, i32)
+
+declare void @_ZN12HNodeRotate2C2EPK9HingeNodeRK4Vec3Ri(%struct.HNodeRotate2*, %struct.HingeNode*, %struct.Vec3*, i32*)
+
+declare void @_ZN21HNodeTranslateRotate3C2EPK9HingeNodeP7IVMAtomRib(%struct.HNodeTranslateRotate3*, %struct.HingeNode*, %struct.IVMAtom*, i32*, i8 zeroext)
+
+declare void @_ZN13HingeNodeSpecILi3EE9calcPropsEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi3ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI4Mat3EENT_13TransposeTypeERKS2_(%struct.Mat3* noalias sret, %struct.Mat3*)
+
+declare void @_Z10blockMat12IdLi3ELi3ELi3EE11FixedMatrixIT_XT0_EXplT1_T2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,3,0,0>"*, %"struct.FixedMatrix<double,3,3,0,0>"*)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi3ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi3ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi4EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,2,2>"*)
+
+declare double @_Z4normIdLi4EET_RK11FixedVectorIS0_XT0_ELi0EE(%"struct.FixedMatrix<double,2,2,0,0>"*)
+
+declare %"struct.FixedMatrixBase<double,2,2>"* @_ZN15FixedVectorBaseIdLi4EEdVERKd(%"struct.FixedMatrixBase<double,2,2>"*, double*)
+
+declare %"struct.FixedMatrixBase<double,2,2>"* @_ZN15FixedVectorBaseIdLi4EEmIERKS0_(%"struct.FixedMatrixBase<double,2,2>"*, %"struct.FixedMatrixBase<double,2,2>"*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi3ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,6,0,0>"*, i32, i32, i32)
+
+declare void @_ZN13HingeNodeSpecILi3EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<3>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi3ELi3ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%"struct.FixedMatrix<double,3,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,3,0,0>"*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi3ELi3EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,3,3>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi3ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,3,6>"*)
+
+declare void @_Z7unitVecRK4Vec3(%struct.Vec3* noalias sret, %struct.Vec3*)
+
+declare double @_Z4normIdLi3EET_RK11FixedVectorIS0_XT0_ELi0EE(%"struct.FixedMatrix<double,1,3,0,0>"*)
+
+declare void @_ZN12HNodeTorsionC2EPK9HingeNodeRK4Vec3Ri(%struct.HNodeTorsion*, %struct.HingeNode*, %struct.Vec3*, i32*)
+
+declare double @acos(double) nounwind readnone
+
+declare double @atan2(double, double) nounwind readnone
+
+declare void @_ZN21HNodeTranslateRotate2C2EPK9HingeNodeRi(%struct.HNodeTranslateRotate2*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi2EE9calcPropsEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi2ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_Z10blockMat21IdLi1ELi3ELi1EE11FixedMatrixIT_XplT0_T2_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT2_EXT1_ELi0ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,3,0,0>"*, %"struct.FixedMatrix<double,1,3,0,0>"*)
+
+declare void @_Z10blockMat12IdLi2ELi3ELi3EE11FixedMatrixIT_XT0_EXplT1_T2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi2ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi2ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi2EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedVectorBase<double,2>"*)
+
+declare %"struct.FixedMatrixBase<double,1,3>"* @_ZN15FixedVectorBaseIdLi3EEdVERKd(%"struct.FixedMatrixBase<double,1,3>"*, double*)
+
+declare %"struct.FixedMatrixBase<double,1,3>"* @_ZN15FixedVectorBaseIdLi3EEmIERKS0_(%"struct.FixedMatrixBase<double,1,3>"*, %"struct.FixedMatrixBase<double,1,3>"*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi2ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,6,0,0>"*, i32, i32, i32)
+
+declare void @_ZN13HingeNodeSpecILi2EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<2>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi2ELi2ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%"struct.FixedMatrix<double,2,2,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,2,0,0>"*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi2ELi2EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,2,2>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi2ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,2,6>"*)
+
+declare zeroext i8 @_ZNK9CDSStringIcE7matchesEPKcb(%struct.String*, i8*, i8 zeroext)
+
+declare %struct.HingeNode* @_Z9constructP9HingeNodeRKN16InternalDynamics9HingeSpecERi(%struct.HingeNode*, %"struct.InternalDynamics::HingeSpec"*, i32*)
+
+declare void @_ZN9CDSStringIcEC1ERKS0_(%struct.String*, %struct.String*)
+
+declare void @_ZN9CDSStringIcE8downcaseEv(%struct.String*)
+
+declare %struct.String* @_ZN9CDSStringIcEaSEPKc(%struct.String*, i8*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIP7IVMAtomERSoS2_RK7CDSListIT_E(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %struct.AtomList*)
+
+declare i32 @_ZNK7CDSListIP9HingeNodeE8getIndexERKS1_(%"struct.CDSList<HingeNode*>"*, %struct.HingeNode**)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi6ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %struct.Mat6*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi6ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %struct.Mat6*, i32, i32, i32)
+
+declare void @_ZN13HingeNodeSpecILi6EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<6>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi6ELi6ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%struct.Mat6* noalias sret, %struct.Mat6*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi6ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,6,6>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE9calcPropsEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi5ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi5ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi5ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi5EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedVectorBase<double,5>"*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi5ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, i32, i32, i32)
+
+declare void @_ZN13HingeNodeSpecILi5EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<5>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi5ELi5ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%"struct.FixedMatrix<double,5,5,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,5,0,0>"*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi5ELi5EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,5,5>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi5ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,5,6>"*)
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/negative-sin.ll b/src/LLVM/test/CodeGen/X86/negative-sin.ll
new file mode 100644
index 0000000..76e557b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/negative-sin.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86-64 | FileCheck %s
+; CHECK-NOT:     {{addsd|subsd|xor}}
+
+declare double @sin(double %f)
+
+define double @foo(double %e)
+{
+  %f = fsub double 0.0, %e
+  %g = call double @sin(double %f) readonly
+  %h = fsub double 0.0, %g
+  ret double %h
+}

diff --git a/src/LLVM/test/CodeGen/X86/negative-stride-fptosi-user.ll b/src/LLVM/test/CodeGen/X86/negative-stride-fptosi-user.ll
new file mode 100644
index 0000000..332e0b9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/negative-stride-fptosi-user.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64 | grep cvtsi2sd
+
+; LSR previously eliminated the sitofp by introducing an induction
+; variable which stepped by a bogus ((double)UINT32_C(-1)). It's theoretically
+; possible to eliminate the sitofp using a proper -1.0 step though; this
+; test should be changed if that is done.
+
+define void @foo(i32 %N) nounwind {
+entry:
+  %0 = icmp slt i32 %N, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %return
+
+bb:                                               ; preds = %bb, %entry
+  %i.03 = phi i32 [ 0, %entry ], [ %2, %bb ]      ; <i32> [#uses=2]
+  %1 = sitofp i32 %i.03 to double                  ; <double> [#uses=1]
+  tail call void @bar(double %1) nounwind
+  %2 = add nsw i32 %i.03, -1                       ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %2, %N                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+declare void @bar(double)

diff --git a/src/LLVM/test/CodeGen/X86/negative-subscript.ll b/src/LLVM/test/CodeGen/X86/negative-subscript.ll
new file mode 100644
index 0000000..28f7d6b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/negative-subscript.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86
+; rdar://6559995
+
+@a = external global [255 x i8*], align 32
+
+define i32 @main() nounwind {
+entry:
+	store i8* bitcast (i8** getelementptr ([255 x i8*]* @a, i32 0, i32 -2147483624) to i8*), i8** getelementptr ([255 x i8*]* @a, i32 0, i32 16), align 32
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/negative_zero.ll b/src/LLVM/test/CodeGen/X86/negative_zero.ll
new file mode 100644
index 0000000..50c5a9a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/negative_zero.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | grep fchs

+

+

+define double @T() {

+	ret double -1.0   ;; codegen as fld1/fchs, not as a load from cst pool

+}


diff --git a/src/LLVM/test/CodeGen/X86/no-cfi.ll b/src/LLVM/test/CodeGen/X86/no-cfi.ll
new file mode 100644
index 0000000..f9985d4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/no-cfi.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -disable-cfi | FileCheck --check-prefix=STATIC %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -disable-cfi -relocation-model=pic | FileCheck --check-prefix=PIC %s
+
+; STATIC:      .ascii   "zPLR"
+; STATIC:      .byte   3
+; STATIC-NEXT: .long   __gxx_personality_v0
+; STATIC-NEXT: .byte   3
+; STATIC-NEXT: .byte   3
+
+; PIC:      .ascii   "zPLR"
+; PIC:      .byte   155
+; PIC-NEXT: .L
+; PIC-NEXT: .long   DW.ref.__gxx_personality_v0-.L
+; PIC-NEXT: .byte   27
+; PIC-NEXT: .byte   27
+
+
+define void @bar() {
+entry:
+  %call = invoke i32 @foo()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret void
+
+lpad:
+  %exn = call i8* @llvm.eh.exception() nounwind
+  %eh.selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null) nounwind
+  ret void
+}
+
+declare i32 @foo()
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/nobt.ll b/src/LLVM/test/CodeGen/X86/nobt.ll
new file mode 100644
index 0000000..35090e3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/nobt.ll

@@ -0,0 +1,70 @@
+; RUN: llc < %s -march=x86 | not grep btl
+
+; This tests some cases where BT must not be generated.  See also bt.ll.
+; Fixes 20040709-[12].c in gcc testsuite.
+
+define void @test2(i32 %x, i32 %n) nounwind {
+entry:
+        %tmp1 = and i32 %x, 1
+        %tmp2 = urem i32 %tmp1, 15
+	%tmp3 = and i32 %tmp2, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, %tmp2	; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test3(i32 %x, i32 %n) nounwind {
+entry:
+        %tmp1 = and i32 %x, 1
+        %tmp2 = urem i32 %tmp1, 15
+	%tmp3 = and i32 %tmp2, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp2, %tmp3	; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test4(i32 %x, i32 %n) nounwind {
+entry:
+        %tmp1 = and i32 %x, 1
+        %tmp2 = urem i32 %tmp1, 15
+	%tmp3 = and i32 %tmp2, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp2, %tmp3	; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test5(i32 %x, i32 %n) nounwind {
+entry:
+        %tmp1 = and i32 %x, 1
+        %tmp2 = urem i32 %tmp1, 15
+	%tmp3 = and i32 %tmp2, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp2, %tmp3	; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare void @foo()

diff --git a/src/LLVM/test/CodeGen/X86/non-lazy-bind.ll b/src/LLVM/test/CodeGen/X86/non-lazy-bind.ll
new file mode 100644
index 0000000..f729658
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/non-lazy-bind.ll

@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+declare void @lazy() nonlazybind
+declare void @not()
+
+; CHECK: foo:
+; CHECK:  callq _not
+; CHECK:  callq *_lazy@GOTPCREL(%rip)
+define void @foo() nounwind {
+  call void @not()
+  call void @lazy()
+  ret void
+}
+
+; CHECK: tail_call_regular:
+; CHECK:   jmp _not
+define void @tail_call_regular() nounwind {
+  tail call void @not()
+  ret void
+}
+
+; CHECK: tail_call_eager:
+; CHECK:   jmpq *_lazy@GOTPCREL(%rip)
+define void @tail_call_eager() nounwind {
+  tail call void @lazy()
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/nontemporal.ll b/src/LLVM/test/CodeGen/X86/nontemporal.ll
new file mode 100644
index 0000000..1d09535
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/nontemporal.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
+; CHECK: movntps
+  %cast = bitcast i8* %B to <4 x float>*
+  store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
+; CHECK: movntdq
+  %cast1 = bitcast i8* %B to <2 x i64>*
+  store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
+; CHECK: movntpd
+  %cast2 = bitcast i8* %B to <2 x double>*
+  store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
+; CHECK: movnti
+  %cast3 = bitcast i8* %B to i32*
+  store i32 %D, i32* %cast3, align 16, !nontemporal !0
+  ret void
+}
+
+!0 = metadata !{i32 1}

diff --git a/src/LLVM/test/CodeGen/X86/norex-subreg.ll b/src/LLVM/test/CodeGen/X86/norex-subreg.ll
new file mode 100644
index 0000000..2c529fd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/norex-subreg.ll

@@ -0,0 +1,80 @@
+; RUN: llc -O0 < %s
+; RUN: llc < %s
+target triple = "x86_64-apple-macosx10.7"
+
+; This test case extracts a sub_8bit_hi sub-register:
+;
+;	%R8B<def> = COPY %BH, %EBX<imp-use,kill>
+;	%ESI<def> = MOVZX32_NOREXrr8 %R8B<kill>
+;
+; The register allocation above is invalid, %BH can only be encoded without an
+; REX prefix, so the destination register must be GR8_NOREX.  The code above
+; triggers an assertion in copyPhysReg.
+;
+; <rdar://problem/10248099>
+
+define void @f() nounwind uwtable ssp {
+entry:
+  %0 = load i32* undef, align 4
+  %add = add i32 0, %0
+  %conv1 = trunc i32 %add to i16
+  %bf.value = and i16 %conv1, 255
+  %1 = and i16 %bf.value, 255
+  %2 = shl i16 %1, 8
+  %3 = load i16* undef, align 1
+  %4 = and i16 %3, 255
+  %5 = or i16 %4, %2
+  store i16 %5, i16* undef, align 1
+  %6 = load i16* undef, align 1
+  %7 = lshr i16 %6, 8
+  %bf.clear2 = and i16 %7, 255
+  %conv3 = zext i16 %bf.clear2 to i32
+  %rem = srem i32 %conv3, 15
+  %conv4 = trunc i32 %rem to i16
+  %bf.value5 = and i16 %conv4, 255
+  %8 = and i16 %bf.value5, 255
+  %9 = shl i16 %8, 8
+  %10 = or i16 undef, %9
+  store i16 %10, i16* undef, align 1
+  ret void
+}
+
+; This test case extracts a sub_8bit_hi sub-register:
+;
+;       %vreg2<def> = COPY %vreg1:sub_8bit_hi; GR8:%vreg2 GR64_ABCD:%vreg1
+;       TEST8ri %vreg2, 1, %EFLAGS<imp-def>; GR8:%vreg2
+;
+; %vreg2 must be constrained to GR8_NOREX, or the COPY could become impossible.
+;
+; PR11088
+
+define fastcc i32 @g(i64 %FB) nounwind uwtable readnone align 2 {
+entry:
+  %and32 = and i64 %FB, 256
+  %cmp33 = icmp eq i64 %and32, 0
+  %Features.6.or35 = select i1 %cmp33, i32 0, i32 undef
+  %cmp38 = icmp eq i64 undef, 0
+  %or40 = or i32 %Features.6.or35, 4
+  %Features.8 = select i1 %cmp38, i32 %Features.6.or35, i32 %or40
+  %and42 = and i64 %FB, 32
+  %or45 = or i32 %Features.8, 2
+  %cmp43 = icmp eq i64 %and42, 0
+  %Features.8.or45 = select i1 %cmp43, i32 %Features.8, i32 %or45
+  %and47 = and i64 %FB, 8192
+  %cmp48 = icmp eq i64 %and47, 0
+  %or50 = or i32 %Features.8.or45, 32
+  %Features.10 = select i1 %cmp48, i32 %Features.8.or45, i32 %or50
+  %or55 = or i32 %Features.10, 64
+  %Features.10.or55 = select i1 undef, i32 %Features.10, i32 %or55
+  %and57 = lshr i64 %FB, 2
+  %and57.tr = trunc i64 %and57 to i32
+  %or60 = and i32 %and57.tr, 1
+  %Features.12 = or i32 %Features.10.or55, %or60
+  %and62 = and i64 %FB, 128
+  %or65 = or i32 %Features.12, 8
+  %cmp63 = icmp eq i64 %and62, 0
+  %Features.12.or65 = select i1 %cmp63, i32 %Features.12, i32 %or65
+  %Features.14 = select i1 undef, i32 undef, i32 %Features.12.or65
+  %Features.16 = select i1 undef, i32 undef, i32 %Features.14
+  ret i32 %Features.16
+}

diff --git a/src/LLVM/test/CodeGen/X86/nosse-error1.ll b/src/LLVM/test/CodeGen/X86/nosse-error1.ll
new file mode 100644
index 0000000..16cbb73
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/nosse-error1.ll

@@ -0,0 +1,33 @@
+; RUN: llvm-as < %s > %t1
+; RUN: not llc -march=x86-64 -mattr=-sse < %t1 2> %t2
+; RUN: grep "SSE register return with SSE disabled" %t2
+; RUN: llc -march=x86-64 < %t1 | grep xmm
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@f = external global float		; <float*> [#uses=4]
+@d = external global double		; <double*> [#uses=4]
+
+define void @test() nounwind {
+entry:
+	%0 = load float* @f, align 4		; <float> [#uses=1]
+	%1 = tail call float @foo1(float %0) nounwind		; <float> [#uses=1]
+	store float %1, float* @f, align 4
+	%2 = load double* @d, align 8		; <double> [#uses=1]
+	%3 = tail call double @foo2(double %2) nounwind		; <double> [#uses=1]
+	store double %3, double* @d, align 8
+	%4 = load float* @f, align 4		; <float> [#uses=1]
+	%5 = tail call float @foo3(float %4) nounwind		; <float> [#uses=1]
+	store float %5, float* @f, align 4
+	%6 = load double* @d, align 8		; <double> [#uses=1]
+	%7 = tail call double @foo4(double %6) nounwind		; <double> [#uses=1]
+	store double %7, double* @d, align 8
+	ret void
+}
+
+declare float @foo1(float)
+
+declare double @foo2(double)
+
+declare float @foo3(float)
+
+declare double @foo4(double)

diff --git a/src/LLVM/test/CodeGen/X86/nosse-error2.ll b/src/LLVM/test/CodeGen/X86/nosse-error2.ll
new file mode 100644
index 0000000..45a5eaf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/nosse-error2.ll

@@ -0,0 +1,33 @@
+; RUN: llvm-as < %s > %t1
+; RUN: not llc -march=x86 -mcpu=i686 -mattr=-sse < %t1 2> %t2
+; RUN: grep "SSE register return with SSE disabled" %t2
+; RUN: llc -march=x86 -mcpu=i686 -mattr=+sse < %t1 | grep xmm
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-unknown-linux-gnu"
+@f = external global float		; <float*> [#uses=4]
+@d = external global double		; <double*> [#uses=4]
+
+define void @test() nounwind {
+entry:
+	%0 = load float* @f, align 4		; <float> [#uses=1]
+	%1 = tail call inreg float @foo1(float inreg %0) nounwind		; <float> [#uses=1]
+	store float %1, float* @f, align 4
+	%2 = load double* @d, align 8		; <double> [#uses=1]
+	%3 = tail call inreg double @foo2(double inreg %2) nounwind		; <double> [#uses=1]
+	store double %3, double* @d, align 8
+	%4 = load float* @f, align 4		; <float> [#uses=1]
+	%5 = tail call inreg float @foo3(float inreg %4) nounwind		; <float> [#uses=1]
+	store float %5, float* @f, align 4
+	%6 = load double* @d, align 8		; <double> [#uses=1]
+	%7 = tail call inreg double @foo4(double inreg %6) nounwind		; <double> [#uses=1]
+	store double %7, double* @d, align 8
+	ret void
+}
+
+declare inreg float @foo1(float inreg)
+
+declare inreg double @foo2(double inreg)
+
+declare inreg float @foo3(float inreg)
+
+declare inreg double @foo4(double inreg)

diff --git a/src/LLVM/test/CodeGen/X86/nosse-varargs.ll b/src/LLVM/test/CodeGen/X86/nosse-varargs.ll
new file mode 100644
index 0000000..e6da0ab5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/nosse-varargs.ll

@@ -0,0 +1,46 @@
+; RUN: llvm-as < %s > %t
+; RUN: llc -march=x86-64 -mattr=-sse < %t | not grep xmm
+; RUN: llc -march=x86-64 < %t | grep xmm
+; PR3403
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define i32 @foo(float %a, i8* nocapture %fmt, ...) nounwind {
+entry:
+	%ap = alloca [1 x %struct.__va_list_tag], align 8		; <[1 x %struct.__va_list_tag]*> [#uses=4]
+	%ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*		; <i8*> [#uses=2]
+	call void @llvm.va_start(i8* %ap12)
+	%0 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0		; <i32*> [#uses=2]
+	%1 = load i32* %0, align 8		; <i32> [#uses=3]
+	%2 = icmp ult i32 %1, 48		; <i1> [#uses=1]
+	br i1 %2, label %bb, label %bb3
+
+bb:		; preds = %entry
+	%3 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 3		; <i8**> [#uses=1]
+	%4 = load i8** %3, align 8		; <i8*> [#uses=1]
+	%5 = inttoptr i32 %1 to i8*		; <i8*> [#uses=1]
+	%6 = ptrtoint i8* %5 to i64		; <i64> [#uses=1]
+	%ctg2 = getelementptr i8* %4, i64 %6		; <i8*> [#uses=1]
+	%7 = add i32 %1, 8		; <i32> [#uses=1]
+	store i32 %7, i32* %0, align 8
+	br label %bb4
+
+bb3:		; preds = %entry
+	%8 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2		; <i8**> [#uses=2]
+	%9 = load i8** %8, align 8		; <i8*> [#uses=2]
+	%10 = getelementptr i8* %9, i64 8		; <i8*> [#uses=1]
+	store i8* %10, i8** %8, align 8
+	br label %bb4
+
+bb4:		; preds = %bb3, %bb
+	%addr.0.0 = phi i8* [ %ctg2, %bb ], [ %9, %bb3 ]		; <i8*> [#uses=1]
+	%11 = bitcast i8* %addr.0.0 to i32*		; <i32*> [#uses=1]
+	%12 = load i32* %11, align 4		; <i32> [#uses=1]
+	call void @llvm.va_end(i8* %ap12)
+	ret i32 %12
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/object-size.ll b/src/LLVM/test/CodeGen/X86/object-size.ll
new file mode 100644
index 0000000..0493edc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/object-size.ll

@@ -0,0 +1,55 @@
+; RUN: llc -O0 -regalloc=linearscan < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+
+; ModuleID = 'ts.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0"
+
+@p = common global i8* null, align 8              ; <i8**> [#uses=4]
+@.str = private constant [3 x i8] c"Hi\00"        ; <[3 x i8]*> [#uses=1]
+
+define void @bar() nounwind ssp {
+entry:
+  %tmp = load i8** @p                             ; <i8*> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64(i8* %tmp, i1 0) ; <i64> [#uses=1]
+  %cmp = icmp ne i64 %0, -1                       ; <i1> [#uses=1]
+; X64: movabsq $-1, %rax
+; X64: cmpq    $-1, %rax
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %tmp1 = load i8** @p                            ; <i8*> [#uses=1]
+  %tmp2 = load i8** @p                            ; <i8*> [#uses=1]
+  %1 = call i64 @llvm.objectsize.i64(i8* %tmp2, i1 1) ; <i64> [#uses=1]
+  %call = call i8* @__strcpy_chk(i8* %tmp1, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 %1) ssp ; <i8*> [#uses=1]
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %tmp3 = load i8** @p                            ; <i8*> [#uses=1]
+  %call4 = call i8* @__inline_strcpy_chk(i8* %tmp3, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) ssp ; <i8*> [#uses=1]
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i8* [ %call, %cond.true ], [ %call4, %cond.false ] ; <i8*> [#uses=0]
+  ret void
+}
+
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
+
+declare i8* @__strcpy_chk(i8*, i8*, i64) ssp
+
+define internal i8* @__inline_strcpy_chk(i8* %__dest, i8* %__src) nounwind ssp {
+entry:
+  %retval = alloca i8*                            ; <i8**> [#uses=2]
+  %__dest.addr = alloca i8*                       ; <i8**> [#uses=3]
+  %__src.addr = alloca i8*                        ; <i8**> [#uses=2]
+  store i8* %__dest, i8** %__dest.addr
+  store i8* %__src, i8** %__src.addr
+  %tmp = load i8** %__dest.addr                   ; <i8*> [#uses=1]
+  %tmp1 = load i8** %__src.addr                   ; <i8*> [#uses=1]
+  %tmp2 = load i8** %__dest.addr                  ; <i8*> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64(i8* %tmp2, i1 1) ; <i64> [#uses=1]
+  %call = call i8* @__strcpy_chk(i8* %tmp, i8* %tmp1, i64 %0) ssp ; <i8*> [#uses=1]
+  store i8* %call, i8** %retval
+  %1 = load i8** %retval                          ; <i8*> [#uses=1]
+  ret i8* %1
+}

diff --git a/src/LLVM/test/CodeGen/X86/opt-ext-uses.ll b/src/LLVM/test/CodeGen/X86/opt-ext-uses.ll
new file mode 100644
index 0000000..72fb38b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/opt-ext-uses.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 | grep movw | count 1
+
+define signext i16 @t()   {
+entry:
+        %tmp180 = load i16* null, align 2               ; <i16> [#uses=3]
+        %tmp180181 = sext i16 %tmp180 to i32            ; <i32> [#uses=1]
+        %tmp182 = add i16 %tmp180, 10
+        %tmp185 = icmp slt i16 %tmp182, 0               ; <i1> [#uses=1]
+        br i1 %tmp185, label %cond_true188, label %cond_next245
+
+cond_true188:           ; preds = %entry
+        %tmp195196 = trunc i16 %tmp180 to i8            ; <i8> [#uses=0]
+        ret i16 %tmp180
+
+cond_next245:           ; preds = %entry
+        %tmp256 = and i32 %tmp180181, 15                ; <i32> [#uses=0]
+        %tmp3 = trunc i32 %tmp256 to i16
+        ret i16 %tmp3
+}

diff --git a/src/LLVM/test/CodeGen/X86/opt-shuff-tstore.ll b/src/LLVM/test/CodeGen/X86/opt-shuff-tstore.ll
new file mode 100644
index 0000000..fc24913
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/opt-shuff-tstore.ll

@@ -0,0 +1,39 @@
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s -promote-elements -mattr=+sse2,+sse41 | FileCheck %s
+
+; CHECK: func_4_8
+; A single memory write
+; CHECK: movd
+; CHECK-NEXT: ret
+define void @func_4_8(<4 x i8> %param, <4 x i8>* %p) {
+  %r = add <4 x i8> %param, <i8 1, i8 2, i8 3, i8 4>
+  store <4 x i8> %r, <4 x i8>* %p
+  ret void
+}
+
+; CHECK: func_4_16
+; CHECK: movq
+; CHECK-NEXT: ret
+define void @func_4_16(<4 x i16> %param, <4 x i16>* %p) {
+  %r = add <4 x i16> %param, <i16 1, i16 2, i16 3, i16 4>
+  store <4 x i16> %r, <4 x i16>* %p
+  ret void
+}
+
+; CHECK: func_8_8
+; CHECK: movq
+; CHECK-NEXT: ret
+define void @func_8_8(<8 x i8> %param, <8 x i8>* %p) {
+  %r = add <8 x i8> %param, <i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4>
+  store <8 x i8> %r, <8 x i8>* %p
+  ret void
+}
+
+; CHECK: func_2_32
+; CHECK: movq
+; CHECK-NEXT: ret
+define void @func_2_32(<2 x i32> %param, <2 x i32>* %p) {
+  %r = add <2 x i32> %param, <i32 1, i32 2>
+  store <2 x i32> %r, <2 x i32>* %p
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/optimize-max-0.ll b/src/LLVM/test/CodeGen/X86/optimize-max-0.ll
new file mode 100644
index 0000000..981a16a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/optimize-max-0.ll

@@ -0,0 +1,461 @@
+; RUN: llc < %s -march=x86 | not grep cmov
+
+; LSR should be able to eliminate the max computations by
+; making the loops use slt/ult comparisons instead of ne comparisons.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+
+define void @foo(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
+entry:
+  %0 = mul i32 %x, %w
+  %1 = mul i32 %x, %w
+  %2 = sdiv i32 %1, 4
+  %.sum2 = add i32 %2, %0
+  %cond = icmp eq i32 %d, 1
+  br i1 %cond, label %bb29, label %bb10.preheader
+
+bb10.preheader:                                   ; preds = %entry
+  %3 = icmp sgt i32 %x, 0
+  br i1 %3, label %bb.nph9, label %bb18.loopexit
+
+bb.nph7:                                          ; preds = %bb7.preheader
+  %4 = mul i32 %y.08, %w
+  %5 = mul i32 %y.08, %s
+  %6 = add i32 %5, 1
+  %tmp8 = icmp sgt i32 1, %w
+  %smax9 = select i1 %tmp8, i32 1, i32 %w
+  br label %bb6
+
+bb6:                                              ; preds = %bb7, %bb.nph7
+  %x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]
+  %7 = add i32 %x.06, %4
+  %8 = shl i32 %x.06, 1
+  %9 = add i32 %6, %8
+  %10 = getelementptr i8* %r, i32 %9
+  %11 = load i8* %10, align 1
+  %12 = getelementptr i8* %j, i32 %7
+  store i8 %11, i8* %12, align 1
+  br label %bb7
+
+bb7:                                              ; preds = %bb6
+  %indvar.next7 = add i32 %x.06, 1
+  %exitcond10 = icmp ne i32 %indvar.next7, %smax9
+  br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
+
+bb7.bb9_crit_edge:                                ; preds = %bb7
+  br label %bb9
+
+bb9:                                              ; preds = %bb7.preheader, %bb7.bb9_crit_edge
+  br label %bb10
+
+bb10:                                             ; preds = %bb9
+  %indvar.next11 = add i32 %y.08, 1
+  %exitcond12 = icmp ne i32 %indvar.next11, %x
+  br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
+
+bb10.bb18.loopexit_crit_edge:                     ; preds = %bb10
+  br label %bb10.bb18.loopexit_crit_edge.split
+
+bb10.bb18.loopexit_crit_edge.split:               ; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
+  br label %bb18.loopexit
+
+bb.nph9:                                          ; preds = %bb10.preheader
+  %13 = icmp sgt i32 %w, 0
+  br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
+
+bb.nph9.split:                                    ; preds = %bb.nph9
+  br label %bb7.preheader
+
+bb7.preheader:                                    ; preds = %bb.nph9.split, %bb10
+  %y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]
+  br i1 true, label %bb.nph7, label %bb9
+
+bb.nph5:                                          ; preds = %bb18.loopexit
+  %14 = sdiv i32 %w, 2
+  %15 = icmp slt i32 %w, 2
+  %16 = sdiv i32 %x, 2
+  br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
+
+bb.nph5.split:                                    ; preds = %bb.nph5
+  %tmp2 = icmp sgt i32 1, %16
+  %smax3 = select i1 %tmp2, i32 1, i32 %16
+  br label %bb13
+
+bb13:                                             ; preds = %bb18, %bb.nph5.split
+  %y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]
+  %17 = mul i32 %14, %y.14
+  %18 = shl i32 %y.14, 1
+  %19 = srem i32 %y.14, 2
+  %20 = add i32 %19, %18
+  %21 = mul i32 %20, %s
+  br i1 true, label %bb.nph3, label %bb17
+
+bb.nph3:                                          ; preds = %bb13
+  %22 = add i32 %17, %0
+  %23 = add i32 %17, %.sum2
+  %24 = sdiv i32 %w, 2
+  %tmp = icmp sgt i32 1, %24
+  %smax = select i1 %tmp, i32 1, i32 %24
+  br label %bb14
+
+bb14:                                             ; preds = %bb15, %bb.nph3
+  %x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]
+  %25 = shl i32 %x.12, 2
+  %26 = add i32 %25, %21
+  %27 = getelementptr i8* %r, i32 %26
+  %28 = load i8* %27, align 1
+  %.sum = add i32 %22, %x.12
+  %29 = getelementptr i8* %j, i32 %.sum
+  store i8 %28, i8* %29, align 1
+  %30 = shl i32 %x.12, 2
+  %31 = or i32 %30, 2
+  %32 = add i32 %31, %21
+  %33 = getelementptr i8* %r, i32 %32
+  %34 = load i8* %33, align 1
+  %.sum6 = add i32 %23, %x.12
+  %35 = getelementptr i8* %j, i32 %.sum6
+  store i8 %34, i8* %35, align 1
+  br label %bb15
+
+bb15:                                             ; preds = %bb14
+  %indvar.next = add i32 %x.12, 1
+  %exitcond = icmp ne i32 %indvar.next, %smax
+  br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
+
+bb15.bb17_crit_edge:                              ; preds = %bb15
+  br label %bb17
+
+bb17:                                             ; preds = %bb15.bb17_crit_edge, %bb13
+  br label %bb18
+
+bb18.loopexit:                                    ; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
+  %36 = icmp slt i32 %x, 2
+  br i1 %36, label %bb20, label %bb.nph5
+
+bb18:                                             ; preds = %bb17
+  %indvar.next1 = add i32 %y.14, 1
+  %exitcond4 = icmp ne i32 %indvar.next1, %smax3
+  br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
+
+bb18.bb20_crit_edge:                              ; preds = %bb18
+  br label %bb18.bb20_crit_edge.split
+
+bb18.bb20_crit_edge.split:                        ; preds = %bb18.bb20_crit_edge, %bb.nph5
+  br label %bb20
+
+bb20:                                             ; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
+  switch i32 %d, label %return [
+    i32 3, label %bb22
+    i32 1, label %bb29
+  ]
+
+bb22:                                             ; preds = %bb20
+  %37 = mul i32 %x, %w
+  %38 = sdiv i32 %37, 4
+  %.sum3 = add i32 %38, %.sum2
+  %39 = add i32 %x, 15
+  %40 = and i32 %39, -16
+  %41 = add i32 %w, 15
+  %42 = and i32 %41, -16
+  %43 = mul i32 %40, %s
+  %44 = icmp sgt i32 %x, 0
+  br i1 %44, label %bb.nph, label %bb26
+
+bb.nph:                                           ; preds = %bb22
+  br label %bb23
+
+bb23:                                             ; preds = %bb24, %bb.nph
+  %y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]
+  %45 = mul i32 %y.21, %42
+  %.sum1 = add i32 %45, %43
+  %46 = getelementptr i8* %r, i32 %.sum1
+  %47 = mul i32 %y.21, %w
+  %.sum5 = add i32 %47, %.sum3
+  %48 = getelementptr i8* %j, i32 %.sum5
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %48, i8* %46, i32 %w, i32 1, i1 false)
+  br label %bb24
+
+bb24:                                             ; preds = %bb23
+  %indvar.next5 = add i32 %y.21, 1
+  %exitcond6 = icmp ne i32 %indvar.next5, %x
+  br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
+
+bb24.bb26_crit_edge:                              ; preds = %bb24
+  br label %bb26
+
+bb26:                                             ; preds = %bb24.bb26_crit_edge, %bb22
+  %49 = mul i32 %x, %w
+  %.sum4 = add i32 %.sum3, %49
+  %50 = getelementptr i8* %j, i32 %.sum4
+  %51 = mul i32 %x, %w
+  %52 = sdiv i32 %51, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %50, i8 -128, i32 %52, i32 1, i1 false)
+  ret void
+
+bb29:                                             ; preds = %bb20, %entry
+  %53 = add i32 %w, 15
+  %54 = and i32 %53, -16
+  %55 = icmp sgt i32 %x, 0
+  br i1 %55, label %bb.nph11, label %bb33
+
+bb.nph11:                                         ; preds = %bb29
+  br label %bb30
+
+bb30:                                             ; preds = %bb31, %bb.nph11
+  %y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]
+  %56 = mul i32 %y.310, %54
+  %57 = getelementptr i8* %r, i32 %56
+  %58 = mul i32 %y.310, %w
+  %59 = getelementptr i8* %j, i32 %58
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %59, i8* %57, i32 %w, i32 1, i1 false)
+  br label %bb31
+
+bb31:                                             ; preds = %bb30
+  %indvar.next13 = add i32 %y.310, 1
+  %exitcond14 = icmp ne i32 %indvar.next13, %x
+  br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
+
+bb31.bb33_crit_edge:                              ; preds = %bb31
+  br label %bb33
+
+bb33:                                             ; preds = %bb31.bb33_crit_edge, %bb29
+  %60 = mul i32 %x, %w
+  %61 = getelementptr i8* %j, i32 %60
+  %62 = mul i32 %x, %w
+  %63 = sdiv i32 %62, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %61, i8 -128, i32 %63, i32 1, i1 false)
+  ret void
+
+return:                                           ; preds = %bb20
+  ret void
+}
+
+define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
+entry:
+  %0 = mul i32 %x, %w
+  %1 = mul i32 %x, %w
+  %2 = udiv i32 %1, 4
+  %.sum2 = add i32 %2, %0
+  %cond = icmp eq i32 %d, 1
+  br i1 %cond, label %bb29, label %bb10.preheader
+
+bb10.preheader:                                   ; preds = %entry
+  %3 = icmp ne i32 %x, 0
+  br i1 %3, label %bb.nph9, label %bb18.loopexit
+
+bb.nph7:                                          ; preds = %bb7.preheader
+  %4 = mul i32 %y.08, %w
+  %5 = mul i32 %y.08, %s
+  %6 = add i32 %5, 1
+  %tmp8 = icmp ugt i32 1, %w
+  %smax9 = select i1 %tmp8, i32 1, i32 %w
+  br label %bb6
+
+bb6:                                              ; preds = %bb7, %bb.nph7
+  %x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]
+  %7 = add i32 %x.06, %4
+  %8 = shl i32 %x.06, 1
+  %9 = add i32 %6, %8
+  %10 = getelementptr i8* %r, i32 %9
+  %11 = load i8* %10, align 1
+  %12 = getelementptr i8* %j, i32 %7
+  store i8 %11, i8* %12, align 1
+  br label %bb7
+
+bb7:                                              ; preds = %bb6
+  %indvar.next7 = add i32 %x.06, 1
+  %exitcond10 = icmp ne i32 %indvar.next7, %smax9
+  br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
+
+bb7.bb9_crit_edge:                                ; preds = %bb7
+  br label %bb9
+
+bb9:                                              ; preds = %bb7.preheader, %bb7.bb9_crit_edge
+  br label %bb10
+
+bb10:                                             ; preds = %bb9
+  %indvar.next11 = add i32 %y.08, 1
+  %exitcond12 = icmp ne i32 %indvar.next11, %x
+  br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
+
+bb10.bb18.loopexit_crit_edge:                     ; preds = %bb10
+  br label %bb10.bb18.loopexit_crit_edge.split
+
+bb10.bb18.loopexit_crit_edge.split:               ; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
+  br label %bb18.loopexit
+
+bb.nph9:                                          ; preds = %bb10.preheader
+  %13 = icmp ugt i32 %w, 0
+  br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
+
+bb.nph9.split:                                    ; preds = %bb.nph9
+  br label %bb7.preheader
+
+bb7.preheader:                                    ; preds = %bb.nph9.split, %bb10
+  %y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]
+  br i1 true, label %bb.nph7, label %bb9
+
+bb.nph5:                                          ; preds = %bb18.loopexit
+  %14 = udiv i32 %w, 2
+  %15 = icmp ult i32 %w, 2
+  %16 = udiv i32 %x, 2
+  br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
+
+bb.nph5.split:                                    ; preds = %bb.nph5
+  %tmp2 = icmp ugt i32 1, %16
+  %smax3 = select i1 %tmp2, i32 1, i32 %16
+  br label %bb13
+
+bb13:                                             ; preds = %bb18, %bb.nph5.split
+  %y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]
+  %17 = mul i32 %14, %y.14
+  %18 = shl i32 %y.14, 1
+  %19 = urem i32 %y.14, 2
+  %20 = add i32 %19, %18
+  %21 = mul i32 %20, %s
+  br i1 true, label %bb.nph3, label %bb17
+
+bb.nph3:                                          ; preds = %bb13
+  %22 = add i32 %17, %0
+  %23 = add i32 %17, %.sum2
+  %24 = udiv i32 %w, 2
+  %tmp = icmp ugt i32 1, %24
+  %smax = select i1 %tmp, i32 1, i32 %24
+  br label %bb14
+
+bb14:                                             ; preds = %bb15, %bb.nph3
+  %x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]
+  %25 = shl i32 %x.12, 2
+  %26 = add i32 %25, %21
+  %27 = getelementptr i8* %r, i32 %26
+  %28 = load i8* %27, align 1
+  %.sum = add i32 %22, %x.12
+  %29 = getelementptr i8* %j, i32 %.sum
+  store i8 %28, i8* %29, align 1
+  %30 = shl i32 %x.12, 2
+  %31 = or i32 %30, 2
+  %32 = add i32 %31, %21
+  %33 = getelementptr i8* %r, i32 %32
+  %34 = load i8* %33, align 1
+  %.sum6 = add i32 %23, %x.12
+  %35 = getelementptr i8* %j, i32 %.sum6
+  store i8 %34, i8* %35, align 1
+  br label %bb15
+
+bb15:                                             ; preds = %bb14
+  %indvar.next = add i32 %x.12, 1
+  %exitcond = icmp ne i32 %indvar.next, %smax
+  br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
+
+bb15.bb17_crit_edge:                              ; preds = %bb15
+  br label %bb17
+
+bb17:                                             ; preds = %bb15.bb17_crit_edge, %bb13
+  br label %bb18
+
+bb18.loopexit:                                    ; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
+  %36 = icmp ult i32 %x, 2
+  br i1 %36, label %bb20, label %bb.nph5
+
+bb18:                                             ; preds = %bb17
+  %indvar.next1 = add i32 %y.14, 1
+  %exitcond4 = icmp ne i32 %indvar.next1, %smax3
+  br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
+
+bb18.bb20_crit_edge:                              ; preds = %bb18
+  br label %bb18.bb20_crit_edge.split
+
+bb18.bb20_crit_edge.split:                        ; preds = %bb18.bb20_crit_edge, %bb.nph5
+  br label %bb20
+
+bb20:                                             ; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
+  switch i32 %d, label %return [
+    i32 3, label %bb22
+    i32 1, label %bb29
+  ]
+
+bb22:                                             ; preds = %bb20
+  %37 = mul i32 %x, %w
+  %38 = udiv i32 %37, 4
+  %.sum3 = add i32 %38, %.sum2
+  %39 = add i32 %x, 15
+  %40 = and i32 %39, -16
+  %41 = add i32 %w, 15
+  %42 = and i32 %41, -16
+  %43 = mul i32 %40, %s
+  %44 = icmp ugt i32 %x, 0
+  br i1 %44, label %bb.nph, label %bb26
+
+bb.nph:                                           ; preds = %bb22
+  br label %bb23
+
+bb23:                                             ; preds = %bb24, %bb.nph
+  %y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]
+  %45 = mul i32 %y.21, %42
+  %.sum1 = add i32 %45, %43
+  %46 = getelementptr i8* %r, i32 %.sum1
+  %47 = mul i32 %y.21, %w
+  %.sum5 = add i32 %47, %.sum3
+  %48 = getelementptr i8* %j, i32 %.sum5
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %48, i8* %46, i32 %w, i32 1, i1 false)
+  br label %bb24
+
+bb24:                                             ; preds = %bb23
+  %indvar.next5 = add i32 %y.21, 1
+  %exitcond6 = icmp ne i32 %indvar.next5, %x
+  br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
+
+bb24.bb26_crit_edge:                              ; preds = %bb24
+  br label %bb26
+
+bb26:                                             ; preds = %bb24.bb26_crit_edge, %bb22
+  %49 = mul i32 %x, %w
+  %.sum4 = add i32 %.sum3, %49
+  %50 = getelementptr i8* %j, i32 %.sum4
+  %51 = mul i32 %x, %w
+  %52 = udiv i32 %51, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %50, i8 -128, i32 %52, i32 1, i1 false)
+  ret void
+
+bb29:                                             ; preds = %bb20, %entry
+  %53 = add i32 %w, 15
+  %54 = and i32 %53, -16
+  %55 = icmp ugt i32 %x, 0
+  br i1 %55, label %bb.nph11, label %bb33
+
+bb.nph11:                                         ; preds = %bb29
+  br label %bb30
+
+bb30:                                             ; preds = %bb31, %bb.nph11
+  %y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]
+  %56 = mul i32 %y.310, %54
+  %57 = getelementptr i8* %r, i32 %56
+  %58 = mul i32 %y.310, %w
+  %59 = getelementptr i8* %j, i32 %58
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %59, i8* %57, i32 %w, i32 1, i1 false)
+  br label %bb31
+
+bb31:                                             ; preds = %bb30
+  %indvar.next13 = add i32 %y.310, 1
+  %exitcond14 = icmp ne i32 %indvar.next13, %x
+  br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
+
+bb31.bb33_crit_edge:                              ; preds = %bb31
+  br label %bb33
+
+bb33:                                             ; preds = %bb31.bb33_crit_edge, %bb29
+  %60 = mul i32 %x, %w
+  %61 = getelementptr i8* %j, i32 %60
+  %62 = mul i32 %x, %w
+  %63 = udiv i32 %62, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %61, i8 -128, i32 %63, i32 1, i1 false)
+  ret void
+
+return:                                           ; preds = %bb20
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/optimize-max-1.ll b/src/LLVM/test/CodeGen/X86/optimize-max-1.ll
new file mode 100644
index 0000000..ad6c24d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/optimize-max-1.ll

@@ -0,0 +1,78 @@
+; RUN: llc < %s -march=x86-64 | not grep cmov
+
+; LSR should be able to eliminate both smax and umax expressions
+; in loop trip counts.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @fs(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp slt i64 %n, 1		; <i1> [#uses=1]
+	%smax = select i1 %tmp, i64 1, i64 %n		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @bs(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp sge i64 %n, 1		; <i1> [#uses=1]
+	%smax = select i1 %tmp, i64 %n, i64 1		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @fu(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp eq i64 %n, 0		; <i1> [#uses=1]
+	%umax = select i1 %tmp, i64 1, i64 %n		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @bu(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp ne i64 %n, 0		; <i1> [#uses=1]
+	%umax = select i1 %tmp, i64 %n, i64 1		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/optimize-max-2.ll b/src/LLVM/test/CodeGen/X86/optimize-max-2.ll
new file mode 100644
index 0000000..8851c5b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/optimize-max-2.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep cmov %t | count 2
+; RUN: grep jne %t | count 1
+
+; LSR's OptimizeMax function shouldn't try to eliminate this max, because
+; it has three operands.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @foo(double* nocapture %p, i64 %x, i64 %y) nounwind {
+entry:
+	%tmp = icmp eq i64 %y, 0		; <i1> [#uses=1]
+	%umax = select i1 %tmp, i64 1, i64 %y		; <i64> [#uses=2]
+	%tmp8 = icmp ugt i64 %umax, %x		; <i1> [#uses=1]
+	%umax9 = select i1 %tmp8, i64 %umax, i64 %x		; <i64> [#uses=1]
+	br label %bb4
+
+bb4:		; preds = %bb4, %entry
+	%i.07 = phi i64 [ 0, %entry ], [ %2, %bb4 ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.07		; <double*> [#uses=2]
+	%0 = load double* %scevgep, align 8		; <double> [#uses=1]
+	%1 = fmul double %0, 2.000000e+00		; <double> [#uses=1]
+	store double %1, double* %scevgep, align 8
+	%2 = add i64 %i.07, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %2, %umax9		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb4
+
+return:		; preds = %bb4
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/optimize-max-3.ll b/src/LLVM/test/CodeGen/X86/optimize-max-3.ll
new file mode 100644
index 0000000..e42aa9d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/optimize-max-3.ll

@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s
+
+; LSR's OptimizeMax should eliminate the select (max).
+
+; CHECK: foo:
+; CHECK-NOT: cmov
+; CHECK: jle
+
+define void @foo(i64 %n, double* nocapture %p) nounwind {
+entry:
+  %cmp6 = icmp slt i64 %n, 0                      ; <i1> [#uses=1]
+  br i1 %cmp6, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  %tmp = icmp sgt i64 %n, 0                       ; <i1> [#uses=1]
+  %n.op = add i64 %n, 1                           ; <i64> [#uses=1]
+  %tmp1 = select i1 %tmp, i64 %n.op, i64 1        ; <i64> [#uses=1]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i = phi i64 [ %i.next, %for.body ], [ 0, %for.body.preheader ] ; <i64> [#uses=2]
+  %arrayidx = getelementptr double* %p, i64 %i    ; <double*> [#uses=2]
+  %t4 = load double* %arrayidx                    ; <double> [#uses=1]
+  %mul = fmul double %t4, 2.200000e+00            ; <double> [#uses=1]
+  store double %mul, double* %arrayidx
+  %i.next = add nsw i64 %i, 1                     ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %i.next, %tmp1          ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; In this case, one of the max operands is another max, which folds,
+; leaving a two-operand max which doesn't fit the usual pattern.
+; OptimizeMax should handle this case.
+; PR7454
+
+;      CHECK: _Z18GenerateStatusPagei:
+
+;      CHECK:         jle
+;  CHECK-NOT:         cmov
+;      CHECK:         xorl    {{%edi, %edi|%ecx, %ecx|%eax, %eax}}
+; CHECK-NEXT:         align
+; CHECK-NEXT: BB1_2:
+; CHECK:              callq
+; CHECK-NEXT:         incl    [[BX:%[a-z0-9]+]]
+; CHECK-NEXT:         cmpl    [[R14:%[a-z0-9]+]], [[BX]]
+; CHECK:              jl
+
+define void @_Z18GenerateStatusPagei(i32 %jobs_to_display) nounwind {
+entry:
+  %cmp.i = icmp sgt i32 %jobs_to_display, 0       ; <i1> [#uses=1]
+  %tmp = select i1 %cmp.i, i32 %jobs_to_display, i32 0 ; <i32> [#uses=3]
+  %cmp8 = icmp sgt i32 %tmp, 0                    ; <i1> [#uses=1]
+  br i1 %cmp8, label %bb.nph, label %for.end
+
+bb.nph:                                           ; preds = %entry
+  %tmp11 = icmp sgt i32 %tmp, 1                   ; <i1> [#uses=1]
+  %smax = select i1 %tmp11, i32 %tmp, i32 1       ; <i32> [#uses=1]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %bb.nph
+  %i.010 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] ; <i32> [#uses=1]
+  %it.0.09 = phi float* [ null, %bb.nph ], [ %call.i, %for.body ] ; <float*> [#uses=1]
+  %call.i = call float* @_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base(float* %it.0.09) ; <float*> [#uses=1]
+  %inc = add nsw i32 %i.010, 1                    ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %inc, %smax             ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float* @_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base(float*)

diff --git a/src/LLVM/test/CodeGen/X86/or-address.ll b/src/LLVM/test/CodeGen/X86/or-address.ll
new file mode 100644
index 0000000..f866e41
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/or-address.ll

@@ -0,0 +1,90 @@
+; PR1135
+; RUN: llc %s -o - | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.3"
+
+
+; CHECK: 	movl	%{{.*}},   (%rdi,[[R0:.+]],4)
+; CHECK:	movl	%{{.*}},  8(%rdi,[[R0]],4)
+; CHECK:	movl	%{{.*}},  4(%rdi,[[R0]],4)
+; CHECK:	movl	%{{.*}}, 12(%rdi,[[R0]],4)
+
+define void @test(i32* nocapture %array, i32 %r0) nounwind ssp noredzone {
+bb.nph:
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %j.010 = phi i8 [ 0, %bb.nph ], [ %14, %bb ]    ; <i8> [#uses=1]
+  %k.19 = phi i8 [ 0, %bb.nph ], [ %.k.1, %bb ]   ; <i8> [#uses=1]
+  %i0.08 = phi i8 [ 0, %bb.nph ], [ %15, %bb ]    ; <i8> [#uses=3]
+  %0 = icmp slt i8 %i0.08, 4                      ; <i1> [#uses=1]
+  %iftmp.0.0 = select i1 %0, i8 %i0.08, i8 0      ; <i8> [#uses=2]
+  %1 = icmp eq i8 %i0.08, 4                       ; <i1> [#uses=1]
+  %2 = zext i1 %1 to i8                           ; <i8> [#uses=1]
+  %.k.1 = add i8 %2, %k.19                        ; <i8> [#uses=2]
+  %3 = shl i8 %.k.1, 2                            ; <i8> [#uses=1]
+  %4 = add i8 %3, %iftmp.0.0                      ; <i8> [#uses=1]
+  %5 = shl i8 %4, 2                               ; <i8> [#uses=1]
+  %6 = zext i8 %5 to i64                          ; <i64> [#uses=4]
+  %7 = getelementptr inbounds i32* %array, i64 %6 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %7, align 4
+  %8 = or i64 %6, 2                               ; <i64> [#uses=1]
+  %9 = getelementptr inbounds i32* %array, i64 %8 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %9, align 4
+  %10 = or i64 %6, 1                              ; <i64> [#uses=1]
+  %11 = getelementptr inbounds i32* %array, i64 %10 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %11, align 4
+  %12 = or i64 %6, 3                              ; <i64> [#uses=1]
+  %13 = getelementptr inbounds i32* %array, i64 %12 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %13, align 4
+  %14 = add nsw i8 %j.010, 1                      ; <i8> [#uses=2]
+  %15 = add i8 %iftmp.0.0, 1                      ; <i8> [#uses=1]
+  %exitcond = icmp eq i8 %14, 32                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb
+  ret void
+}
+
+; CHECK: test1:
+; CHECK: 	movl	%{{.*}},   (%[[RDI:...]],%[[RCX:...]],4)
+; CHECK:	movl	%{{.*}},  8(%[[RDI]],%[[RCX]],4)
+; CHECK:	movl	%{{.*}},  4(%[[RDI]],%[[RCX]],4)
+; CHECK:	movl	%{{.*}}, 12(%[[RDI]],%[[RCX]],4)
+
+define void @test1(i32* nocapture %array, i32 %r0, i8 signext %k, i8 signext %i0) nounwind {
+bb.nph:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %bb.nph
+  %j.065 = phi i8 [ 0, %bb.nph ], [ %inc52, %for.body ] ; <i8> [#uses=1]
+  %i0.addr.064 = phi i8 [ %i0, %bb.nph ], [ %add, %for.body ] ; <i8> [#uses=3]
+  %k.addr.163 = phi i8 [ %k, %bb.nph ], [ %inc.k.addr.1, %for.body ] ; <i8> [#uses=1]
+  %cmp5 = icmp slt i8 %i0.addr.064, 4             ; <i1> [#uses=1]
+  %cond = select i1 %cmp5, i8 %i0.addr.064, i8 0  ; <i8> [#uses=2]
+  %cmp12 = icmp eq i8 %i0.addr.064, 4             ; <i1> [#uses=1]
+  %inc = zext i1 %cmp12 to i8                     ; <i8> [#uses=1]
+  %inc.k.addr.1 = add i8 %inc, %k.addr.163        ; <i8> [#uses=2]
+  %mul = shl i8 %cond, 2                          ; <i8> [#uses=1]
+  %mul22 = shl i8 %inc.k.addr.1, 4                ; <i8> [#uses=1]
+  %add23 = add i8 %mul22, %mul                    ; <i8> [#uses=1]
+  %idxprom = zext i8 %add23 to i64                ; <i64> [#uses=4]
+  %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom ; <i32*> [#uses=1]
+  store i32 %r0, i32* %arrayidx
+  %add3356 = or i64 %idxprom, 2                   ; <i64> [#uses=1]
+  %arrayidx36 = getelementptr inbounds i32* %array, i64 %add3356 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %arrayidx36
+  %add4058 = or i64 %idxprom, 1                   ; <i64> [#uses=1]
+  %arrayidx43 = getelementptr inbounds i32* %array, i64 %add4058 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %arrayidx43
+  %add4760 = or i64 %idxprom, 3                   ; <i64> [#uses=1]
+  %arrayidx50 = getelementptr inbounds i32* %array, i64 %add4760 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %arrayidx50
+  %inc52 = add nsw i8 %j.065, 1                   ; <i8> [#uses=2]
+  %add = add i8 %cond, 1                          ; <i8> [#uses=1]
+  %exitcond = icmp eq i8 %inc52, 32               ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/or-branch.ll b/src/LLVM/test/CodeGen/X86/or-branch.ll
new file mode 100644
index 0000000..d3f71bc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/or-branch.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86  | not grep set

+

+define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {

+entry:

+	%tmp = tail call i32 (...)* @bar( )		; <i32> [#uses=0]

+	%tmp.upgrd.1 = icmp eq i32 %X, 0		; <i1> [#uses=1]

+	%tmp3 = icmp slt i32 %Y, 5		; <i1> [#uses=1]

+	%tmp4 = or i1 %tmp3, %tmp.upgrd.1		; <i1> [#uses=1]

+	br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock

+

+cond_true:		; preds = %entry

+	%tmp5 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]

+	ret void

+

+UnifiedReturnBlock:		; preds = %entry

+	ret void

+}

+

+declare i32 @bar(...)


diff --git a/src/LLVM/test/CodeGen/X86/overlap-shift.ll b/src/LLVM/test/CodeGen/X86/overlap-shift.ll
new file mode 100644
index 0000000..b65c110
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/overlap-shift.ll

@@ -0,0 +1,19 @@
+;; X's live range extends beyond the shift, so the register allocator

+;; cannot coalesce it with Y.  Because of this, a copy needs to be

+;; emitted before the shift to save the register value before it is

+;; clobbered.  However, this copy is not needed if the register

+;; allocator turns the shift into an LEA.  This also occurs for ADD.

+

+; Check that the shift gets turned into an LEA.

+

+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \

+; RUN:   not grep {mov E.X, E.X}

+

+@G = external global i32                ; <i32*> [#uses=1]

+

+define i32 @test1(i32 %X) {

+        %Z = shl i32 %X, 2              ; <i32> [#uses=1]

+        volatile store i32 %Z, i32* @G

+        ret i32 %X

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/packed_struct.ll b/src/LLVM/test/CodeGen/X86/packed_struct.ll
new file mode 100644
index 0000000..cb2bec4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/packed_struct.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 > %t

+; RUN: grep foos+5 %t

+; RUN: grep foos+1 %t

+; RUN: grep foos+9 %t

+; RUN: grep bara+19 %t

+; RUN: grep bara+4 %t

+

+; make sure we compute the correct offset for a packed structure

+

+;Note: codegen for this could change rendering the above checks wrong

+

+target datalayout = "e-p:32:32"

+target triple = "i686-pc-linux-gnu"

+	%struct.anon = type <{ i8, i32, i32, i32 }>

+@foos = external global %struct.anon		; <%struct.anon*> [#uses=3]

+@bara = weak global [4 x <{ i32, i8 }>] zeroinitializer		; <[4 x <{ i32, i8 }>]*> [#uses=2]

+

+define i32 @foo() nounwind {

+entry:

+	%tmp = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 1)		; <i32> [#uses=1]

+	%tmp3 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 2)		; <i32> [#uses=1]

+	%tmp6 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 3)		; <i32> [#uses=1]

+	%tmp4 = add i32 %tmp3, %tmp		; <i32> [#uses=1]

+	%tmp7 = add i32 %tmp4, %tmp6		; <i32> [#uses=1]

+	ret i32 %tmp7

+}

+

+define i8 @bar() nounwind {

+entry:

+	%tmp = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 0, i32 1)		; <i8> [#uses=1]

+	%tmp4 = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 3, i32 1)		; <i8> [#uses=1]

+	%tmp5 = add i8 %tmp4, %tmp		; <i8> [#uses=1]

+	ret i8 %tmp5

+}


diff --git a/src/LLVM/test/CodeGen/X86/palignr-2.ll b/src/LLVM/test/CodeGen/X86/palignr-2.ll
new file mode 100644
index 0000000..116d4c7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/palignr-2.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 -mattr=+ssse3 | FileCheck %s
+; rdar://7341330
+
+@a = global [4 x i32] [i32 4, i32 5, i32 6, i32 7], align 16 ; <[4 x i32]*> [#uses=1]
+@c = common global [4 x i32] zeroinitializer, align 16 ; <[4 x i32]*> [#uses=1]
+@b = global [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 16 ; <[4 x i32]*> [#uses=1]
+
+define void @t1(<2 x i64> %a, <2 x i64> %b) nounwind ssp {
+entry:
+; CHECK: t1:
+; palignr $3, %xmm1, %xmm0
+  %0 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %a, <2 x i64> %b, i8 24) nounwind readnone
+  store <2 x i64> %0, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
+  ret void
+}
+
+declare <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
+define void @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; palignr $4, _b, %xmm0
+  %0 = load <2 x i64>* bitcast ([4 x i32]* @b to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
+  %1 = load <2 x i64>* bitcast ([4 x i32]* @a to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
+  %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 32) nounwind readnone
+  store <2 x i64> %2, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/palignr.ll b/src/LLVM/test/CodeGen/X86/palignr.ll
new file mode 100644
index 0000000..6875fb3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/palignr.ll

@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 -mattr=+ssse3 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck --check-prefix=YONAH %s
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK: pshufd
+; CHECK-YONAH: pshufd
+  %C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 >
+	ret <4 x i32> %C
+}
+
+define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test2:
+; CHECK: palignr
+; CHECK-YONAH: shufps
+  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 3, i32 4 >
+	ret <4 x i32> %C
+}
+
+define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test3:
+; CHECK: palignr
+  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 >
+	ret <4 x i32> %C
+}
+
+define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test4:
+; CHECK: palignr
+  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
+	ret <4 x i32> %C
+}
+
+define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {
+; CHECK: test5:
+; CHECK: palignr
+  %C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
+	ret <4 x float> %C
+}
+
+define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind {
+; CHECK: test6:
+; CHECK: palignr
+  %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >
+	ret <8 x i16> %C
+}
+
+define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {
+; CHECK: test7:
+; CHECK: palignr
+  %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >
+	ret <8 x i16> %C
+}
+
+define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
+; CHECK: test8:
+; CHECK: palignr
+  %C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >
+	ret <16 x i8> %C
+}
+
+; Check that we don't do unary (circular on single operand) palignr incorrectly.
+; (It is possible, but before this testcase was committed, it was being done
+; incorrectly.  In particular, one of the operands of the palignr node
+; was an UNDEF.)
+define <8 x i16> @test9(<8 x i16> %A, <8 x i16> %B) nounwind {
+; CHECK: test9:
+; CHECK-NOT: palignr
+; CHECK: pshufb
+  %C = shufflevector <8 x i16> %B, <8 x i16> %A, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >
+	ret <8 x i16> %C
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/peep-setb.ll b/src/LLVM/test/CodeGen/X86/peep-setb.ll
new file mode 100644
index 0000000..0bab789
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/peep-setb.ll

@@ -0,0 +1,82 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+define i8 @test1(i8 %a, i8 %b) nounwind {
+  %cmp = icmp ult i8 %a, %b
+  %cond = zext i1 %cmp to i8
+  %add = add i8 %cond, %b
+  ret i8 %add
+; CHECK: test1:
+; CHECK: adcb $0
+}
+
+define i32 @test2(i32 %a, i32 %b) nounwind {
+  %cmp = icmp ult i32 %a, %b
+  %cond = zext i1 %cmp to i32
+  %add = add i32 %cond, %b
+  ret i32 %add
+; CHECK: test2:
+; CHECK: adcl $0
+}
+
+define i64 @test3(i64 %a, i64 %b) nounwind {
+  %cmp = icmp ult i64 %a, %b
+  %conv = zext i1 %cmp to i64
+  %add = add i64 %conv, %b
+  ret i64 %add
+; CHECK: test3:
+; CHECK: adcq $0
+}
+
+define i8 @test4(i8 %a, i8 %b) nounwind {
+  %cmp = icmp ult i8 %a, %b
+  %cond = zext i1 %cmp to i8
+  %sub = sub i8 %b, %cond
+  ret i8 %sub
+; CHECK: test4:
+; CHECK: sbbb $0
+}
+
+define i32 @test5(i32 %a, i32 %b) nounwind {
+  %cmp = icmp ult i32 %a, %b
+  %cond = zext i1 %cmp to i32
+  %sub = sub i32 %b, %cond
+  ret i32 %sub
+; CHECK: test5:
+; CHECK: sbbl $0
+}
+
+define i64 @test6(i64 %a, i64 %b) nounwind {
+  %cmp = icmp ult i64 %a, %b
+  %conv = zext i1 %cmp to i64
+  %sub = sub i64 %b, %conv
+  ret i64 %sub
+; CHECK: test6:
+; CHECK: sbbq $0
+}
+
+define i8 @test7(i8 %a, i8 %b) nounwind {
+  %cmp = icmp ult i8 %a, %b
+  %cond = sext i1 %cmp to i8
+  %sub = sub i8 %b, %cond
+  ret i8 %sub
+; CHECK: test7:
+; CHECK: adcb $0
+}
+
+define i32 @test8(i32 %a, i32 %b) nounwind {
+  %cmp = icmp ult i32 %a, %b
+  %cond = sext i1 %cmp to i32
+  %sub = sub i32 %b, %cond
+  ret i32 %sub
+; CHECK: test8:
+; CHECK: adcl $0
+}
+
+define i64 @test9(i64 %a, i64 %b) nounwind {
+  %cmp = icmp ult i64 %a, %b
+  %conv = sext i1 %cmp to i64
+  %sub = sub i64 %b, %conv
+  ret i64 %sub
+; CHECK: test9:
+; CHECK: adcq $0
+}

diff --git a/src/LLVM/test/CodeGen/X86/peep-test-0.ll b/src/LLVM/test/CodeGen/X86/peep-test-0.ll
new file mode 100644
index 0000000..e521d8e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/peep-test-0.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: not grep cmp %t
+; RUN: not grep test %t
+
+define void @loop(i64 %n, double* nocapture %d) nounwind {
+entry:
+	br label %bb
+
+bb:
+	%indvar = phi i64 [ %n, %entry ], [ %indvar.next, %bb ]
+	%i.03 = add i64 %indvar, %n
+	%0 = getelementptr double* %d, i64 %i.03
+	%1 = load double* %0, align 8
+	%2 = fmul double %1, 3.000000e+00
+	store double %2, double* %0, align 8
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %bb
+
+return:
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/peep-test-1.ll b/src/LLVM/test/CodeGen/X86/peep-test-1.ll
new file mode 100644
index 0000000..f83f0f6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/peep-test-1.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep dec %t | count 1
+; RUN: not grep test %t
+; RUN: not grep cmp %t
+
+define void @foo(i32 %n, double* nocapture %p) nounwind {
+	br label %bb
+
+bb:
+	%indvar = phi i32 [ 0, %0 ], [ %indvar.next, %bb ]
+	%i.03 = sub i32 %n, %indvar
+	%1 = getelementptr double* %p, i32 %i.03
+	%2 = load double* %1, align 4
+	%3 = fmul double %2, 2.930000e+00
+	store double %3, double* %1, align 4
+	%4 = add i32 %i.03, -1
+	%phitmp = icmp slt i32 %4, 0
+	%indvar.next = add i32 %indvar, 1
+	br i1 %phitmp, label %bb, label %return
+
+return:
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/peep-test-2.ll b/src/LLVM/test/CodeGen/X86/peep-test-2.ll
new file mode 100644
index 0000000..2745172
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/peep-test-2.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 | grep testl
+
+; It's tempting to eliminate the testl instruction here and just use the
+; EFLAGS value from the incl, however it can't be known whether the add
+; will overflow, and if it does the incl would set OF, and the
+; subsequent setg would return true.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define i32 @f(i32 %j) nounwind readnone {
+entry:
+	%0 = add i32 %j, 1		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 0		; <i1> [#uses=1]
+	%2 = zext i1 %1 to i32		; <i32> [#uses=1]
+	ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/X86/peep-test-3.ll b/src/LLVM/test/CodeGen/X86/peep-test-3.ll
new file mode 100644
index 0000000..528c4bc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/peep-test-3.ll

@@ -0,0 +1,89 @@
+; RUN: llc < %s -march=x86 -post-RA-scheduler=false | FileCheck %s
+; rdar://7226797
+
+; LLVM should omit the testl and use the flags result from the orl.
+
+; CHECK: or:
+define void @or(float* %A, i32 %IA, i32 %N) nounwind {
+entry:
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      orl %e
+; CHECK-NEXT: je
+  %3 = or i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* %A, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+; CHECK: xor:
+define void @xor(float* %A, i32 %IA, i32 %N) nounwind {
+entry:
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+; CHECK:      xorl $1, %e
+; CHECK-NEXT: je
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+  %3 = xor i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* %A, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+; CHECK: and:
+define void @and(float* %A, i32 %IA, i32 %N, i8* %p) nounwind {
+entry:
+  store i8 0, i8* %p
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      andl  $3, %
+; CHECK-NEXT: movb  %
+; CHECK-NEXT: je
+  %3 = and i32 %2, %1                              ; <i32> [#uses=1]
+  %t = trunc i32 %3 to i8
+  store i8 %t, i8* %p
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* null, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+; Just like @and, but without the trunc+store. This should use a testb
+; instead of an andl.
+; CHECK: test:
+define void @test(float* %A, i32 %IA, i32 %N, i8* %p) nounwind {
+entry:
+  store i8 0, i8* %p
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      testb $3, %
+; CHECK-NEXT: je
+  %3 = and i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* null, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/peep-vector-extract-concat.ll b/src/LLVM/test/CodeGen/X86/peep-vector-extract-concat.ll
new file mode 100644
index 0000000..606a9be
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/peep-vector-extract-concat.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2,-sse41 | FileCheck %s
+; CHECK: pshufd $3, %xmm0, %xmm0
+
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2,-sse41 | FileCheck %s -check-prefix=WIN64
+; %a is passed indirectly on Win64.
+; WIN64: movss   12(%rcx), %xmm0
+
+define float @foo(<8 x float> %a) nounwind {
+  %c = extractelement <8 x float> %a, i32 3
+  ret float %c
+}

diff --git a/src/LLVM/test/CodeGen/X86/peep-vector-extract-insert.ll b/src/LLVM/test/CodeGen/X86/peep-vector-extract-insert.ll
new file mode 100644
index 0000000..5e18044
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/peep-vector-extract-insert.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 | grep {pxor	%xmm0, %xmm0} | count 2
+
+define float @foo(<4 x float> %a) {
+  %b = insertelement <4 x float> %a, float 0.0, i32 3
+  %c = extractelement <4 x float> %b, i32 3
+  ret float %c
+}
+define float @bar(float %a) {
+  %b = insertelement <4 x float> <float 0x400B333340000000, float 4.5, float 0.0, float 0x4022666660000000>, float %a, i32 3
+  %c = extractelement <4 x float> %b, i32 2
+  ret float %c
+}

diff --git a/src/LLVM/test/CodeGen/X86/personality.ll b/src/LLVM/test/CodeGen/X86/personality.ll
new file mode 100644
index 0000000..51be7bc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/personality.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -disable-cfi -mtriple=x86_64-apple-darwin9 -disable-cgp-branch-opts | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -disable-cfi -mtriple=i386-apple-darwin9 -disable-cgp-branch-opts | FileCheck %s -check-prefix=X32
+; PR1632
+
+define void @_Z1fv() {
+entry:
+  invoke void @_Z1gv()
+          to label %return unwind label %unwind
+
+unwind:                                           ; preds = %entry
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  br i1 false, label %eh_then, label %cleanup20
+
+eh_then:                                          ; preds = %unwind
+  invoke void @__cxa_end_catch()
+          to label %return unwind label %unwind10
+
+unwind10:                                         ; preds = %eh_then
+  %exn10 = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  %upgraded.eh_select13 = extractvalue { i8*, i32 } %exn10, 1
+  %upgraded.eh_select131 = sext i32 %upgraded.eh_select13 to i64
+  %tmp18 = icmp slt i64 %upgraded.eh_select131, 0
+  br i1 %tmp18, label %filter, label %cleanup20
+
+filter:                                           ; preds = %unwind10
+  unreachable
+
+cleanup20:                                        ; preds = %unwind10, %unwind
+  %eh_selector.0 = phi i64 [ 0, %unwind ], [ %upgraded.eh_select131, %unwind10 ]
+  ret void
+
+return:                                           ; preds = %eh_then, %entry
+  ret void
+}
+
+declare void @_Z1gv()
+
+declare void @__cxa_end_catch()
+
+declare i32 @__gxx_personality_v0(...)
+
+; X64:      zPLR
+; X64:      .byte 155
+; X64-NEXT: .long	___gxx_personality_v0@GOTPCREL+4
+
+; X32:        .section	__IMPORT,__pointers,non_lazy_symbol_pointers
+; X32-NEXT: L___gxx_personality_v0$non_lazy_ptr:
+; X32-NEXT:   .indirect_symbol ___gxx_personality_v0
+
+; X32:      zPLR
+; X32:      .byte 155
+; X32-NEXT: :
+; X32-NEXT: .long	L___gxx_personality_v0$non_lazy_ptr-

diff --git a/src/LLVM/test/CodeGen/X86/phi-bit-propagation.ll b/src/LLVM/test/CodeGen/X86/phi-bit-propagation.ll
new file mode 100644
index 0000000..94c9722
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/phi-bit-propagation.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+%"class.std::bitset" = type { [8 x i8] }
+
+define zeroext i1 @_Z3fooPjmS_mRSt6bitsetILm32EE(i32* nocapture %a, i64 %asize, i32* nocapture %b, i64 %bsize, %"class.std::bitset"* %bits) nounwind readonly ssp noredzone {
+entry:
+  %tmp.i.i.i.i = bitcast %"class.std::bitset"* %bits to i64*
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %conv = zext i32 %0 to i64
+  %cmp = icmp eq i64 %conv, %bsize
+  br i1 %cmp, label %return, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32* %b, i64 %conv
+  %tmp5 = load i32* %arrayidx, align 4
+  %conv6 = zext i32 %tmp5 to i64
+  %rem.i.i.i.i = and i64 %conv6, 63
+  %tmp3.i = load i64* %tmp.i.i.i.i, align 8
+  %shl.i.i = shl i64 1, %rem.i.i.i.i
+  %and.i = and i64 %shl.i.i, %tmp3.i
+  %cmp.i = icmp eq i64 %and.i, 0
+  br i1 %cmp.i, label %for.inc, label %return
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %0, 1
+  br label %for.cond
+
+return:                                           ; preds = %for.body, %for.cond
+; CHECK-NOT: and
+  %retval.0 = phi i1 [ true, %for.body ], [ false, %for.cond ]
+  ret i1 %retval.0
+}
+
+; This test case caused an assertion failure; see PR9324.
+define void @func_37() noreturn nounwind ssp {
+entry:
+  br i1 undef, label %lbl_919, label %entry.for.inc_crit_edge
+
+entry.for.inc_crit_edge:                          ; preds = %entry
+  br label %for.inc
+
+lbl_919:                                          ; preds = %for.cond7.preheader, %entry
+  br label %for.cond7.preheader
+
+for.cond7.preheader:                              ; preds = %for.inc, %lbl_919
+  %storemerge.ph = phi i8 [ 0, %lbl_919 ], [ %add, %for.inc ]
+  br i1 undef, label %for.inc, label %lbl_919
+
+for.inc:                                          ; preds = %for.cond7.preheader, %entry.for.inc_crit_edge
+  %add = add i8 undef, 1
+  br label %for.cond7.preheader
+}

diff --git a/src/LLVM/test/CodeGen/X86/phi-immediate-factoring.ll b/src/LLVM/test/CodeGen/X86/phi-immediate-factoring.ll
new file mode 100644
index 0000000..cb6c420
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/phi-immediate-factoring.ll

@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=x86 -stats |& grep {Number of blocks eliminated} | grep 6

+; PR1296

+

+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"

+target triple = "i686-apple-darwin8"

+

+define i32 @foo(i32 %A, i32 %B, i32 %C) nounwind {

+entry:

+	switch i32 %A, label %out [

+		 i32 1, label %bb

+		 i32 0, label %bb13

+		 i32 2, label %bb35

+	]

+

+bb:		; preds = %cond_next, %entry

+	%i.144.1 = phi i32 [ 0, %entry ], [ %tmp7, %cond_next ]		; <i32> [#uses=2]

+	%tmp4 = and i32 %i.144.1, %B		; <i32> [#uses=1]

+	icmp eq i32 %tmp4, 0		; <i1>:0 [#uses=1]

+	br i1 %0, label %cond_next, label %out

+

+cond_next:		; preds = %bb

+	%tmp7 = add i32 %i.144.1, 1		; <i32> [#uses=2]

+	icmp slt i32 %tmp7, 1000		; <i1>:1 [#uses=1]

+	br i1 %1, label %bb, label %out

+

+bb13:		; preds = %cond_next18, %entry

+	%i.248.1 = phi i32 [ 0, %entry ], [ %tmp20, %cond_next18 ]		; <i32> [#uses=2]

+	%tmp16 = and i32 %i.248.1, %C		; <i32> [#uses=1]

+	icmp eq i32 %tmp16, 0		; <i1>:2 [#uses=1]

+	br i1 %2, label %cond_next18, label %out

+

+cond_next18:		; preds = %bb13

+	%tmp20 = add i32 %i.248.1, 1		; <i32> [#uses=2]

+	icmp slt i32 %tmp20, 1000		; <i1>:3 [#uses=1]

+	br i1 %3, label %bb13, label %out

+

+bb27:		; preds = %bb35

+	%tmp30 = and i32 %i.3, %C		; <i32> [#uses=1]

+	icmp eq i32 %tmp30, 0		; <i1>:4 [#uses=1]

+	br i1 %4, label %cond_next32, label %out

+

+cond_next32:		; preds = %bb27

+	%indvar.next = add i32 %i.3, 1		; <i32> [#uses=1]

+	br label %bb35

+

+bb35:		; preds = %entry, %cond_next32

+	%i.3 = phi i32 [ %indvar.next, %cond_next32 ], [ 0, %entry ]		; <i32> [#uses=3]

+	icmp slt i32 %i.3, 1000		; <i1>:5 [#uses=1]

+	br i1 %5, label %bb27, label %out

+

+out:		; preds = %bb27, %bb35, %bb13, %cond_next18, %bb, %cond_next, %entry

+	%result.0 = phi i32 [ 0, %entry ], [ 1, %bb ], [ 0, %cond_next ], [ 1, %bb13 ], [ 0, %cond_next18 ], [ 1, %bb27 ], [ 0, %bb35 ]		; <i32> [#uses=1]

+	ret i32 %result.0

+}


diff --git a/src/LLVM/test/CodeGen/X86/phys-reg-local-regalloc.ll b/src/LLVM/test/CodeGen/X86/phys-reg-local-regalloc.ll
new file mode 100644
index 0000000..8b9ea17
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/phys-reg-local-regalloc.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast | FileCheck %s
+; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast | FileCheck %s
+; CHECKed instructions should be the same with or without -O0.
+
+@.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+; CHECK: movl 24(%esp), %eax
+; CHECK-NOT: movl
+; CHECK: movl	%eax, 36(%esp)
+; CHECK-NOT: movl
+; CHECK: movl 28(%esp), %ebx
+; CHECK-NOT: movl
+; CHECK: movl	%ebx, 40(%esp)
+; CHECK-NOT: movl
+; CHECK: addl %ebx, %eax
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %"%ebx" = alloca i32                            ; <i32*> [#uses=1]
+  %"%eax" = alloca i32                            ; <i32*> [#uses=2]
+  %result = alloca i32                            ; <i32*> [#uses=2]
+  %y = alloca i32                                 ; <i32*> [#uses=2]
+  %x = alloca i32                                 ; <i32*> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 1, i32* %x, align 4
+  store i32 2, i32* %y, align 4
+  call void asm sideeffect alignstack "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
+  %asmtmp = call i32 asm sideeffect alignstack "movl $1, $0", "=={eax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32* %x) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp, i32* %"%eax"
+  %asmtmp1 = call i32 asm sideeffect alignstack "movl $1, $0", "=={ebx},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32* %y) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp1, i32* %"%ebx"
+  %1 = call i32 asm "", "={bx}"() nounwind        ; <i32> [#uses=1]
+  %2 = call i32 asm "", "={ax}"() nounwind        ; <i32> [#uses=1]
+  %asmtmp2 = call i32 asm sideeffect alignstack "addl $1, $0", "=={eax},{ebx},{eax},~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %1, i32 %2) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp2, i32* %"%eax"
+  %3 = call i32 asm "", "={ax}"() nounwind        ; <i32> [#uses=1]
+  call void asm sideeffect alignstack "movl $0, $1", "{eax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %3, i32* %result) nounwind
+  %4 = load i32* %result, align 4                 ; <i32> [#uses=1]
+  %5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 %4) nounwind ; <i32> [#uses=0]
+  store i32 0, i32* %0, align 4
+  %6 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  store i32 %6, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval3 = load i32* %retval                    ; <i32> [#uses=1]
+  ret i32 %retval3
+}
+
+declare i32 @printf(i8*, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/src/LLVM/test/CodeGen/X86/phys_subreg_coalesce-2.ll
new file mode 100644
index 0000000..02c519f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/phys_subreg_coalesce-2.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; PR2659
+
+define i32 @binomial(i32 %n, i32 %k) nounwind {
+entry:
+	%cmp = icmp ugt i32 %k, %n		; <i1> [#uses=1]
+	br i1 %cmp, label %ifthen, label %forcond.preheader
+
+forcond.preheader:		; preds = %entry
+	%cmp44 = icmp eq i32 %k, 0		; <i1> [#uses=1]
+	br i1 %cmp44, label %afterfor, label %forbody
+
+ifthen:		; preds = %entry
+	ret i32 0
+; CHECK: forbody
+; CHECK-NOT: mov
+forbody:		; preds = %forbody, %forcond.preheader
+	%indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ]		; <i32> [#uses=3]
+	%accumulator.01 = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]		; <i32> [#uses=1]
+	%divisor.02 = add i32 %indvar, 1		; <i32> [#uses=2]
+	%n.addr.03 = sub i32 %n, %indvar		; <i32> [#uses=1]
+	%mul = mul i32 %n.addr.03, %accumulator.01		; <i32> [#uses=1]
+	%div = udiv i32 %mul, %divisor.02		; <i32> [#uses=2]
+	%inc = add i32 %indvar, 2		; <i32> [#uses=1]
+	%cmp4 = icmp ugt i32 %inc, %k		; <i1> [#uses=1]
+	br i1 %cmp4, label %afterfor, label %forbody
+
+afterfor:		; preds = %forbody, %forcond.preheader
+	%accumulator.0.lcssa = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]		; <i32> [#uses=1]
+	ret i32 %accumulator.0.lcssa
+}

diff --git a/src/LLVM/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/src/LLVM/test/CodeGen/X86/phys_subreg_coalesce-3.ll
new file mode 100644
index 0000000..4162015
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/phys_subreg_coalesce-3.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -join-physregs | FileCheck %s
+; rdar://5571034
+
+; This requires physreg joining, %vreg13 is live everywhere:
+; 304L		%CL<def> = COPY %vreg13:sub_8bit; GR32_ABCD:%vreg13
+; 320L		%vreg15<def> = COPY %vreg19; GR32:%vreg15 GR32_NOSP:%vreg19
+; 336L		%vreg15<def> = SAR32rCL %vreg15, %EFLAGS<imp-def,dead>, %CL<imp-use,kill>; GR32:%vreg15
+
+define void @foo(i32* nocapture %quadrant, i32* nocapture %ptr, i32 %bbSize, i32 %bbStart, i32 %shifts) nounwind ssp {
+; CHECK: foo:
+entry:
+  %j.03 = add i32 %bbSize, -1                     ; <i32> [#uses=2]
+  %0 = icmp sgt i32 %j.03, -1                     ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph, label %return
+
+bb.nph:                                           ; preds = %entry
+  %tmp9 = add i32 %bbStart, %bbSize               ; <i32> [#uses=1]
+  %tmp10 = add i32 %tmp9, -1                      ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+; CHECK: %bb
+; CHECK-NOT: movb {{.*}}l, %cl
+; CHECK: sarl %cl
+  %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
+  %j.06 = sub i32 %j.03, %indvar                  ; <i32> [#uses=1]
+  %tmp11 = sub i32 %tmp10, %indvar                ; <i32> [#uses=1]
+  %scevgep = getelementptr i32* %ptr, i32 %tmp11  ; <i32*> [#uses=1]
+  %1 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %2 = ashr i32 %j.06, %shifts                    ; <i32> [#uses=1]
+  %3 = and i32 %2, 65535                          ; <i32> [#uses=1]
+  %4 = getelementptr inbounds i32* %quadrant, i32 %1 ; <i32*> [#uses=1]
+  store i32 %3, i32* %4, align 4
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %bbSize   ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/phys_subreg_coalesce.ll b/src/LLVM/test/CodeGen/X86/phys_subreg_coalesce.ll
new file mode 100644
index 0000000..2c855ce
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/phys_subreg_coalesce.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+sse2 | not grep movl
+
+	%struct.dpoint = type { double, double }
+
+define %struct.dpoint @midpoint(i64 %p1.0, i64 %p2.0) nounwind readnone {
+entry:
+	%0 = trunc i64 %p1.0 to i32		; <i32> [#uses=1]
+	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = trunc i64 %p2.0 to i32		; <i32> [#uses=1]
+	%3 = sitofp i32 %2 to double		; <double> [#uses=1]
+	%4 = fadd double %1, %3		; <double> [#uses=1]
+	%5 = fmul double %4, 5.000000e-01		; <double> [#uses=1]
+	%6 = lshr i64 %p1.0, 32		; <i64> [#uses=1]
+	%7 = trunc i64 %6 to i32		; <i32> [#uses=1]
+	%8 = sitofp i32 %7 to double		; <double> [#uses=1]
+	%9 = lshr i64 %p2.0, 32		; <i64> [#uses=1]
+	%10 = trunc i64 %9 to i32		; <i32> [#uses=1]
+	%11 = sitofp i32 %10 to double		; <double> [#uses=1]
+	%12 = fadd double %8, %11		; <double> [#uses=1]
+	%13 = fmul double %12, 5.000000e-01		; <double> [#uses=1]
+	%mrv3 = insertvalue %struct.dpoint undef, double %5, 0		; <%struct.dpoint> [#uses=1]
+	%mrv4 = insertvalue %struct.dpoint %mrv3, double %13, 1		; <%struct.dpoint> [#uses=1]
+	ret %struct.dpoint %mrv4
+}

diff --git a/src/LLVM/test/CodeGen/X86/pic-load-remat.ll b/src/LLVM/test/CodeGen/X86/pic-load-remat.ll
new file mode 100644
index 0000000..7729752
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pic-load-remat.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | grep psllw | grep pb
+
+define void @f() nounwind  {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%tmp4403 = tail call <8 x i16> @llvm.x86.sse2.psubs.w( <8 x i16> zeroinitializer, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=2]
+	%tmp4443 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> zeroinitializer, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4609 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 3, i32 5, i32 6, i32 9 > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4651 = add <8 x i16> %tmp4609, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >		; <<8 x i16>> [#uses=1]
+	%tmp4658 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp4651, <8 x i16> bitcast (<4 x i32> < i32 4, i32 1, i32 2, i32 3 > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4669 = tail call <8 x i16> @llvm.x86.sse2.pavg.w( <8 x i16> < i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170 >, <8 x i16> %tmp4443 ) nounwind readnone 		; <<8 x i16>> [#uses=2]
+	%tmp4679 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4669, <8 x i16> %tmp4669 ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4689 = add <8 x i16> %tmp4679, %tmp4658		; <<8 x i16>> [#uses=1]
+	%tmp4700 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4689, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4708 = bitcast <8 x i16> %tmp4700 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4772 = add <8 x i16> zeroinitializer, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >		; <<8 x i16>> [#uses=1]
+	%tmp4779 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp4772, <8 x i16> bitcast (<4 x i32> < i32 3, i32 5, i32 undef, i32 7 > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4810 = add <8 x i16> zeroinitializer, %tmp4779		; <<8 x i16>> [#uses=1]
+	%tmp4821 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4810, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4829 = bitcast <8 x i16> %tmp4821 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4900 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 1, i32 1, i32 2, i32 2 > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4911 = tail call <8 x i16> @llvm.x86.sse2.pavg.w( <8 x i16> < i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170 >, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=2]
+	%tmp4921 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4911, <8 x i16> %tmp4911 ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4931 = add <8 x i16> %tmp4921, %tmp4900		; <<8 x i16>> [#uses=1]
+	%tmp4942 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4931, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4950 = bitcast <8 x i16> %tmp4942 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4957 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4403, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4958 = bitcast <8 x i16> %tmp4957 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4967 = tail call <8 x i16> @llvm.x86.sse2.psubs.w( <8 x i16> %tmp4403, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4968 = bitcast <8 x i16> %tmp4967 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	store <2 x i64> %tmp4829, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4958, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4968, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4950, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4708, <2 x i64>* null, align 16
+	br label %bb
+}
+
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 

diff --git a/src/LLVM/test/CodeGen/X86/pic.ll b/src/LLVM/test/CodeGen/X86/pic.ll
new file mode 100644
index 0000000..cef12ec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pic.ll

@@ -0,0 +1,208 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX

+

+@ptr = external global i32* 

+@dst = external global i32 

+@src = external global i32 

+

+define void @test0() nounwind {

+entry:

+    store i32* @dst, i32** @ptr

+    %tmp.s = load i32* @src

+    store i32 %tmp.s, i32* @dst

+    ret void

+    

+; LINUX:    test0:

+; LINUX:	calll	.L0$pb

+; LINUX-NEXT: .L0$pb:

+; LINUX-NEXT:	popl

+; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L0$pb),

+; LINUX:	movl	dst@GOT(%eax),

+; LINUX:	movl	ptr@GOT(%eax),

+; LINUX:	movl	src@GOT(%eax),

+; LINUX:	ret

+}

+

+@ptr2 = global i32* null

+@dst2 = global i32 0

+@src2 = global i32 0

+

+define void @test1() nounwind {

+entry:

+    store i32* @dst2, i32** @ptr2

+    %tmp.s = load i32* @src2

+    store i32 %tmp.s, i32* @dst2

+    ret void

+    

+; LINUX: test1:

+; LINUX:	calll	.L1$pb

+; LINUX-NEXT: .L1$pb:

+; LINUX-NEXT:	popl

+; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L1$pb), %eax

+; LINUX:	movl	dst2@GOT(%eax),

+; LINUX:	movl	ptr2@GOT(%eax),

+; LINUX:	movl	src2@GOT(%eax),

+; LINUX:	ret

+

+}

+

+declare i8* @malloc(i32)

+

+define void @test2() nounwind {

+entry:

+    %ptr = call i8* @malloc(i32 40)

+    ret void

+; LINUX: test2:

+; LINUX: 	pushl	%ebx

+; LINUX-NEXT: 	subl	$8, %esp

+; LINUX-NEXT: 	calll	.L2$pb

+; LINUX-NEXT: .L2$pb:

+; LINUX-NEXT: 	popl	%ebx

+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L2$pb), %ebx

+; LINUX: 	movl	$40, (%esp)

+; LINUX: 	calll	malloc@PLT

+; LINUX: 	addl	$8, %esp

+; LINUX: 	popl	%ebx

+; LINUX: 	ret

+}

+

+@pfoo = external global void(...)* 

+

+define void @test3() nounwind {

+entry:

+    %tmp = call void(...)*(...)* @afoo()

+    store void(...)* %tmp, void(...)** @pfoo

+    %tmp1 = load void(...)** @pfoo

+    call void(...)* %tmp1()

+    ret void

+; LINUX: test3:

+; LINUX: 	calll	.L3$pb

+; LINUX-NEXT: .L3$pb:

+; LINUX: 	popl

+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), %[[REG3:e..]]

+; LINUX: 	calll	afoo@PLT

+; LINUX: 	movl	pfoo@GOT(%[[REG3]]),

+; LINUX: 	calll	*

+}

+

+declare void(...)* @afoo(...)

+

+define void @test4() nounwind {

+entry:

+    call void(...)* @foo()

+    ret void

+; LINUX: test4:

+; LINUX: calll	.L4$pb

+; LINUX: popl	%ebx

+; LINUX: addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L4$pb), %ebx

+; LINUX: calll	foo@PLT

+}

+

+declare void @foo(...)

+

+

+@ptr6 = internal global i32* null

+@dst6 = internal global i32 0

+@src6 = internal global i32 0

+

+define void @test5() nounwind {

+entry:

+    store i32* @dst6, i32** @ptr6

+    %tmp.s = load i32* @src6

+    store i32 %tmp.s, i32* @dst6

+    ret void

+    

+; LINUX: test5:

+; LINUX: 	calll	.L5$pb

+; LINUX-NEXT: .L5$pb:

+; LINUX-NEXT: 	popl	%eax

+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L5$pb), %eax

+; LINUX: 	leal	dst6@GOTOFF(%eax), %ecx

+; LINUX: 	movl	%ecx, ptr6@GOTOFF(%eax)

+; LINUX: 	movl	src6@GOTOFF(%eax), %ecx

+; LINUX: 	movl	%ecx, dst6@GOTOFF(%eax)

+; LINUX: 	ret

+}

+

+

+;; Test constant pool references.

+define double @test6(i32 %a.u) nounwind {

+entry:

+    %tmp = icmp eq i32 %a.u,0

+    %retval = select i1 %tmp, double 4.561230e+02, double 1.234560e+02

+    ret double %retval

+

+; LINUX: .LCPI6_0:

+

+; LINUX: test6:

+; LINUX:    calll .L6$pb

+; LINUX: .L6$pb:

+; LINUX:    addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L6$pb), 

+; LINUX:    fldl	.LCPI6_0@GOTOFF(

+}

+

+

+;; Test jump table references.

+define void @test7(i32 %n.u) nounwind {

+entry:

+    switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]

+bb:

+    tail call void(...)* @foo1()

+    ret void

+bb1:

+    tail call void(...)* @foo2()

+    ret void

+bb2:

+    tail call void(...)* @foo6()

+    ret void

+bb3:

+    tail call void(...)* @foo3()

+    ret void

+bb4:

+    tail call void(...)* @foo4()

+    ret void

+bb5:

+    tail call void(...)* @foo5()

+    ret void

+bb6:

+    tail call void(...)* @foo1()

+    ret void

+bb7:

+    tail call void(...)* @foo2()

+    ret void

+bb8:

+    tail call void(...)* @foo6()

+    ret void

+bb9:

+    tail call void(...)* @foo3()

+    ret void

+bb10:

+    tail call void(...)* @foo4()

+    ret void

+bb11:

+    tail call void(...)* @foo5()

+    ret void

+bb12:

+    tail call void(...)* @foo6()

+    ret void

+    

+; LINUX: test7:

+; LINUX:   calll	.L7$pb

+; LINUX: .L7$pb:

+; LINUX:   addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb),

+; LINUX:   .LJTI7_0@GOTOFF(

+; LINUX:   jmpl	*

+

+; LINUX: .LJTI7_0:

+; LINUX:   .long	 .LBB7_2@GOTOFF

+; LINUX:   .long	 .LBB7_8@GOTOFF

+; LINUX:   .long	 .LBB7_14@GOTOFF

+; LINUX:   .long	 .LBB7_9@GOTOFF

+; LINUX:   .long	 .LBB7_10@GOTOFF

+}

+

+declare void @foo1(...)

+declare void @foo2(...)

+declare void @foo6(...)

+declare void @foo3(...)

+declare void @foo4(...)

+declare void @foo5(...)


diff --git a/src/LLVM/test/CodeGen/X86/pic_jumptable.ll b/src/LLVM/test/CodeGen/X86/pic_jumptable.ll
new file mode 100644
index 0000000..439375d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pic_jumptable.ll

@@ -0,0 +1,86 @@
+; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false \

+; RUN:   | FileCheck %s --check-prefix=CHECK-LINUX

+; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false \

+; RUN:   | FileCheck %s

+; RUN: llc < %s                       -mtriple=x86_64-apple-darwin | not grep 'lJTI'

+; rdar://6971437

+; rdar://7738756

+

+declare void @_Z3bari(i32)

+

+; CHECK-LINUX: .text._Z3fooILi1EEvi,"axG",@progbits,_Z3fooILi1EEvi,comdat

+define linkonce void @_Z3fooILi1EEvi(i32 %Y) nounwind {

+entry:

+; CHECK:       L0$pb

+; CHECK-NOT:   leal

+; CHECK:       Ltmp0 = LJTI0_0-L0$pb

+; CHECK-NEXT:  addl Ltmp0(%eax,%ecx,4)

+; CHECK-NEXT:  jmpl *%eax

+	%Y_addr = alloca i32		; <i32*> [#uses=2]

+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]

+	store i32 %Y, i32* %Y_addr

+	%tmp = load i32* %Y_addr		; <i32> [#uses=1]

+	switch i32 %tmp, label %bb10 [

+		 i32 0, label %bb3

+		 i32 1, label %bb

+		 i32 2, label %bb

+		 i32 3, label %bb

+		 i32 4, label %bb

+		 i32 5, label %bb

+		 i32 6, label %bb

+		 i32 7, label %bb

+		 i32 8, label %bb

+		 i32 9, label %bb

+		 i32 10, label %bb

+		 i32 12, label %bb1

+		 i32 13, label %bb5

+		 i32 14, label %bb6

+		 i32 16, label %bb2

+		 i32 17, label %bb4

+		 i32 23, label %bb8

+		 i32 27, label %bb7

+		 i32 34, label %bb9

+	]

+

+bb:		; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry

+	br label %bb1

+

+bb1:		; preds = %bb, %entry

+	br label %bb2

+

+bb2:		; preds = %bb1, %entry

+	call void @_Z3bari( i32 1 )

+	br label %bb11

+

+bb3:		; preds = %entry

+	br label %bb4

+

+bb4:		; preds = %bb3, %entry

+	br label %bb5

+

+bb5:		; preds = %bb4, %entry

+	br label %bb6

+

+bb6:		; preds = %bb5, %entry

+	call void @_Z3bari( i32 2 )

+	br label %bb11

+

+bb7:		; preds = %entry

+	br label %bb8

+

+bb8:		; preds = %bb7, %entry

+	br label %bb9

+

+bb9:		; preds = %bb8, %entry

+	call void @_Z3bari( i32 3 )

+	br label %bb11

+

+bb10:		; preds = %entry

+	br label %bb11

+

+bb11:		; preds = %bb10, %bb9, %bb6, %bb2

+	br label %return

+

+return:		; preds = %bb11

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/pmul.ll b/src/LLVM/test/CodeGen/X86/pmul.ll
new file mode 100644
index 0000000..d8ed4c0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pmul.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 -join-physregs > %t
+; RUN: grep pmul %t | count 12
+; RUN: grep mov %t | count 11
+
+; The f() arguments in %xmm0 and %xmm1 cause an extra movdqa without -join-physregs.
+
+define <4 x i32> @a(<4 x i32> %i) nounwind  {
+        %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
+        ret <4 x i32> %A
+}
+define <2 x i64> @b(<2 x i64> %i) nounwind  {
+        %A = mul <2 x i64> %i, < i64 117, i64 117 >
+        ret <2 x i64> %A
+}
+define <4 x i32> @c(<4 x i32> %i, <4 x i32> %j) nounwind  {
+        %A = mul <4 x i32> %i, %j
+        ret <4 x i32> %A
+}
+define <2 x i64> @d(<2 x i64> %i, <2 x i64> %j) nounwind  {
+        %A = mul <2 x i64> %i, %j
+        ret <2 x i64> %A
+}
+; Use a call to force spills.
+declare void @foo()
+define <4 x i32> @e(<4 x i32> %i, <4 x i32> %j) nounwind  {
+        call void @foo()
+        %A = mul <4 x i32> %i, %j
+        ret <4 x i32> %A
+}
+define <2 x i64> @f(<2 x i64> %i, <2 x i64> %j) nounwind  {
+        call void @foo()
+        %A = mul <2 x i64> %i, %j
+        ret <2 x i64> %A
+}

diff --git a/src/LLVM/test/CodeGen/X86/pmulld.ll b/src/LLVM/test/CodeGen/X86/pmulld.ll
new file mode 100644
index 0000000..be527ae
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pmulld.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse41 -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse41 -asm-verbose=0 | FileCheck %s -check-prefix=WIN64
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK-NEXT: pmulld
+
+; WIN64: test1:
+; WIN64-NEXT: movdqa  (%rcx), %xmm0
+; WIN64-NEXT: pmulld  (%rdx), %xmm0
+  %C = mul <4 x i32> %A, %B
+  ret <4 x i32> %C
+}
+
+define <4 x i32> @test1a(<4 x i32> %A, <4 x i32> *%Bp) nounwind {
+; CHECK: test1a:
+; CHECK-NEXT: pmulld
+
+; WIN64: test1a:
+; WIN64-NEXT: movdqa  (%rcx), %xmm0
+; WIN64-NEXT: pmulld  (%rdx), %xmm0
+
+  %B = load <4 x i32>* %Bp
+  %C = mul <4 x i32> %A, %B
+  ret <4 x i32> %C
+}

diff --git a/src/LLVM/test/CodeGen/X86/popcnt.ll b/src/LLVM/test/CodeGen/X86/popcnt.ll
new file mode 100644
index 0000000..430214c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/popcnt.ll

@@ -0,0 +1,38 @@
+; RUN: llc -march=x86-64 -mattr=+popcnt < %s | FileCheck %s
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+  %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+  ret i8 %cnt
+; CHECK: cnt8:
+; CHECK: popcntw
+; CHECK: ret
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+  ret i16 %cnt
+; CHECK: cnt16:
+; CHECK: popcntw
+; CHECK: ret
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+; CHECK: cnt32:
+; CHECK: popcntl
+; CHECK: ret
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+; CHECK: cnt64:
+; CHECK: popcntq
+; CHECK: ret
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/postalloc-coalescing.ll b/src/LLVM/test/CodeGen/X86/postalloc-coalescing.ll
new file mode 100644
index 0000000..fe6f521
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/postalloc-coalescing.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 | grep mov | count 3
+
+define fastcc i32 @_Z18yy_get_next_bufferv() nounwind {
+entry:
+	br label %bb131
+
+bb116:		; preds = %bb131
+	%tmp125126 = trunc i32 %c.1 to i8		; <i8> [#uses=1]
+	store i8 %tmp125126, i8* null, align 1
+	br label %bb131
+
+bb131:		; preds = %bb116, %entry
+	%c.2 = phi i32 [ %c.1, %bb116 ], [ 42, %entry ]		; <i32> [#uses=1]
+	%c.1 = select i1 false, i32 0, i32 %c.2		; <i32> [#uses=4]
+	%tmp181 = icmp eq i32 %c.1, -1		; <i1> [#uses=1]
+	br i1 %tmp181, label %bb158, label %bb116
+
+bb158:		; preds = %bb131
+	br i1 true, label %cond_true163, label %cond_next178
+
+cond_true163:		; preds = %bb158
+	%tmp172173 = trunc i32 %c.1 to i8		; <i8> [#uses=1]
+	store i8 %tmp172173, i8* null, align 1
+	br label %cond_next178
+
+cond_next178:		; preds = %cond_true163, %bb158
+	%tmp180 = icmp eq i32 %c.1, -1		; <i1> [#uses=1]
+	br i1 %tmp180, label %cond_next184, label %cond_next199
+
+cond_next184:		; preds = %cond_next178
+	ret i32 0
+
+cond_next199:		; preds = %cond_next178
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/postra-licm.ll b/src/LLVM/test/CodeGen/X86/postra-licm.ll
new file mode 100644
index 0000000..48c48ae
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/postra-licm.ll

@@ -0,0 +1,188 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=X86-32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=X86-64
+
+; MachineLICM should be able to hoist loop invariant reload out of the loop.
+; Only linear scan needs this, -regalloc=greedy sinks the spill instead.
+; rdar://7233099
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+%struct.epoch_t = type { %struct.trans_t*, %struct.trans_t*, i32, i32, i32, i32, i32 }
+%struct.trans_t = type { i32, i32, i32, i8* }
+
+@.str12 = external constant [2 x i8], align 1     ; <[2 x i8]*> [#uses=1]
+@.str19 = external constant [7 x i8], align 1     ; <[7 x i8]*> [#uses=1]
+@.str24 = external constant [4 x i8], align 1     ; <[4 x i8]*> [#uses=1]
+
+define i32 @t1(i32 %c, i8** nocapture %v) nounwind ssp {
+; X86-32: t1:
+entry:
+  br i1 undef, label %bb, label %bb3
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb3:                                              ; preds = %entry
+  br i1 undef, label %bb.i, label %bb.nph41
+
+bb.i:                                             ; preds = %bb3
+  unreachable
+
+bb.nph41:                                         ; preds = %bb3
+  %0 = call %struct.FILE* @"\01_fopen$UNIX2003"(i8* undef, i8* getelementptr inbounds ([2 x i8]* @.str12, i32 0, i32 0)) nounwind ; <%struct.FILE*> [#uses=3]
+  br i1 undef, label %bb4, label %bb5.preheader
+
+bb5.preheader:                                    ; preds = %bb.nph41
+  br label %bb5
+
+bb4:                                              ; preds = %bb.nph41
+  unreachable
+
+bb5:                                              ; preds = %bb5, %bb5.preheader
+  br i1 undef, label %bb7, label %bb5
+
+bb7:                                              ; preds = %bb5
+  br i1 undef, label %bb9, label %bb12
+
+bb9:                                              ; preds = %bb7
+  unreachable
+
+bb12:                                             ; preds = %bb7
+  br i1 undef, label %bb16, label %bb22
+
+bb16:                                             ; preds = %bb12
+  unreachable
+
+bb22:                                             ; preds = %bb12
+  br label %bb.i1
+
+bb.i1:                                            ; preds = %bb.i1, %bb22
+  %1 = icmp eq i8 undef, 69                       ; <i1> [#uses=1]
+  br i1 %1, label %imix_test.exit, label %bb.i1
+
+imix_test.exit:                                   ; preds = %bb.i1
+  br i1 undef, label %bb23, label %bb26.preheader
+
+bb26.preheader:                                   ; preds = %imix_test.exit
+  br i1 undef, label %bb28, label %bb30
+
+bb23:                                             ; preds = %imix_test.exit
+  unreachable
+; Verify that there are no loads inside the loop.
+; X86-32: %bb26.preheader
+; X86-32: .align 4
+; X86-32-NOT: (%esp),
+; X86-32-NOT: (%ebp),
+; X86-32: jmp
+
+bb28:                                             ; preds = %bb28, %bb26.preheader
+  %counter.035 = phi i32 [ %3, %bb28 ], [ 0, %bb26.preheader ] ; <i32> [#uses=2]
+  %tmp56 = shl i32 %counter.035, 2                ; <i32> [#uses=0]
+  %2 = call i8* @fgets(i8* undef, i32 50, %struct.FILE* %0) nounwind ; <i8*> [#uses=0]
+  %3 = add nsw i32 %counter.035, 1                ; <i32> [#uses=1]
+  %4 = call i32 @feof(%struct.FILE* %0) nounwind  ; <i32> [#uses=0]
+  br label %bb28
+
+bb30:                                             ; preds = %bb26.preheader
+  %5 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([7 x i8]* @.str19, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
+  br i1 undef, label %bb34, label %bb70
+
+bb32.loopexit:                                    ; preds = %bb45
+  %6 = icmp eq i32 undef, 0                       ; <i1> [#uses=1]
+  %indvar.next55 = add i32 %indvar54, 1           ; <i32> [#uses=1]
+  br i1 %6, label %bb34, label %bb70
+
+bb34:                                             ; preds = %bb32.loopexit, %bb30
+  %indvar54 = phi i32 [ %indvar.next55, %bb32.loopexit ], [ 0, %bb30 ] ; <i32> [#uses=3]
+  br i1 false, label %bb35, label %bb39.preheader
+
+bb35:                                             ; preds = %bb34
+  unreachable
+
+bb39.preheader:                                   ; preds = %bb34
+  %7 = getelementptr inbounds %struct.epoch_t* undef, i32 %indvar54, i32 3 ; <i32*> [#uses=1]
+  %8 = getelementptr inbounds %struct.epoch_t* undef, i32 %indvar54, i32 2 ; <i32*> [#uses=0]
+  br i1 false, label %bb42, label %bb45
+
+bb42:                                             ; preds = %bb39.preheader
+  unreachable
+
+bb45:                                             ; preds = %bb39.preheader
+  %9 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([4 x i8]* @.str24, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
+  br i1 false, label %bb47, label %bb32.loopexit
+
+bb47:                                             ; preds = %bb45
+  %10 = load i32* %7, align 4                     ; <i32> [#uses=0]
+  unreachable
+
+bb70:                                             ; preds = %bb32.loopexit, %bb30
+  br i1 undef, label %bb78, label %bb76
+
+bb76:                                             ; preds = %bb70
+  unreachable
+
+bb78:                                             ; preds = %bb70
+  br i1 undef, label %bb83, label %bb79
+
+bb79:                                             ; preds = %bb78
+  unreachable
+
+bb83:                                             ; preds = %bb78
+  call void @rewind(%struct.FILE* %0) nounwind
+  unreachable
+}
+
+declare %struct.FILE* @"\01_fopen$UNIX2003"(i8*, i8*)
+
+declare i8* @fgets(i8*, i32, %struct.FILE* nocapture) nounwind
+
+declare void @rewind(%struct.FILE* nocapture) nounwind
+
+declare i32 @feof(%struct.FILE* nocapture) nounwind
+
+declare i32 @strcmp(i8* nocapture, i8* nocapture) nounwind readonly
+
+@map_4_to_16 = external constant [16 x i16], align 32 ; <[16 x i16]*> [#uses=2]
+
+define void @t2(i8* nocapture %bufp, i8* nocapture %data, i32 %dsize) nounwind ssp {
+; X86-64: t2:
+entry:
+  br i1 undef, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+; X86-64: movq _map_4_to_16@GOTPCREL(%rip)
+; X86-64: .align 4
+  %tmp5 = zext i32 undef to i64                   ; <i64> [#uses=1]
+  %tmp6 = add i64 %tmp5, 1                        ; <i64> [#uses=1]
+  %tmp11 = shl i64 undef, 1                       ; <i64> [#uses=1]
+  %tmp14 = mul i64 undef, 3                       ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %tmp9 = mul i64 undef, undef                    ; <i64> [#uses=2]
+  %tmp12 = add i64 %tmp11, %tmp9                  ; <i64> [#uses=1]
+  %scevgep13 = getelementptr i8* %bufp, i64 %tmp12 ; <i8*> [#uses=1]
+  %tmp15 = add i64 %tmp14, %tmp9                  ; <i64> [#uses=1]
+  %scevgep16 = getelementptr i8* %bufp, i64 %tmp15 ; <i8*> [#uses=1]
+  %0 = load i8* undef, align 1                    ; <i8> [#uses=1]
+  %1 = zext i8 %0 to i32                          ; <i32> [#uses=1]
+  %2 = getelementptr inbounds [16 x i16]* @map_4_to_16, i64 0, i64 0 ; <i16*> [#uses=1]
+  %3 = load i16* %2, align 2                      ; <i16> [#uses=1]
+  %4 = trunc i16 %3 to i8                         ; <i8> [#uses=1]
+  store i8 %4, i8* undef, align 1
+  %5 = and i32 %1, 15                             ; <i32> [#uses=1]
+  %6 = zext i32 %5 to i64                         ; <i64> [#uses=1]
+  %7 = getelementptr inbounds [16 x i16]* @map_4_to_16, i64 0, i64 %6 ; <i16*> [#uses=1]
+  %8 = load i16* %7, align 2                      ; <i16> [#uses=2]
+  %9 = lshr i16 %8, 8                             ; <i16> [#uses=1]
+  %10 = trunc i16 %9 to i8                        ; <i8> [#uses=1]
+  store i8 %10, i8* %scevgep13, align 1
+  %11 = trunc i16 %8 to i8                        ; <i8> [#uses=1]
+  store i8 %11, i8* %scevgep16, align 1
+  %exitcond = icmp eq i64 undef, %tmp6            ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/powi.ll b/src/LLVM/test/CodeGen/X86/powi.ll
new file mode 100644
index 0000000..c3d6831
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/powi.ll

@@ -0,0 +1,11 @@
+; RUN: llc %s -march=x86 -mcpu=yonah -o - | grep mulsd | count 6
+; Ideally this would compile to 5 multiplies.
+
+define double @_Z3f10d(double %a) nounwind readonly ssp noredzone {
+entry:
+  %0 = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
+  ret double %0
+}
+
+declare double @llvm.powi.f64(double, i32) nounwind readonly
+

diff --git a/src/LLVM/test/CodeGen/X86/pr10068.ll b/src/LLVM/test/CodeGen/X86/pr10068.ll
new file mode 100644
index 0000000..8829c5d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr10068.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86
+
+define void @foobar() {
+entry:
+  %sub.i = trunc i64 undef to i32
+  %shr80.i = ashr i32 %sub.i, 16
+  %add82.i = add nsw i32 %shr80.i, 1
+  %notlhs.i = icmp slt i32 %shr80.i, undef
+  %notrhs.i = icmp sgt i32 %add82.i, -1
+  %or.cond.not.i = and i1 %notrhs.i, %notlhs.i
+  %cmp154.i = icmp slt i32 0, undef
+  %or.cond406.i = and i1 %or.cond.not.i, %cmp154.i
+  %or.cond406.not.i = xor i1 %or.cond406.i, true
+  %or.cond407.i = or i1 undef, %or.cond406.not.i
+  br i1 %or.cond407.i, label %if.then158.i, label %if.end163.i
+
+if.then158.i:
+  ret void
+
+if.end163.i:                                      ; preds = %if.end67.i
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/pr10420.ll b/src/LLVM/test/CodeGen/X86/pr10420.ll
new file mode 100644
index 0000000..3993f24
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr10420.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -disable-cfi | FileCheck %s
+
+define private void @foo() {
+       ret void
+}
+
+define void @bar() {
+       call void @foo()
+       ret void;
+}
+
+; CHECK: _bar:                                   ## @bar
+; CHECK-NEXT: Ltmp2:
+
+; CHECK: Ltmp12:
+; CHECK-NEXT: Ltmp13 = L_foo-Ltmp12                   ## FDE initial location
+; CHECK-NEXT:         .quad   Ltmp13
+
+; CHECK: Ltmp19:
+; CHECK-NEXT: Ltmp20 = Ltmp2-Ltmp19                   ## FDE initial location
+; CHECK-NEXT:         .quad   Ltmp20

diff --git a/src/LLVM/test/CodeGen/X86/pr1462.ll b/src/LLVM/test/CodeGen/X86/pr1462.ll
new file mode 100644
index 0000000..62549a5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr1462.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s
+; PR1462
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-
+v64:64:64-v128:128:128-a0:0:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define hidden i128 @__addvti3(i128 %a1, i128 %b2) {
+entry:
+        %tmp8 = add i128 %b2, %a1               ; <i128> [#uses=3]
+        %tmp10 = icmp sgt i128 %b2, -1          ; <i1> [#uses=1]
+        %tmp18 = icmp sgt i128 %tmp8, %a1               ; <i1> [#uses=1]
+        %tmp14 = icmp slt i128 %tmp8, %a1               ; <i1> [#uses=1]
+        %iftmp.0.0.in = select i1 %tmp10, i1 %tmp14, i1 %tmp18          ; <i1> [#uses=1]
+        br i1 %iftmp.0.0.in, label %cond_true22, label %cond_next23
+
+cond_true22:            ; preds = %entry
+        tail call void @abort( )
+        unreachable
+
+cond_next23:            ; preds = %entry
+        ret i128 %tmp8
+}
+
+declare void @abort()

diff --git a/src/LLVM/test/CodeGen/X86/pr1489.ll b/src/LLVM/test/CodeGen/X86/pr1489.ll
new file mode 100644
index 0000000..c9e24bf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr1489.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -disable-fp-elim -O0 -mcpu=i486 | grep 1082126238 | count 3
+; RUN: llc < %s -disable-fp-elim -O0 -mcpu=i486 | grep -- -1236950581 | count 1
+;; magic constants are 3.999f and half of 3.999
+; ModuleID = '1489.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+@.str = internal constant [13 x i8] c"%d %d %d %d\0A\00"		; <[13 x i8]*> [#uses=1]
+
+define i32 @quux() nounwind {
+entry:
+	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
+	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp23
+}
+
+declare i32 @lrintf(float)
+
+define i32 @foo() nounwind {
+entry:
+	%tmp1 = tail call i32 @lrint( double 3.999000e+00 )		; <i32> [#uses=1]
+	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp23
+}
+
+declare i32 @lrint(double)
+
+define i32 @bar() nounwind {
+entry:
+	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
+	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp23
+}
+
+define i32 @baz() nounwind {
+entry:
+	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
+	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp23
+}
+
+define i32 @main() nounwind {
+entry:
+	%tmp = tail call i32 @baz( )		; <i32> [#uses=1]
+	%tmp1 = tail call i32 @bar( )		; <i32> [#uses=1]
+	%tmp2 = tail call i32 @foo( )		; <i32> [#uses=1]
+	%tmp3 = tail call i32 @quux( )		; <i32> [#uses=1]
+	%tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8]* @.str, i32 0, i32 0), i32 %tmp3, i32 %tmp2, i32 %tmp1, i32 %tmp )		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @printf(i8*, ...)

diff --git a/src/LLVM/test/CodeGen/X86/pr1505.ll b/src/LLVM/test/CodeGen/X86/pr1505.ll
new file mode 100644
index 0000000..883a806
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr1505.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mcpu=i486 | not grep fldl
+; PR1505
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+@G = weak global float 0.000000e+00		; <float*> [#uses=1]
+
+define void @t1(float %F) {
+entry:
+	store float %F, float* @G
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/pr1505b.ll b/src/LLVM/test/CodeGen/X86/pr1505b.ll
new file mode 100644
index 0000000..945ec4c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr1505b.ll

@@ -0,0 +1,80 @@
+; RUN: llc < %s -mcpu=i486 | FileCheck %s
+; PR1505
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"* }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }
+	%"struct.std::ctype_base" = type <{ i8 }>
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::ios_base::_Words" = type { i8*, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+	%"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >" = type { %"struct.std::locale::facet" }
+@a = global float 0x3FD3333340000000		; <float*> [#uses=1]
+@b = global double 6.000000e-01, align 8		; <double*> [#uses=1]
+@_ZSt8__ioinit = internal global %"struct.std::ctype_base" zeroinitializer		; <%"struct.std::ctype_base"*> [#uses=2]
+@__dso_handle = external global i8*		; <i8**> [#uses=1]
+@_ZSt4cout = external global %"struct.std::basic_ostream<char,std::char_traits<char> >"		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=2]
+@.str = internal constant [12 x i8] c"tan float: \00"		; <[12 x i8]*> [#uses=1]
+@.str1 = internal constant [13 x i8] c"tan double: \00"		; <[13 x i8]*> [#uses=1]
+
+declare void @_ZNSt8ios_base4InitD1Ev(%"struct.std::ctype_base"*)
+
+declare void @_ZNSt8ios_base4InitC1Ev(%"struct.std::ctype_base"*)
+
+declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
+
+; CHECK: main
+define i32 @main() {
+entry:
+; CHECK: flds
+	%tmp6 = volatile load float* @a		; <float> [#uses=1]
+; CHECK: fstps (%esp)
+; CHECK: tanf
+	%tmp9 = tail call float @tanf( float %tmp6 )		; <float> [#uses=1]
+; Spill returned value:
+; CHECK: fstp
+
+; CHECK: fldl
+	%tmp12 = volatile load double* @b		; <double> [#uses=1]
+; CHECK: fstpl (%esp)
+; CHECK: tan
+	%tmp13 = tail call double @tan( double %tmp12 )		; <double> [#uses=1]
+; Spill returned value:
+; CHECK: fstp
+	%tmp1314 = fptrunc double %tmp13 to float		; <float> [#uses=1]
+	%tmp16 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0) )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+	%tmp1920 = fpext float %tmp9 to double		; <double> [#uses=1]
+; reload:
+; CHECK: fld
+; CHECK: fstpl
+; CHECK: ZNSolsEd
+	%tmp22 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp16, double %tmp1920 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+	%tmp30 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp22 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+; reload:
+; CHECK: fld
+; CHECK: fstps
+; CHECK: ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
+	%tmp34 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
+; CHECK: fstpl
+; CHECK: ZNSolsEd
+	%tmp42 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp34, double %tmp3940 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+	%tmp51 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp42 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+	ret i32 0
+}
+
+declare float @tanf(float)
+
+declare double @tan(double)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, double)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_(%"struct.std::basic_ostream<char,std::char_traits<char> >"*)

diff --git a/src/LLVM/test/CodeGen/X86/pr2177.ll b/src/LLVM/test/CodeGen/X86/pr2177.ll
new file mode 100644
index 0000000..e941bf7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr2177.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s
+; PR2177
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.1.0"
+	%struct.S2259 = type { <4 x i16>, i8, i64 }
+
+define void @check2259va(i32 %z, ...) {
+entry:
+	br i1 false, label %bb5, label %return
+bb5:		; preds = %entry
+	switch i32 0, label %bb155 [
+		 i32 16, label %bb10
+		 i32 17, label %bb118
+		 i32 18, label %bb54
+		 i32 32, label %bb118
+		 i32 33, label %bb118
+		 i32 36, label %bb118
+	]
+bb10:		; preds = %bb5
+	ret void
+bb54:		; preds = %bb5
+	ret void
+bb118:		; preds = %bb5, %bb5, %bb5, %bb5
+	%tmp125 = load i8** null, align 8		; <i8*> [#uses=1]
+	%tmp125126 = bitcast i8* %tmp125 to %struct.S2259*		; <%struct.S2259*> [#uses=1]
+	%tmp128 = getelementptr %struct.S2259* %tmp125126, i32 0, i32 0		; <<4 x i16>*> [#uses=1]
+	%tmp129 = load <4 x i16>* %tmp128, align 8		; <<4 x i16>> [#uses=1]
+	store <4 x i16> %tmp129, <4 x i16>* null, align 8
+	ret void
+bb155:		; preds = %bb5
+	ret void
+return:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/pr2182.ll b/src/LLVM/test/CodeGen/X86/pr2182.ll
new file mode 100644
index 0000000..2a8bb35
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr2182.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s | FileCheck %s
+; PR2182
+
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@x = weak global i32 0          ; <i32*> [#uses=8]
+
+define void @loop_2() nounwind  {
+; CHECK: loop_2:
+; CHECK-NOT: ret
+; CHECK: addl $3, (%{{.*}})
+; CHECK-NEXT: addl $3, (%{{.*}})
+; CHECK-NEXT: addl $3, (%{{.*}})
+; CHECK-NEXT: addl $3, (%{{.*}})
+; CHECK-NEXT: ret
+
+  %tmp = volatile load i32* @x, align 4           ; <i32> [#uses=1]
+  %tmp1 = add i32 %tmp, 3         ; <i32> [#uses=1]
+  volatile store i32 %tmp1, i32* @x, align 4
+  %tmp.1 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  %tmp1.1 = add i32 %tmp.1, 3             ; <i32> [#uses=1]
+  volatile store i32 %tmp1.1, i32* @x, align 4
+  %tmp.2 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  %tmp1.2 = add i32 %tmp.2, 3             ; <i32> [#uses=1]
+  volatile store i32 %tmp1.2, i32* @x, align 4
+  %tmp.3 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  %tmp1.3 = add i32 %tmp.3, 3             ; <i32> [#uses=1]
+  volatile store i32 %tmp1.3, i32* @x, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/pr2326.ll b/src/LLVM/test/CodeGen/X86/pr2326.ll
new file mode 100644
index 0000000..f82dcb5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr2326.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | grep sete
+; PR2326
+
+define i32 @func_59(i32 %p_60) nounwind  {
+entry:
+	%l_108 = alloca i32		; <i32*> [#uses=2]
+	%tmp15 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp16 = load i32* %l_108, align 4		; <i32> [#uses=1]
+	%tmp17 = icmp eq i32 %tmp15, %tmp16		; <i1> [#uses=1]
+	%tmp1718 = zext i1 %tmp17 to i8		; <i8> [#uses=1]
+	%tmp19 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp20 = load i32* %l_108, align 4		; <i32> [#uses=1]
+	%tmp21 = icmp ule i32 %tmp19, %tmp20		; <i1> [#uses=1]
+	%tmp2122 = zext i1 %tmp21 to i8		; <i8> [#uses=1]
+	%toBool23 = icmp ne i8 %tmp1718, 0		; <i1> [#uses=1]
+	%toBool24 = icmp ne i8 %tmp2122, 0		; <i1> [#uses=1]
+	%tmp25 = and i1 %toBool23, %toBool24		; <i1> [#uses=1]
+	%tmp2526 = zext i1 %tmp25 to i8		; <i8> [#uses=1]
+	%tmp252627 = zext i8 %tmp2526 to i32		; <i32> [#uses=1]
+	%tmp29 = call i32 (...)* @func_15( i32 %tmp252627, i32 0 ) nounwind 		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @func_15(...)

diff --git a/src/LLVM/test/CodeGen/X86/pr2656.ll b/src/LLVM/test/CodeGen/X86/pr2656.ll
new file mode 100644
index 0000000..afd7114
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr2656.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {xorps.\*sp} | count 1
+; PR2656
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9.4.0"
+	%struct.anon = type <{ float, float }>
+@.str = internal constant [17 x i8] c"pt: %.0f, %.0f\0A\00\00"		; <[17 x i8]*> [#uses=1]
+
+define void @foo(%struct.anon* byval %p) nounwind {
+entry:
+	%tmp = getelementptr %struct.anon* %p, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp1 = load float* %tmp		; <float> [#uses=1]
+	%tmp2 = getelementptr %struct.anon* %p, i32 0, i32 1		; <float*> [#uses=1]
+	%tmp3 = load float* %tmp2		; <float> [#uses=1]
+	%neg = fsub float -0.000000e+00, %tmp1		; <float> [#uses=1]
+	%conv = fpext float %neg to double		; <double> [#uses=1]
+	%neg4 = fsub float -0.000000e+00, %tmp3		; <float> [#uses=1]
+	%conv5 = fpext float %neg4 to double		; <double> [#uses=1]
+	%call = call i32 (...)* @printf( i8* getelementptr ([17 x i8]* @.str, i32 0, i32 0), double %conv, double %conv5 )		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @printf(...)

diff --git a/src/LLVM/test/CodeGen/X86/pr2659.ll b/src/LLVM/test/CodeGen/X86/pr2659.ll
new file mode 100644
index 0000000..5dab5c9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr2659.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 -disable-branch-fold | FileCheck %s
+; PR2659
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9.4.0"
+
+define i32 @binomial(i32 %n, i32 %k) nounwind  {
+entry:
+  %cmp = icmp ugt i32 %k, %n            ; <i1> [#uses=1]
+  br i1 %cmp, label %ifthen, label %forcond.preheader
+
+forcond.preheader:              ; preds = %entry
+  %cmp44 = icmp eq i32 %k, 0            ; <i1> [#uses=1]
+  br i1 %cmp44, label %afterfor, label %forbody
+
+; CHECK: %forcond.preheader
+; CHECK: movl $1
+; CHECK-NOT: xorl
+; CHECK-NOT: movl
+; CHECK-NOT: LBB
+; CHECK: jne
+
+; There should be no moves required in the for loop body.
+; CHECK: %forbody
+; CHECK-NOT: mov
+
+ifthen:         ; preds = %entry
+  ret i32 0
+
+forbody:                ; preds = %forbody, %forcond.preheader
+  %indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ]                ; <i32> [#uses=3]
+  %accumulator.01 = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]               ; <i32> [#uses=1]
+  %divisor.02 = add i32 %indvar, 1              ; <i32> [#uses=2]
+  %n.addr.03 = sub i32 %n, %indvar              ; <i32> [#uses=1]
+  %mul = mul i32 %n.addr.03, %accumulator.01            ; <i32> [#uses=1]
+  %div = udiv i32 %mul, %divisor.02             ; <i32> [#uses=2]
+  %inc = add i32 %indvar, 2             ; <i32> [#uses=1]
+  %cmp4 = icmp ugt i32 %inc, %k         ; <i1> [#uses=1]
+  br i1 %cmp4, label %afterfor, label %forbody
+
+afterfor:               ; preds = %forbody, %forcond.preheader
+  %accumulator.0.lcssa = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]          ; <i32> [#uses=1]
+  ret i32 %accumulator.0.lcssa
+}

diff --git a/src/LLVM/test/CodeGen/X86/pr2849.ll b/src/LLVM/test/CodeGen/X86/pr2849.ll
new file mode 100644
index 0000000..0fec481
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr2849.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s
+; PR2849
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.BaseBoundPtrs = type { i8*, i8* }
+	%struct.HashEntry = type { %struct.BaseBoundPtrs }
+	%struct.NODE = type { i8, i8, %struct.anon }
+	%struct.anon = type { %struct.xlist }
+	%struct.xlist = type { %struct.NODE*, %struct.NODE* }
+	%struct.xvect = type { %struct.NODE** }
+@hash_table_begin = external global %struct.HashEntry*
+
+define void @obshow() {
+entry:
+	%tmp = load %struct.HashEntry** @hash_table_begin, align 8
+	br i1 false, label %xlygetvalue.exit, label %xlygetvalue.exit
+
+xlygetvalue.exit:
+	%storemerge.in.i = phi %struct.NODE** [ null, %entry ], [ null, %entry ]
+	%storemerge.i = load %struct.NODE** %storemerge.in.i
+	%tmp1 = ptrtoint %struct.NODE** %storemerge.in.i to i64
+	%tmp2 = lshr i64 %tmp1, 3
+	%tmp3 = and i64 %tmp2, 2147483647
+	%tmp4 = getelementptr %struct.HashEntry* %tmp, i64 %tmp3, i32 0, i32 1
+	%tmp7 = load i8** %tmp4, align 8
+	%tmp8 = getelementptr %struct.NODE* %storemerge.i, i64 0, i32 2
+	%tmp9 = bitcast %struct.anon* %tmp8 to %struct.NODE***
+	%tmp11 = load %struct.NODE*** %tmp9, align 8
+	%tmp12 = ptrtoint %struct.NODE** %tmp11 to i64
+	%tmp13 = lshr i64 %tmp12, 3
+	%tmp14 = and i64 %tmp13, 2147483647
+	%tmp15 = getelementptr %struct.HashEntry* %tmp, i64 %tmp14, i32 0, i32 1
+	call fastcc void @xlprint(i8** %tmp4, i8* %tmp7, i8** %tmp15)
+	ret void
+}
+
+declare fastcc void @xlprint(i8**, i8*, i8**)

diff --git a/src/LLVM/test/CodeGen/X86/pr2924.ll b/src/LLVM/test/CodeGen/X86/pr2924.ll
new file mode 100644
index 0000000..b9e8dc1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr2924.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s
+; PR2924
+
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define x86_stdcallcc { i32, i8* } @_D3std6string7toupperFAaZAa({ i32, i8* } %s) {
+entry_std.string.toupper:
+        %tmp58 = load i32* null
+        %tmp59 = icmp eq i32 %tmp58, 0
+        %r.val = load { i32, i8* }* null, align 8
+        %condtmp.0 = select i1 %tmp59, { i32, i8* } undef, { i32, i8* } %r.val 
+
+        ret { i32, i8* } %condtmp.0
+}
+define { } @empty({ } %s) {
+entry_std.string.toupper:
+        %tmp58 = load i32* null
+        %tmp59 = icmp eq i32 %tmp58, 0
+        %r.val = load { }* null, align 8
+        %condtmp.0 = select i1 %tmp59, { } undef, { } %r.val
+        ret { } %condtmp.0
+}

diff --git a/src/LLVM/test/CodeGen/X86/pr2982.ll b/src/LLVM/test/CodeGen/X86/pr2982.ll
new file mode 100644
index 0000000..3f9a595
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr2982.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86
+; PR2982
+
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+@g_279 = external global i32            ; <i32*> [#uses=1]
+@g_265 = external global i32            ; <i32*> [#uses=1]
+@g_3 = external global i8               ; <i8*> [#uses=1]
+
+declare i32 @rshift_u_u(...)
+
+define void @bar() nounwind {
+entry:
+        %0 = load i32* @g_279, align 4          ; <i32> [#uses=1]
+        %1 = shl i32 %0, 1              ; <i32> [#uses=1]
+        %2 = and i32 %1, 2              ; <i32> [#uses=1]
+        %3 = load i32* @g_265, align 4          ; <i32> [#uses=1]
+        %4 = load i8* @g_3, align 1             ; <i8> [#uses=1]
+        %5 = sext i8 %4 to i32          ; <i32> [#uses=1]
+        %6 = add i32 %2, %3             ; <i32> [#uses=1]
+        %7 = add i32 %6, %5             ; <i32> [#uses=1]
+        %8 = tail call i32 (...)* @rshift_u_u(i32 %7, i32 0) nounwind          
+; <i32> [#uses=0]
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/pr3154.ll b/src/LLVM/test/CodeGen/X86/pr3154.ll
new file mode 100644
index 0000000..18df97c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr3154.ll

@@ -0,0 +1,104 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mattr=+sse2
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mattr=+sse2 -relocation-model=pic -disable-fp-elim
+; PR3154
+
+define void @ff_flac_compute_autocorr_sse2(i32* %data, i32 %len, i32 %lag, double* %autoc) nounwind {
+entry:
+	%c = alloca double, align 8		; <double*> [#uses=2]
+	%0 = add i32 %len, 2		; <i32> [#uses=1]
+	%1 = add i32 %0, %lag		; <i32> [#uses=1]
+	%2 = alloca double, i32 %1		; <double*> [#uses=2]
+	%3 = getelementptr double* %2, i32 %lag		; <double*> [#uses=2]
+	%4 = ptrtoint double* %3 to i32		; <i32> [#uses=1]
+	%5 = and i32 %4, 8		; <i32> [#uses=1]
+	%6 = icmp eq i32 %5, 0		; <i1> [#uses=1]
+	br i1 %6, label %bb19, label %bb
+
+bb:		; preds = %entry
+	%.sum = add i32 %lag, 1		; <i32> [#uses=1]
+	%7 = getelementptr double* %2, i32 %.sum		; <double*> [#uses=1]
+	br label %bb19
+
+bb19:		; preds = %bb, %entry
+	%data15.0 = phi double* [ %7, %bb ], [ %3, %entry ]		; <double*> [#uses=5]
+	%8 = sitofp i32 %len to double		; <double> [#uses=1]
+	%9 = fsub double %8, 1.000000e+00		; <double> [#uses=1]
+	%10 = fdiv double 2.000000e+00, %9		; <double> [#uses=1]
+	store double %10, double* %c, align 8
+	%11 = ashr i32 %len, 1		; <i32> [#uses=3]
+	%12 = mul i32 %11, -4		; <i32> [#uses=2]
+	%13 = shl i32 %len, 1		; <i32> [#uses=1]
+	%14 = and i32 %13, -4		; <i32> [#uses=2]
+	call void asm sideeffect "movsd   $0,     %xmm7                \0A\09movapd  ff_pd_1, %xmm6     \0A\09movapd  ff_pd_2, %xmm5     \0A\09movlhps %xmm7, %xmm7                \0A\09subpd   %xmm5, %xmm7                \0A\09addsd   %xmm6, %xmm7                \0A\09", "*m,~{dirflag},~{fpsr},~{flags}"(double* %c) nounwind
+	%15 = and i32 %len, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %15, 0		; <i1> [#uses=1]
+	%16 = getelementptr double* %data15.0, i32 %11		; <double*> [#uses=2]
+	%17 = getelementptr i32* %data, i32 %11		; <i32*> [#uses=2]
+	br i1 %toBool, label %bb22, label %bb20
+
+bb20:		; preds = %bb19
+	%asmtmp = call { i32, i32 } asm sideeffect "1:                                    \0A\09movapd   %xmm7,  %xmm1              \0A\09mulpd    %xmm1,  %xmm1              \0A\09movapd   %xmm6,  %xmm0              \0A\09subpd    %xmm1,  %xmm0              \0A\09pshufd   $$0x4e,   %xmm0, %xmm1      \0A\09cvtpi2pd ($3,$0), %xmm2              \0A\09cvtpi2pd -1*4($3,$1), %xmm3   \0A\09mulpd    %xmm0,  %xmm2              \0A\09mulpd    %xmm1,  %xmm3              \0A\09movapd   %xmm2, ($2,$0,2)            \0A\09movupd    %xmm3, -1*8($2,$1,2) \0A\09subpd    %xmm5,  %xmm7              \0A\09sub      $$8,      $1                  \0A\09add      $$8,      $0                  \0A\09jl 1b                                 \0A\09", "=&r,=&r,r,r,0,1,~{dirflag},~{fpsr},~{flags}"(double* %16, i32* %17, i32 %12, i32 %14) nounwind		; <{ i32, i32 }> [#uses=0]
+	br label %bb28.preheader
+
+bb22:		; preds = %bb19
+	%asmtmp23 = call { i32, i32 } asm sideeffect "1:                                    \0A\09movapd   %xmm7,  %xmm1              \0A\09mulpd    %xmm1,  %xmm1              \0A\09movapd   %xmm6,  %xmm0              \0A\09subpd    %xmm1,  %xmm0              \0A\09pshufd   $$0x4e,   %xmm0, %xmm1      \0A\09cvtpi2pd ($3,$0), %xmm2              \0A\09cvtpi2pd -2*4($3,$1), %xmm3   \0A\09mulpd    %xmm0,  %xmm2              \0A\09mulpd    %xmm1,  %xmm3              \0A\09movapd   %xmm2, ($2,$0,2)            \0A\09movapd    %xmm3, -2*8($2,$1,2) \0A\09subpd    %xmm5,  %xmm7              \0A\09sub      $$8,      $1                  \0A\09add      $$8,      $0                  \0A\09jl 1b                                 \0A\09", "=&r,=&r,r,r,0,1,~{dirflag},~{fpsr},~{flags}"(double* %16, i32* %17, i32 %12, i32 %14) nounwind		; <{ i32, i32 }> [#uses=0]
+	br label %bb28.preheader
+
+bb28.preheader:		; preds = %bb22, %bb20
+	%18 = icmp sgt i32 %lag, 0		; <i1> [#uses=2]
+	br i1 %18, label %bb27, label %bb29
+
+bb27:		; preds = %bb27, %bb28.preheader
+	%j4.042 = phi i32 [ 0, %bb28.preheader ], [ %indvar.next45, %bb27 ]		; <i32> [#uses=2]
+	%19 = sub i32 %j4.042, %lag		; <i32> [#uses=1]
+	%20 = getelementptr double* %data15.0, i32 %19		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %20, align 8
+	%indvar.next45 = add i32 %j4.042, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next45, %lag		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb29, label %bb27
+
+bb29:		; preds = %bb27, %bb28.preheader
+	%21 = getelementptr double* %data15.0, i32 %len		; <double*> [#uses=3]
+	store double 0.000000e+00, double* %21, align 8
+	br i1 %18, label %bb.nph, label %bb37
+
+bb.nph:		; preds = %bb29
+	%22 = mul i32 %len, -8		; <i32> [#uses=2]
+	%23 = add i32 %lag, -2		; <i32> [#uses=1]
+	br label %bb30
+
+bb30:		; preds = %bb35, %bb.nph
+	%indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb35 ]		; <i32> [#uses=2]
+	%j4.141 = shl i32 %indvar, 1		; <i32> [#uses=8]
+	%24 = icmp eq i32 %23, %j4.141		; <i1> [#uses=1]
+	%25 = or i32 %j4.141, 1		; <i32> [#uses=2]
+	br i1 %24, label %bb31, label %bb33
+
+bb31:		; preds = %bb30
+	%26 = add i32 %j4.141, 2		; <i32> [#uses=2]
+	%.sum38 = sub i32 %len, %j4.141		; <i32> [#uses=1]
+	%27 = getelementptr double* %data15.0, i32 %.sum38		; <double*> [#uses=1]
+	%28 = getelementptr double* %autoc, i32 %j4.141		; <double*> [#uses=1]
+	%29 = getelementptr double* %autoc, i32 %25		; <double*> [#uses=1]
+	%30 = getelementptr double* %autoc, i32 %26		; <double*> [#uses=1]
+	%asmtmp32 = call i32 asm sideeffect "movsd    ff_pd_1, %xmm0 \0A\09movsd    ff_pd_1, %xmm1 \0A\09movsd    ff_pd_1, %xmm2 \0A\091:                                 \0A\09movapd   ($4,$0), %xmm3           \0A\09movupd -8($5,$0), %xmm4           \0A\09movapd   ($5,$0), %xmm5           \0A\09mulpd     %xmm3, %xmm4           \0A\09mulpd     %xmm3, %xmm5           \0A\09mulpd -16($5,$0), %xmm3           \0A\09addpd     %xmm4, %xmm1           \0A\09addpd     %xmm5, %xmm0           \0A\09addpd     %xmm3, %xmm2           \0A\09add       $$16,    $0               \0A\09jl 1b                              \0A\09movhlps   %xmm0, %xmm3           \0A\09movhlps   %xmm1, %xmm4           \0A\09movhlps   %xmm2, %xmm5           \0A\09addsd     %xmm3, %xmm0           \0A\09addsd     %xmm4, %xmm1           \0A\09addsd     %xmm5, %xmm2           \0A\09movsd     %xmm0, $1               \0A\09movsd     %xmm1, $2               \0A\09movsd     %xmm2, $3               \0A\09", "=&r,=*m,=*m,=*m,r,r,0,~{dirflag},~{fpsr},~{flags}"(double* %28, double* %29, double* %30, double* %21, double* %27, i32 %22) nounwind		; <i32> [#uses=0]
+	br label %bb35
+
+bb33:		; preds = %bb30
+	%.sum39 = sub i32 %len, %j4.141		; <i32> [#uses=1]
+	%31 = getelementptr double* %data15.0, i32 %.sum39		; <double*> [#uses=1]
+	%32 = getelementptr double* %autoc, i32 %j4.141		; <double*> [#uses=1]
+	%33 = getelementptr double* %autoc, i32 %25		; <double*> [#uses=1]
+	%asmtmp34 = call i32 asm sideeffect "movsd    ff_pd_1, %xmm0 \0A\09movsd    ff_pd_1, %xmm1 \0A\091:                                 \0A\09movapd   ($3,$0), %xmm3           \0A\09movupd -8($4,$0), %xmm4           \0A\09mulpd     %xmm3, %xmm4           \0A\09mulpd    ($4,$0), %xmm3           \0A\09addpd     %xmm4, %xmm1           \0A\09addpd     %xmm3, %xmm0           \0A\09add       $$16,    $0               \0A\09jl 1b                              \0A\09movhlps   %xmm0, %xmm3           \0A\09movhlps   %xmm1, %xmm4           \0A\09addsd     %xmm3, %xmm0           \0A\09addsd     %xmm4, %xmm1           \0A\09movsd     %xmm0, $1               \0A\09movsd     %xmm1, $2               \0A\09", "=&r,=*m,=*m,r,r,0,~{dirflag},~{fpsr},~{flags}"(double* %32, double* %33, double* %21, double* %31, i32 %22) nounwind		; <i32> [#uses=0]
+	%.pre = add i32 %j4.141, 2		; <i32> [#uses=1]
+	br label %bb35
+
+bb35:		; preds = %bb33, %bb31
+	%.pre-phi = phi i32 [ %.pre, %bb33 ], [ %26, %bb31 ]		; <i32> [#uses=1]
+	%34 = icmp slt i32 %.pre-phi, %lag		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %34, label %bb30, label %bb37
+
+bb37:		; preds = %bb35, %bb29
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/pr3216.ll b/src/LLVM/test/CodeGen/X86/pr3216.ll
new file mode 100644
index 0000000..63676d9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr3216.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | FileCheck %s 
+
+@foo = global i8 127
+
+define i32 @main() nounwind {
+; CHECK: main:
+; CHECK-NOT: ret
+; CHECK: sar{{.}} $5
+; CHECK: ret
+
+   %tmp = load i8* @foo
+   %bf.lo = lshr i8 %tmp, 5
+   %bf.lo.cleared = and i8 %bf.lo, 7
+   %1 = shl i8 %bf.lo.cleared, 5
+   %bf.val.sext = ashr i8 %1, 5
+   %conv = sext i8 %bf.val.sext to i32
+   ret i32 %conv
+}

diff --git a/src/LLVM/test/CodeGen/X86/pr3241.ll b/src/LLVM/test/CodeGen/X86/pr3241.ll
new file mode 100644
index 0000000..2f7917b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr3241.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86
+; PR3241
+
+@g_620 = external global i32
+
+define void @func_18(i32 %p_21) nounwind {
+entry:
+	%t0 = call i32 @func_31(i32 %p_21) nounwind
+	%t1 = call i32 @safe_add_macro_uint32_t_u_u() nounwind
+	%t2 = icmp sgt i32 %t1, 0
+	%t3 = zext i1 %t2 to i32
+	%t4 = load i32* @g_620, align 4
+	%t5 = icmp eq i32 %t3, %t4
+	%t6 = xor i32 %p_21, 1
+	%t7 = call i32 @func_55(i32 %t6) nounwind
+	br i1 %t5, label %return, label %bb
+
+bb:
+	unreachable
+
+return:
+	unreachable
+}
+
+declare i32 @func_31(i32)
+
+declare i32 @safe_add_macro_uint32_t_u_u()
+
+declare i32 @func_55(i32)

diff --git a/src/LLVM/test/CodeGen/X86/pr3243.ll b/src/LLVM/test/CodeGen/X86/pr3243.ll
new file mode 100644
index 0000000..483b5bf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr3243.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86
+; PR3243
+
+declare signext i16 @safe_mul_func_int16_t_s_s(i16 signext, i32) nounwind readnone optsize
+
+define i32 @func_120(i32 %p_121) nounwind optsize {
+entry:
+	%0 = trunc i32 %p_121 to i16		; <i16> [#uses=1]
+	%1 = urem i16 %0, -15461		; <i16> [#uses=1]
+	%phitmp1 = trunc i16 %1 to i8		; <i8> [#uses=1]
+	%phitmp2 = urem i8 %phitmp1, -1		; <i8> [#uses=1]
+	%phitmp3 = zext i8 %phitmp2 to i16		; <i16> [#uses=1]
+	%2 = tail call signext i16 @safe_mul_func_int16_t_s_s(i16 signext %phitmp3, i32 1) nounwind		; <i16> [#uses=0]
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/pr3244.ll b/src/LLVM/test/CodeGen/X86/pr3244.ll
new file mode 100644
index 0000000..2598c2f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr3244.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86
+; PR3244
+
+@g_62 = external global i16             ; <i16*> [#uses=1]
+@g_487 = external global i32            ; <i32*> [#uses=1]
+
+define i32 @func_42(i32 %p_43, i32 %p_44, i32 %p_45, i32 %p_46) nounwind {
+entry:
+        %0 = load i16* @g_62, align 2           ; <i16> [#uses=1]
+        %1 = load i32* @g_487, align 4          ; <i32> [#uses=1]
+        %2 = trunc i16 %0 to i8         ; <i8> [#uses=1]
+        %3 = trunc i32 %1 to i8         ; <i8> [#uses=1]
+        %4 = tail call i32 (...)* @func_7(i64 -4455561449541442965, i32 1)
+nounwind             ; <i32> [#uses=1]
+        %5 = trunc i32 %4 to i8         ; <i8> [#uses=1]
+        %6 = mul i8 %3, %2              ; <i8> [#uses=1]
+        %7 = mul i8 %6, %5              ; <i8> [#uses=1]
+        %8 = sext i8 %7 to i16          ; <i16> [#uses=1]
+        %9 = tail call i32 @func_85(i16 signext %8, i32 1, i32 1) nounwind     
+        ; <i32> [#uses=0]
+        ret i32 undef
+}
+
+declare i32 @func_7(...)
+
+declare i32 @func_85(i16 signext, i32, i32)

diff --git a/src/LLVM/test/CodeGen/X86/pr3250.ll b/src/LLVM/test/CodeGen/X86/pr3250.ll
new file mode 100644
index 0000000..cccbf54
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr3250.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86
+; PR3250
+
+declare i32 @safe_sub_func_short_u_u(i16 signext, i16 signext) nounwind
+
+define i32 @func_106(i32 %p_107) nounwind {
+entry:
+        %0 = tail call i32 (...)* @safe_div_(i32 %p_107, i32 1) nounwind       
+        ; <i32> [#uses=1]
+        %1 = lshr i32 %0, -9            ; <i32> [#uses=1]
+        %2 = trunc i32 %1 to i16                ; <i16> [#uses=1]
+        %3 = tail call i32 @safe_sub_func_short_u_u(i16 signext 1, i16 signext
+%2) nounwind             ; <i32> [#uses=0]
+        ret i32 undef
+}
+
+declare i32 @safe_div_(...)

diff --git a/src/LLVM/test/CodeGen/X86/pr3317.ll b/src/LLVM/test/CodeGen/X86/pr3317.ll
new file mode 100644
index 0000000..d83daf0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr3317.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=x86
+; PR3317
+
+%VT = type [0 x i32 (...)*]
+        %ArraySInt16 = type { %JavaObject, i8*, [0 x i16] }
+        %ArraySInt8 = type { %JavaObject, i8*, [0 x i8] }
+        %Attribut = type { %ArraySInt16*, i32, i32 }
+        %CacheNode = type { i8*, %JavaCommonClass*, %CacheNode*, %Enveloppe* }
+        %Enveloppe = type { %CacheNode*, %ArraySInt16*, %ArraySInt16*, i8, %JavaClass*, %CacheNode }
+        %JavaArray = type { %JavaObject, i8* }
+        %JavaClass = type { %JavaCommonClass, i32, %VT*, [1 x %TaskClassMirror], i8*, %JavaField*, i16, %JavaField*, i16, %JavaMethod*, i16, %JavaMethod*, i16, i8*, %ArraySInt8*, i8*, %Attribut*, i16, %JavaClass**, i16, %JavaClass*, i16, i8, i32, i32, i8*, void (i8*)* }
+        %JavaCommonClass = type { %JavaCommonClass**, i32, [1 x %JavaObject*], i16, %JavaClass**, i16, %ArraySInt16*, %JavaClass*, i8* }
+        %JavaField = type { i8*, i16, %ArraySInt16*, %ArraySInt16*, %Attribut*, i16, %JavaClass*, i32, i16, i8* }
+        %JavaMethod = type { i8*, i16, %Attribut*, i16, %Enveloppe*, i16, %JavaClass*, %ArraySInt16*, %ArraySInt16*, i8, i8*, i32, i8* }
+        %JavaObject = type { %VT*, %JavaCommonClass*, i8* }
+        %TaskClassMirror = type { i32, i8* }
+        %UTF8 = type { %JavaObject, i8*, [0 x i16] }
+
+declare void @jnjvmNullPointerException()
+
+define i32 @JnJVM_java_rmi_activation_ActivationGroupID_hashCode__(%JavaObject* nocapture) nounwind {
+start:
+        %1 = getelementptr %JavaObject* %0, i64 1, i32 1                ; <%JavaCommonClass**> [#uses=1]
+        %2 = load %JavaCommonClass** %1         ; <%JavaCommonClass*> [#uses=4]
+        %3 = icmp eq %JavaCommonClass* %2, null         ; <i1> [#uses=1]
+        br i1 %3, label %verifyNullExit1, label %verifyNullCont2
+
+verifyNullExit1:                ; preds = %start
+        tail call void @jnjvmNullPointerException()
+        unreachable
+
+verifyNullCont2:                ; preds = %start
+        %4 = bitcast %JavaCommonClass* %2 to { %JavaObject, i16, i32, i64 }*            ; <{ %JavaObject, i16, i32, i64 }*> [#uses=1]
+        %5 = getelementptr { %JavaObject, i16, i32, i64 }* %4, i64 0, i32 2             ; <i32*> [#uses=1]
+        %6 = load i32* %5               ; <i32> [#uses=1]
+        %7 = getelementptr %JavaCommonClass* %2, i64 0, i32 4           ; <%JavaClass***> [#uses=1]
+        %8 = bitcast %JavaClass*** %7 to i64*           ; <i64*> [#uses=1]
+        %9 = load i64* %8               ; <i64> [#uses=1]
+        %10 = trunc i64 %9 to i32               ; <i32> [#uses=1]
+        %11 = getelementptr %JavaCommonClass* %2, i64 0, i32 3          ; <i16*> [#uses=1]
+        %12 = load i16* %11             ; <i16> [#uses=1]
+        %13 = sext i16 %12 to i32               ; <i32> [#uses=1]
+        %14 = xor i32 %10, %6           ; <i32> [#uses=1]
+        %15 = xor i32 %14, %13          ; <i32> [#uses=1]
+        ret i32 %15 
+}

diff --git a/src/LLVM/test/CodeGen/X86/pr3366.ll b/src/LLVM/test/CodeGen/X86/pr3366.ll
new file mode 100644
index 0000000..1127b60
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr3366.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -disable-cgp-branch-opts | grep movzbl
+; PR3366
+
+define void @_ada_c34002a() nounwind {
+entry:
+  %0 = load i8* null, align 1
+  %1 = sdiv i8 90, %0
+  %2 = icmp ne i8 %1, 3
+  %3 = zext i1 %2 to i8
+  %toBool449 = icmp ne i8 %3, 0
+  %4 = or i1 false, %toBool449
+  %5 = zext i1 %4 to i8
+  %toBool450 = icmp ne i8 %5, 0
+  br i1 %toBool450, label %bb451, label %bb457
+
+bb451:
+  br label %bb457
+
+bb457:
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/pr3457.ll b/src/LLVM/test/CodeGen/X86/pr3457.ll
new file mode 100644
index 0000000..f7af927
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr3457.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep fstpt
+; PR3457
+; rdar://6548010
+
+define void @foo(double* nocapture %P) nounwind {
+entry:
+	%0 = tail call double (...)* @test() nounwind		; <double> [#uses=2]
+	%1 = tail call double (...)* @test() nounwind		; <double> [#uses=2]
+	%2 = fmul double %0, %0		; <double> [#uses=1]
+	%3 = fmul double %1, %1		; <double> [#uses=1]
+	%4 = fadd double %2, %3		; <double> [#uses=1]
+	store double %4, double* %P, align 8
+	ret void
+}
+
+declare double @test(...)

diff --git a/src/LLVM/test/CodeGen/X86/pr3495-2.ll b/src/LLVM/test/CodeGen/X86/pr3495-2.ll
new file mode 100644
index 0000000..a4204e5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr3495-2.ll

@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats -regalloc=linearscan |& grep {Number of loads added} | grep 1
+; PR3495
+;
+; This test may not be testing what it was supposed to test.
+; It used to have two spills and four reloads, but not it only has one spill and one reload.
+
+target datalayout = "e-p:32:32:32"
+target triple = "i386-apple-darwin9.6"
+	%struct.constraintVCGType = type { i32, i32, i32, i32 }
+	%struct.nodeVCGType = type { %struct.constraintVCGType*, i32, i32, i32, %struct.constraintVCGType*, i32, i32, i32 }
+
+define fastcc void @SCC_DFSBelowVCG(%struct.nodeVCGType* %VCG, i32 %net, i32 %label) nounwind {
+entry:
+	%0 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 5		; <i32*> [#uses=2]
+	%1 = load i32* %0, align 4		; <i32> [#uses=1]
+	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb5, label %bb.nph3
+
+bb.nph3:		; preds = %entry
+	%3 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 4		; <%struct.constraintVCGType**> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb3, %bb.nph3
+	%s.02 = phi i32 [ 0, %bb.nph3 ], [ %12, %bb3 ]		; <i32> [#uses=2]
+	%4 = load %struct.constraintVCGType** %3, align 4		; <%struct.constraintVCGType*> [#uses=1]
+	%5 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %5, label %bb1, label %bb3
+
+bb1:		; preds = %bb
+	%6 = getelementptr %struct.constraintVCGType* %4, i32 %s.02, i32 0		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=2]
+	%8 = getelementptr %struct.nodeVCGType* %VCG, i32 %7, i32 7		; <i32*> [#uses=1]
+	%9 = load i32* %8, align 4		; <i32> [#uses=1]
+	%10 = icmp eq i32 %9, 0		; <i1> [#uses=1]
+	br i1 %10, label %bb2, label %bb3
+
+bb2:		; preds = %bb1
+	%11 = getelementptr %struct.nodeVCGType* %VCG, i32 %7, i32 4		; <%struct.constraintVCGType**> [#uses=0]
+	br label %bb.i
+
+bb.i:		; preds = %bb.i, %bb2
+	br label %bb.i
+
+bb3:		; preds = %bb1, %bb
+	%12 = add i32 %s.02, 1		; <i32> [#uses=2]
+	%13 = load i32* %0, align 4		; <i32> [#uses=1]
+	%14 = icmp ugt i32 %13, %12		; <i1> [#uses=1]
+	br i1 %14, label %bb, label %bb5
+
+bb5:		; preds = %bb3, %entry
+	%15 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 6		; <i32*> [#uses=1]
+	store i32 %label, i32* %15, align 4
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/pr3495.ll b/src/LLVM/test/CodeGen/X86/pr3495.ll
new file mode 100644
index 0000000..7efd35b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr3495.ll

@@ -0,0 +1,81 @@
+; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of loads added} | grep 2
+; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of spill slots allocated} | grep 1
+; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of machine instrs printed} | grep 34
+; PR3495
+;
+; Note: this should not spill at all with either good LSR or good regalloc.
+
+target triple = "i386-pc-linux-gnu"
+@x = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=1]
+@rows = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=2]
+@up = external global [15 x i32], align 32		; <[15 x i32]*> [#uses=2]
+@down = external global [15 x i32], align 32		; <[15 x i32]*> [#uses=1]
+
+define i32 @queens(i32 %c) nounwind {
+entry:
+	%tmp91 = add i32 %c, 1		; <i32> [#uses=3]
+	%tmp135 = getelementptr [8 x i32]* @x, i32 0, i32 %tmp91		; <i32*> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb569, %entry
+	%r25.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %indvar.next715, %bb569 ]		; <i32> [#uses=4]
+	%tmp27 = getelementptr [8 x i32]* @rows, i32 0, i32 %r25.0.reg2mem.0		; <i32*> [#uses=1]
+	%tmp28 = load i32* %tmp27, align 4		; <i32> [#uses=1]
+	%tmp29 = icmp eq i32 %tmp28, 0		; <i1> [#uses=1]
+	br i1 %tmp29, label %bb569, label %bb31
+
+bb31:		; preds = %bb
+	%tmp35 = sub i32 %r25.0.reg2mem.0, 0		; <i32> [#uses=1]
+	%tmp36 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp35		; <i32*> [#uses=1]
+	%tmp37 = load i32* %tmp36, align 4		; <i32> [#uses=1]
+	%tmp38 = icmp eq i32 %tmp37, 0		; <i1> [#uses=1]
+	br i1 %tmp38, label %bb569, label %bb41
+
+bb41:		; preds = %bb31
+	%tmp54 = sub i32 %r25.0.reg2mem.0, %c		; <i32> [#uses=1]
+	%tmp55 = add i32 %tmp54, 7		; <i32> [#uses=1]
+	%tmp62 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp55		; <i32*> [#uses=2]
+	store i32 0, i32* %tmp62, align 4
+	br label %bb92
+
+bb92:		; preds = %bb545, %bb41
+	%r20.0.reg2mem.0 = phi i32 [ 0, %bb41 ], [ %indvar.next711, %bb545 ]		; <i32> [#uses=5]
+	%tmp94 = getelementptr [8 x i32]* @rows, i32 0, i32 %r20.0.reg2mem.0		; <i32*> [#uses=1]
+	%tmp95 = load i32* %tmp94, align 4		; <i32> [#uses=0]
+	%tmp112 = add i32 %r20.0.reg2mem.0, %tmp91		; <i32> [#uses=1]
+	%tmp113 = getelementptr [15 x i32]* @down, i32 0, i32 %tmp112		; <i32*> [#uses=2]
+	%tmp114 = load i32* %tmp113, align 4		; <i32> [#uses=1]
+	%tmp115 = icmp eq i32 %tmp114, 0		; <i1> [#uses=1]
+	br i1 %tmp115, label %bb545, label %bb118
+
+bb118:		; preds = %bb92
+	%tmp122 = sub i32 %r20.0.reg2mem.0, %tmp91		; <i32> [#uses=0]
+	store i32 0, i32* %tmp113, align 4
+	store i32 %r20.0.reg2mem.0, i32* %tmp135, align 4
+	br label %bb142
+
+bb142:		; preds = %bb142, %bb118
+	%k18.0.reg2mem.0 = phi i32 [ 0, %bb118 ], [ %indvar.next709, %bb142 ]		; <i32> [#uses=1]
+	%indvar.next709 = add i32 %k18.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond710 = icmp eq i32 %indvar.next709, 8		; <i1> [#uses=1]
+	br i1 %exitcond710, label %bb155, label %bb142
+
+bb155:		; preds = %bb142
+	%tmp156 = tail call i32 @putchar(i32 10) nounwind		; <i32> [#uses=0]
+	br label %bb545
+
+bb545:		; preds = %bb155, %bb92
+	%indvar.next711 = add i32 %r20.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond712 = icmp eq i32 %indvar.next711, 8		; <i1> [#uses=1]
+	br i1 %exitcond712, label %bb553, label %bb92
+
+bb553:		; preds = %bb545
+	store i32 1, i32* %tmp62, align 4
+	br label %bb569
+
+bb569:		; preds = %bb553, %bb31, %bb
+	%indvar.next715 = add i32 %r25.0.reg2mem.0, 1		; <i32> [#uses=1]
+	br label %bb
+}
+
+declare i32 @putchar(i32)

diff --git a/src/LLVM/test/CodeGen/X86/pr3522.ll b/src/LLVM/test/CodeGen/X86/pr3522.ll
new file mode 100644
index 0000000..1122530
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr3522.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 -stats |& not grep {instructions sunk}
+; PR3522
+
+target triple = "i386-pc-linux-gnu"
+@.str = external constant [13 x i8]		; <[13 x i8]*> [#uses=1]
+
+define void @_ada_c34018a() {
+entry:
+	%0 = tail call i32 @report__ident_int(i32 90)		; <i32> [#uses=1]
+	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
+	invoke void @__gnat_rcheck_12(i8* getelementptr ([13 x i8]* @.str, i32 0, i32 0), i32 32) noreturn
+			to label %invcont unwind label %lpad
+
+invcont:		; preds = %entry
+	unreachable
+
+bb22:		; preds = %lpad
+	ret void
+
+return:		; preds = %lpad
+	ret void
+
+lpad:		; preds = %entry
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	%2 = icmp eq i8 %1, 90		; <i1> [#uses=1]
+	br i1 %2, label %return, label %bb22
+}
+
+declare void @__gnat_rcheck_12(i8*, i32) noreturn
+
+declare i32 @report__ident_int(i32)
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/X86/pr7882.ll b/src/LLVM/test/CodeGen/X86/pr7882.ll
new file mode 100644
index 0000000..88404db
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr7882.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin -pre-RA-sched=fast \
+; RUN: | FileCheck %s
+; make sure scheduler honors the flags clobber.  PR 7882.
+
+define i32 @main(i32 %argc, i8** %argv) nounwind
+{
+entry:
+; CHECK: InlineAsm End
+; CHECK: cmpl
+    %res = icmp slt i32 1, %argc
+    %tmp = call i32 asm sideeffect alignstack
+        "push $$0
+         popf
+         mov $$13, $0", "=r,r,~{memory},~{flags}" (i1 %res)
+    %ret = select i1 %res, i32 %tmp, i32 42
+    ret i32 %ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/pr9127.ll b/src/LLVM/test/CodeGen/X86/pr9127.ll
new file mode 100644
index 0000000..ba92c77
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr9127.ll

@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-win32 < %s | FileCheck %s
+
+define i8 @foobar(double %d, double* %x) {
+entry:
+  %tmp2 = load double* %x, align 8
+  %cmp = fcmp oeq double %tmp2, %d
+  %conv3 = zext i1 %cmp to i8
+  ret i8 %conv3
+}
+
+; test that the load is folded.
+; CHECK: cmpeqsd	(%{{rdi|rdx}}), %xmm0

diff --git a/src/LLVM/test/CodeGen/X86/pr9743.ll b/src/LLVM/test/CodeGen/X86/pr9743.ll
new file mode 100644
index 0000000..6597c23
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/pr9743.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -disable-fp-elim -asm-verbose=0 | FileCheck %s
+
+define void @f() {
+  ret void
+}
+
+; CHECK:       .cfi_startproc
+; CHECK-NEXT:  pushq
+; CHECK-NEXT: :
+; CHECK-NEXT:  .cfi_def_cfa_offset 16
+; CHECK-NEXT: :
+; CHECK-NEXT:  .cfi_offset %rbp, -16
+; CHECK-NEXT:  movq    %rsp, %rbp
+; CHECK-NEXT: :
+; CHECK-NEXT:  .cfi_def_cfa_register %rbp
+; CHECK-NEXT:  popq    %rbp
+; CHECK-NEXT:  ret

diff --git a/src/LLVM/test/CodeGen/X86/prefetch.ll b/src/LLVM/test/CodeGen/X86/prefetch.ll
new file mode 100644
index 0000000..ebe11a5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/prefetch.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
+
+define void @t(i8* %ptr) nounwind  {
+entry:
+; CHECK: prefetcht2
+; CHECK: prefetcht1
+; CHECK: prefetcht0
+; CHECK: prefetchnta
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 )
+	ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind 

diff --git a/src/LLVM/test/CodeGen/X86/private-2.ll b/src/LLVM/test/CodeGen/X86/private-2.ll
new file mode 100644
index 0000000..8aa744e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/private-2.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | grep L__ZZ20
+; Quote should be outside of private prefix.
+; rdar://6855766x
+
+	%struct.A = type { i32*, i32 }
+@"_ZZ20-[Example1 whatever]E4C.91" = private constant %struct.A { i32* null, i32 1 }		; <%struct.A*> [#uses=1]
+
+define internal i32* @"\01-[Example1 whatever]"() nounwind optsize ssp {
+entry:
+	%0 = getelementptr %struct.A* @"_ZZ20-[Example1 whatever]E4C.91", i64 0, i32 0		; <i32**> [#uses=1]
+	%1 = load i32** %0, align 8		; <i32*> [#uses=1]
+	ret i32* %1
+}

diff --git a/src/LLVM/test/CodeGen/X86/private.ll b/src/LLVM/test/CodeGen/X86/private.ll
new file mode 100644
index 0000000..484afc9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/private.ll

@@ -0,0 +1,18 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lfoo:
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep call.*\.Lfoo
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lbaz:
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep movl.*\.Lbaz
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}

diff --git a/src/LLVM/test/CodeGen/X86/promote-assert-zext.ll b/src/LLVM/test/CodeGen/X86/promote-assert-zext.ll
new file mode 100644
index 0000000..b582806
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/promote-assert-zext.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+; rdar://8051990
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11"
+
+; ISel doesn't yet know how to eliminate this extra zero-extend. But until
+; it knows how to do so safely, it shouldn;t eliminate it.
+; CHECK: movzbl  (%rdi), %eax
+; CHECK: movzwl  %ax, %eax
+
+define i64 @_ZL5matchPKtPKhiR9MatchData(i8* %tmp13) nounwind {
+entry:
+  %tmp14 = load i8* %tmp13, align 1
+  %tmp17 = zext i8 %tmp14 to i16
+  br label %bb341
+
+bb341:
+  %tmp18 = add i16 %tmp17, -1
+  %tmp23 = sext i16 %tmp18 to i64
+  ret i64 %tmp23
+}

diff --git a/src/LLVM/test/CodeGen/X86/promote-i16.ll b/src/LLVM/test/CodeGen/X86/promote-i16.ll
new file mode 100644
index 0000000..3c91d74
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/promote-i16.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define signext i16 @foo(i16 signext %x) nounwind {
+entry:
+; CHECK: foo:
+; CHECK-NOT: movzwl
+; CHECK: movswl 4(%esp), %eax
+; CHECK: xorl $21998, %eax
+  %0 = xor i16 %x, 21998
+  ret i16 %0
+}
+
+define signext i16 @bar(i16 signext %x) nounwind {
+entry:
+; CHECK: bar:
+; CHECK-NOT: movzwl
+; CHECK: movswl 4(%esp), %eax
+; CHECK: xorl $-10770, %eax
+  %0 = xor i16 %x, 54766
+  ret i16 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/promote-trunc.ll b/src/LLVM/test/CodeGen/X86/promote-trunc.ll
new file mode 100644
index 0000000..4211d82
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/promote-trunc.ll

@@ -0,0 +1,11 @@
+; RUN: llc -promote-elements < %s -march=x86-64
+
+define<4 x i8> @func_8_64() {
+  %F = load <4 x i64>* undef
+  %G = trunc <4 x i64> %F to <4 x i8>
+  %H = load <4 x i64>* undef
+  %Y = trunc <4 x i64> %H to <4 x i8>
+  %T = add <4 x i8> %Y, %G
+  ret <4 x i8> %T
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/ptr-rotate.ll b/src/LLVM/test/CodeGen/X86/ptr-rotate.ll
new file mode 100644
index 0000000..6debd16
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/ptr-rotate.ll

@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=i386-apple-darwin -o - < %s | FileCheck %s
+
+define i32 @func(i8* %A) nounwind readnone {
+entry:
+  %tmp = ptrtoint i8* %A to i32
+  %shr = lshr i32 %tmp, 5
+  %shl = shl i32 %tmp, 27
+  %or = or i32 %shr, %shl
+; CHECK: roll  $27
+  ret i32 %or
+}

diff --git a/src/LLVM/test/CodeGen/X86/ptrtoint-constexpr.ll b/src/LLVM/test/CodeGen/X86/ptrtoint-constexpr.ll
new file mode 100644
index 0000000..d1cb34b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/ptrtoint-constexpr.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-linux | FileCheck %s
+	%union.x = type { i64 }
+
+; CHECK:	.globl r
+; CHECK: r:
+; CHECK: .quad	r&4294967295
+
+@r = global %union.x { i64 ptrtoint (%union.x* @r to i64) }, align 4
+
+; CHECK:	.globl x
+; CHECK: x:
+; CHECK: .quad	((0+1)&4294967295)*3
+
+@x = global i64 mul (i64 3, i64 ptrtoint (i2* getelementptr (i2* null, i64 1) to i64))

diff --git a/src/LLVM/test/CodeGen/X86/rdtsc.ll b/src/LLVM/test/CodeGen/X86/rdtsc.ll
new file mode 100644
index 0000000..c463f19
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/rdtsc.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep rdtsc

+; RUN: llc < %s -march=x86-64 | grep rdtsc

+declare i64 @llvm.readcyclecounter()

+

+define i64 @foo() {

+	%tmp.1 = call i64 @llvm.readcyclecounter( )		; <i64> [#uses=1]

+	ret i64 %tmp.1

+}


diff --git a/src/LLVM/test/CodeGen/X86/red-zone.ll b/src/LLVM/test/CodeGen/X86/red-zone.ll
new file mode 100644
index 0000000..d936971
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/red-zone.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+
+; First without noredzone.
+; CHECK: f0:
+; CHECK: -4(%rsp)
+; CHECK: -4(%rsp)
+; CHECK: ret
+define x86_fp80 @f0(float %f) nounwind readnone {
+entry:
+	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}
+
+; Then with noredzone.
+; CHECK: f1:
+; CHECK: subq $4, %rsp
+; CHECK: (%rsp)
+; CHECK: (%rsp)
+; CHECK: addq $4, %rsp
+; CHECK: ret
+define x86_fp80 @f1(float %f) nounwind readnone noredzone {
+entry:
+	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/red-zone2.ll b/src/LLVM/test/CodeGen/X86/red-zone2.ll
new file mode 100644
index 0000000..9557d17
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/red-zone2.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep subq %t | count 1
+; RUN: grep addq %t | count 1
+
+define x86_fp80 @f0(float %f) nounwind readnone noredzone {
+entry:
+	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/reghinting.ll b/src/LLVM/test/CodeGen/X86/reghinting.ll
new file mode 100644
index 0000000..87f65ed
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/reghinting.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+; PR10221
+
+;; The registers %x and %y must both spill across the finit call.
+;; Check that they are spilled early enough that not copies are needed for the
+;; fadd and fpext.
+
+; CHECK: pr10221
+; CHECK-NOT: movaps
+; CHECK:      movss
+; CHECK-NEXT: movss
+; CHECK-NEXT: addss
+; CHECK-NEXT: cvtss2sd
+; CHECK-NEXT: finit
+
+define i32 @pr10221(float %x, float %y, i8** nocapture %_retval) nounwind uwtable ssp {
+entry:
+  %add = fadd float %x, %y
+  %conv = fpext float %add to double
+  %call = tail call i32 @finit(double %conv) nounwind
+  %tobool = icmp eq i32 %call, 0
+  br i1 %tobool, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  tail call void @foo(float %x, float %y) nounwind
+  br label %return
+
+return:                                           ; preds = %entry, %if.end
+  %retval.0 = phi i32 [ 0, %if.end ], [ 5, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @finit(double)
+
+declare void @foo(float, float)

diff --git a/src/LLVM/test/CodeGen/X86/regpressure.ll b/src/LLVM/test/CodeGen/X86/regpressure.ll
new file mode 100644
index 0000000..edb17c6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/regpressure.ll

@@ -0,0 +1,114 @@
+;; Both functions in this testcase should codegen to the same function, and

+;; neither of them should require spilling anything to the stack.

+

+; RUN: llc < %s -march=x86 -stats |& \

+; RUN:   not grep {Number of register spills}

+

+;; This can be compiled to use three registers if the loads are not

+;; folded into the multiplies, 2 registers otherwise.

+

+define i32 @regpressure1(i32* %P) {

+	%A = load i32* %P		; <i32> [#uses=1]

+	%Bp = getelementptr i32* %P, i32 1		; <i32*> [#uses=1]

+	%B = load i32* %Bp		; <i32> [#uses=1]

+	%s1 = mul i32 %A, %B		; <i32> [#uses=1]

+	%Cp = getelementptr i32* %P, i32 2		; <i32*> [#uses=1]

+	%C = load i32* %Cp		; <i32> [#uses=1]

+	%s2 = mul i32 %s1, %C		; <i32> [#uses=1]

+	%Dp = getelementptr i32* %P, i32 3		; <i32*> [#uses=1]

+	%D = load i32* %Dp		; <i32> [#uses=1]

+	%s3 = mul i32 %s2, %D		; <i32> [#uses=1]

+	%Ep = getelementptr i32* %P, i32 4		; <i32*> [#uses=1]

+	%E = load i32* %Ep		; <i32> [#uses=1]

+	%s4 = mul i32 %s3, %E		; <i32> [#uses=1]

+	%Fp = getelementptr i32* %P, i32 5		; <i32*> [#uses=1]

+	%F = load i32* %Fp		; <i32> [#uses=1]

+	%s5 = mul i32 %s4, %F		; <i32> [#uses=1]

+	%Gp = getelementptr i32* %P, i32 6		; <i32*> [#uses=1]

+	%G = load i32* %Gp		; <i32> [#uses=1]

+	%s6 = mul i32 %s5, %G		; <i32> [#uses=1]

+	%Hp = getelementptr i32* %P, i32 7		; <i32*> [#uses=1]

+	%H = load i32* %Hp		; <i32> [#uses=1]

+	%s7 = mul i32 %s6, %H		; <i32> [#uses=1]

+	%Ip = getelementptr i32* %P, i32 8		; <i32*> [#uses=1]

+	%I = load i32* %Ip		; <i32> [#uses=1]

+	%s8 = mul i32 %s7, %I		; <i32> [#uses=1]

+	%Jp = getelementptr i32* %P, i32 9		; <i32*> [#uses=1]

+	%J = load i32* %Jp		; <i32> [#uses=1]

+	%s9 = mul i32 %s8, %J		; <i32> [#uses=1]

+	ret i32 %s9

+}

+

+define i32 @regpressure2(i32* %P) {

+	%A = load i32* %P		; <i32> [#uses=1]

+	%Bp = getelementptr i32* %P, i32 1		; <i32*> [#uses=1]

+	%B = load i32* %Bp		; <i32> [#uses=1]

+	%Cp = getelementptr i32* %P, i32 2		; <i32*> [#uses=1]

+	%C = load i32* %Cp		; <i32> [#uses=1]

+	%Dp = getelementptr i32* %P, i32 3		; <i32*> [#uses=1]

+	%D = load i32* %Dp		; <i32> [#uses=1]

+	%Ep = getelementptr i32* %P, i32 4		; <i32*> [#uses=1]

+	%E = load i32* %Ep		; <i32> [#uses=1]

+	%Fp = getelementptr i32* %P, i32 5		; <i32*> [#uses=1]

+	%F = load i32* %Fp		; <i32> [#uses=1]

+	%Gp = getelementptr i32* %P, i32 6		; <i32*> [#uses=1]

+	%G = load i32* %Gp		; <i32> [#uses=1]

+	%Hp = getelementptr i32* %P, i32 7		; <i32*> [#uses=1]

+	%H = load i32* %Hp		; <i32> [#uses=1]

+	%Ip = getelementptr i32* %P, i32 8		; <i32*> [#uses=1]

+	%I = load i32* %Ip		; <i32> [#uses=1]

+	%Jp = getelementptr i32* %P, i32 9		; <i32*> [#uses=1]

+	%J = load i32* %Jp		; <i32> [#uses=1]

+	%s1 = mul i32 %A, %B		; <i32> [#uses=1]

+	%s2 = mul i32 %s1, %C		; <i32> [#uses=1]

+	%s3 = mul i32 %s2, %D		; <i32> [#uses=1]

+	%s4 = mul i32 %s3, %E		; <i32> [#uses=1]

+	%s5 = mul i32 %s4, %F		; <i32> [#uses=1]

+	%s6 = mul i32 %s5, %G		; <i32> [#uses=1]

+	%s7 = mul i32 %s6, %H		; <i32> [#uses=1]

+	%s8 = mul i32 %s7, %I		; <i32> [#uses=1]

+	%s9 = mul i32 %s8, %J		; <i32> [#uses=1]

+	ret i32 %s9

+}

+

+define i32 @regpressure3(i16* %P, i1 %Cond, i32* %Other) {

+	%A = load i16* %P		; <i16> [#uses=1]

+	%Bp = getelementptr i16* %P, i32 1		; <i16*> [#uses=1]

+	%B = load i16* %Bp		; <i16> [#uses=1]

+	%Cp = getelementptr i16* %P, i32 2		; <i16*> [#uses=1]

+	%C = load i16* %Cp		; <i16> [#uses=1]

+	%Dp = getelementptr i16* %P, i32 3		; <i16*> [#uses=1]

+	%D = load i16* %Dp		; <i16> [#uses=1]

+	%Ep = getelementptr i16* %P, i32 4		; <i16*> [#uses=1]

+	%E = load i16* %Ep		; <i16> [#uses=1]

+	%Fp = getelementptr i16* %P, i32 5		; <i16*> [#uses=1]

+	%F = load i16* %Fp		; <i16> [#uses=1]

+	%Gp = getelementptr i16* %P, i32 6		; <i16*> [#uses=1]

+	%G = load i16* %Gp		; <i16> [#uses=1]

+	%Hp = getelementptr i16* %P, i32 7		; <i16*> [#uses=1]

+	%H = load i16* %Hp		; <i16> [#uses=1]

+	%Ip = getelementptr i16* %P, i32 8		; <i16*> [#uses=1]

+	%I = load i16* %Ip		; <i16> [#uses=1]

+	%Jp = getelementptr i16* %P, i32 9		; <i16*> [#uses=1]

+	%J = load i16* %Jp		; <i16> [#uses=1]

+	%A.upgrd.1 = sext i16 %A to i32		; <i32> [#uses=1]

+	%B.upgrd.2 = sext i16 %B to i32		; <i32> [#uses=1]

+	%D.upgrd.3 = sext i16 %D to i32		; <i32> [#uses=1]

+	%C.upgrd.4 = sext i16 %C to i32		; <i32> [#uses=1]

+	%E.upgrd.5 = sext i16 %E to i32		; <i32> [#uses=1]

+	%F.upgrd.6 = sext i16 %F to i32		; <i32> [#uses=1]

+	%G.upgrd.7 = sext i16 %G to i32		; <i32> [#uses=1]

+	%H.upgrd.8 = sext i16 %H to i32		; <i32> [#uses=1]

+	%I.upgrd.9 = sext i16 %I to i32		; <i32> [#uses=1]

+	%J.upgrd.10 = sext i16 %J to i32		; <i32> [#uses=1]

+	%s1 = add i32 %A.upgrd.1, %B.upgrd.2		; <i32> [#uses=1]

+	%s2 = add i32 %C.upgrd.4, %s1		; <i32> [#uses=1]

+	%s3 = add i32 %D.upgrd.3, %s2		; <i32> [#uses=1]

+	%s4 = add i32 %E.upgrd.5, %s3		; <i32> [#uses=1]

+	%s5 = add i32 %F.upgrd.6, %s4		; <i32> [#uses=1]

+	%s6 = add i32 %G.upgrd.7, %s5		; <i32> [#uses=1]

+	%s7 = add i32 %H.upgrd.8, %s6		; <i32> [#uses=1]

+	%s8 = add i32 %I.upgrd.9, %s7		; <i32> [#uses=1]

+	%s9 = add i32 %J.upgrd.10, %s8		; <i32> [#uses=1]

+	ret i32 %s9

+}


diff --git a/src/LLVM/test/CodeGen/X86/rem-2.ll b/src/LLVM/test/CodeGen/X86/rem-2.ll
new file mode 100644
index 0000000..1b2af4b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/rem-2.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 | not grep cltd
+
+define i32 @test(i32 %X) nounwind readnone {
+entry:
+	%0 = srem i32 41, %X
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/rem.ll b/src/LLVM/test/CodeGen/X86/rem.ll
new file mode 100644
index 0000000..3db31d1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/rem.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 | not grep div

+

+define i32 @test1(i32 %X) {

+        %tmp1 = srem i32 %X, 255                ; <i32> [#uses=1]

+        ret i32 %tmp1

+}

+

+define i32 @test2(i32 %X) {

+        %tmp1 = srem i32 %X, 256                ; <i32> [#uses=1]

+        ret i32 %tmp1

+}

+

+define i32 @test3(i32 %X) {

+        %tmp1 = urem i32 %X, 255                ; <i32> [#uses=1]

+        ret i32 %tmp1

+}

+

+define i32 @test4(i32 %X) {

+        %tmp1 = urem i32 %X, 256                ; <i32> [#uses=1]

+        ret i32 %tmp1

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/remat-constant.ll b/src/LLVM/test/CodeGen/X86/remat-constant.ll
new file mode 100644
index 0000000..3e81320
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/remat-constant.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static | grep xmm | count 2
+
+declare void @bar() nounwind
+
+@a = external constant float
+
+declare void @qux(float %f) nounwind 
+
+define void @foo() nounwind  {
+  %f = load float* @a
+  call void @bar()
+  call void @qux(float %f)
+  call void @qux(float %f)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/remat-mov-0.ll b/src/LLVM/test/CodeGen/X86/remat-mov-0.ll
new file mode 100644
index 0000000..f89cd33
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/remat-mov-0.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+
+; CodeGen should remat the zero instead of spilling it.
+
+declare void @foo(i64 %p)
+
+; CHECK: bar:
+; CHECK: xorl %e[[A0:di|cx]], %e
+; CHECK: xorl %e[[A0]], %e[[A0]]
+define void @bar() nounwind {
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  ret void
+}
+
+; CHECK: bat:
+; CHECK: movq $-1, %r[[A0]]
+; CHECK: movq $-1, %r[[A0]]
+define void @bat() nounwind {
+  call void @foo(i64 -1)
+  call void @foo(i64 -1)
+  ret void
+}
+
+; CHECK: bau:
+; CHECK: movl $1, %e[[A0]]
+; CHECK: movl $1, %e[[A0]]
+define void @bau() nounwind {
+  call void @foo(i64 1)
+  call void @foo(i64 1)
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/remat-scalar-zero.ll b/src/LLVM/test/CodeGen/X86/remat-scalar-zero.ll
new file mode 100644
index 0000000..f6f0ed1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/remat-scalar-zero.ll

@@ -0,0 +1,96 @@
+; XFAIL: *
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t
+; RUN: not grep xor %t
+; RUN: not grep movap %t
+; RUN: grep {\\.quad.*0} %t
+
+; Remat should be able to fold the zero constant into the div instructions
+; as a constant-pool load.
+
+define void @foo(double* nocapture %x, double* nocapture %y) nounwind {
+entry:
+  %tmp1 = load double* %x                         ; <double> [#uses=1]
+  %arrayidx4 = getelementptr inbounds double* %x, i64 1 ; <double*> [#uses=1]
+  %tmp5 = load double* %arrayidx4                 ; <double> [#uses=1]
+  %arrayidx8 = getelementptr inbounds double* %x, i64 2 ; <double*> [#uses=1]
+  %tmp9 = load double* %arrayidx8                 ; <double> [#uses=1]
+  %arrayidx12 = getelementptr inbounds double* %x, i64 3 ; <double*> [#uses=1]
+  %tmp13 = load double* %arrayidx12               ; <double> [#uses=1]
+  %arrayidx16 = getelementptr inbounds double* %x, i64 4 ; <double*> [#uses=1]
+  %tmp17 = load double* %arrayidx16               ; <double> [#uses=1]
+  %arrayidx20 = getelementptr inbounds double* %x, i64 5 ; <double*> [#uses=1]
+  %tmp21 = load double* %arrayidx20               ; <double> [#uses=1]
+  %arrayidx24 = getelementptr inbounds double* %x, i64 6 ; <double*> [#uses=1]
+  %tmp25 = load double* %arrayidx24               ; <double> [#uses=1]
+  %arrayidx28 = getelementptr inbounds double* %x, i64 7 ; <double*> [#uses=1]
+  %tmp29 = load double* %arrayidx28               ; <double> [#uses=1]
+  %arrayidx32 = getelementptr inbounds double* %x, i64 8 ; <double*> [#uses=1]
+  %tmp33 = load double* %arrayidx32               ; <double> [#uses=1]
+  %arrayidx36 = getelementptr inbounds double* %x, i64 9 ; <double*> [#uses=1]
+  %tmp37 = load double* %arrayidx36               ; <double> [#uses=1]
+  %arrayidx40 = getelementptr inbounds double* %x, i64 10 ; <double*> [#uses=1]
+  %tmp41 = load double* %arrayidx40               ; <double> [#uses=1]
+  %arrayidx44 = getelementptr inbounds double* %x, i64 11 ; <double*> [#uses=1]
+  %tmp45 = load double* %arrayidx44               ; <double> [#uses=1]
+  %arrayidx48 = getelementptr inbounds double* %x, i64 12 ; <double*> [#uses=1]
+  %tmp49 = load double* %arrayidx48               ; <double> [#uses=1]
+  %arrayidx52 = getelementptr inbounds double* %x, i64 13 ; <double*> [#uses=1]
+  %tmp53 = load double* %arrayidx52               ; <double> [#uses=1]
+  %arrayidx56 = getelementptr inbounds double* %x, i64 14 ; <double*> [#uses=1]
+  %tmp57 = load double* %arrayidx56               ; <double> [#uses=1]
+  %arrayidx60 = getelementptr inbounds double* %x, i64 15 ; <double*> [#uses=1]
+  %tmp61 = load double* %arrayidx60               ; <double> [#uses=1]
+  %arrayidx64 = getelementptr inbounds double* %x, i64 16 ; <double*> [#uses=1]
+  %tmp65 = load double* %arrayidx64               ; <double> [#uses=1]
+  %div = fdiv double %tmp1, 0.000000e+00          ; <double> [#uses=1]
+  store double %div, double* %y
+  %div70 = fdiv double %tmp5, 2.000000e-01        ; <double> [#uses=1]
+  %arrayidx72 = getelementptr inbounds double* %y, i64 1 ; <double*> [#uses=1]
+  store double %div70, double* %arrayidx72
+  %div74 = fdiv double %tmp9, 2.000000e-01        ; <double> [#uses=1]
+  %arrayidx76 = getelementptr inbounds double* %y, i64 2 ; <double*> [#uses=1]
+  store double %div74, double* %arrayidx76
+  %div78 = fdiv double %tmp13, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx80 = getelementptr inbounds double* %y, i64 3 ; <double*> [#uses=1]
+  store double %div78, double* %arrayidx80
+  %div82 = fdiv double %tmp17, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx84 = getelementptr inbounds double* %y, i64 4 ; <double*> [#uses=1]
+  store double %div82, double* %arrayidx84
+  %div86 = fdiv double %tmp21, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx88 = getelementptr inbounds double* %y, i64 5 ; <double*> [#uses=1]
+  store double %div86, double* %arrayidx88
+  %div90 = fdiv double %tmp25, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx92 = getelementptr inbounds double* %y, i64 6 ; <double*> [#uses=1]
+  store double %div90, double* %arrayidx92
+  %div94 = fdiv double %tmp29, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx96 = getelementptr inbounds double* %y, i64 7 ; <double*> [#uses=1]
+  store double %div94, double* %arrayidx96
+  %div98 = fdiv double %tmp33, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx100 = getelementptr inbounds double* %y, i64 8 ; <double*> [#uses=1]
+  store double %div98, double* %arrayidx100
+  %div102 = fdiv double %tmp37, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx104 = getelementptr inbounds double* %y, i64 9 ; <double*> [#uses=1]
+  store double %div102, double* %arrayidx104
+  %div106 = fdiv double %tmp41, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx108 = getelementptr inbounds double* %y, i64 10 ; <double*> [#uses=1]
+  store double %div106, double* %arrayidx108
+  %div110 = fdiv double %tmp45, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx112 = getelementptr inbounds double* %y, i64 11 ; <double*> [#uses=1]
+  store double %div110, double* %arrayidx112
+  %div114 = fdiv double %tmp49, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx116 = getelementptr inbounds double* %y, i64 12 ; <double*> [#uses=1]
+  store double %div114, double* %arrayidx116
+  %div118 = fdiv double %tmp53, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx120 = getelementptr inbounds double* %y, i64 13 ; <double*> [#uses=1]
+  store double %div118, double* %arrayidx120
+  %div122 = fdiv double %tmp57, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx124 = getelementptr inbounds double* %y, i64 14 ; <double*> [#uses=1]
+  store double %div122, double* %arrayidx124
+  %div126 = fdiv double %tmp61, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx128 = getelementptr inbounds double* %y, i64 15 ; <double*> [#uses=1]
+  store double %div126, double* %arrayidx128
+  %div130 = fdiv double %tmp65, 0.000000e+00      ; <double> [#uses=1]
+  %arrayidx132 = getelementptr inbounds double* %y, i64 16 ; <double*> [#uses=1]
+  store double %div130, double* %arrayidx132
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/ret-addr.ll b/src/LLVM/test/CodeGen/X86/ret-addr.ll
new file mode 100644
index 0000000..b7b57ab
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/ret-addr.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -disable-fp-elim -march=x86 | not grep xor
+; RUN: llc < %s -disable-fp-elim -march=x86-64 | not grep xor
+
+define i8* @h() nounwind readnone optsize {
+entry:
+	%0 = tail call i8* @llvm.returnaddress(i32 2)		; <i8*> [#uses=1]
+	ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
+
+define i8* @g() nounwind readnone optsize {
+entry:
+	%0 = tail call i8* @llvm.returnaddress(i32 1)		; <i8*> [#uses=1]
+	ret i8* %0
+}
+
+define i8* @f() nounwind readnone optsize {
+entry:
+	%0 = tail call i8* @llvm.returnaddress(i32 0)		; <i8*> [#uses=1]
+	ret i8* %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/ret-i64-0.ll b/src/LLVM/test/CodeGen/X86/ret-i64-0.ll
new file mode 100644
index 0000000..bca0f05
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/ret-i64-0.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 | grep xor | count 2
+
+define i64 @foo() nounwind {
+  ret i64 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/ret-mmx.ll b/src/LLVM/test/CodeGen/X86/ret-mmx.ll
new file mode 100644
index 0000000..865e147
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/ret-mmx.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -mattr=+mmx,+sse2 | FileCheck %s
+; rdar://6602459
+
+@g_v1di = external global <1 x i64>
+
+define void @t1() nounwind {
+entry:
+	%call = call <1 x i64> @return_v1di()		; <<1 x i64>> [#uses=0]
+	store <1 x i64> %call, <1 x i64>* @g_v1di
+        ret void
+; CHECK: t1:
+; CHECK: callq
+; CHECK-NEXT: movq	_g_v1di
+; CHECK-NEXT: movq	%rax,
+}
+
+declare <1 x i64> @return_v1di()
+
+define <1 x i64> @t2() nounwind {
+	ret <1 x i64> <i64 1>
+; CHECK: t2:
+; CHECK: movl	$1
+; CHECK-NEXT: ret
+}
+
+define <2 x i32> @t3() nounwind {
+	ret <2 x i32> <i32 1, i32 0>
+; CHECK: t3:
+; CHECK: movl $1
+; CHECK: movd {{.*}}, %xmm0
+}
+
+define double @t4() nounwind {
+	ret double bitcast (<2 x i32> <i32 1, i32 0> to double)
+; CHECK: t4:
+; CHECK: movl $1
+; CHECK: movd {{.*}}, %xmm0
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/rip-rel-address.ll b/src/LLVM/test/CodeGen/X86/rip-rel-address.ll
new file mode 100644
index 0000000..24ff07b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/rip-rel-address.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -relocation-model=pic -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=PIC64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -relocation-model=static | FileCheck %s -check-prefix=STATIC64
+
+; Use %rip-relative addressing even in static mode on x86-64, because
+; it has a smaller encoding.
+
+@a = internal global double 3.4
+define double @foo() nounwind {
+  %a = load double* @a
+  ret double %a
+  
+; PIC64:    movsd	_a(%rip), %xmm0
+; STATIC64: movsd	a(%rip), %xmm0
+}

diff --git a/src/LLVM/test/CodeGen/X86/rodata-relocs.ll b/src/LLVM/test/CodeGen/X86/rodata-relocs.ll
new file mode 100644
index 0000000..9291200
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/rodata-relocs.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -relocation-model=static | grep rodata | count 3
+; RUN: llc < %s -relocation-model=static | grep -F "rodata.cst" | count 2
+; RUN: llc < %s -relocation-model=pic | grep rodata | count 2
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.ro" | count 2
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.ro.local" | count 1
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel" | count 4
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.local" | count 1
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@a = internal unnamed_addr constant [2 x i32] [i32 1, i32 2]
+@a1 = unnamed_addr constant [2 x i32] [i32 1, i32 2]
+@e = internal  unnamed_addr constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
+@e1 = unnamed_addr constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
+@p = unnamed_addr constant i8* bitcast ([2 x i32]* @a to i8*)
+@t = unnamed_addr constant i8* bitcast ([2 x [2 x i32]]* @e to i8*)
+@p1 = unnamed_addr constant i8* bitcast ([2 x i32]* @a1 to i8*)
+@t1 = unnamed_addr constant i8* bitcast ([2 x [2 x i32]]* @e1 to i8*)
+@p2 = internal global i8* bitcast([2 x i32]* @a1 to i8*)
+@t2 = internal global i8* bitcast([2 x [2 x i32]]* @e1 to i8*)
+@p3 = internal global i8* bitcast([2 x i32]* @a to i8*)
+@t3 = internal global i8* bitcast([2 x [2 x i32]]* @e to i8*)
+

diff --git a/src/LLVM/test/CodeGen/X86/rot16.ll b/src/LLVM/test/CodeGen/X86/rot16.ll
new file mode 100644
index 0000000..de23dcb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/rot16.ll

@@ -0,0 +1,85 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i16 @foo(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: foo:
+; CHECK: rolw %cl
+	%0 = shl i16 %x, %z
+	%1 = sub i16 16, %z
+	%2 = lshr i16 %x, %1
+	%3 = or i16 %2, %0
+	ret i16 %3
+}
+
+define i16 @bar(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: bar:
+; CHECK: shldw %cl
+	%0 = shl i16 %y, %z
+	%1 = sub i16 16, %z
+	%2 = lshr i16 %x, %1
+	%3 = or i16 %2, %0
+	ret i16 %3
+}
+
+define i16 @un(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: un:
+; CHECK: rorw %cl
+	%0 = lshr i16 %x, %z
+	%1 = sub i16 16, %z
+	%2 = shl i16 %x, %1
+	%3 = or i16 %2, %0
+	ret i16 %3
+}
+
+define i16 @bu(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: bu:
+; CHECK: shrdw
+	%0 = lshr i16 %y, %z
+	%1 = sub i16 16, %z
+	%2 = shl i16 %x, %1
+	%3 = or i16 %2, %0
+	ret i16 %3
+}
+
+define i16 @xfoo(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: xfoo:
+; CHECK: rolw $5
+	%0 = lshr i16 %x, 11
+	%1 = shl i16 %x, 5
+	%2 = or i16 %0, %1
+	ret i16 %2
+}
+
+define i16 @xbar(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: xbar:
+; CHECK: shldw $5
+	%0 = shl i16 %y, 5
+	%1 = lshr i16 %x, 11
+	%2 = or i16 %0, %1
+	ret i16 %2
+}
+
+define i16 @xun(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: xun:
+; CHECK: rolw $11
+	%0 = lshr i16 %x, 5
+	%1 = shl i16 %x, 11
+	%2 = or i16 %0, %1
+	ret i16 %2
+}
+
+define i16 @xbu(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: xbu:
+; CHECK: shldw $11
+	%0 = lshr i16 %y, 5
+	%1 = shl i16 %x, 11
+	%2 = or i16 %0, %1
+	ret i16 %2
+}

diff --git a/src/LLVM/test/CodeGen/X86/rot32.ll b/src/LLVM/test/CodeGen/X86/rot32.ll
new file mode 100644
index 0000000..99602fd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/rot32.ll

@@ -0,0 +1,85 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: foo:
+; CHECK: roll %cl
+	%0 = shl i32 %x, %z
+	%1 = sub i32 32, %z
+	%2 = lshr i32 %x, %1
+	%3 = or i32 %2, %0
+	ret i32 %3
+}
+
+define i32 @bar(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: bar:
+; CHECK: shldl %cl
+	%0 = shl i32 %y, %z
+	%1 = sub i32 32, %z
+	%2 = lshr i32 %x, %1
+	%3 = or i32 %2, %0
+	ret i32 %3
+}
+
+define i32 @un(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: un:
+; CHECK: rorl %cl
+	%0 = lshr i32 %x, %z
+	%1 = sub i32 32, %z
+	%2 = shl i32 %x, %1
+	%3 = or i32 %2, %0
+	ret i32 %3
+}
+
+define i32 @bu(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: bu:
+; CHECK: shrdl %cl
+	%0 = lshr i32 %y, %z
+	%1 = sub i32 32, %z
+	%2 = shl i32 %x, %1
+	%3 = or i32 %2, %0
+	ret i32 %3
+}
+
+define i32 @xfoo(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: xfoo:
+; CHECK: roll $7
+	%0 = lshr i32 %x, 25
+	%1 = shl i32 %x, 7
+	%2 = or i32 %0, %1
+	ret i32 %2
+}
+
+define i32 @xbar(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: xbar:
+; CHECK: shldl $7
+	%0 = shl i32 %y, 7
+	%1 = lshr i32 %x, 25
+	%2 = or i32 %0, %1
+	ret i32 %2
+}
+
+define i32 @xun(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: xun:
+; CHECK: roll $25
+	%0 = lshr i32 %x, 7
+	%1 = shl i32 %x, 25
+	%2 = or i32 %0, %1
+	ret i32 %2
+}
+
+define i32 @xbu(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: xbu:
+; CHECK: shldl
+	%0 = lshr i32 %y, 7
+	%1 = shl i32 %x, 25
+	%2 = or i32 %0, %1
+	ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/X86/rot64.ll b/src/LLVM/test/CodeGen/X86/rot64.ll
new file mode 100644
index 0000000..4e082bb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/rot64.ll

@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep rol %t | count 3
+; RUN: grep ror %t | count 1
+; RUN: grep shld %t | count 2
+; RUN: grep shrd %t | count 2
+
+define i64 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = shl i64 %x, %z
+	%1 = sub i64 64, %z
+	%2 = lshr i64 %x, %1
+	%3 = or i64 %2, %0
+	ret i64 %3
+}
+
+define i64 @bar(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = shl i64 %y, %z
+	%1 = sub i64 64, %z
+	%2 = lshr i64 %x, %1
+	%3 = or i64 %2, %0
+	ret i64 %3
+}
+
+define i64 @un(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %x, %z
+	%1 = sub i64 64, %z
+	%2 = shl i64 %x, %1
+	%3 = or i64 %2, %0
+	ret i64 %3
+}
+
+define i64 @bu(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %y, %z
+	%1 = sub i64 64, %z
+	%2 = shl i64 %x, %1
+	%3 = or i64 %2, %0
+	ret i64 %3
+}
+
+define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %x, 57
+	%1 = shl i64 %x, 7
+	%2 = or i64 %0, %1
+	ret i64 %2
+}
+
+define i64 @xbar(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = shl i64 %y, 7
+	%1 = lshr i64 %x, 57
+	%2 = or i64 %0, %1
+	ret i64 %2
+}
+
+define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %x, 7
+	%1 = shl i64 %x, 57
+	%2 = or i64 %0, %1
+	ret i64 %2
+}
+
+define i64 @xbu(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %y, 7
+	%1 = shl i64 %x, 57
+	%2 = or i64 %0, %1
+	ret i64 %2
+}

diff --git a/src/LLVM/test/CodeGen/X86/rotate.ll b/src/LLVM/test/CodeGen/X86/rotate.ll
new file mode 100644
index 0000000..0a4aec3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/rotate.ll

@@ -0,0 +1,100 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \

+; RUN:   grep {ro\[rl\]} | count 12

+

+define i32 @rotl32(i32 %A, i8 %Amt) {

+	%shift.upgrd.1 = zext i8 %Amt to i32		; <i32> [#uses=1]

+	%B = shl i32 %A, %shift.upgrd.1		; <i32> [#uses=1]

+	%Amt2 = sub i8 32, %Amt		; <i8> [#uses=1]

+	%shift.upgrd.2 = zext i8 %Amt2 to i32		; <i32> [#uses=1]

+	%C = lshr i32 %A, %shift.upgrd.2		; <i32> [#uses=1]

+	%D = or i32 %B, %C		; <i32> [#uses=1]

+	ret i32 %D

+}

+

+define i32 @rotr32(i32 %A, i8 %Amt) {

+	%shift.upgrd.3 = zext i8 %Amt to i32		; <i32> [#uses=1]

+	%B = lshr i32 %A, %shift.upgrd.3		; <i32> [#uses=1]

+	%Amt2 = sub i8 32, %Amt		; <i8> [#uses=1]

+	%shift.upgrd.4 = zext i8 %Amt2 to i32		; <i32> [#uses=1]

+	%C = shl i32 %A, %shift.upgrd.4		; <i32> [#uses=1]

+	%D = or i32 %B, %C		; <i32> [#uses=1]

+	ret i32 %D

+}

+

+define i32 @rotli32(i32 %A) {

+	%B = shl i32 %A, 5		; <i32> [#uses=1]

+	%C = lshr i32 %A, 27		; <i32> [#uses=1]

+	%D = or i32 %B, %C		; <i32> [#uses=1]

+	ret i32 %D

+}

+

+define i32 @rotri32(i32 %A) {

+	%B = lshr i32 %A, 5		; <i32> [#uses=1]

+	%C = shl i32 %A, 27		; <i32> [#uses=1]

+	%D = or i32 %B, %C		; <i32> [#uses=1]

+	ret i32 %D

+}

+

+define i16 @rotl16(i16 %A, i8 %Amt) {

+	%shift.upgrd.5 = zext i8 %Amt to i16		; <i16> [#uses=1]

+	%B = shl i16 %A, %shift.upgrd.5		; <i16> [#uses=1]

+	%Amt2 = sub i8 16, %Amt		; <i8> [#uses=1]

+	%shift.upgrd.6 = zext i8 %Amt2 to i16		; <i16> [#uses=1]

+	%C = lshr i16 %A, %shift.upgrd.6		; <i16> [#uses=1]

+	%D = or i16 %B, %C		; <i16> [#uses=1]

+	ret i16 %D

+}

+

+define i16 @rotr16(i16 %A, i8 %Amt) {

+	%shift.upgrd.7 = zext i8 %Amt to i16		; <i16> [#uses=1]

+	%B = lshr i16 %A, %shift.upgrd.7		; <i16> [#uses=1]

+	%Amt2 = sub i8 16, %Amt		; <i8> [#uses=1]

+	%shift.upgrd.8 = zext i8 %Amt2 to i16		; <i16> [#uses=1]

+	%C = shl i16 %A, %shift.upgrd.8		; <i16> [#uses=1]

+	%D = or i16 %B, %C		; <i16> [#uses=1]

+	ret i16 %D

+}

+

+define i16 @rotli16(i16 %A) {

+	%B = shl i16 %A, 5		; <i16> [#uses=1]

+	%C = lshr i16 %A, 11		; <i16> [#uses=1]

+	%D = or i16 %B, %C		; <i16> [#uses=1]

+	ret i16 %D

+}

+

+define i16 @rotri16(i16 %A) {

+	%B = lshr i16 %A, 5		; <i16> [#uses=1]

+	%C = shl i16 %A, 11		; <i16> [#uses=1]

+	%D = or i16 %B, %C		; <i16> [#uses=1]

+	ret i16 %D

+}

+

+define i8 @rotl8(i8 %A, i8 %Amt) {

+	%B = shl i8 %A, %Amt		; <i8> [#uses=1]

+	%Amt2 = sub i8 8, %Amt		; <i8> [#uses=1]

+	%C = lshr i8 %A, %Amt2		; <i8> [#uses=1]

+	%D = or i8 %B, %C		; <i8> [#uses=1]

+	ret i8 %D

+}

+

+define i8 @rotr8(i8 %A, i8 %Amt) {

+	%B = lshr i8 %A, %Amt		; <i8> [#uses=1]

+	%Amt2 = sub i8 8, %Amt		; <i8> [#uses=1]

+	%C = shl i8 %A, %Amt2		; <i8> [#uses=1]

+	%D = or i8 %B, %C		; <i8> [#uses=1]

+	ret i8 %D

+}

+

+define i8 @rotli8(i8 %A) {

+	%B = shl i8 %A, 5		; <i8> [#uses=1]

+	%C = lshr i8 %A, 3		; <i8> [#uses=1]

+	%D = or i8 %B, %C		; <i8> [#uses=1]

+	ret i8 %D

+}

+

+define i8 @rotri8(i8 %A) {

+	%B = lshr i8 %A, 5		; <i8> [#uses=1]

+	%C = shl i8 %A, 3		; <i8> [#uses=1]

+	%D = or i8 %B, %C		; <i8> [#uses=1]

+	ret i8 %D

+}


diff --git a/src/LLVM/test/CodeGen/X86/rotate2.ll b/src/LLVM/test/CodeGen/X86/rotate2.ll
new file mode 100644
index 0000000..2eea399
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/rotate2.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | grep rol | count 2
+
+define i64 @test1(i64 %x) nounwind  {
+entry:
+	%tmp2 = lshr i64 %x, 55		; <i64> [#uses=1]
+	%tmp4 = shl i64 %x, 9		; <i64> [#uses=1]
+	%tmp5 = or i64 %tmp2, %tmp4		; <i64> [#uses=1]
+	ret i64 %tmp5
+}
+
+define i64 @test2(i32 %x) nounwind  {
+entry:
+	%tmp2 = lshr i32 %x, 22		; <i32> [#uses=1]
+	%tmp4 = shl i32 %x, 10		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp2, %tmp4		; <i32> [#uses=1]
+	%tmp56 = zext i32 %tmp5 to i64		; <i64> [#uses=1]
+	ret i64 %tmp56
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/scalar-extract.ll b/src/LLVM/test/CodeGen/X86/scalar-extract.ll
new file mode 100644
index 0000000..2845838
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/scalar-extract.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx -o %t
+; RUN: not grep movq  %t
+
+; Check that widening doesn't introduce a mmx register in this case when
+; a simple load/store would suffice.
+
+define void @foo(<2 x i16>* %A, <2 x i16>* %B) {
+entry:
+	%tmp1 = load <2 x i16>* %A		; <<2 x i16>> [#uses=1]
+	store <2 x i16> %tmp1, <2 x i16>* %B
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/scalar-min-max-fill-operand.ll b/src/LLVM/test/CodeGen/X86/scalar-min-max-fill-operand.ll
new file mode 100644
index 0000000..2f90932
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/scalar-min-max-fill-operand.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     mov
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     min
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     mov
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     max
+; CHECK-NOT:     {{(min|max|mov)}}
+
+declare float @bar()
+
+define float @foo(float %a) nounwind
+{
+  %s = call float @bar()
+  %t = fcmp olt float %s, %a
+  %u = select i1 %t, float %s, float %a
+  ret float %u
+}
+define float @hem(float %a) nounwind
+{
+  %s = call float @bar()
+  %t = fcmp ogt float %s, %a
+  %u = select i1 %t, float %s, float %a
+  ret float %u
+}

diff --git a/src/LLVM/test/CodeGen/X86/scalar_sse_minmax.ll b/src/LLVM/test/CodeGen/X86/scalar_sse_minmax.ll
new file mode 100644
index 0000000..ce76ec0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/scalar_sse_minmax.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | \

+; RUN:   grep mins | count 3

+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | \

+; RUN:   grep maxs | count 2

+

+declare i1 @llvm.isunordered.f64(double, double)

+

+declare i1 @llvm.isunordered.f32(float, float)

+

+define float @min1(float %x, float %y) {

+	%tmp = fcmp olt float %x, %y		; <i1> [#uses=1]

+	%retval = select i1 %tmp, float %x, float %y		; <float> [#uses=1]

+	ret float %retval

+}

+

+define double @min2(double %x, double %y) {

+	%tmp = fcmp olt double %x, %y		; <i1> [#uses=1]

+	%retval = select i1 %tmp, double %x, double %y		; <double> [#uses=1]

+	ret double %retval

+}

+

+define float @max1(float %x, float %y) {

+	%tmp = fcmp oge float %x, %y		; <i1> [#uses=1]

+	%tmp2 = fcmp uno float %x, %y		; <i1> [#uses=1]

+	%tmp3 = or i1 %tmp2, %tmp		; <i1> [#uses=1]

+	%retval = select i1 %tmp3, float %x, float %y		; <float> [#uses=1]

+	ret float %retval

+}

+

+define double @max2(double %x, double %y) {

+	%tmp = fcmp oge double %x, %y		; <i1> [#uses=1]

+	%tmp2 = fcmp uno double %x, %y		; <i1> [#uses=1]

+	%tmp3 = or i1 %tmp2, %tmp		; <i1> [#uses=1]

+	%retval = select i1 %tmp3, double %x, double %y		; <double> [#uses=1]

+	ret double %retval

+}

+

+define <4 x float> @min3(float %tmp37) {

+	%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0		; <<4 x float>> [#uses=1]

+	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]

+	ret <4 x float> %tmp48

+}

+

+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)


diff --git a/src/LLVM/test/CodeGen/X86/scalar_widen_div.ll b/src/LLVM/test/CodeGen/X86/scalar_widen_div.ll
new file mode 100644
index 0000000..adc58ac
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/scalar_widen_div.ll

@@ -0,0 +1,183 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 |  FileCheck %s
+
+; Verify when widening a divide/remainder operation, we only generate a
+; divide/rem per element since divide/remainder can trap.
+
+define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind {
+; CHECK: idivl
+; CHECK: idivl
+; CHECK-NOT: idivl
+; CHECK: ret
+entry:
+  %nsource.addr = alloca <2 x i32> addrspace(1)*, align 4
+  %dsource.addr = alloca <2 x i32> addrspace(1)*, align 4
+  %qdest.addr = alloca <2 x i32> addrspace(1)*, align 4
+  %index = alloca i32, align 4
+  store <2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)** %nsource.addr
+  store <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)** %dsource.addr
+  store <2 x i32> addrspace(1)* %qdest, <2 x i32> addrspace(1)** %qdest.addr
+  %tmp = load <2 x i32> addrspace(1)** %qdest.addr
+  %tmp1 = load i32* %index
+  %arrayidx = getelementptr <2 x i32> addrspace(1)* %tmp, i32 %tmp1
+  %tmp2 = load <2 x i32> addrspace(1)** %nsource.addr
+  %tmp3 = load i32* %index
+  %arrayidx4 = getelementptr <2 x i32> addrspace(1)* %tmp2, i32 %tmp3
+  %tmp5 = load <2 x i32> addrspace(1)* %arrayidx4
+  %tmp6 = load <2 x i32> addrspace(1)** %dsource.addr
+  %tmp7 = load i32* %index
+  %arrayidx8 = getelementptr <2 x i32> addrspace(1)* %tmp6, i32 %tmp7
+  %tmp9 = load <2 x i32> addrspace(1)* %arrayidx8
+  %tmp10 = sdiv <2 x i32> %tmp5, %tmp9
+  store <2 x i32> %tmp10, <2 x i32> addrspace(1)* %arrayidx
+  ret void
+}
+
+define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
+; CHECK: idivb
+; CHECK: idivb
+; CHECK: idivb
+; CHECK-NOT: idivb
+; CHECK: ret
+  %div.r = sdiv <3 x i8> %num, %div
+  ret <3 x i8>  %div.r
+}
+
+define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
+; CHECK: divb
+; CHECK: divb
+; CHECK: divb
+; CHECK-NOT: divb
+; CHECK: ret
+  %div.r = udiv <3 x i8> %num, %div
+  ret <3 x i8>  %div.r
+}
+
+define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK-NOT: idivw
+; CHECK: ret
+  %div.r = sdiv <5 x i16> %num, %div
+  ret <5 x i16>  %div.r
+}
+
+define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
+; CHECK: divw
+; CHECK: divw
+; CHECK: divw
+; CHECK: divw
+; CHECK-NOT: divw
+; CHECK: ret
+  %div.r = udiv <4 x i16> %num, %div
+  ret <4 x i16>  %div.r
+}
+
+define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
+; CHECK: divl
+; CHECK: divl
+; CHECK: divl
+; CHECK-NOT: divl
+; CHECK: ret
+  %div.r = udiv <3 x i32> %num, %div
+  ret <3 x i32>  %div.r
+}
+
+define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
+; CHECK: idivq
+; CHECK: idivq
+; CHECK: idivq
+; CHECK-NOT: idivq
+; CHECK: ret
+  %div.r = sdiv <3 x i64> %num, %div
+  ret <3 x i64>  %div.r
+}
+
+define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
+; CHECK: divq
+; CHECK: divq
+; CHECK: divq
+; CHECK-NOT: divq
+; CHECK: ret
+  %div.r = udiv <3 x i64> %num, %div
+  ret <3 x i64>  %div.r
+}
+
+
+define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
+; CHECK: idivb
+; CHECK: idivb
+; CHECK: idivb
+; CHECK: idivb
+; CHECK-NOT: idivb
+; CHECK: ret
+  %rem.r = srem <4 x i8> %num, %rem
+  ret <4 x i8>  %rem.r
+}
+
+define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK-NOT: idivw
+; CHECK: ret
+  %rem.r = srem <5 x i16> %num, %rem
+  ret <5 x i16>  %rem.r
+}
+
+define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK-NOT: idivl
+; CHECK: ret
+  %rem.r = srem <4 x i32> %num, %rem
+  ret <4 x i32>  %rem.r
+}
+
+
+define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
+; CHECK: divq
+; CHECK: divq
+; CHECK: divq
+; CHECK: divq
+; CHECK: divq
+; CHECK-NOT: divq
+; CHECK: ret
+  %rem.r = urem <5 x i64> %num, %rem
+  ret <5 x i64>  %rem.r
+}
+
+define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK-NOT: idivl
+; CHECK: ret
+entry:
+  %cmp13 = icmp sgt i32 %n, 0
+  br i1 %cmp13, label %bb.nph, label %for.end
+
+bb.nph:  
+  br label %for.body
+
+for.body:
+  %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] 
+  %arrayidx11 = getelementptr <3 x i32>* %dest, i32 %i.014
+  %tmp4 = load <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1]
+  %arrayidx7 = getelementptr inbounds <3 x i32>* %old, i32 %i.014
+  %tmp8 = load <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1]
+  %div = sdiv <3 x i32> %tmp4, %tmp8
+  store <3 x i32> %div, <3 x i32>* %arrayidx11
+  %inc = add nsw i32 %i.014, 1
+  %exitcond = icmp eq i32 %inc, %n 
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/scalarize-bitcast.ll b/src/LLVM/test/CodeGen/X86/scalarize-bitcast.ll
new file mode 100644
index 0000000..f6b29ec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/scalarize-bitcast.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86-64
+; PR3886
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-pc-linux-gnu"
+
+define void @mmxCombineMaskU(i32* nocapture %src, i32* nocapture %mask) nounwind {
+entry:
+	%tmp1 = load i32* %src		; <i32> [#uses=1]
+	%0 = insertelement <2 x i32> undef, i32 %tmp1, i32 0		; <<2 x i32>> [#uses=1]
+	%1 = insertelement <2 x i32> %0, i32 0, i32 1		; <<2 x i32>> [#uses=1]
+	%conv.i.i = bitcast <2 x i32> %1 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp2.i.i = extractelement <1 x i64> %conv.i.i, i32 0		; <i64> [#uses=1]
+	%tmp22.i = bitcast i64 %tmp2.i.i to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp15.i = extractelement <1 x i64> %tmp22.i, i32 0		; <i64> [#uses=1]
+	%conv.i26.i = bitcast i64 %tmp15.i to <8 x i8>		; <<8 x i8>> [#uses=1]
+	%shuffle.i.i = shufflevector <8 x i8> %conv.i26.i, <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>		; <<8 x i8>> [#uses=1]
+	%conv6.i.i = bitcast <8 x i8> %shuffle.i.i to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp12.i.i = extractelement <1 x i64> %conv6.i.i, i32 0		; <i64> [#uses=1]
+	%tmp10.i = bitcast i64 %tmp12.i.i to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp24.i = extractelement <1 x i64> %tmp10.i, i32 0		; <i64> [#uses=1]
+	%tmp10 = bitcast i64 %tmp24.i to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp7 = extractelement <1 x i64> %tmp10, i32 0		; <i64> [#uses=1]
+	%call6 = tail call i32 (...)* @store8888(i64 %tmp7)		; <i32> [#uses=1]
+	store i32 %call6, i32* %src
+	ret void
+}
+
+declare i32 @store8888(...)

diff --git a/src/LLVM/test/CodeGen/X86/scev-interchange.ll b/src/LLVM/test/CodeGen/X86/scev-interchange.ll
new file mode 100644
index 0000000..71a4d21
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/scev-interchange.ll

@@ -0,0 +1,382 @@
+; RUN: llc < %s -march=x86-64
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+	%"struct.DataOutBase::GmvFlags" = type { i32 }
+	%"struct.FE_DGPNonparametric<3>" = type { [1156 x i8], i32, %"struct.PolynomialSpace<1>" }
+	%"struct.FiniteElementData<1>" = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.Line = type { [2 x i32] }
+	%"struct.PolynomialSpace<1>" = type { %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >", i32, %"struct.std::vector<int,std::allocator<int> >", %"struct.std::vector<int,std::allocator<int> >" }
+	%"struct.Polynomials::Polynomial<double>" = type { %struct.Subscriptor, %"struct.std::vector<double,std::allocator<double> >" }
+	%struct.Subscriptor = type { i32 (...)**, i32, %"struct.std::type_info"* }
+	%"struct.TableBase<2,double>" = type { %struct.Subscriptor, double*, i32, %"struct.TableIndices<2>" }
+	%"struct.TableIndices<2>" = type { %struct.Line }
+	%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base" }
+	%"struct.std::_Bit_iterator_base" = type { i64*, i32 }
+	%"struct.std::_Bvector_base<std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" }
+	%"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" = type { %"struct.std::_Bit_const_iterator", %"struct.std::_Bit_const_iterator", i64* }
+	%"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >" = type { %"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >::_Vector_impl" }
+	%"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >::_Vector_impl" = type { %"struct.Polynomials::Polynomial<double>"*, %"struct.Polynomials::Polynomial<double>"*, %"struct.Polynomials::Polynomial<double>"* }
+	%"struct.std::_Vector_base<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" }
+	%"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" = type { double*, double*, double* }
+	%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
+	%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+	%"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" = type { %"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >::_Vector_impl" }
+	%"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >::_Vector_impl" = type { %"struct.std::vector<bool,std::allocator<bool> >"*, %"struct.std::vector<bool,std::allocator<bool> >"*, %"struct.std::vector<bool,std::allocator<bool> >"* }
+	%"struct.std::type_info" = type { i32 (...)**, i8* }
+	%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >" = type { %"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >" }
+	%"struct.std::vector<bool,std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >" }
+	%"struct.std::vector<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >" }
+	%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
+	%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" = type { %"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" }
+
+declare void @_Unwind_Resume(i8*)
+
+declare i8* @_Znwm(i64)
+
+declare fastcc void @_ZNSt6vectorIjSaIjEEaSERKS1_(%"struct.std::vector<int,std::allocator<int> >"*, %"struct.std::vector<int,std::allocator<int> >"*)
+
+declare fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* nocapture, i32, i32)
+
+declare fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* nocapture, i64, i8* nocapture)
+
+declare fastcc void @_ZNSt6vectorIS_IbSaIbEESaIS1_EEC2EmRKS1_RKS2_(%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* nocapture, i64, %"struct.std::vector<bool,std::allocator<bool> >"* nocapture)
+
+declare fastcc void @_ZNSt6vectorIN11Polynomials10PolynomialIdEESaIS2_EED1Ev(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* nocapture)
+
+declare fastcc void @_ZN24TensorProductPolynomialsILi3EEC2IN11Polynomials10PolynomialIdEEEERKSt6vectorIT_SaIS6_EE(%"struct.PolynomialSpace<1>"* nocapture, %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* nocapture)
+
+declare fastcc void @_ZN7FE_PolyI24TensorProductPolynomialsILi3EELi3EEC2EjRKS1_RK17FiniteElementDataILi3EERKSt6vectorIbSaIbEERKS9_ISB_SaISB_EE(%"struct.FE_DGPNonparametric<3>"*, i32, %"struct.PolynomialSpace<1>"* nocapture, %"struct.FiniteElementData<1>"* nocapture, %"struct.std::vector<bool,std::allocator<bool> >"* nocapture, %"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* nocapture)
+
+declare fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vectorIjSaIjEE(%"struct.std::vector<int,std::allocator<int> >"* noalias nocapture sret, %"struct.std::vector<int,std::allocator<int> >"* nocapture)
+
+declare fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias nocapture sret, i32)
+
+define fastcc void @_ZN4FE_QILi3EEC1Ej(i32 %degree) {
+entry:
+	invoke fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* undef, i64 1, i8* undef)
+			to label %invcont.i unwind label %lpad.i
+
+invcont.i:		; preds = %entry
+	invoke fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, i32 %degree)
+			to label %invcont1.i unwind label %lpad120.i
+
+invcont1.i:		; preds = %invcont.i
+	invoke fastcc void @_ZNSt6vectorIS_IbSaIbEESaIS1_EEC2EmRKS1_RKS2_(%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* undef, i64 undef, %"struct.std::vector<bool,std::allocator<bool> >"* undef)
+			to label %invcont3.i unwind label %lpad124.i
+
+invcont3.i:		; preds = %invcont1.i
+	invoke fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, i32 %degree)
+			to label %invcont4.i unwind label %lpad128.i
+
+invcont4.i:		; preds = %invcont3.i
+	invoke fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* undef, i64 undef, i8* undef)
+			to label %invcont6.i unwind label %lpad132.i
+
+invcont6.i:		; preds = %invcont4.i
+	invoke fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, i32 %degree)
+			to label %invcont7.i unwind label %lpad136.i
+
+invcont7.i:		; preds = %invcont6.i
+	invoke fastcc void @_ZN11Polynomials19LagrangeEquidistant23generate_complete_basisEj(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* noalias sret undef, i32 %degree)
+			to label %invcont9.i unwind label %lpad140.i
+
+invcont9.i:		; preds = %invcont7.i
+	invoke fastcc void @_ZN24TensorProductPolynomialsILi3EEC2IN11Polynomials10PolynomialIdEEEERKSt6vectorIT_SaIS6_EE(%"struct.PolynomialSpace<1>"* undef, %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* undef)
+			to label %invcont10.i unwind label %lpad144.i
+
+invcont10.i:		; preds = %invcont9.i
+	invoke fastcc void @_ZN7FE_PolyI24TensorProductPolynomialsILi3EELi3EEC2EjRKS1_RK17FiniteElementDataILi3EERKSt6vectorIbSaIbEERKS9_ISB_SaISB_EE(%"struct.FE_DGPNonparametric<3>"* undef, i32 %degree, %"struct.PolynomialSpace<1>"* undef, %"struct.FiniteElementData<1>"* undef, %"struct.std::vector<bool,std::allocator<bool> >"* undef, %"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* undef)
+			to label %bb14.i unwind label %lpad148.i
+
+bb14.i:		; preds = %invcont10.i
+	br i1 false, label %bb3.i164.i, label %bb.i.i.i.i160.i
+
+bb.i.i.i.i160.i:		; preds = %bb14.i
+	unreachable
+
+bb3.i164.i:		; preds = %bb14.i
+	br i1 undef, label %bb10.i168.i, label %bb.i.i.i20.i166.i
+
+bb.i.i.i20.i166.i:		; preds = %bb3.i164.i
+	unreachable
+
+bb10.i168.i:		; preds = %bb3.i164.i
+	invoke fastcc void @_ZNSt6vectorIN11Polynomials10PolynomialIdEESaIS2_EED1Ev(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* undef)
+			to label %bb21.i unwind label %lpad144.i
+
+bb21.i:		; preds = %bb10.i168.i
+	invoke fastcc void @_ZNSt6vectorIN11Polynomials10PolynomialIdEESaIS2_EED1Ev(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* undef)
+			to label %bb28.i unwind label %lpad140.i
+
+bb28.i:		; preds = %bb21.i
+	br i1 undef, label %bb35.i, label %bb.i.i.i175.i
+
+bb.i.i.i175.i:		; preds = %bb28.i
+	br label %bb35.i
+
+bb35.i:		; preds = %bb.i.i.i175.i, %bb28.i
+	br i1 undef, label %bb42.i, label %bb.i.i.i205.i
+
+bb.i.i.i205.i:		; preds = %bb35.i
+	unreachable
+
+bb42.i:		; preds = %bb35.i
+	br i1 undef, label %bb47.i, label %bb.i.i.i213.i
+
+bb.i.i.i213.i:		; preds = %bb42.i
+	unreachable
+
+bb47.i:		; preds = %bb42.i
+	br i1 undef, label %bb59.i, label %bb.i.i.i247.i
+
+bb.i.i.i247.i:		; preds = %bb47.i
+	unreachable
+
+bb59.i:		; preds = %bb47.i
+	br i1 undef, label %bb66.i, label %bb.i.i.i255.i
+
+bb.i.i.i255.i:		; preds = %bb59.i
+	unreachable
+
+bb66.i:		; preds = %bb59.i
+	br i1 undef, label %bb71.i, label %bb.i.i.i262.i
+
+bb.i.i.i262.i:		; preds = %bb66.i
+	br label %bb71.i
+
+bb71.i:		; preds = %bb.i.i.i262.i, %bb66.i
+	%tmp11.i.i29.i.i.i.i.i.i = invoke i8* @_Znwm(i64 12)
+			to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i unwind label %lpad.i.i.i.i.i.i		; <i8*> [#uses=0]
+
+lpad.i.i.i.i.i.i:		; preds = %bb71.i
+        %exn.i.i.i.i.i.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i:		; preds = %bb71.i
+	br i1 undef, label %_ZNSt6vectorIjSaIjEED1Ev.exit.i.i, label %bb.i.i.i.i94.i
+
+bb.i.i.i.i94.i:		; preds = %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i
+	unreachable
+
+_ZNSt6vectorIjSaIjEED1Ev.exit.i.i:		; preds = %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i
+	%tmp11.i.i29.i.i.i.i5.i.i = invoke i8* @_Znwm(i64 undef)
+			to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i unwind label %lpad.i.i.i.i8.i.i		; <i8*> [#uses=0]
+
+lpad.i.i.i.i8.i.i:		; preds = %_ZNSt6vectorIjSaIjEED1Ev.exit.i.i
+        %exn.i.i.i.i8.i.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	invoke void @_Unwind_Resume(i8* undef)
+			to label %.noexc.i9.i.i unwind label %lpad.i19.i.i
+
+.noexc.i9.i.i:		; preds = %lpad.i.i.i.i8.i.i
+	unreachable
+
+_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i:		; preds = %_ZNSt6vectorIjSaIjEED1Ev.exit.i.i
+	br i1 undef, label %bb50.i.i.i, label %bb.i.i.i.i.i.i.i.i.i.i
+
+bb.i.i.i.i.i.i.i.i.i.i:		; preds = %bb.i.i.i.i.i.i.i.i.i.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i
+	br i1 undef, label %bb50.i.i.i, label %bb.i.i.i.i.i.i.i.i.i.i
+
+bb50.i.i.i:		; preds = %bb.i.i.i.i.i.i.i.i.i.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i
+	invoke fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vectorIjSaIjEE(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, %"struct.std::vector<int,std::allocator<int> >"* undef)
+			to label %bb83.i unwind label %lpad188.i
+
+lpad.i19.i.i:		; preds = %lpad.i.i.i.i8.i.i
+        %exn.i19.i.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+bb83.i:		; preds = %bb50.i.i.i
+	br i1 undef, label %invcont84.i, label %bb.i.i.i221.i
+
+bb.i.i.i221.i:		; preds = %bb83.i
+	unreachable
+
+invcont84.i:		; preds = %bb83.i
+	%tmp11.i.i29.i.i.i.i.i = invoke i8* @_Znwm(i64 undef)
+			to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i unwind label %lpad.i.i.i.i315.i		; <i8*> [#uses=0]
+
+lpad.i.i.i.i315.i:		; preds = %invcont84.i
+        %exn.i.i.i.i315.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	invoke void @_Unwind_Resume(i8* undef)
+			to label %.noexc.i316.i unwind label %lpad.i352.i
+
+.noexc.i316.i:		; preds = %lpad.i.i.i.i315.i
+	unreachable
+
+_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i:		; preds = %invcont84.i
+	br i1 undef, label %bb50.i.i, label %bb.i.i.i.i.i.i.i.i320.i
+
+bb.i.i.i.i.i.i.i.i320.i:		; preds = %bb.i.i.i.i.i.i.i.i320.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i
+	br i1 undef, label %bb50.i.i, label %bb.i.i.i.i.i.i.i.i320.i
+
+bb50.i.i:		; preds = %bb.i.i.i.i.i.i.i.i320.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i
+	invoke fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vectorIjSaIjEE(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, %"struct.std::vector<int,std::allocator<int> >"* undef)
+			to label %invcont86.i unwind label %lpad200.i
+
+lpad.i352.i:		; preds = %lpad.i.i.i.i315.i
+        %exn.i352.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+invcont86.i:		; preds = %bb50.i.i
+	invoke fastcc void @_ZNSt6vectorIjSaIjEEaSERKS1_(%"struct.std::vector<int,std::allocator<int> >"* undef, %"struct.std::vector<int,std::allocator<int> >"* undef)
+			to label %.noexc380.i unwind label %lpad204.i
+
+.noexc380.i:		; preds = %invcont86.i
+	br i1 undef, label %bb100.i, label %bb.i.i.i198.i
+
+bb.i.i.i198.i:		; preds = %.noexc380.i
+	unreachable
+
+bb100.i:		; preds = %.noexc380.i
+	br i1 undef, label %invcont101.i, label %bb.i.i.i190.i
+
+bb.i.i.i190.i:		; preds = %bb100.i
+	unreachable
+
+invcont101.i:		; preds = %bb100.i
+	invoke fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* undef, i32 undef, i32 undef)
+			to label %_ZN10FullMatrixIdEC1Ejj.exit.i.i unwind label %lpad.i.i.i.i.i
+
+lpad.i.i.i.i.i:		; preds = %invcont101.i
+        %exn.i.i.i.i.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+_ZN10FullMatrixIdEC1Ejj.exit.i.i:		; preds = %invcont101.i
+	invoke fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* undef, i32 undef, i32 undef)
+			to label %_ZN10FullMatrixIdEC1Ejj.exit28.i.i unwind label %lpad.i.i.i27.i.i
+
+lpad.i.i.i27.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit.i.i
+        %exn.i.i.i27.i.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	invoke void @_Unwind_Resume(i8* undef)
+			to label %.noexc.i.i unwind label %lpad.i.i
+
+.noexc.i.i:		; preds = %lpad.i.i.i27.i.i
+	unreachable
+
+_ZN10FullMatrixIdEC1Ejj.exit28.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit.i.i
+	br i1 undef, label %bb58.i.i, label %bb.i.i.i304.i.i
+
+bb.i.i.i304.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit28.i.i
+	unreachable
+
+bb58.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit28.i.i
+	br i1 false, label %bb.i191.i, label %bb.i.i.i297.i.i
+
+bb.i.i.i297.i.i:		; preds = %bb58.i.i
+	unreachable
+
+lpad.i.i:		; preds = %lpad.i.i.i27.i.i
+        %exn.i.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+bb.i191.i:		; preds = %.noexc232.i, %bb58.i.i
+	invoke fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* undef, i32 undef, i32 undef)
+			to label %.noexc232.i unwind label %lpad196.i
+
+.noexc232.i:		; preds = %bb.i191.i
+	br i1 undef, label %bb29.loopexit.i.i, label %bb.i191.i
+
+bb7.i215.i:		; preds = %bb9.i216.i
+	br i1 undef, label %bb16.preheader.i.i, label %bb8.i.i
+
+bb8.i.i:		; preds = %bb7.i215.i
+	%tmp60.i.i = add i32 %0, 1		; <i32> [#uses=1]
+	br label %bb9.i216.i
+
+bb9.i216.i:		; preds = %bb29.loopexit.i.i, %bb8.i.i
+	%0 = phi i32 [ 0, %bb29.loopexit.i.i ], [ %tmp60.i.i, %bb8.i.i ]		; <i32> [#uses=2]
+	br i1 undef, label %bb7.i215.i, label %bb16.preheader.i.i
+
+bb15.i.i:		; preds = %bb16.preheader.i.i, %bb15.i.i
+	%j1.0212.i.i = phi i32 [ %1, %bb15.i.i ], [ 0, %bb16.preheader.i.i ]		; <i32> [#uses=2]
+	%tmp6.i.i195.i.i = load i32* undef, align 4		; <i32> [#uses=1]
+	%tmp231.i.i = mul i32 %0, %tmp6.i.i195.i.i		; <i32> [#uses=1]
+	%tmp13.i197.i.i = add i32 %j1.0212.i.i, %tmp231.i.i		; <i32> [#uses=0]
+	%1 = add i32 %j1.0212.i.i, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb15.i.i, label %bb17.i.i
+
+bb17.i.i:		; preds = %bb16.preheader.i.i, %bb15.i.i
+	br label %bb16.preheader.i.i
+
+bb16.preheader.i.i:		; preds = %bb17.i.i, %bb9.i216.i, %bb7.i215.i
+	br i1 undef, label %bb17.i.i, label %bb15.i.i
+
+bb29.loopexit.i.i:		; preds = %.noexc232.i
+	br label %bb9.i216.i
+
+lpad.i:		; preds = %entry
+        %exn.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+lpad120.i:		; preds = %invcont.i
+        %exn120.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+lpad124.i:		; preds = %invcont1.i
+        %exn124.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+lpad128.i:		; preds = %invcont3.i
+        %exn128.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+lpad132.i:		; preds = %invcont4.i
+        %exn132.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+lpad136.i:		; preds = %invcont6.i
+        %exn136.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+lpad140.i:		; preds = %bb21.i, %invcont7.i
+        %exn140.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+lpad144.i:		; preds = %bb10.i168.i, %invcont9.i
+        %exn144.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+lpad148.i:		; preds = %invcont10.i
+        %exn148.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+lpad188.i:		; preds = %bb50.i.i.i
+        %exn188.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+lpad196.i:		; preds = %bb.i191.i
+        %exn196 = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+lpad200.i:		; preds = %bb50.i.i
+        %exn200.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+
+lpad204.i:		; preds = %invcont86.i
+        %exn204.i = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+}
+
+declare fastcc void @_ZN11Polynomials19LagrangeEquidistant23generate_complete_basisEj(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* noalias nocapture sret, i32)
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/X86/sdiv-exact.ll b/src/LLVM/test/CodeGen/X86/sdiv-exact.ll
new file mode 100644
index 0000000..48bb883
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sdiv-exact.ll

@@ -0,0 +1,18 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+define i32 @test1(i32 %x) {
+  %div = sdiv exact i32 %x, 25
+  ret i32 %div
+; CHECK: test1:
+; CHECK: imull	$-1030792151, 4(%esp)
+; CHECK-NEXT: ret
+}
+
+define i32 @test2(i32 %x) {
+  %div = sdiv exact i32 %x, 24
+  ret i32 %div
+; CHECK: test2:
+; CHECK: sarl	$3
+; CHECK-NEXT: imull	$-1431655765
+; CHECK-NEXT: ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/segmented-stacks.ll b/src/LLVM/test/CodeGen/X86/segmented-stacks.ll
new file mode 100644
index 0000000..ecdb00d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/segmented-stacks.ll

@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=i686-linux -segmented-stacks | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux  -segmented-stacks | FileCheck %s -check-prefix=X64
+
+; Just to prevent the alloca from being optimized away
+declare void @dummy_use(i32*, i32)
+
+define i32 @test_basic(i32 %l) {
+        %mem = alloca i32, i32 %l
+        call void @dummy_use (i32* %mem, i32 %l)
+        %terminate = icmp eq i32 %l, 0
+        br i1 %terminate, label %true, label %false
+
+true:
+        ret i32 0
+
+false:
+        %newlen = sub i32 %l, 1
+        %retvalue = call i32 @test_basic(i32 %newlen)
+        ret i32 %retvalue
+
+; X32:      test_basic:
+
+; X32:      leal -12(%esp), %ecx
+; X32-NEXT: cmpl %gs:48, %ecx
+
+; X32:      subl $8, %esp
+; X32-NEXT: pushl $4
+; X32-NEXT: pushl $12
+; X32-NEXT: calll __morestack
+; X32-NEXT: addl $8, %esp
+; X32-NEXT: ret 
+
+; X32:      movl %eax, %esp
+
+; X32:      subl $12, %esp
+; X32-NEXT: pushl %ecx
+; X32-NEXT: calll __morestack_allocate_stack_space
+; X32-NEXT: addl $16, %esp
+
+; X64:      test_basic:
+
+; X64:      leaq -24(%rsp), %r11
+; X64-NEXT: cmpq %fs:112, %r11
+
+; X64:      movabsq $24, %r10
+; X64-NEXT: movabsq $0, %r11
+; X64-NEXT: callq __morestack
+; X64-NEXT: ret
+
+; X64:      movq %rsp, %rax
+; X64-NEXT: subq %rcx, %rax
+; X64-NEXT: cmpq %rax, %fs:112
+
+; X64:      movq %rax, %rsp
+
+; X64:      movq %rcx, %rdi
+; X64-NEXT: callq __morestack_allocate_stack_space
+
+}
+
+define i32 @test_nested(i32 * nest %closure, i32 %other) {
+       %addend = load i32 * %closure
+       %result = add i32 %other, %addend
+       ret i32 %result
+
+; X32:      leal (%esp), %edx
+; X32-NEXT: cmpl %gs:48, %edx
+
+
+; X32:      subl $8, %esp
+; X32-NEXT: pushl $4
+; X32-NEXT: pushl $0
+; X32-NEXT: calll __morestack
+; X32-NEXT: addl $8, %esp
+; X32-NEXT: ret
+
+; X64:      leaq (%rsp), %r11
+; X64-NEXT: cmpq %fs:112, %r11
+
+; X64:      movq %r10, %rax
+; X64-NEXT: movabsq $0, %r10
+; X64-NEXT: movabsq $0, %r11
+; X64-NEXT: callq __morestack
+; X64-NEXT: ret
+; X64:      movq %rax, %r10
+
+}

diff --git a/src/LLVM/test/CodeGen/X86/select.ll b/src/LLVM/test/CodeGen/X86/select.ll
new file mode 100644
index 0000000..ce04e07
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/select.ll

@@ -0,0 +1,220 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; PR5757
+
+%0 = type { i64, i32 }
+
+define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
+  %t0 = load %0* %p
+  %t1 = load %0* %q
+  %t4 = select i1 %r, %0 %t0, %0 %t1
+  %t5 = extractvalue %0 %t4, 1
+  ret i32 %t5
+; CHECK: test1:
+; CHECK: cmovneq %rdi, %rsi
+; CHECK: movl (%rsi), %eax
+}
+
+
+; PR2139
+define i32 @test2() nounwind {
+entry:
+	%tmp73 = tail call i1 @return_false()		; <i8> [#uses=1]
+	%g.0 = select i1 %tmp73, i16 0, i16 -480		; <i16> [#uses=2]
+	%tmp7778 = sext i16 %g.0 to i32		; <i32> [#uses=1]
+	%tmp80 = shl i32 %tmp7778, 3		; <i32> [#uses=2]
+	%tmp87 = icmp sgt i32 %tmp80, 32767		; <i1> [#uses=1]
+	br i1 %tmp87, label %bb90, label %bb91
+bb90:		; preds = %bb84, %bb72
+	unreachable
+bb91:		; preds = %bb84
+	ret i32 0
+; CHECK: test2:
+; CHECK: movnew
+; CHECK: movswl
+}
+
+declare i1 @return_false()
+
+
+;; Select between two floating point constants.
+define float @test3(i32 %x) nounwind readnone {
+entry:
+	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01		; <float> [#uses=1]
+	ret float %iftmp.0.0
+; CHECK: test3:
+; CHECK: movss	{{.*}},4), %xmm0
+}
+
+define signext i8 @test4(i8* nocapture %P, double %F) nounwind readonly {
+entry:
+	%0 = fcmp olt double %F, 4.200000e+01		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, i32 4, i32 0		; <i32> [#uses=1]
+	%1 = getelementptr i8* %P, i32 %iftmp.0.0		; <i8*> [#uses=1]
+	%2 = load i8* %1, align 1		; <i8> [#uses=1]
+	ret i8 %2
+; CHECK: test4:
+; CHECK: movsbl	({{.*}},4), %eax
+}
+
+define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
+  %x = select i1 %c, <2 x i16> %a, <2 x i16> %b
+  store <2 x i16> %x, <2 x i16>* %p
+  ret void
+; CHECK: test5:
+}
+
+define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
+        %tmp = load <4 x float>* %A             ; <<4 x float>> [#uses=1]
+        %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=2]
+        %tmp9 = fmul <4 x float> %tmp3, %tmp3            ; <<4 x float>> [#uses=1]
+        %tmp.upgrd.1 = icmp eq i32 %C, 0                ; <i1> [#uses=1]
+        %iftmp.38.0 = select i1 %tmp.upgrd.1, <4 x float> %tmp9, <4 x float> %tmp               ; <<4 x float>> [#uses=1]
+        store <4 x float> %iftmp.38.0, <4 x float>* %A
+        ret void
+; Verify that the fmul gets sunk into the one part of the diamond where it is
+; needed.
+; CHECK: test6:
+; CHECK: jne
+; CHECK: mulps
+; CHECK: ret
+; CHECK: ret
+}
+
+; Select with fp80's
+define x86_fp80 @test7(i32 %tmp8) nounwind {
+        %tmp9 = icmp sgt i32 %tmp8, -1          ; <i1> [#uses=1]
+        %retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000
+        ret x86_fp80 %retval
+; CHECK: test7:
+; CHECK: leaq
+; CHECK: fldt (%r{{.}}x,%r{{.}}x)
+}
+
+; widening select v6i32 and then a sub
+define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind {
+	%x = select i1 %c, <6 x i32> %src1, <6 x i32> %src2
+	%val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	store <6 x i32> %val, <6 x i32>* %dst.addr
+	ret void
+        
+; CHECK: test8:
+}
+
+
+;; Test integer select between values and constants.
+
+define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp ne i64 %x, 0
+  %cond = select i1 %cmp, i64 %y, i64 -1
+  ret i64 %cond
+; CHECK: test9:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
+}
+
+;; Same as test9
+define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %cond = select i1 %cmp, i64 -1, i64 %y
+  ret i64 %cond
+; CHECK: test9a:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
+}
+
+define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %A = sext i1 %cmp to i64
+  %cond = or i64 %y, %A
+  ret i64 %cond
+; CHECK: test9b:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
+}
+
+;; Select between -1 and 1.
+define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %cond = select i1 %cmp, i64 -1, i64 1
+  ret i64 %cond
+; CHECK: test10:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	$1, %rax
+; CHECK: ret
+}
+
+
+
+define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %cond = select i1 %cmp, i64 %y, i64 -1
+  ret i64 %cond
+; CHECK: test11:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: notq %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
+}
+
+define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp ne i64 %x, 0
+  %cond = select i1 %cmp, i64 -1, i64 %y
+  ret i64 %cond
+; CHECK: test11a:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: notq %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
+}
+
+
+declare noalias i8* @_Znam(i64) noredzone
+
+define noalias i8* @test12(i64 %count) nounwind ssp noredzone {
+entry:
+  %A = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %count, i64 4)
+  %B = extractvalue { i64, i1 } %A, 1
+  %C = extractvalue { i64, i1 } %A, 0
+  %D = select i1 %B, i64 -1, i64 %C
+  %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
+  ret i8* %call
+; CHECK: test12:
+; CHECK: mulq
+; CHECK: movq $-1, %rdi
+; CHECK: cmovnoq	%rax, %rdi
+; CHECK: jmp	__Znam
+}
+
+declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
+
+define i32 @test13(i32 %a, i32 %b) nounwind {
+  %c = icmp ult i32 %a, %b
+  %d = sext i1 %c to i32
+  ret i32 %d
+; CHECK: test13:
+; CHECK: cmpl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: ret
+}
+
+define i32 @test14(i32 %a, i32 %b) nounwind {
+  %c = icmp uge i32 %a, %b
+  %d = sext i1 %c to i32
+  ret i32 %d
+; CHECK: test14:
+; CHECK: cmpl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: notl
+; CHECK-NEXT: ret
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/setcc.ll b/src/LLVM/test/CodeGen/X86/setcc.ll
new file mode 100644
index 0000000..c37e15d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/setcc.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7329206
+
+; Use sbb x, x to materialize carry bit in a GPR. The value is either
+; all 1's or all 0's.
+
+define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t1:
+; CHECK: seta %al
+; CHECK: movzbl %al, %eax
+; CHECK: shll $5, %eax
+  %0 = icmp ugt i16 %x, 26                        ; <i1> [#uses=1]
+  %iftmp.1.0 = select i1 %0, i16 32, i16 0        ; <i16> [#uses=1]
+  ret i16 %iftmp.1.0
+}
+
+define zeroext i16 @t2(i16 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t2:
+; CHECK: sbbl %eax, %eax
+; CHECK: andl $32, %eax
+  %0 = icmp ult i16 %x, 26                        ; <i1> [#uses=1]
+  %iftmp.0.0 = select i1 %0, i16 32, i16 0        ; <i16> [#uses=1]
+  ret i16 %iftmp.0.0
+}
+
+define i64 @t3(i64 %x) nounwind readnone ssp {
+entry:
+; CHECK: t3:
+; CHECK: sbbq %rax, %rax
+; CHECK: andq $64, %rax
+  %0 = icmp ult i64 %x, 18                        ; <i1> [#uses=1]
+  %iftmp.2.0 = select i1 %0, i64 64, i64 0        ; <i64> [#uses=1]
+  ret i64 %iftmp.2.0
+}

diff --git a/src/LLVM/test/CodeGen/X86/setoeq.ll b/src/LLVM/test/CodeGen/X86/setoeq.ll
new file mode 100644
index 0000000..aa2f0af
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/setoeq.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+define zeroext i8 @t(double %x) nounwind readnone {
+entry:
+	%0 = fptosi double %x to i32		; <i32> [#uses=1]
+	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = fcmp oeq double %1, %x		; <i1> [#uses=1]
+	%retval12 = zext i1 %2 to i8		; <i8> [#uses=1]
+; CHECK: cmpeqsd
+	ret i8 %retval12
+}
+
+define zeroext i8 @u(double %x) nounwind readnone {
+entry:
+	%0 = fptosi double %x to i32		; <i32> [#uses=1]
+	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = fcmp une double %1, %x		; <i1> [#uses=1]
+	%retval12 = zext i1 %2 to i8		; <i8> [#uses=1]
+; CHECK: cmpneqsd
+	ret i8 %retval12
+}

diff --git a/src/LLVM/test/CodeGen/X86/setuge.ll b/src/LLVM/test/CodeGen/X86/setuge.ll
new file mode 100644
index 0000000..6caa123
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/setuge.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86  | not grep set

+

+declare i1 @llvm.isunordered.f32(float, float)

+

+define float @cmp(float %A, float %B, float %C, float %D) nounwind {

+entry:

+        %tmp.1 = fcmp uno float %A, %B          ; <i1> [#uses=1]

+        %tmp.2 = fcmp oge float %A, %B          ; <i1> [#uses=1]

+        %tmp.3 = or i1 %tmp.1, %tmp.2           ; <i1> [#uses=1]

+        %tmp.4 = select i1 %tmp.3, float %C, float %D           ; <float> [#uses=1]

+        ret float %tmp.4

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/sext-i1.ll b/src/LLVM/test/CodeGen/X86/sext-i1.ll
new file mode 100644
index 0000000..574769b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sext-i1.ll

@@ -0,0 +1,63 @@
+; RUN: llc < %s -march=x86 -disable-cgp-branch-opts    | FileCheck %s -check-prefix=32
+; RUN: llc < %s -march=x86-64 -disable-cgp-branch-opts | FileCheck %s -check-prefix=64
+; rdar://7573216
+; PR6146
+
+define i32 @t1(i32 %x) nounwind readnone ssp {
+entry:
+; 32: t1:
+; 32: cmpl $1
+; 32: sbbl
+
+; 64: t1:
+; 64: cmpl $1
+; 64: sbbl
+  %0 = icmp eq i32 %x, 0
+  %iftmp.0.0 = select i1 %0, i32 -1, i32 0
+  ret i32 %iftmp.0.0
+}
+
+define i32 @t2(i32 %x) nounwind readnone ssp {
+entry:
+; 32: t2:
+; 32: cmpl $1
+; 32: sbbl
+
+; 64: t2:
+; 64: cmpl $1
+; 64: sbbl
+  %0 = icmp eq i32 %x, 0
+  %iftmp.0.0 = sext i1 %0 to i32
+  ret i32 %iftmp.0.0
+}
+
+%struct.zbookmark = type { i64, i64 }
+%struct.zstream = type { }
+
+define i32 @t3() nounwind readonly {
+entry:
+; 32: t3:
+; 32: cmpl $1
+; 32: sbbl
+; 32: cmpl
+; 32: xorl
+
+; 64: t3:
+; 64: cmpl $1
+; 64: sbbq
+; 64: cmpq
+; 64: xorl
+  %not.tobool = icmp eq i32 undef, 0              ; <i1> [#uses=2]
+  %cond = sext i1 %not.tobool to i32              ; <i32> [#uses=1]
+  %conv = sext i1 %not.tobool to i64              ; <i64> [#uses=1]
+  %add13 = add i64 0, %conv                       ; <i64> [#uses=1]
+  %cmp = icmp ult i64 undef, %add13               ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %xor27 = xor i32 undef, %cond                   ; <i32> [#uses=0]
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/sext-load.ll b/src/LLVM/test/CodeGen/X86/sext-load.ll
new file mode 100644
index 0000000..c9b39d3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sext-load.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep movsbl
+
+define i32 @foo(i32 %X) nounwind  {
+entry:
+	%tmp12 = trunc i32 %X to i8		; <i8> [#uses=1]
+	%tmp123 = sext i8 %tmp12 to i32		; <i32> [#uses=1]
+	ret i32 %tmp123
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/sext-ret-val.ll b/src/LLVM/test/CodeGen/X86/sext-ret-val.ll
new file mode 100644
index 0000000..da1a187
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sext-ret-val.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 | grep movzbl | count 1
+; rdar://6699246
+
+define signext i8 @t1(i8* %A) nounwind readnone ssp {
+entry:
+        %0 = icmp ne i8* %A, null
+        %1 = zext i1 %0 to i8
+        ret i8 %1
+}
+
+define i8 @t2(i8* %A) nounwind readnone ssp {
+entry:
+        %0 = icmp ne i8* %A, null
+        %1 = zext i1 %0 to i8
+        ret i8 %1
+}

diff --git a/src/LLVM/test/CodeGen/X86/sext-subreg.ll b/src/LLVM/test/CodeGen/X86/sext-subreg.ll
new file mode 100644
index 0000000..b2b9f81
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sext-subreg.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; rdar://7529457
+
+define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
+; CHECK: t:
+; CHECK: movslq %e{{.*}}, %rax
+; CHECK: movq %rax
+; CHECK: movl %eax
+  %C = add i64 %A, %B
+  %D = trunc i64 %C to i32
+  volatile store i32 %D, i32* %P
+  %E = shl i64 %C, 32
+  %F = ashr i64 %E, 32  
+  volatile store i64 %F, i64 *%P2
+  volatile store i32 %D, i32* %P
+  ret i64 undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/sext-trunc.ll b/src/LLVM/test/CodeGen/X86/sext-trunc.ll
new file mode 100644
index 0000000..22b3791
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sext-trunc.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep movsbl %t
+; RUN: not grep movz %t
+; RUN: not grep and %t
+
+define signext  i8 @foo(i16 signext  %x) nounwind  {
+	%retval56 = trunc i16 %x to i8
+	ret i8 %retval56
+}

diff --git a/src/LLVM/test/CodeGen/X86/sfence.ll b/src/LLVM/test/CodeGen/X86/sfence.ll
new file mode 100644
index 0000000..0c28407
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sfence.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep sfence
+
+declare void @llvm.x86.sse.sfence() nounwind
+
+define void @test() {
+  call void @llvm.x86.sse.sfence()
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/shift-and.ll b/src/LLVM/test/CodeGen/X86/shift-and.ll
new file mode 100644
index 0000000..fd278c2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shift-and.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86    | grep and | count 1
+; RUN: llc < %s -march=x86-64 | not grep and 
+
+define i32 @t1(i32 %t, i32 %val) nounwind {
+       %shamt = and i32 %t, 31
+       %res = shl i32 %val, %shamt
+       ret i32 %res
+}
+
+@X = internal global i16 0
+
+define void @t2(i16 %t) nounwind {
+       %shamt = and i16 %t, 31
+       %tmp = load i16* @X
+       %tmp1 = ashr i16 %tmp, %shamt
+       store i16 %tmp1, i16* @X
+       ret void
+}
+
+define i64 @t3(i64 %t, i64 %val) nounwind {
+       %shamt = and i64 %t, 63
+       %res = lshr i64 %val, %shamt
+       ret i64 %res
+}

diff --git a/src/LLVM/test/CodeGen/X86/shift-coalesce.ll b/src/LLVM/test/CodeGen/X86/shift-coalesce.ll
new file mode 100644
index 0000000..c7afeef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shift-coalesce.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \

+; RUN:   grep {shld.*CL}

+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \

+; RUN:   not grep {mov CL, BL}

+

+; PR687

+

+define i64 @foo(i64 %x, i64* %X) {

+        %tmp.1 = load i64* %X           ; <i64> [#uses=1]

+        %tmp.3 = trunc i64 %tmp.1 to i8         ; <i8> [#uses=1]

+        %shift.upgrd.1 = zext i8 %tmp.3 to i64          ; <i64> [#uses=1]

+        %tmp.4 = shl i64 %x, %shift.upgrd.1             ; <i64> [#uses=1]

+        ret i64 %tmp.4

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/shift-codegen.ll b/src/LLVM/test/CodeGen/X86/shift-codegen.ll
new file mode 100644
index 0000000..274faf2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shift-codegen.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -relocation-model=static -march=x86 | FileCheck %s

+

+; This should produce two shll instructions, not any lea's.

+

+target triple = "i686-apple-darwin8"

+@Y = weak global i32 0          ; <i32*> [#uses=1]

+@X = weak global i32 0          ; <i32*> [#uses=2]

+

+

+define void @fn1() {

+; CHECK: fn1:

+; CHECK-NOT: ret

+; CHECK-NOT: lea

+; CHECK: shll $3

+; CHECK-NOT: lea

+; CHECK: ret

+

+  %tmp = load i32* @Y             ; <i32> [#uses=1]

+  %tmp1 = shl i32 %tmp, 3         ; <i32> [#uses=1]

+  %tmp2 = load i32* @X            ; <i32> [#uses=1]

+  %tmp3 = or i32 %tmp1, %tmp2             ; <i32> [#uses=1]

+  store i32 %tmp3, i32* @X

+  ret void

+}

+

+define i32 @fn2(i32 %X, i32 %Y) {

+; CHECK: fn2:

+; CHECK-NOT: ret

+; CHECK-NOT: lea

+; CHECK: shll $3

+; CHECK-NOT: lea

+; CHECK: ret

+

+  %tmp2 = shl i32 %Y, 3           ; <i32> [#uses=1]

+  %tmp4 = or i32 %tmp2, %X                ; <i32> [#uses=1]

+  ret i32 %tmp4

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/shift-combine.ll b/src/LLVM/test/CodeGen/X86/shift-combine.ll
new file mode 100644
index 0000000..e443ac1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shift-combine.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s | not grep shrl
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+@array = weak global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
+
+define i32 @foo(i32 %x) {
+entry:
+	%tmp2 = lshr i32 %x, 2		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp2, 3		; <i32> [#uses=1]
+	%tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3		; <i32*> [#uses=1]
+	%tmp5 = load i32* %tmp4, align 4		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/shift-double.ll b/src/LLVM/test/CodeGen/X86/shift-double.ll
new file mode 100644
index 0000000..fd66d15
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shift-double.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \

+; RUN:   grep {sh\[lr\]d} | count 5

+

+define i64 @test1(i64 %X, i8 %C) {

+        %shift.upgrd.1 = zext i8 %C to i64              ; <i64> [#uses=1]

+        %Y = shl i64 %X, %shift.upgrd.1         ; <i64> [#uses=1]

+        ret i64 %Y

+}

+

+define i64 @test2(i64 %X, i8 %C) {

+        %shift.upgrd.2 = zext i8 %C to i64              ; <i64> [#uses=1]

+        %Y = ashr i64 %X, %shift.upgrd.2                ; <i64> [#uses=1]

+        ret i64 %Y

+}

+

+define i64 @test3(i64 %X, i8 %C) {

+        %shift.upgrd.3 = zext i8 %C to i64              ; <i64> [#uses=1]

+        %Y = lshr i64 %X, %shift.upgrd.3                ; <i64> [#uses=1]

+        ret i64 %Y

+}

+

+define i32 @test4(i32 %A, i32 %B, i8 %C) {

+        %shift.upgrd.4 = zext i8 %C to i32              ; <i32> [#uses=1]

+        %X = shl i32 %A, %shift.upgrd.4         ; <i32> [#uses=1]

+        %Cv = sub i8 32, %C             ; <i8> [#uses=1]

+        %shift.upgrd.5 = zext i8 %Cv to i32             ; <i32> [#uses=1]

+        %Y = lshr i32 %B, %shift.upgrd.5                ; <i32> [#uses=1]

+        %Z = or i32 %Y, %X              ; <i32> [#uses=1]

+        ret i32 %Z

+}

+

+define i16 @test5(i16 %A, i16 %B, i8 %C) {

+        %shift.upgrd.6 = zext i8 %C to i16              ; <i16> [#uses=1]

+        %X = shl i16 %A, %shift.upgrd.6         ; <i16> [#uses=1]

+        %Cv = sub i8 16, %C             ; <i8> [#uses=1]

+        %shift.upgrd.7 = zext i8 %Cv to i16             ; <i16> [#uses=1]

+        %Y = lshr i16 %B, %shift.upgrd.7                ; <i16> [#uses=1]

+        %Z = or i16 %Y, %X              ; <i16> [#uses=1]

+        ret i16 %Z

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/shift-folding.ll b/src/LLVM/test/CodeGen/X86/shift-folding.ll
new file mode 100644
index 0000000..a7f06c6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shift-folding.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 | \

+; RUN:   grep {s\[ah\]\[rl\]l} | count 1

+

+define i32* @test1(i32* %P, i32 %X) nounwind {

+        %Y = lshr i32 %X, 2             ; <i32> [#uses=1]

+        %gep.upgrd.1 = zext i32 %Y to i64               ; <i64> [#uses=1]

+        %P2 = getelementptr i32* %P, i64 %gep.upgrd.1           ; <i32*> [#uses=1]

+        ret i32* %P2

+}

+

+define i32* @test2(i32* %P, i32 %X) nounwind {

+        %Y = shl i32 %X, 2              ; <i32> [#uses=1]

+        %gep.upgrd.2 = zext i32 %Y to i64               ; <i64> [#uses=1]

+        %P2 = getelementptr i32* %P, i64 %gep.upgrd.2           ; <i32*> [#uses=1]

+        ret i32* %P2

+}

+

+define i32* @test3(i32* %P, i32 %X) nounwind {

+        %Y = ashr i32 %X, 2             ; <i32> [#uses=1]

+        %P2 = getelementptr i32* %P, i32 %Y             ; <i32*> [#uses=1]

+        ret i32* %P2

+}

+

+define fastcc i32 @test4(i32* %d) nounwind {

+  %tmp4 = load i32* %d

+  %tmp512 = lshr i32 %tmp4, 24

+  ret i32 %tmp512

+}


diff --git a/src/LLVM/test/CodeGen/X86/shift-i128.ll b/src/LLVM/test/CodeGen/X86/shift-i128.ll
new file mode 100644
index 0000000..c4d15ae
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shift-i128.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+define void @t(i128 %x, i128 %a, i128* nocapture %r) nounwind {
+entry:
+	%0 = lshr i128 %x, %a
+	store i128 %0, i128* %r, align 16
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/shift-i256.ll b/src/LLVM/test/CodeGen/X86/shift-i256.ll
new file mode 100644
index 0000000..d5f65a6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shift-i256.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+define void @t(i256 %x, i256 %a, i256* nocapture %r) nounwind readnone {
+entry:
+	%0 = ashr i256 %x, %a
+	store i256 %0, i256* %r
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/shift-one.ll b/src/LLVM/test/CodeGen/X86/shift-one.ll
new file mode 100644
index 0000000..a0fe250
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shift-one.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | not grep leal

+

+@x = external global i32                ; <i32*> [#uses=1]

+

+define i32 @test() {

+        %tmp.0 = load i32* @x           ; <i32> [#uses=1]

+        %tmp.1 = shl i32 %tmp.0, 1              ; <i32> [#uses=1]

+        ret i32 %tmp.1

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/shift-pair.ll b/src/LLVM/test/CodeGen/X86/shift-pair.ll
new file mode 100644
index 0000000..24ba1fc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shift-pair.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+define i64 @test(i64 %A) {
+; CHECK: @test
+; CHECK: shrq $54
+; CHECK: andq $1020
+; CHECK: ret
+    %B = lshr i64 %A, 56
+    %C = shl i64 %B, 2
+    ret i64 %C
+}

diff --git a/src/LLVM/test/CodeGen/X86/shift-parts.ll b/src/LLVM/test/CodeGen/X86/shift-parts.ll
new file mode 100644
index 0000000..ce4f538
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shift-parts.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 | grep shrdq
+; PR4736
+
+%0 = type { i32, i8, [35 x i8] }
+
+@g_144 = external global %0, align 8              ; <%0*> [#uses=1]
+
+define i32 @int87(i32 %uint64p_8) nounwind {
+entry:
+  %srcval4 = load i320* bitcast (%0* @g_144 to i320*), align 8 ; <i320> [#uses=1]
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %call3.in.in.in.v = select i1 undef, i320 192, i320 128 ; <i320> [#uses=1]
+  %call3.in.in.in = lshr i320 %srcval4, %call3.in.in.in.v ; <i320> [#uses=1]
+  %call3.in = trunc i320 %call3.in.in.in to i32   ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %call3.in, 0              ; <i1> [#uses=1]
+  br i1 %tobool, label %for.cond, label %if.then
+
+if.then:                                          ; preds = %for.cond
+  ret i32 1
+}

diff --git a/src/LLVM/test/CodeGen/X86/shl-anyext.ll b/src/LLVM/test/CodeGen/X86/shl-anyext.ll
new file mode 100644
index 0000000..10d489b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shl-anyext.ll

@@ -0,0 +1,40 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; Codegen should be able to use a 32-bit shift instead of a 64-bit shift.
+; CHECK: shll $16
+
+define fastcc void @test(i32 %level, i64 %a, i64 %b, i64 %c, i64 %d, i32* %p) nounwind {
+if.end523:                                        ; preds = %if.end453
+  %conv7981749 = zext i32 %level to i64           ; <i64> [#uses=1]
+  %and799 = shl i64 %conv7981749, 16              ; <i64> [#uses=1]
+  %shl800 = and i64 %and799, 16711680             ; <i64> [#uses=1]
+  %or801 = or i64 %shl800, %a                     ; <i64> [#uses=1]
+  %or806 = or i64 %or801, %b                      ; <i64> [#uses=1]
+  %or811 = or i64 %or806, %c                      ; <i64> [#uses=1]
+  %or819 = or i64 %or811, %d                      ; <i64> [#uses=1]
+  %conv820 = trunc i64 %or819 to i32              ; <i32> [#uses=1]
+  store i32 %conv820, i32* %p
+  ret void
+}
+
+; CHECK: foo:
+
+declare void @bar(i64)
+
+define fastcc void @foo(i32 %t) {
+bb:
+  %tmp = add i32 %t, -1                           ; <i32> [#uses=1]
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = zext i32 %tmp to i64                    ; <i64> [#uses=2]
+  %tmp3 = add i64 %tmp2, 1                        ; <i64> [#uses=1]
+  %tmp4 = xor i64 %tmp2, 536870911                ; <i64> [#uses=1]
+  %tmp5 = and i64 %tmp3, %tmp4                    ; <i64> [#uses=1]
+  %tmp6 = shl i64 %tmp5, 3                        ; <i64> [#uses=1]
+  %tmp7 = sub i64 64, %tmp6                       ; <i64> [#uses=1]
+  %tmp8 = and i64 %tmp7, 4294967288               ; <i64> [#uses=1]
+  %tmp9 = lshr i64 -1, %tmp8                      ; <i64> [#uses=1]
+  call void @bar(i64 %tmp9)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/shl_elim.ll b/src/LLVM/test/CodeGen/X86/shl_elim.ll
new file mode 100644
index 0000000..b9171bb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shl_elim.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 | grep {movl	8(.esp), %eax}

+; RUN: llc < %s -march=x86 | grep {shrl	.eax}

+; RUN: llc < %s -march=x86 | grep {movswl	.ax, .eax}

+

+define i32 @test1(i64 %a) nounwind {

+        %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]

+        %tmp23 = trunc i64 %tmp29 to i32                ; <i32> [#uses=1]

+        %tmp410 = lshr i32 %tmp23, 9            ; <i32> [#uses=1]

+        %tmp45 = trunc i32 %tmp410 to i16               ; <i16> [#uses=1]

+        %tmp456 = sext i16 %tmp45 to i32                ; <i32> [#uses=1]

+        ret i32 %tmp456

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/shl_undef.ll b/src/LLVM/test/CodeGen/X86/shl_undef.ll
new file mode 100644
index 0000000..54b74cc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shl_undef.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -O1 -mtriple=i386-apple-darwin | FileCheck %s
+;
+; Interesting test case where %tmp1220 = xor i32 %tmp862, %tmp592 and
+; %tmp1676 = xor i32 %tmp1634, %tmp1530 have zero demanded bits after
+; DAGCombiner optimization pass.  These are changed to undef and in turn
+; the successor shl(s) become shl undef, 1.  This pattern then matches
+; shl x, 1 -> add x, x.  add undef, undef doesn't guarentee the low
+; order bit is zero and is incorrect.
+;
+; See rdar://9453156 and rdar://9487392.
+;
+
+; CHECK-NOT: shl
+define i32 @foo(i8* %a0, i32* %a2) nounwind {
+entry:
+  %tmp0 = alloca i8
+  %tmp1 = alloca i32
+  store i8 1, i8* %tmp0
+  %tmp921.i7845 = load i8* %a0, align 1
+  %tmp309 = xor i8 %tmp921.i7845, 104
+  %tmp592 = zext i8 %tmp309 to i32
+  %tmp862 = xor i32 1293461297, %tmp592
+  %tmp1220 = xor i32 %tmp862, %tmp592
+  %tmp1506 = shl i32 %tmp1220, 1
+  %tmp1530 = sub i32 %tmp592, %tmp1506
+  %tmp1557 = sub i32 %tmp1530, 542767629
+  %tmp1607 = and i32 %tmp1557, 1
+  store i32 %tmp1607, i32* %tmp1
+  %tmp1634 = and i32 %tmp1607, 2080309246
+  %tmp1676 = xor i32 %tmp1634, %tmp1530
+  %tmp1618 = shl i32 %tmp1676, 1
+  %tmp1645 = sub i32 %tmp862, %tmp1618
+  %tmp1697 = and i32 %tmp1645, 1
+  store i32 %tmp1697, i32* %a2
+  ret i32 %tmp1607
+}
+
+; CHECK-NOT: shl
+; shl undef, 0 -> undef
+define i32 @foo2_undef() nounwind {
+entry:
+  %tmp2 = shl i32 undef, 0;
+  ret i32 %tmp2
+}
+
+; CHECK-NOT: shl
+; shl undef, x -> 0
+define i32 @foo1_undef(i32* %a0) nounwind {
+entry:
+  %tmp1 = load i32* %a0, align 1
+  %tmp2 = shl i32 undef, %tmp1;
+  ret i32 %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/X86/shrink-compare.ll b/src/LLVM/test/CodeGen/X86/shrink-compare.ll
new file mode 100644
index 0000000..8d4b07f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shrink-compare.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+declare void @bar()
+
+define void @test1(i32* nocapture %X) nounwind {
+entry:
+  %tmp1 = load i32* %X, align 4
+  %and = and i32 %tmp1, 255
+  %cmp = icmp eq i32 %and, 47
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+; CHECK: test1:
+; CHECK: cmpb $47, (%{{rdi|rcx}})
+}
+
+define void @test2(i32 %X) nounwind {
+entry:
+  %and = and i32 %X, 255
+  %cmp = icmp eq i32 %and, 47
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+; CHECK: test2:
+; CHECK: cmpb $47, %{{dil|cl}}
+}

diff --git a/src/LLVM/test/CodeGen/X86/shrink-fp-const1.ll b/src/LLVM/test/CodeGen/X86/shrink-fp-const1.ll
new file mode 100644
index 0000000..49b9fa3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shrink-fp-const1.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | not grep cvtss2sd
+; PR1264
+
+define double @foo(double %x) {
+        %y = fmul double %x, 5.000000e-01
+        ret double %y
+}

diff --git a/src/LLVM/test/CodeGen/X86/shrink-fp-const2.ll b/src/LLVM/test/CodeGen/X86/shrink-fp-const2.ll
new file mode 100644
index 0000000..3d5203b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/shrink-fp-const2.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 | grep flds
+; This should be a flds, not fldt.
+define x86_fp80 @test2() nounwind  {
+entry:
+	ret x86_fp80 0xK3FFFC000000000000000
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/sibcall-2.ll b/src/LLVM/test/CodeGen/X86/sibcall-2.ll
new file mode 100644
index 0000000..f8a7465
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sibcall-2.ll

@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin   -disable-fp-elim | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck %s -check-prefix=64
+
+; Tail call should not use ebp / rbp after it's popped. Use esp / rsp.
+
+define void @t1(i8* nocapture %value) nounwind {
+entry:
+; 32: t1:
+; 32: jmpl *4(%esp)
+
+; 64: t1:
+; 64: jmpq *%rdi
+  %0 = bitcast i8* %value to void ()*
+  tail call void %0() nounwind
+  ret void
+}
+
+define void @t2(i32 %a, i8* nocapture %value) nounwind {
+entry:
+; 32: t2:
+; 32: jmpl *8(%esp)
+
+; 64: t2:
+; 64: jmpq *%rsi
+  %0 = bitcast i8* %value to void ()*
+  tail call void %0() nounwind
+  ret void
+}
+
+define void @t3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i8* nocapture %value) nounwind {
+entry:
+; 32: t3:
+; 32: jmpl *28(%esp)
+
+; 64: t3:
+; 64: jmpq *8(%rsp)
+  %0 = bitcast i8* %value to void ()*
+  tail call void %0() nounwind
+  ret void
+}
+
+define void @t4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i8* nocapture %value) nounwind {
+entry:
+; 32: t4:
+; 32: jmpl *32(%esp)
+
+; 64: t4:
+; 64: jmpq *16(%rsp)
+  %0 = bitcast i8* %value to void ()*
+  tail call void %0() nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/sibcall-3.ll b/src/LLVM/test/CodeGen/X86/sibcall-3.ll
new file mode 100644
index 0000000..f97abe0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sibcall-3.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s
+; PR7193
+
+define void @t1(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind {
+; CHECK: t1:
+; CHECK: calll 0
+  tail call void null(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind
+  ret void
+}
+
+define void @t2(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind {
+; CHECK: t2:
+; CHECK: jmpl
+  tail call void null(i8* inreg %dst, i8* inreg %src) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/sibcall-4.ll b/src/LLVM/test/CodeGen/X86/sibcall-4.ll
new file mode 100644
index 0000000..1499e66
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sibcall-4.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu | FileCheck %s
+; pr7610
+
+define cc10 void @t(i32* %Base_Arg, i32* %Sp_Arg, i32* %Hp_Arg, i32 %R1_Arg) nounwind {
+cm1:
+; CHECK: t:
+; CHECK: jmpl *%eax
+  %nm3 = getelementptr i32* %Sp_Arg, i32 1
+  %nm9 = load i32* %Sp_Arg
+  %nma = inttoptr i32 %nm9 to void (i32*, i32*, i32*, i32)*
+  tail call cc10 void %nma(i32* %Base_Arg, i32* %nm3, i32* %Hp_Arg, i32 %R1_Arg) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/sibcall-5.ll b/src/LLVM/test/CodeGen/X86/sibcall-5.ll
new file mode 100644
index 0000000..9d74121
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sibcall-5.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin8 -mattr=+sse2  | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
+
+; Sibcall optimization of expanded libcalls.
+; rdar://8707777
+
+define double @foo(double %a) nounwind readonly ssp {
+entry:
+; X32: foo:
+; X32: jmp _sin$stub
+
+; X64: foo:
+; X64: jmp _sin
+  %0 = tail call double @sin(double %a) nounwind readonly
+  ret double %0
+}
+
+define float @bar(float %a) nounwind readonly ssp {
+; X32: bar:
+; X32: jmp _sinf$stub
+
+; X64: bar:
+; X64: jmp _sinf
+entry:
+  %0 = tail call float @sinf(float %a) nounwind readonly
+  ret float %0
+}
+
+declare float @sinf(float) nounwind readonly
+
+declare double @sin(double) nounwind readonly

diff --git a/src/LLVM/test/CodeGen/X86/sibcall-byval.ll b/src/LLVM/test/CodeGen/X86/sibcall-byval.ll
new file mode 100644
index 0000000..c335f30
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sibcall-byval.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin   | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=64
+
+%struct.p = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define i32 @f(%struct.p* byval align 4 %q) nounwind ssp {
+entry:
+; 32: _f:
+; 32: jmp L_g$stub
+
+; 64: _f:
+; 64: jmp _g
+  %call = tail call i32 @g(%struct.p* byval align 4 %q) nounwind
+  ret i32 %call
+}
+
+declare i32 @g(%struct.p* byval align 4)
+
+define i32 @h(%struct.p* byval align 4 %q, i32 %r) nounwind ssp {
+entry:
+; 32: _h:
+; 32: jmp L_i$stub
+
+; 64: _h:
+; 64: jmp _i
+
+  %call = tail call i32 @i(%struct.p* byval align 4 %q, i32 %r) nounwind
+  ret i32 %call
+}
+
+declare i32 @i(%struct.p* byval align 4, i32)

diff --git a/src/LLVM/test/CodeGen/X86/sibcall.ll b/src/LLVM/test/CodeGen/X86/sibcall.ll
new file mode 100644
index 0000000..a9a5420
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sibcall.ll

@@ -0,0 +1,331 @@
+; RUN: llc < %s -mtriple=i686-linux   -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
+
+define void @t1(i32 %x) nounwind ssp {
+entry:
+; 32: t1:
+; 32: jmp {{_?}}foo
+
+; 64: t1:
+; 64: jmp {{_?}}foo
+  tail call void @foo() nounwind
+  ret void
+}
+
+declare void @foo()
+
+define void @t2() nounwind ssp {
+entry:
+; 32: t2:
+; 32: jmp {{_?}}foo2
+
+; 64: t2:
+; 64: jmp {{_?}}foo2
+  %0 = tail call i32 @foo2() nounwind
+  ret void
+}
+
+declare i32 @foo2()
+
+define void @t3() nounwind ssp {
+entry:
+; 32: t3:
+; 32: jmp {{_?}}foo3
+
+; 64: t3:
+; 64: jmp {{_?}}foo3
+  %0 = tail call i32 @foo3() nounwind
+  ret void
+}
+
+declare i32 @foo3()
+
+define void @t4(void (i32)* nocapture %x) nounwind ssp {
+entry:
+; 32: t4:
+; 32: calll *
+; FIXME: gcc can generate a tailcall for this. But it's tricky.
+
+; 64: t4:
+; 64-NOT: call
+; 64: jmpq *
+  tail call void %x(i32 0) nounwind
+  ret void
+}
+
+define void @t5(void ()* nocapture %x) nounwind ssp {
+entry:
+; 32: t5:
+; 32-NOT: call
+; 32: jmpl *4(%esp)
+
+; 64: t5:
+; 64-NOT: call
+; 64: jmpq *%rdi
+  tail call void %x() nounwind
+  ret void
+}
+
+define i32 @t6(i32 %x) nounwind ssp {
+entry:
+; 32: t6:
+; 32: calll {{_?}}t6
+; 32: jmp {{_?}}bar
+
+; 64: t6:
+; 64: jmp {{_?}}t6
+; 64: jmp {{_?}}bar
+  %0 = icmp slt i32 %x, 10
+  br i1 %0, label %bb, label %bb1
+
+bb:
+  %1 = add nsw i32 %x, -1
+  %2 = tail call i32 @t6(i32 %1) nounwind ssp
+  ret i32 %2
+
+bb1:
+  %3 = tail call i32 @bar(i32 %x) nounwind
+  ret i32 %3
+}
+
+declare i32 @bar(i32)
+
+define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind ssp {
+entry:
+; 32: t7:
+; 32: jmp {{_?}}bar2
+
+; 64: t7:
+; 64: jmp {{_?}}bar2
+  %0 = tail call i32 @bar2(i32 %a, i32 %b, i32 %c) nounwind
+  ret i32 %0
+}
+
+declare i32 @bar2(i32, i32, i32)
+
+define signext i16 @t8() nounwind ssp {
+entry:
+; 32: t8:
+; 32: calll {{_?}}bar3
+
+; 64: t8:
+; 64: callq {{_?}}bar3
+  %0 = tail call signext i16 @bar3() nounwind      ; <i16> [#uses=1]
+  ret i16 %0
+}
+
+declare signext i16 @bar3()
+
+define signext i16 @t9(i32 (i32)* nocapture %x) nounwind ssp {
+entry:
+; 32: t9:
+; 32: calll *
+
+; 64: t9:
+; 64: callq *
+  %0 = bitcast i32 (i32)* %x to i16 (i32)*
+  %1 = tail call signext i16 %0(i32 0) nounwind
+  ret i16 %1
+}
+
+define void @t10() nounwind ssp {
+entry:
+; 32: t10:
+; 32: calll
+
+; 64: t10:
+; 64: callq
+  %0 = tail call i32 @foo4() noreturn nounwind
+  unreachable
+}
+
+declare i32 @foo4()
+
+define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
+; In 32-bit mode, it's emitting a bunch of dead loads that are not being
+; eliminated currently.
+
+; 32: t11:
+; 32-NOT: subl ${{[0-9]+}}, %esp
+; 32: jne
+; 32-NOT: movl
+; 32-NOT: addl ${{[0-9]+}}, %esp
+; 32: jmp {{_?}}foo5
+
+; 64: t11:
+; 64-NOT: subq ${{[0-9]+}}, %esp
+; 64-NOT: addq ${{[0-9]+}}, %esp
+; 64: jmp {{_?}}foo5
+entry:
+  %0 = icmp eq i32 %x, 0
+  br i1 %0, label %bb6, label %bb
+
+bb:
+  %1 = tail call i32 @foo5(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind
+  ret i32 %1
+
+bb6:
+  ret i32 0
+}
+
+declare i32 @foo5(i32, i32, i32, i32, i32)
+
+%struct.t = type { i32, i32, i32, i32, i32 }
+
+define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
+; 32: t12:
+; 32-NOT: subl ${{[0-9]+}}, %esp
+; 32-NOT: addl ${{[0-9]+}}, %esp
+; 32: jmp {{_?}}foo6
+
+; 64: t12:
+; 64-NOT: subq ${{[0-9]+}}, %esp
+; 64-NOT: addq ${{[0-9]+}}, %esp
+; 64: jmp {{_?}}foo6
+entry:
+  %0 = icmp eq i32 %x, 0
+  br i1 %0, label %bb2, label %bb
+
+bb:
+  %1 = tail call i32 @foo6(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind
+  ret i32 %1
+
+bb2:
+  ret i32 0
+}
+
+declare i32 @foo6(i32, i32, %struct.t* byval align 4)
+
+; rdar://r7717598
+%struct.ns = type { i32, i32 }
+%struct.cp = type { float, float, float, float, float }
+
+define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
+; 32: t13:
+; 32-NOT: jmp
+; 32: calll
+; 32: ret
+
+; 64: t13:
+; 64-NOT: jmp
+; 64: callq
+; 64: ret
+entry:
+  %0 = tail call fastcc %struct.ns* @foo7(%struct.cp* byval align 4 %yy, i8 signext 0) nounwind
+  ret %struct.ns* %0
+}
+
+; rdar://6195379
+; llvm can't do sibcall for this in 32-bit mode (yet).
+declare fastcc %struct.ns* @foo7(%struct.cp* byval align 4, i8 signext) nounwind ssp
+
+%struct.__block_descriptor = type { i64, i64 }
+%struct.__block_descriptor_withcopydispose = type { i64, i64, i8*, i8* }
+%struct.__block_literal_1 = type { i8*, i32, i32, i8*, %struct.__block_descriptor* }
+%struct.__block_literal_2 = type { i8*, i32, i32, i8*, %struct.__block_descriptor_withcopydispose*, void ()* }
+
+define void @t14(%struct.__block_literal_2* nocapture %.block_descriptor) nounwind ssp {
+entry:
+; 64: t14:
+; 64: movq 32(%rdi)
+; 64-NOT: movq 16(%rdi)
+; 64: jmpq *16({{%rdi|%rax}})
+  %0 = getelementptr inbounds %struct.__block_literal_2* %.block_descriptor, i64 0, i32 5 ; <void ()**> [#uses=1]
+  %1 = load void ()** %0, align 8                 ; <void ()*> [#uses=2]
+  %2 = bitcast void ()* %1 to %struct.__block_literal_1* ; <%struct.__block_literal_1*> [#uses=1]
+  %3 = getelementptr inbounds %struct.__block_literal_1* %2, i64 0, i32 3 ; <i8**> [#uses=1]
+  %4 = load i8** %3, align 8                      ; <i8*> [#uses=1]
+  %5 = bitcast i8* %4 to void (i8*)*              ; <void (i8*)*> [#uses=1]
+  %6 = bitcast void ()* %1 to i8*                 ; <i8*> [#uses=1]
+  tail call void %5(i8* %6) nounwind
+  ret void
+}
+
+; rdar://7726868
+%struct.foo = type { [4 x i32] }
+
+define void @t15(%struct.foo* noalias sret %agg.result) nounwind  {
+; 32: t15:
+; 32: calll {{_?}}f
+; 32: ret $4
+
+; 64: t15:
+; 64: callq {{_?}}f
+; 64: ret
+  tail call fastcc void @f(%struct.foo* noalias sret %agg.result) nounwind
+  ret void
+}
+
+declare void @f(%struct.foo* noalias sret) nounwind
+
+define void @t16() nounwind ssp {
+entry:
+; 32: t16:
+; 32: calll {{_?}}bar4
+; 32: fstp
+
+; 64: t16:
+; 64: jmp {{_?}}bar4
+  %0 = tail call double @bar4() nounwind
+  ret void
+}
+
+declare double @bar4()
+
+; rdar://6283267
+define void @t17() nounwind ssp {
+entry:
+; 32: t17:
+; 32: jmp {{_?}}bar5
+
+; 64: t17:
+; 64: xorb %al, %al
+; 64: jmp {{_?}}bar5
+  tail call void (...)* @bar5() nounwind
+  ret void
+}
+
+declare void @bar5(...)
+
+; rdar://7774847
+define void @t18() nounwind ssp {
+entry:
+; 32: t18:
+; 32: calll {{_?}}bar6
+; 32: fstp %st(0)
+
+; 64: t18:
+; 64: xorb %al, %al
+; 64: jmp {{_?}}bar6
+  %0 = tail call double (...)* @bar6() nounwind
+  ret void
+}
+
+declare double @bar6(...)
+
+define void @t19() alignstack(32) nounwind {
+entry:
+; CHECK: t19:
+; CHECK: andl $-32
+; CHECK: calll {{_?}}foo
+  tail call void @foo() nounwind
+  ret void
+}
+
+; If caller / callee calling convention mismatch then check if the return
+; values are returned in the same registers.
+; rdar://7874780
+
+define double @t20(double %x) nounwind {
+entry:
+; 32: t20:
+; 32: calll {{_?}}foo20
+; 32: fldl (%esp)
+
+; 64: t20:
+; 64: jmp {{_?}}foo20
+  %0 = tail call fastcc double @foo20(double %x) nounwind
+  ret double %0
+}
+
+declare fastcc double @foo20(double) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/sincos.ll b/src/LLVM/test/CodeGen/X86/sincos.ll
new file mode 100644
index 0000000..13f9329
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sincos.ll

@@ -0,0 +1,48 @@
+; Make sure this testcase codegens to the sin and cos instructions, not calls
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
+; RUN:   grep sin\$ | count 3
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
+; RUN:   grep cos\$ | count 3
+
+declare float  @sinf(float) readonly
+
+declare double @sin(double) readonly
+
+declare x86_fp80 @sinl(x86_fp80) readonly
+
+define float @test1(float %X) {
+        %Y = call float @sinf(float %X) readonly
+        ret float %Y
+}
+
+define double @test2(double %X) {
+        %Y = call double @sin(double %X) readonly
+        ret double %Y
+}
+
+define x86_fp80 @test3(x86_fp80 %X) {
+        %Y = call x86_fp80 @sinl(x86_fp80 %X) readonly
+        ret x86_fp80 %Y
+}
+
+declare float @cosf(float) readonly
+
+declare double @cos(double) readonly
+
+declare x86_fp80 @cosl(x86_fp80) readonly
+
+define float @test4(float %X) {
+        %Y = call float @cosf(float %X) readonly
+        ret float %Y
+}
+
+define double @test5(double %X) {
+        %Y = call double @cos(double %X) readonly
+        ret double %Y
+}
+
+define x86_fp80 @test6(x86_fp80 %X) {
+        %Y = call x86_fp80 @cosl(x86_fp80 %X) readonly
+        ret x86_fp80 %Y
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/sink-hoist.ll b/src/LLVM/test/CodeGen/X86/sink-hoist.ll
new file mode 100644
index 0000000..e13a817
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sink-hoist.ll

@@ -0,0 +1,174 @@
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -post-RA-scheduler=true | FileCheck %s
+
+; Currently, floating-point selects are lowered to CFG triangles.
+; This means that one side of the select is always unconditionally
+; evaluated, however with MachineSink we can sink the other side so
+; that it's conditionally evaluated.
+
+; CHECK: foo:
+; CHECK-NEXT: testb $1, %dil
+; CHECK-NEXT: je
+; CHECK-NEXT: divsd
+; CHECK-NEXT: ret
+; CHECK:      divsd
+
+define double @foo(double %x, double %y, i1 %c) nounwind {
+  %a = fdiv double %x, 3.2
+  %b = fdiv double %y, 3.3
+  %z = select i1 %c, double %a, double %b
+  ret double %z
+}
+
+; Make sure the critical edge is broken so the divsd is sunken below
+; the conditional branch.
+; rdar://8454886
+
+; CHECK: split:
+; CHECK-NEXT: testb $1, %dil
+; CHECK-NEXT: je
+; CHECK-NEXT: divsd
+; CHECK-NEXT: ret
+; CHECK:      movaps
+; CHECK-NEXT: ret
+define double @split(double %x, double %y, i1 %c) nounwind {
+  %a = fdiv double %x, 3.2
+  %z = select i1 %c, double %a, double %y
+  ret double %z
+}
+
+
+; Hoist floating-point constant-pool loads out of loops.
+
+; CHECK: bar:
+; CHECK: movsd
+; CHECK: align
+define void @bar(double* nocapture %p, i64 %n) nounwind {
+entry:
+  %0 = icmp sgt i64 %n, 0
+  br i1 %0, label %bb, label %return
+
+bb:
+  %i.03 = phi i64 [ 0, %entry ], [ %3, %bb ]
+  %scevgep = getelementptr double* %p, i64 %i.03
+  %1 = load double* %scevgep, align 8
+  %2 = fdiv double 3.200000e+00, %1
+  store double %2, double* %scevgep, align 8
+  %3 = add nsw i64 %i.03, 1
+  %exitcond = icmp eq i64 %3, %n
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
+
+; Sink instructions with dead EFLAGS defs.
+
+; FIXME: Unfail the zzz test if we can correctly mark pregs with the kill flag.
+; 
+; See <rdar://problem/8030636>. This test isn't valid after we made machine
+; sinking more conservative about sinking instructions that define a preg into a
+; block when we don't know if the preg is killed within the current block.
+
+
+; FIXMEHECK: zzz:
+; FIXMEHECK:      je
+; FIXMEHECK-NEXT: orb
+
+; define zeroext i8 @zzz(i8 zeroext %a, i8 zeroext %b) nounwind readnone {
+; entry:
+;   %tmp = zext i8 %a to i32                        ; <i32> [#uses=1]
+;   %tmp2 = icmp eq i8 %a, 0                    ; <i1> [#uses=1]
+;   %tmp3 = or i8 %b, -128                          ; <i8> [#uses=1]
+;   %tmp4 = and i8 %b, 127                          ; <i8> [#uses=1]
+;   %b_addr.0 = select i1 %tmp2, i8 %tmp4, i8 %tmp3 ; <i8> [#uses=1]
+;   ret i8 %b_addr.0
+; }
+
+; Codegen should hoist and CSE these constants.
+
+; CHECK: vv:
+; CHECK: LCPI3_0(%rip), %xmm0
+; CHECK: LCPI3_1(%rip), %xmm1
+; CHECK: LCPI3_2(%rip), %xmm2
+; CHECK: align
+; CHECK-NOT: LCPI
+; CHECK: ret
+
+@_minusZero.6007 = internal constant <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00> ; <<4 x float>*> [#uses=0]
+@twoTo23.6008 = internal constant <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06> ; <<4 x float>*> [#uses=0]
+
+define void @vv(float* %y, float* %x, i32* %n) nounwind ssp {
+entry:
+  br label %bb60
+
+bb:                                               ; preds = %bb60
+  %i.0 = phi i32 [ 0, %bb60 ]                    ; <i32> [#uses=2]
+  %0 = bitcast float* %x_addr.0 to <4 x float>*   ; <<4 x float>*> [#uses=1]
+  %1 = load <4 x float>* %0, align 16             ; <<4 x float>> [#uses=4]
+  %tmp20 = bitcast <4 x float> %1 to <4 x i32>    ; <<4 x i32>> [#uses=1]
+  %tmp22 = and <4 x i32> %tmp20, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> ; <<4 x i32>> [#uses=1]
+  %tmp23 = bitcast <4 x i32> %tmp22 to <4 x float> ; <<4 x float>> [#uses=1]
+  %tmp25 = bitcast <4 x float> %1 to <4 x i32>    ; <<4 x i32>> [#uses=1]
+  %tmp27 = and <4 x i32> %tmp25, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> ; <<4 x i32>> [#uses=2]
+  %tmp30 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %tmp23, <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06>, i8 5) ; <<4 x float>> [#uses=1]
+  %tmp34 = bitcast <4 x float> %tmp30 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %tmp36 = xor <4 x i32> %tmp34, <i32 -1, i32 -1, i32 -1, i32 -1> ; <<4 x i32>> [#uses=1]
+  %tmp37 = and <4 x i32> %tmp36, <i32 1258291200, i32 1258291200, i32 1258291200, i32 1258291200> ; <<4 x i32>> [#uses=1]
+  %tmp42 = or <4 x i32> %tmp37, %tmp27            ; <<4 x i32>> [#uses=1]
+  %tmp43 = bitcast <4 x i32> %tmp42 to <4 x float> ; <<4 x float>> [#uses=2]
+  %tmp45 = fadd <4 x float> %1, %tmp43            ; <<4 x float>> [#uses=1]
+  %tmp47 = fsub <4 x float> %tmp45, %tmp43        ; <<4 x float>> [#uses=2]
+  %tmp49 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %1, <4 x float> %tmp47, i8 1) ; <<4 x float>> [#uses=1]
+  %2 = bitcast <4 x float> %tmp49 to <4 x i32>    ; <<4 x i32>> [#uses=1]
+  %3 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %2) nounwind readnone ; <<4 x float>> [#uses=1]
+  %tmp53 = fadd <4 x float> %tmp47, %3            ; <<4 x float>> [#uses=1]
+  %tmp55 = bitcast <4 x float> %tmp53 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %tmp57 = or <4 x i32> %tmp55, %tmp27            ; <<4 x i32>> [#uses=1]
+  %tmp58 = bitcast <4 x i32> %tmp57 to <4 x float> ; <<4 x float>> [#uses=1]
+  %4 = bitcast float* %y_addr.0 to <4 x float>*   ; <<4 x float>*> [#uses=1]
+  store <4 x float> %tmp58, <4 x float>* %4, align 16
+  %5 = getelementptr float* %x_addr.0, i64 4      ; <float*> [#uses=1]
+  %6 = getelementptr float* %y_addr.0, i64 4      ; <float*> [#uses=1]
+  %7 = add i32 %i.0, 4                            ; <i32> [#uses=1]
+  %8 = load i32* %n, align 4                      ; <i32> [#uses=1]
+  %9 = icmp sgt i32 %8, %7                        ; <i1> [#uses=1]
+  br i1 %9, label %bb60, label %return
+
+bb60:                                             ; preds = %bb, %entry
+  %x_addr.0 = phi float* [ %x, %entry ], [ %5, %bb ] ; <float*> [#uses=2]
+  %y_addr.0 = phi float* [ %y, %entry ], [ %6, %bb ] ; <float*> [#uses=2]
+  br label %bb
+
+return:                                           ; preds = %bb60
+  ret void
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+; CodeGen should use the correct register class when extracting
+; a load from a zero-extending load for hoisting.
+
+; CHECK: default_get_pch_validity:
+; CHECK: movl cl_options_count(%rip), %ecx
+
+@cl_options_count = external constant i32         ; <i32*> [#uses=2]
+
+define void @default_get_pch_validity() nounwind {
+entry:
+  %tmp4 = load i32* @cl_options_count, align 4    ; <i32> [#uses=1]
+  %tmp5 = icmp eq i32 %tmp4, 0                    ; <i1> [#uses=1]
+  br i1 %tmp5, label %bb6, label %bb2
+
+bb2:                                              ; preds = %bb2, %entry
+  %i.019 = phi i64 [ 0, %entry ], [ %tmp25, %bb2 ] ; <i64> [#uses=1]
+  %tmp25 = add i64 %i.019, 1                      ; <i64> [#uses=2]
+  %tmp11 = load i32* @cl_options_count, align 4   ; <i32> [#uses=1]
+  %tmp12 = zext i32 %tmp11 to i64                 ; <i64> [#uses=1]
+  %tmp13 = icmp ugt i64 %tmp12, %tmp25            ; <i1> [#uses=1]
+  br i1 %tmp13, label %bb2, label %bb6
+
+bb6:                                              ; preds = %bb2, %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/small-byval-memcpy.ll b/src/LLVM/test/CodeGen/X86/small-byval-memcpy.ll
new file mode 100644
index 0000000..1b596b5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/small-byval-memcpy.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2   | grep movsd  | count 8
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=nehalem | grep movups | count 2
+
+define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval align 4  %z) nounwind  {
+entry:
+	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
+	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
+	%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
+	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	%tmp6 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	%tmp7 = load x86_fp80* %tmp6, align 16		; <x86_fp80> [#uses=1]
+	store x86_fp80 %tmp3, x86_fp80* %real, align 16
+	store x86_fp80 %tmp7, x86_fp80* %tmp4, align 16
+	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval align 4  %iz ) nounwind 
+	ret void
+}
+
+declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval align 4 ) nounwind 

diff --git a/src/LLVM/test/CodeGen/X86/smul-with-overflow.ll b/src/LLVM/test/CodeGen/X86/smul-with-overflow.ll
new file mode 100644
index 0000000..7c2e247
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/smul-with-overflow.ll

@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @test1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+; CHECK: test1:
+; CHECK: imull
+; CHECK-NEXT: jo
+}
+
+define i1 @test2(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+; CHECK: test2:
+; CHECK: imull
+; CHECK-NEXT: jno
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
+
+define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 2)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+; CHECK: test3:
+; CHECK: addl
+; CHECK-NEXT: addl
+; CHECK-NEXT: ret
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 4)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+; CHECK: test4:
+; CHECK: addl
+; CHECK: mull
+; CHECK-NEXT: ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/soft-fp.ll b/src/LLVM/test/CodeGen/X86/soft-fp.ll
new file mode 100644
index 0000000..a52135d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/soft-fp.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86    -mattr=+sse2 -soft-float | not grep xmm
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 -soft-float | not grep xmm
+
+	%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define i32 @t1(i32 %a, ...) nounwind {
+entry:
+	%va = alloca [1 x %struct.__va_list_tag], align 8		; <[1 x %struct.__va_list_tag]*> [#uses=2]
+	%va12 = bitcast [1 x %struct.__va_list_tag]* %va to i8*		; <i8*> [#uses=2]
+	call void @llvm.va_start(i8* %va12)
+	%va3 = getelementptr [1 x %struct.__va_list_tag]* %va, i64 0, i64 0		; <%struct.__va_list_tag*> [#uses=1]
+	call void @bar(%struct.__va_list_tag* %va3) nounwind
+	call void @llvm.va_end(i8* %va12)
+	ret i32 undef
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @bar(%struct.__va_list_tag*)
+
+declare void @llvm.va_end(i8*) nounwind
+
+define float @t2(float %a, float %b) nounwind readnone {
+entry:
+	%0 = fadd float %a, %b		; <float> [#uses=1]
+	ret float %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/splat-scalar-load.ll b/src/LLVM/test/CodeGen/X86/splat-scalar-load.ll
new file mode 100644
index 0000000..2b13029
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/splat-scalar-load.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s
+; rdar://7434544
+
+define <2 x i64> @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: pshufd	$85, (%esp), %xmm0
+  %array = alloca [8 x float], align 4
+  %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 1
+  %tmp2 = load float* %arrayidx
+  %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
+  %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
+  %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
+  %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
+  %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
+  ret <2 x i64> %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/split-eh-lpad-edges.ll b/src/LLVM/test/CodeGen/X86/split-eh-lpad-edges.ll
new file mode 100644
index 0000000..756a3dd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/split-eh-lpad-edges.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep jmp
+; rdar://6647639
+
+	%struct.FetchPlanHeader = type { i8*, i8*, i32, i8*, i8*, i8*, i8*, i8*, %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)*, %struct.__attributeDescriptionFlags }
+	%struct.NSArray = type { %struct.NSObject }
+	%struct.NSAutoreleasePool = type { %struct.NSObject, i8*, i8*, i8*, i8* }
+	%struct.NSObject = type { %struct.NSObject* }
+	%struct.__attributeDescriptionFlags = type <{ i32 }>
+	%struct._message_ref_t = type { %struct.NSObject* (%struct.NSObject*, %struct._message_ref_t*, ...)*, %struct.objc_selector* }
+	%struct.objc_selector = type opaque
+@"\01l_objc_msgSend_fixup_alloc" = external global %struct._message_ref_t, align 16		; <%struct._message_ref_t*> [#uses=2]
+
+define %struct.NSArray* @newFetchedRowsForFetchPlan_MT(%struct.FetchPlanHeader* %fetchPlan, %struct.objc_selector* %selectionMethod, %struct.NSObject* %selectionParameter) ssp {
+entry:
+	%0 = invoke %struct.NSObject* null(%struct.NSObject* null, %struct._message_ref_t* @"\01l_objc_msgSend_fixup_alloc")
+			to label %invcont unwind label %lpad		; <%struct.NSObject*> [#uses=1]
+
+invcont:		; preds = %entry
+	%1 = invoke %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)* @objc_msgSend(%struct.NSObject* %0, %struct.objc_selector* null)
+			to label %invcont26 unwind label %lpad		; <%struct.NSObject*> [#uses=0]
+
+invcont26:		; preds = %invcont
+	%2 = invoke %struct.NSObject* null(%struct.NSObject* null, %struct._message_ref_t* @"\01l_objc_msgSend_fixup_alloc")
+			to label %invcont27 unwind label %lpad		; <%struct.NSObject*> [#uses=0]
+
+invcont27:		; preds = %invcont26
+	unreachable
+
+lpad:		; preds = %invcont26, %invcont, %entry
+	%pool.1 = phi %struct.NSAutoreleasePool* [ null, %entry ], [ null, %invcont ], [ null, %invcont26 ]		; <%struct.NSAutoreleasePool*> [#uses=0]
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	unreachable
+}
+
+declare %struct.NSObject* @objc_msgSend(%struct.NSObject*, %struct.objc_selector*, ...)
+
+declare i32 @__gxx_personality_v0(...)

diff --git a/src/LLVM/test/CodeGen/X86/split-vector-bitcast.ll b/src/LLVM/test/CodeGen/X86/split-vector-bitcast.ll
new file mode 100644
index 0000000..fae15cf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/split-vector-bitcast.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=-sse2,+sse | grep addps
+
+; PR10497 + another isel issue with sse2 disabled
+; (This is primarily checking that this construct doesn't crash.)
+define void @a(<2 x float>* %a, <2 x i32>* %b) {
+  %cc = load <2 x float>* %a
+  %c = fadd <2 x float> %cc, %cc
+  %dd = bitcast <2 x float> %c to <2 x i32>
+  %d = add <2 x i32> %dd, %dd
+  store <2 x i32> %d, <2 x i32>* %b
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/split-vector-rem.ll b/src/LLVM/test/CodeGen/X86/split-vector-rem.ll
new file mode 100644
index 0000000..681c6b0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/split-vector-rem.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep div | count 16
+; RUN: llc < %s -march=x86-64 | grep fmodf | count 8
+
+define <8 x i32> @foo(<8 x i32> %t, <8 x i32> %u) {
+	%m = srem <8 x i32> %t, %u
+	ret <8 x i32> %m
+}
+define <8 x i32> @bar(<8 x i32> %t, <8 x i32> %u) {
+	%m = urem <8 x i32> %t, %u
+	ret <8 x i32> %m
+}
+define <8 x float> @qux(<8 x float> %t, <8 x float> %u) {
+	%m = frem <8 x float> %t, %u
+	ret <8 x float> %m
+}

diff --git a/src/LLVM/test/CodeGen/X86/sret.ll b/src/LLVM/test/CodeGen/X86/sret.ll
new file mode 100644
index 0000000..b945530
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sret.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | grep ret | grep 4
+
+	%struct.foo = type { [4 x i32] }
+
+define void @bar(%struct.foo* noalias sret %agg.result) nounwind  {
+entry:
+	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0
+	%tmp3 = getelementptr [4 x i32]* %tmp1, i32 0, i32 0
+	store i32 1, i32* %tmp3, align 8
+        ret void
+}
+
+@dst = external global i32
+
+define void @foo() nounwind {
+	%memtmp = alloca %struct.foo, align 4
+        call void @bar( %struct.foo* sret %memtmp ) nounwind
+        %tmp4 = getelementptr %struct.foo* %memtmp, i32 0, i32 0
+	%tmp5 = getelementptr [4 x i32]* %tmp4, i32 0, i32 0
+        %tmp6 = load i32* %tmp5
+        store i32 %tmp6, i32* @dst
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse-align-0.ll b/src/LLVM/test/CodeGen/X86/sse-align-0.ll
new file mode 100644
index 0000000..8ffd312
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-align-0.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     mov
+
+define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
+  %t = load <4 x float>* %p
+  %z = fmul <4 x float> %t, %x
+  ret <4 x float> %z
+}
+define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
+  %t = load <2 x double>* %p
+  %z = fmul <2 x double> %t, %x
+  ret <2 x double> %z
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse-align-1.ll b/src/LLVM/test/CodeGen/X86/sse-align-1.ll
new file mode 100644
index 0000000..c7a5cd5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-align-1.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep movap | count 2
+
+define <4 x float> @foo(<4 x float>* %p) nounwind {
+  %t = load <4 x float>* %p
+  ret <4 x float> %t
+}
+define <2 x double> @bar(<2 x double>* %p) nounwind {
+  %t = load <2 x double>* %p
+  ret <2 x double> %t
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse-align-10.ll b/src/LLVM/test/CodeGen/X86/sse-align-10.ll
new file mode 100644
index 0000000..0f91697
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-align-10.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
+
+define <2 x i64> @bar(<2 x i64>* %p) nounwind {
+  %t = load <2 x i64>* %p, align 8
+  ret <2 x i64> %t
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse-align-11.ll b/src/LLVM/test/CodeGen/X86/sse-align-11.ll
new file mode 100644
index 0000000..9f5d4b4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-align-11.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin8 | grep movaps
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-linux-gnu | grep movaps
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
+
+define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
+entry:
+        %tmp6 = insertelement <4 x float> undef, float %a, i32 0               
+        %tmp7 = insertelement <4 x float> %tmp6, float %b, i32 1               
+        %tmp8 = insertelement <4 x float> %tmp7, float %c, i32 2               
+        %tmp9 = insertelement <4 x float> %tmp8, float %d, i32 3               
+        ret <4 x float> %tmp9
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/sse-align-12.ll b/src/LLVM/test/CodeGen/X86/sse-align-12.ll
new file mode 100644
index 0000000..118e393
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-align-12.ll

@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK: a:
+; CHECK: movdqu
+; CHECK: pshufd
+define <4 x float> @a(<4 x float>* %y) nounwind {
+  %x = load <4 x float>* %y, align 4
+  %a = extractelement <4 x float> %x, i32 0
+  %b = extractelement <4 x float> %x, i32 1
+  %c = extractelement <4 x float> %x, i32 2
+  %d = extractelement <4 x float> %x, i32 3
+  %p = insertelement <4 x float> undef, float %d, i32 0
+  %q = insertelement <4 x float> %p, float %c, i32 1
+  %r = insertelement <4 x float> %q, float %b, i32 2
+  %s = insertelement <4 x float> %r, float %a, i32 3
+  ret <4 x float> %s
+}
+
+; CHECK: b:
+; CHECK: movups
+; CHECK: unpckhps
+define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
+  %x = load <4 x float>* %y, align 4
+  %a = extractelement <4 x float> %x, i32 2
+  %b = extractelement <4 x float> %x, i32 3
+  %c = extractelement <4 x float> %z, i32 2
+  %d = extractelement <4 x float> %z, i32 3
+  %p = insertelement <4 x float> undef, float %c, i32 0
+  %q = insertelement <4 x float> %p, float %a, i32 1
+  %r = insertelement <4 x float> %q, float %d, i32 2
+  %s = insertelement <4 x float> %r, float %b, i32 3
+  ret <4 x float> %s
+}
+
+; CHECK: c:
+; CHECK: movupd
+; CHECK: shufpd
+define <2 x double> @c(<2 x double>* %y) nounwind {
+  %x = load <2 x double>* %y, align 8
+  %a = extractelement <2 x double> %x, i32 0
+  %c = extractelement <2 x double> %x, i32 1
+  %p = insertelement <2 x double> undef, double %c, i32 0
+  %r = insertelement <2 x double> %p, double %a, i32 1
+  ret <2 x double> %r
+}
+
+; CHECK: d:
+; CHECK: movupd
+; CHECK: unpckhpd
+define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind {
+  %x = load <2 x double>* %y, align 8
+  %a = extractelement <2 x double> %x, i32 1
+  %c = extractelement <2 x double> %z, i32 1
+  %p = insertelement <2 x double> undef, double %c, i32 0
+  %r = insertelement <2 x double> %p, double %a, i32 1
+  ret <2 x double> %r
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse-align-2.ll b/src/LLVM/test/CodeGen/X86/sse-align-2.ll
new file mode 100644
index 0000000..102c3fb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-align-2.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
+
+define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
+  %t = load <4 x float>* %p, align 4
+  %z = fmul <4 x float> %t, %x
+  ret <4 x float> %z
+}
+define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
+  %t = load <2 x double>* %p, align 8
+  %z = fmul <2 x double> %t, %x
+  ret <2 x double> %z
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse-align-3.ll b/src/LLVM/test/CodeGen/X86/sse-align-3.ll
new file mode 100644
index 0000000..04f2161
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-align-3.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     movapd
+; CHECK:     movaps
+; CHECK-NOT:     movaps
+; CHECK:     movapd
+; CHECK-NOT:     movap
+
+define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
+  store <4 x float> %x, <4 x float>* %p
+  ret void
+}
+define void @bar(<2 x double>* %p, <2 x double> %x) nounwind {
+  store <2 x double> %x, <2 x double>* %p
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse-align-4.ll b/src/LLVM/test/CodeGen/X86/sse-align-4.ll
new file mode 100644
index 0000000..4c59934
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-align-4.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
+
+define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
+  store <4 x float> %x, <4 x float>* %p, align 4
+  ret void
+}
+define void @bar(<2 x double>* %p, <2 x double> %x) nounwind {
+  store <2 x double> %x, <2 x double>* %p, align 8
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse-align-5.ll b/src/LLVM/test/CodeGen/X86/sse-align-5.ll
new file mode 100644
index 0000000..21cd231
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-align-5.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 | grep movaps | count 1
+
+define <2 x i64> @bar(<2 x i64>* %p) nounwind {
+  %t = load <2 x i64>* %p
+  ret <2 x i64> %t
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse-align-6.ll b/src/LLVM/test/CodeGen/X86/sse-align-6.ll
new file mode 100644
index 0000000..fcea1b1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-align-6.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86-64 | grep movdqu | count 1
+
+define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
+  %t = load <2 x i64>* %p, align 8
+  %z = mul <2 x i64> %t, %x
+  ret <2 x i64> %z
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse-align-7.ll b/src/LLVM/test/CodeGen/X86/sse-align-7.ll
new file mode 100644
index 0000000..e55d585
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-align-7.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK:     movaps
+; CHECK-NOT:     movaps
+
+define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
+  store <2 x i64> %x, <2 x i64>* %p
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse-align-8.ll b/src/LLVM/test/CodeGen/X86/sse-align-8.ll
new file mode 100644
index 0000000..cfeff81
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-align-8.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
+
+define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
+  store <2 x i64> %x, <2 x i64>* %p, align 8
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse-align-9.ll b/src/LLVM/test/CodeGen/X86/sse-align-9.ll
new file mode 100644
index 0000000..cb26b95
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-align-9.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
+
+define <4 x float> @foo(<4 x float>* %p) nounwind {
+  %t = load <4 x float>* %p, align 4
+  ret <4 x float> %t
+}
+define <2 x double> @bar(<2 x double>* %p) nounwind {
+  %t = load <2 x double>* %p, align 8
+  ret <2 x double> %t
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse-commute.ll b/src/LLVM/test/CodeGen/X86/sse-commute.ll
new file mode 100644
index 0000000..336bf06
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-commute.ll

@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+
+; Commute the comparison to avoid a move.
+; PR7500.
+
+; CHECK: a:
+; CHECK-NOT: mov
+; CHECK:     pcmpeqd
+define <2 x double> @a(<2 x double>, <2 x double>) nounwind readnone {
+entry:
+  %tmp6 = bitcast <2 x double> %0 to <4 x i32>    ; <<4 x i32>> [#uses=2]
+  %tmp4 = bitcast <2 x double> %1 to <4 x i32>    ; <<4 x i32>> [#uses=1]
+  %cmp = icmp eq <4 x i32> %tmp6, %tmp4           ; <<4 x i1>> [#uses=1]
+  %sext = sext <4 x i1> %cmp to <4 x i32>         ; <<4 x i32>> [#uses=1]
+  %and = and <4 x i32> %tmp6, %sext               ; <<4 x i32>> [#uses=1]
+  %tmp8 = bitcast <4 x i32> %and to <2 x double>  ; <<2 x double>> [#uses=1]
+  ret <2 x double> %tmp8
+}
+
+

diff --git a/src/LLVM/test/CodeGen/X86/sse-fcopysign.ll b/src/LLVM/test/CodeGen/X86/sse-fcopysign.ll
new file mode 100644
index 0000000..a7fd786
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-fcopysign.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep test

+

+define float @tst1(float %a, float %b) {

+	%tmp = tail call float @copysignf( float %b, float %a )

+	ret float %tmp

+}

+

+define double @tst2(double %a, float %b, float %c) {

+	%tmp1 = fadd float %b, %c

+	%tmp2 = fpext float %tmp1 to double

+	%tmp = tail call double @copysign( double %a, double %tmp2 )

+	ret double %tmp

+}

+

+declare float @copysignf(float, float)

+declare double @copysign(double, double)


diff --git a/src/LLVM/test/CodeGen/X86/sse-load-ret.ll b/src/LLVM/test/CodeGen/X86/sse-load-ret.ll
new file mode 100644
index 0000000..913b9b3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-load-ret.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep movss

+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep xmm

+

+define double @test1(double* %P) {

+        %X = load double* %P            ; <double> [#uses=1]

+        ret double %X

+}

+

+define double @test2() {

+        ret double 1.234560e+03

+}

+

+

+; FIXME: Todo

+;double %test3(bool %B) {

+;	%C = select bool %B, double 123.412, double 523.01123123

+;	ret double %C

+;}

+


diff --git a/src/LLVM/test/CodeGen/X86/sse-minmax.ll b/src/LLVM/test/CodeGen/X86/sse-minmax.ll
new file mode 100644
index 0000000..af1a73b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-minmax.ll

@@ -0,0 +1,967 @@
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs -promote-elements | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs -enable-unsafe-fp-math -enable-no-nans-fp-math -promote-elements | FileCheck -check-prefix=UNSAFE %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs -enable-no-nans-fp-math -promote-elements | FileCheck -check-prefix=FINITE %s
+
+; Some of these patterns can be matched as SSE min or max. Some of
+; then can be matched provided that the operands are swapped.
+; Some of them can't be matched at all and require a comparison
+; and a conditional branch.
+
+; The naming convention is {,x_,y_}{o,u}{gt,lt,ge,le}{,_inverse}
+; x_ : use 0.0 instead of %y
+; y_ : use -0.0 instead of %y
+; _inverse : swap the arms of the select.
+
+; Some of these tests depend on -join-physregs commuting instructions to
+; eliminate copies.
+
+; CHECK:      ogt:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      ogt:
+; UNSAFE-NEXT: maxsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ogt:
+; FINITE-NEXT: maxsd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ogt(double %x, double %y) nounwind {
+  %c = fcmp ogt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      olt:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      olt:
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      olt:
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @olt(double %x, double %y) nounwind {
+  %c = fcmp olt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ogt_inverse:
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      ogt_inverse:
+; UNSAFE-NEXT: minsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ogt_inverse:
+; FINITE-NEXT: minsd  %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ogt_inverse(double %x, double %y) nounwind {
+  %c = fcmp ogt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      olt_inverse:
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      olt_inverse:
+; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      olt_inverse:
+; FINITE-NEXT: maxsd  %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @olt_inverse(double %x, double %y) nounwind {
+  %c = fcmp olt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      oge:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; UNSAFE:      oge:
+; UNSAFE-NEXT: maxsd	%xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      oge:
+; FINITE-NEXT: maxsd	%xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @oge(double %x, double %y) nounwind {
+  %c = fcmp oge double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ole:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; UNSAFE:      ole:
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; FINITE:      ole:
+; FINITE-NEXT: minsd %xmm1, %xmm0
+define double @ole(double %x, double %y) nounwind {
+  %c = fcmp ole double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      oge_inverse:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; UNSAFE:      oge_inverse:
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      oge_inverse:
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @oge_inverse(double %x, double %y) nounwind {
+  %c = fcmp oge double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ole_inverse:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; UNSAFE:      ole_inverse:
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ole_inverse:
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ole_inverse(double %x, double %y) nounwind {
+  %c = fcmp ole double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      x_ogt:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_ogt:
+; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ogt:
+; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ogt(double %x) nounwind {
+  %c = fcmp ogt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_olt:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_olt:
+; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_olt:
+; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_olt(double %x) nounwind {
+  %c = fcmp olt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ogt_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_ogt_inverse:
+; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: minsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ogt_inverse:
+; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: minsd  %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ogt_inverse(double %x) nounwind {
+  %c = fcmp ogt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_olt_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_olt_inverse:
+; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_olt_inverse:
+; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: maxsd  %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_olt_inverse(double %x) nounwind {
+  %c = fcmp olt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_oge:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      x_oge:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_oge:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: maxsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_oge(double %x) nounwind {
+  %c = fcmp oge double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ole:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      x_ole:
+; UNSAFE-NEXT: pxor %xmm1, %xmm1
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ole:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ole(double %x) nounwind {
+  %c = fcmp ole double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_oge_inverse:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      x_oge_inverse:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_oge_inverse:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_oge_inverse(double %x) nounwind {
+  %c = fcmp oge double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ole_inverse:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      x_ole_inverse:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ole_inverse:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ole_inverse(double %x) nounwind {
+  %c = fcmp ole double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      ugt:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      ugt:
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ugt:
+; FINITE-NEXT: maxsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ugt(double %x, double %y) nounwind {
+  %c = fcmp ugt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ult:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      ult:
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ult:
+; FINITE-NEXT: minsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ult(double %x, double %y) nounwind {
+  %c = fcmp ult double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ugt_inverse:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      ugt_inverse:
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ugt_inverse:
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ugt_inverse(double %x, double %y) nounwind {
+  %c = fcmp ugt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ult_inverse:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      ult_inverse:
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ult_inverse:
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ult_inverse(double %x, double %y) nounwind {
+  %c = fcmp ult double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      uge:
+; CHECK-NEXT: maxsd   %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      uge:
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      uge:
+; FINITE-NEXT: maxsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @uge(double %x, double %y) nounwind {
+  %c = fcmp uge double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ule:
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      ule:
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ule:
+; FINITE-NEXT: minsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ule(double %x, double %y) nounwind {
+  %c = fcmp ule double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      uge_inverse:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      uge_inverse:
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      uge_inverse:
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @uge_inverse(double %x, double %y) nounwind {
+  %c = fcmp uge double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ule_inverse:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      ule_inverse:
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ule_inverse:
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ule_inverse(double %x, double %y) nounwind {
+  %c = fcmp ule double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      x_ugt:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      x_ugt:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ugt:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: maxsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ugt(double %x) nounwind {
+  %c = fcmp ugt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ult:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      x_ult:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ult:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: minsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ult(double %x) nounwind {
+  %c = fcmp ult double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ugt_inverse:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      x_ugt_inverse:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ugt_inverse:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ugt_inverse(double %x) nounwind {
+  %c = fcmp ugt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ult_inverse:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      x_ult_inverse:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ult_inverse:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ult_inverse(double %x) nounwind {
+  %c = fcmp ult double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_uge:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_uge:
+; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_uge:
+; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: maxsd  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_uge(double %x) nounwind {
+  %c = fcmp uge double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ule:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_ule:
+; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: minsd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ule:
+; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: minsd  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ule(double %x) nounwind {
+  %c = fcmp ule double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_uge_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_uge_inverse:
+; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_uge_inverse:
+; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_uge_inverse(double %x) nounwind {
+  %c = fcmp uge double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ule_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_ule_inverse:
+; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ule_inverse:
+; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ule_inverse(double %x) nounwind {
+  %c = fcmp ule double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_ogt:
+; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_ogt:
+; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ogt:
+; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ogt(double %x) nounwind {
+  %c = fcmp ogt double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_olt:
+; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_olt:
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_olt:
+; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_olt(double %x) nounwind {
+  %c = fcmp olt double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ogt_inverse:
+; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_ogt_inverse:
+; UNSAFE-NEXT: movsd  {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ogt_inverse:
+; FINITE-NEXT: movsd  {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd  %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ogt_inverse(double %x) nounwind {
+  %c = fcmp ogt double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_olt_inverse:
+; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_olt_inverse:
+; UNSAFE-NEXT: movsd  {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_olt_inverse:
+; FINITE-NEXT: movsd  {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd  %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_olt_inverse(double %x) nounwind {
+  %c = fcmp olt double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_oge:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      y_oge:
+; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_oge:
+; FINITE-NEXT: maxsd   {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_oge(double %x) nounwind {
+  %c = fcmp oge double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ole:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      y_ole:
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ole:
+; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ole(double %x) nounwind {
+  %c = fcmp ole double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_oge_inverse:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      y_oge_inverse:
+; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_oge_inverse:
+; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_oge_inverse(double %x) nounwind {
+  %c = fcmp oge double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_ole_inverse:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      y_ole_inverse:
+; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ole_inverse:
+; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ole_inverse(double %x) nounwind {
+  %c = fcmp ole double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_ugt:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      y_ugt:
+; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ugt:
+; FINITE-NEXT: maxsd   {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ugt(double %x) nounwind {
+  %c = fcmp ugt double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ult:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      y_ult:
+; UNSAFE-NEXT: minsd   {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ult:
+; FINITE-NEXT: minsd   {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ult(double %x) nounwind {
+  %c = fcmp ult double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ugt_inverse:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      y_ugt_inverse:
+; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ugt_inverse:
+; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ugt_inverse(double %x) nounwind {
+  %c = fcmp ugt double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_ult_inverse:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      y_ult_inverse:
+; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ult_inverse:
+; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ult_inverse(double %x) nounwind {
+  %c = fcmp ult double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_uge:
+; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_uge:
+; UNSAFE-NEXT: maxsd  {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_uge:
+; FINITE-NEXT: maxsd  {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_uge(double %x) nounwind {
+  %c = fcmp uge double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ule:
+; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_ule:
+; UNSAFE-NEXT: minsd  {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ule:
+; FINITE-NEXT: minsd  {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ule(double %x) nounwind {
+  %c = fcmp ule double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_uge_inverse:
+; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_uge_inverse:
+; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_uge_inverse:
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_uge_inverse(double %x) nounwind {
+  %c = fcmp uge double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_ule_inverse:
+; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_ule_inverse:
+; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ule_inverse:
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ule_inverse(double %x) nounwind {
+  %c = fcmp ule double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+; Test a few more misc. cases.
+
+; CHECK: clampTo3k_a:
+; CHECK: minsd
+; UNSAFE: clampTo3k_a:
+; UNSAFE: minsd
+; FINITE: clampTo3k_a:
+; FINITE: minsd
+define double @clampTo3k_a(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_b:
+; CHECK: minsd
+; UNSAFE: clampTo3k_b:
+; UNSAFE: minsd
+; FINITE: clampTo3k_b:
+; FINITE: minsd
+define double @clampTo3k_b(double %x) nounwind readnone {
+entry:
+  %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_c:
+; CHECK: maxsd
+; UNSAFE: clampTo3k_c:
+; UNSAFE: maxsd
+; FINITE: clampTo3k_c:
+; FINITE: maxsd
+define double @clampTo3k_c(double %x) nounwind readnone {
+entry:
+  %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_d:
+; CHECK: maxsd
+; UNSAFE: clampTo3k_d:
+; UNSAFE: maxsd
+; FINITE: clampTo3k_d:
+; FINITE: maxsd
+define double @clampTo3k_d(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_e:
+; CHECK: maxsd
+; UNSAFE: clampTo3k_e:
+; UNSAFE: maxsd
+; FINITE: clampTo3k_e:
+; FINITE: maxsd
+define double @clampTo3k_e(double %x) nounwind readnone {
+entry:
+  %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_f:
+; CHECK: maxsd
+; UNSAFE: clampTo3k_f:
+; UNSAFE: maxsd
+; FINITE: clampTo3k_f:
+; FINITE: maxsd
+define double @clampTo3k_f(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_g:
+; CHECK: minsd
+; UNSAFE: clampTo3k_g:
+; UNSAFE: minsd
+; FINITE: clampTo3k_g:
+; FINITE: minsd
+define double @clampTo3k_g(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_h:
+; CHECK: minsd
+; UNSAFE: clampTo3k_h:
+; UNSAFE: minsd
+; FINITE: clampTo3k_h:
+; FINITE: minsd
+define double @clampTo3k_h(double %x) nounwind readnone {
+entry:
+  %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; UNSAFE: maxpd:
+; UNSAFE: maxpd
+define <2 x double> @maxpd(<2 x double> %x, <2 x double> %y) {
+  %max_is_x = fcmp oge <2 x double> %x, %y
+  %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
+  ret <2 x double> %max
+}
+
+; UNSAFE: minpd:
+; UNSAFE: minpd
+define <2 x double> @minpd(<2 x double> %x, <2 x double> %y) {
+  %min_is_x = fcmp ole <2 x double> %x, %y
+  %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
+  ret <2 x double> %min
+}
+
+; UNSAFE: maxps:
+; UNSAFE: maxps
+define <4 x float> @maxps(<4 x float> %x, <4 x float> %y) {
+  %max_is_x = fcmp oge <4 x float> %x, %y
+  %max = select <4 x i1> %max_is_x, <4 x float> %x, <4 x float> %y
+  ret <4 x float> %max
+}
+
+; UNSAFE: minps:
+; UNSAFE: minps
+define <4 x float> @minps(<4 x float> %x, <4 x float> %y) {
+  %min_is_x = fcmp ole <4 x float> %x, %y
+  %min = select <4 x i1> %min_is_x, <4 x float> %x, <4 x float> %y
+  ret <4 x float> %min
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse-varargs.ll b/src/LLVM/test/CodeGen/X86/sse-varargs.ll
new file mode 100644
index 0000000..da38f0e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse-varargs.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xmm | grep esp
+
+define i32 @t() nounwind  {
+entry:
+	tail call void (i32, ...)* @foo( i32 1, <4 x i32> < i32 10, i32 11, i32 12, i32 13 > ) nounwind 
+	ret i32 0
+}
+
+declare void @foo(i32, ...)

diff --git a/src/LLVM/test/CodeGen/X86/sse1.ll b/src/LLVM/test/CodeGen/X86/sse1.ll
new file mode 100644
index 0000000..9b2e05b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse1.ll

@@ -0,0 +1,45 @@
+; Tests for SSE1 and below, without SSE2+.
+; RUN: llc < %s -march=x86 -mcpu=pentium3 -O3 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=-sse2,+sse -O3 | FileCheck %s
+
+define <8 x i16> @test1(<8 x i32> %a) nounwind {
+; CHECK: test1
+  ret <8 x i16> zeroinitializer
+}
+
+define <8 x i16> @test2(<8 x i32> %a) nounwind {
+; CHECK: test2
+  %c = trunc <8 x i32> %a to <8 x i16>            ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %c
+}
+
+; PR7993
+;define <4 x i32> @test3(<4 x i16> %a) nounwind {
+;  %c = sext <4 x i16> %a to <4 x i32>             ; <<4 x i32>> [#uses=1]
+;  ret <4 x i32> %c
+;}
+
+; This should not emit shuffles to populate the top 2 elements of the 4-element
+; vector that this ends up returning.
+; rdar://8368414
+define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind {
+entry:
+  %tmp7 = extractelement <2 x float> %A, i32 0
+  %tmp5 = extractelement <2 x float> %A, i32 1
+  %tmp3 = extractelement <2 x float> %B, i32 0
+  %tmp1 = extractelement <2 x float> %B, i32 1
+  %add.r = fadd float %tmp7, %tmp3
+  %add.i = fsub float %tmp5, %tmp1
+  %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
+  %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
+  ret <2 x float> %tmp9
+; CHECK: test4:
+; CHECK-NOT: shufps	$16
+; CHECK: shufps	$1, 
+; CHECK-NOT: shufps	$16
+; CHECK: shufps	$1, 
+; CHECK-NOT: shufps	$16
+; CHECK: unpcklps
+; CHECK-NOT: shufps	$16
+; CHECK: ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse2-blend.ll b/src/LLVM/test/CodeGen/X86/sse2-blend.ll
new file mode 100644
index 0000000..56b099e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse2-blend.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -promote-elements -mattr=+sse2,-sse41 | FileCheck %s
+
+
+; currently (xor v4i32) is defined as illegal, so we scalarize the code.
+
+define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
+  %A = load <4 x float>* %v1
+  %B = load <4 x float>* %v2
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %A, <4 x float> %B
+  store <4 x float > %vsel, <4 x float>* %v1
+  ret void
+}
+
+; currently (xor v4i32) is defined as illegal, so we scalarize the code.
+
+define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
+  %A = load <4 x i32>* %v1
+  %B = load <4 x i32>* %v2
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %A, <4 x i32> %B
+  store <4 x i32 > %vsel, <4 x i32>* %v1
+  ret void
+}
+
+; CHECK: vsel_i64
+; CHECK: pxor
+; CHECK: pand
+; CHECK: andnps
+; CHECK: orps
+; CHECK: ret
+
+define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
+  %A = load <4 x i64>* %v1
+  %B = load <4 x i64>* %v2
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %A, <4 x i64> %B
+  store <4 x i64 > %vsel, <4 x i64>* %v1
+  ret void
+}
+
+; CHECK: vsel_double
+; CHECK: pxor
+; CHECK: pand
+; CHECK: andnps
+; CHECK: orps
+; CHECK: ret
+
+
+define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
+  %A = load <4 x double>* %v1
+  %B = load <4 x double>* %v2
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %A, <4 x double> %B
+  store <4 x double > %vsel, <4 x double>* %v1
+  ret void
+}
+
+

diff --git a/src/LLVM/test/CodeGen/X86/sse2.ll b/src/LLVM/test/CodeGen/X86/sse2.ll
new file mode 100644
index 0000000..70e0a8a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse2.ll

@@ -0,0 +1,224 @@
+; Tests for SSE2 and below, without SSE3+.
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
+
+define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+	%tmp3 = load <2 x double>* %A, align 16
+	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
+	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
+	store <2 x double> %tmp9, <2 x double>* %r, align 16
+	ret void
+        
+; CHECK: test1:
+; CHECK: 	movl	8(%esp), %eax
+; CHECK-NEXT: 	movapd	(%eax), %xmm0
+; CHECK-NEXT: 	movlpd	12(%esp), %xmm0
+; CHECK-NEXT: 	movl	4(%esp), %eax
+; CHECK-NEXT: 	movapd	%xmm0, (%eax)
+; CHECK-NEXT: 	ret
+}
+
+define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+	%tmp3 = load <2 x double>* %A, align 16
+	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
+	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
+	store <2 x double> %tmp9, <2 x double>* %r, align 16
+	ret void
+        
+; CHECK: test2:
+; CHECK: 	movl	8(%esp), %eax
+; CHECK-NEXT: 	movapd	(%eax), %xmm0
+; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
+; CHECK-NEXT: 	movl	4(%esp), %eax
+; CHECK-NEXT: 	movapd	%xmm0, (%eax)
+; CHECK-NEXT: 	ret
+}
+
+
+define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind {
+	%tmp = load <4 x float>* %B		; <<4 x float>> [#uses=2]
+	%tmp3 = load <4 x float>* %A		; <<4 x float>> [#uses=2]
+	%tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0		; <float> [#uses=1]
+	%tmp7 = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
+	%tmp8 = extractelement <4 x float> %tmp3, i32 1		; <float> [#uses=1]
+	%tmp9 = extractelement <4 x float> %tmp, i32 1		; <float> [#uses=1]
+	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2		; <<4 x float>> [#uses=1]
+	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp13, <4 x float>* %res
+	ret void
+; CHECK: @test3
+; CHECK: 	unpcklps	
+}
+
+define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
+	%tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp5, <4 x float>* %res
+	ret void
+; CHECK: @test4
+; CHECK: 	pshufd	$50, %xmm0, %xmm0
+}
+
+define <4 x i32> @test5(i8** %ptr) nounwind {
+; CHECK: test5:
+; CHECK: pxor
+; CHECK: punpcklbw
+; CHECK: punpcklwd
+
+	%tmp = load i8** %ptr		; <i8*> [#uses=1]
+	%tmp.upgrd.1 = bitcast i8* %tmp to float*		; <float*> [#uses=1]
+	%tmp.upgrd.2 = load float* %tmp.upgrd.1		; <float> [#uses=1]
+	%tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0		; <<4 x float>> [#uses=1]
+	%tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	%tmp21 = bitcast <4 x float> %tmp11 to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
+	%tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
+	%tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp36
+}
+
+define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
+        %tmp1 = load <4 x float>* %A            ; <<4 x float>> [#uses=1]
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp2, <4 x float>* %res
+        ret void
+        
+; CHECK: test6:
+; CHECK: 	movaps	(%eax), %xmm0
+; CHECK:	movaps	%xmm0, (%eax)
+}
+
+define void @test7() nounwind {
+        bitcast <4 x i32> zeroinitializer to <4 x float>                ; <<4 x float>>:1 [#uses=1]
+        shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
+        store <4 x float> %2, <4 x float>* null
+        ret void
+        
+; CHECK: test7:
+; CHECK:	pxor	%xmm0, %xmm0
+; CHECK:	movaps	%xmm0, 0
+}
+
+@x = external global [4 x i32]
+
+define <2 x i64> @test8() nounwind {
+	%tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0)		; <i32> [#uses=1]
+	%tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1)		; <i32> [#uses=1]
+	%tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2)		; <i32> [#uses=1]
+	%tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3)		; <i32> [#uses=1]
+	%tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
+	%tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2		; <<4 x i32>> [#uses=1]
+	%tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3		; <<4 x i32>> [#uses=1]
+	%tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp16
+; CHECK: test8:
+; CHECK: movups	(%eax), %xmm0
+}
+
+define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind {
+	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
+	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp13
+; CHECK: test9:
+; CHECK: movups	8(%esp), %xmm0
+}
+
+define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind {
+	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
+	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp13
+; CHECK: test10:
+; CHECK: movaps	4(%esp), %xmm0
+}
+
+define <2 x double> @test11(double %a, double %b) nounwind {
+	%tmp = insertelement <2 x double> undef, double %a, i32 0		; <<2 x double>> [#uses=1]
+	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
+	ret <2 x double> %tmp7
+; CHECK: test11:
+; CHECK: movapd	4(%esp), %xmm0
+}
+
+define void @test12() nounwind {
+        %tmp1 = load <4 x float>* null          ; <<4 x float>> [#uses=2]
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
+        %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp4, <4 x float>* null
+        ret void
+; CHECK: test12:
+; CHECK: movhlps
+; CHECK: shufps
+}
+
+define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+        %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=1]
+        %tmp5 = load <4 x float>* %C            ; <<4 x float>> [#uses=1]
+        %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 >         ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp11, <4 x float>* %res
+        ret void
+; CHECK: test13
+; CHECK: shufps	$69, (%eax), %xmm0
+; CHECK: pshufd	$-40, %xmm0, %xmm0
+}
+
+define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
+        %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=2]
+        %tmp5 = load <4 x float>* %x            ; <<4 x float>> [#uses=2]
+        %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
+        %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
+        %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp27
+; CHECK: test14:
+; CHECK: 	addps	[[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
+; CHECK: 	subps	[[X1]], [[X2:%xmm[0-9]+]]
+; CHECK: 	movlhps	[[X2]], [[X0]]
+}
+
+define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
+entry:
+        %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=1]
+        %tmp3 = load <4 x float>* %x            ; <<4 x float>> [#uses=1]
+        %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp4
+; CHECK: test15:
+; CHECK: 	movhlps	%xmm1, %xmm0
+}
+
+; PR8900
+; CHECK: test16:
+; CHECK: unpcklpd
+; CHECK: ret
+
+define  <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocapture %dst) {
+  %i5 = getelementptr inbounds <4 x double>* %srcA, i32 3
+  %i6 = load <4 x double>* %i5, align 32
+  %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2>
+  ret <2 x double> %i7
+}
+
+; PR9009
+define fastcc void @test17() nounwind {
+entry:
+  %0 = insertelement <4 x i32> undef, i32 undef, i32 1
+  %1 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 32768, i32 32768>, <4 x i32> %0, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %2 = bitcast <4 x i32> %1 to <4 x float>
+  store <4 x float> %2, <4 x float> * undef
+  ret void
+}
+
+; PR9210
+define <4 x float> @f(<4 x double>) nounwind {
+entry:
+ %double2float.i = fptrunc <4 x double> %0 to <4 x float>
+ ret <4 x float> %double2float.i
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/sse3.ll b/src/LLVM/test/CodeGen/X86/sse3.ll
new file mode 100644
index 0000000..8b3a317
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse3.ll

@@ -0,0 +1,276 @@
+; These are tests for SSE3 codegen.
+
+; RUN: llc < %s -march=x86-64 -mcpu=nocona -mtriple=i686-apple-darwin9 -O3 \
+; RUN:              | FileCheck %s --check-prefix=X64
+
+; Test for v8xi16 lowering where we extract the first element of the vector and
+; placed it in the second element of the result.
+
+define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
+entry:
+	%tmp3 = load <8 x i16>* %old
+	%tmp6 = shufflevector <8 x i16> %tmp3,
+                <8 x i16> < i16 0, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
+                <8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef  >
+	store <8 x i16> %tmp6, <8 x i16>* %dest
+	ret void
+        
+; X64: t0:
+; X64: 	movddup	(%rsi), %xmm0
+; X64:  pshuflw	$0, %xmm0, %xmm0
+; X64:	xorl	%eax, %eax
+; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	movdqa	%xmm0, (%rdi)
+; X64:	ret
+}
+
+define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp3
+        
+; X64: t1:
+; X64: 	movl	(%rsi), %eax
+; X64: 	movdqa	(%rdi), %xmm0
+; X64: 	pinsrw	$0, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp
+; X64: t2:
+; X64:	pextrw	$1, %xmm1, %eax
+; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	pinsrw	$3, %eax, %xmm0
+; X64:	ret
+}
+
+define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
+	ret <8 x i16> %tmp
+; X64: t3:
+; X64: 	pextrw	$5, %xmm0, %eax
+; X64: 	pshuflw	$44, %xmm0, %xmm0
+; X64: 	pshufhw	$27, %xmm0, %xmm0
+; X64: 	pinsrw	$3, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
+	ret <8 x i16> %tmp
+; X64: t4:
+; X64: 	pextrw	$7, [[XMM0:%xmm[0-9]+]], %eax
+; X64: 	pshufhw	$100, [[XMM0]], [[XMM1:%xmm[0-9]+]]
+; X64: 	pinsrw	$1, %eax, [[XMM1]]
+; X64: 	pextrw	$1, [[XMM0]], %eax
+; X64: 	pinsrw	$4, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
+	ret <8 x i16> %tmp
+; X64: 	t5:
+; X64: 		movlhps	%xmm1, %xmm0
+; X64: 		pshufd	$114, %xmm0, %xmm0
+; X64: 		ret
+}
+
+define <8 x i16> @t6(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp
+; X64: 	t6:
+; X64: 		movss	%xmm1, %xmm0
+; X64: 		ret
+}
+
+define <8 x i16> @t7(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 >
+	ret <8 x i16> %tmp
+; X64: 	t7:
+; X64: 		pshuflw	$-80, %xmm0, %xmm0
+; X64: 		pshufhw	$-56, %xmm0, %xmm0
+; X64: 		ret
+}
+
+define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind {
+	%tmp = load <2 x i64>* %A
+	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>
+	%tmp0 = extractelement <8 x i16> %tmp.upgrd.1, i32 0
+	%tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1
+	%tmp2 = extractelement <8 x i16> %tmp.upgrd.1, i32 2
+	%tmp3 = extractelement <8 x i16> %tmp.upgrd.1, i32 3
+	%tmp4 = extractelement <8 x i16> %tmp.upgrd.1, i32 4
+	%tmp5 = extractelement <8 x i16> %tmp.upgrd.1, i32 5
+	%tmp6 = extractelement <8 x i16> %tmp.upgrd.1, i32 6
+	%tmp7 = extractelement <8 x i16> %tmp.upgrd.1, i32 7
+	%tmp8 = insertelement <8 x i16> undef, i16 %tmp2, i32 0
+	%tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 1
+	%tmp10 = insertelement <8 x i16> %tmp9, i16 %tmp0, i32 2
+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 3
+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp6, i32 4
+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 5
+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp4, i32 6
+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 7
+	%tmp15.upgrd.2 = bitcast <8 x i16> %tmp15 to <2 x i64>
+	store <2 x i64> %tmp15.upgrd.2, <2 x i64>* %res
+	ret void
+; X64: 	t8:
+; X64: 		pshuflw	$-58, (%rsi), %xmm0
+; X64: 		pshufhw	$-58, %xmm0, %xmm0
+; X64: 		movdqa	%xmm0, (%rdi)
+; X64: 		ret
+}
+
+define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
+	%tmp = load <4 x float>* %r
+	%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
+	%tmp.upgrd.4 = load double* %tmp.upgrd.3
+	%tmp.upgrd.5 = insertelement <2 x double> undef, double %tmp.upgrd.4, i32 0
+	%tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1	
+	%tmp6 = bitcast <2 x double> %tmp5 to <4 x float>	
+	%tmp.upgrd.6 = extractelement <4 x float> %tmp, i32 0	
+	%tmp7 = extractelement <4 x float> %tmp, i32 1		
+	%tmp8 = extractelement <4 x float> %tmp6, i32 0		
+	%tmp9 = extractelement <4 x float> %tmp6, i32 1		
+	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.6, i32 0	
+	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1
+	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2
+	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3
+	store <4 x float> %tmp13, <4 x float>* %r
+	ret void
+; X64: 	t9:
+; X64: 		movaps	(%rdi), %xmm0
+; X64:	        movhps	(%rsi), %xmm0
+; X64:	        movaps	%xmm0, (%rdi)
+; X64: 		ret
+}
+
+
+
+; FIXME: This testcase produces icky code. It can be made much better!
+; PR2585
+
+@g1 = external constant <4 x i32>
+@g2 = external constant <4 x i16>
+
+define internal void @t10() nounwind {
+        load <4 x i32>* @g1, align 16 
+        bitcast <4 x i32> %1 to <8 x i16>
+        shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef >
+        bitcast <8 x i16> %3 to <2 x i64>  
+        extractelement <2 x i64> %4, i32 0 
+        bitcast i64 %5 to <4 x i16>        
+        store <4 x i16> %6, <4 x i16>* @g2, align 8
+        ret void
+; X64: 	t10:
+; X64: 		pextrw	$4, [[X0:%xmm[0-9]+]], %eax
+; X64: 		unpcklpd [[X1:%xmm[0-9]+]]
+; X64: 		pshuflw	$8, [[X1]], [[X2:%xmm[0-9]+]]
+; X64: 		pinsrw	$2, %eax, [[X2]]
+; X64: 		pextrw	$6, [[X0]], %eax
+; X64: 		pinsrw	$3, %eax, [[X2]]
+}
+
+
+; Pack various elements via shuffles.
+define <8 x i16> @t11(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp7
+
+; X64: t11:
+; X64:	movd	%xmm1, %eax
+; X64:	movlhps	%xmm0, %xmm0
+; X64:	pshuflw	$1, %xmm0, %xmm0
+; X64:	pinsrw	$1, %eax, %xmm0
+; X64:	ret
+}
+
+
+define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+
+; X64: t12:
+; X64: 	pextrw	$3, %xmm1, %eax
+; X64: 	movlhps	%xmm0, %xmm0
+; X64: 	pshufhw	$3, %xmm0, %xmm0
+; X64: 	pinsrw	$5, %eax, %xmm0
+; X64: 	ret
+}
+
+
+define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+; X64: t13:
+; X64: 	punpcklqdq	%xmm0, %xmm1
+; X64: 	pextrw	$3, %xmm1, %eax
+; X64: 	pshufd	$52, %xmm1, %xmm0
+; X64: 	pinsrw	$4, %eax, %xmm0
+; X64: 	ret
+}
+
+
+define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+; X64: t14:
+; X64: 	punpcklqdq	%xmm0, %xmm1
+; X64: 	pshufhw	$8, %xmm1, %xmm0
+; X64: 	ret
+}
+
+
+
+define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+        %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+        ret <8 x i16> %tmp8
+; X64: 	t15:
+; X64: 		pextrw	$7, %xmm0, %eax
+; X64: 		punpcklqdq	%xmm1, %xmm0
+; X64: 		pshuflw	$-128, %xmm0, %xmm0
+; X64: 		pinsrw	$2, %eax, %xmm0
+; X64: 		ret
+}
+
+
+; Test yonah where we convert a shuffle to pextrw and pinrsw
+define <16 x i8> @t16(<16 x i8> %T0) nounwind readnone {
+entry:
+        %tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0,  i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+        %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+        ret <16 x i8> %tmp9
+; X64: 	t16:
+; X64: 		pinsrw	$0, %eax, [[X1:%xmm[0-9]+]]
+; X64: 		pextrw	$8, [[X0:%xmm[0-9]+]], %eax
+; X64: 		pinsrw	$1, %eax, [[X1]]
+; X64: 		pextrw	$1, [[X1]], %ecx
+; X64: 		movd	[[X1]], %edx
+; X64: 		pinsrw	$0, %edx, %xmm
+; X64: 		pinsrw	$1, %eax, %xmm
+; X64: 		ret
+}
+
+; rdar://8520311
+define <4 x i32> @t17() nounwind {
+entry:
+; X64: t17:
+; X64:          movddup (%rax), %xmm0
+  %tmp1 = load <4 x float>* undef, align 16
+  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  %tmp3 = load <4 x float>* undef, align 16
+  %tmp4 = shufflevector <4 x float> %tmp2, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+  %tmp5 = bitcast <4 x float> %tmp3 to <4 x i32>
+  %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+  %tmp7 = and <4 x i32> %tmp6, <i32 undef, i32 undef, i32 -1, i32 0>
+  ret <4 x i32> %tmp7
+}

diff --git a/src/LLVM/test/CodeGen/X86/sse41-blend.ll b/src/LLVM/test/CodeGen/X86/sse41-blend.ll
new file mode 100644
index 0000000..78604a0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse41-blend.ll

@@ -0,0 +1,82 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s
+
+;CHECK: vsel_float
+;CHECK: blendvps
+;CHECK: ret
+define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
+  ret <4 x float> %vsel
+}
+
+
+;CHECK: vsel_4xi8
+;CHECK: blendvps
+;CHECK: ret
+define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i8> %v1, <4 x i8> %v2
+  ret <4 x i8> %vsel
+}
+
+;CHECK: vsel_4xi16
+;CHECK: blendvps
+;CHECK: ret
+define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i16> %v1, <4 x i16> %v2
+  ret <4 x i16> %vsel
+}
+
+
+;CHECK: vsel_i32
+;CHECK: blendvps
+;CHECK: ret
+define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %v1, <4 x i32> %v2
+  ret <4 x i32> %vsel
+}
+
+
+;CHECK: vsel_double
+;CHECK: blendvpd
+;CHECK: ret
+define <4 x double> @vsel_double(<4 x double> %v1, <4 x double> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %v1, <4 x double> %v2
+  ret <4 x double> %vsel
+}
+
+
+;CHECK: vsel_i64
+;CHECK: blendvpd
+;CHECK: ret
+define <4 x i64> @vsel_i64(<4 x i64> %v1, <4 x i64> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %v1, <4 x i64> %v2
+  ret <4 x i64> %vsel
+}
+
+
+;CHECK: vsel_i8
+;CHECK: pblendvb
+;CHECK: ret
+define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
+  %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
+  ret <16 x i8> %vsel
+}
+
+;; TEST blend + compares
+; CHECK: A
+define <2 x double> @A(<2 x double> %x, <2 x double> %y) {
+  ; CHECK: cmplepd
+  ; CHECK: blendvpd
+  %max_is_x = fcmp oge <2 x double> %x, %y
+  %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
+  ret <2 x double> %max
+}
+
+; CHECK: B
+define <2 x double> @B(<2 x double> %x, <2 x double> %y) {
+  ; CHECK: cmpnlepd
+  ; CHECK: blendvpd
+  %min_is_x = fcmp ult <2 x double> %x, %y
+  %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
+  ret <2 x double> %min
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/sse41.ll b/src/LLVM/test/CodeGen/X86/sse41.ll
new file mode 100644
index 0000000..2ac4cb4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse41.ll

@@ -0,0 +1,251 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X64
+
+@g16 = external global i16
+
+define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind {
+        %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 1
+        ret <4 x i32> %tmp1
+; X32: pinsrd_1:
+; X32:    pinsrd $1, 4(%esp), %xmm0
+
+; X64: pinsrd_1:
+; X64:    pinsrd $1, %edi, %xmm0
+}
+
+define <16 x i8> @pinsrb_1(i8 %s, <16 x i8> %tmp) nounwind {
+        %tmp1 = insertelement <16 x i8> %tmp, i8 %s, i32 1
+        ret <16 x i8> %tmp1
+; X32: pinsrb_1:
+; X32:    pinsrb $1, 4(%esp), %xmm0
+
+; X64: pinsrb_1:
+; X64:    pinsrb $1, %edi, %xmm0
+}
+
+
+define <2 x i64> @pmovsxbd_1(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 4
+	%1 = insertelement <4 x i32> undef, i32 %0, i32 0
+	%2 = insertelement <4 x i32> %1, i32 0, i32 1
+	%3 = insertelement <4 x i32> %2, i32 0, i32 2
+	%4 = insertelement <4 x i32> %3, i32 0, i32 3
+	%5 = bitcast <4 x i32> %4 to <16 x i8>
+	%6 = tail call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %5) nounwind readnone
+	%7 = bitcast <4 x i32> %6 to <2 x i64>
+	ret <2 x i64> %7
+        
+; X32: _pmovsxbd_1:
+; X32:   movl      4(%esp), %eax
+; X32:   pmovsxbd   (%eax), %xmm0
+
+; X64: _pmovsxbd_1:
+; X64:   pmovsxbd   (%rdi), %xmm0
+}
+
+define <2 x i64> @pmovsxwd_1(i64* %p) nounwind readonly {
+entry:
+	%0 = load i64* %p		; <i64> [#uses=1]
+	%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0		; <<2 x i64>> [#uses=1]
+	%1 = bitcast <2 x i64> %tmp2 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone		; <<4 x i32>> [#uses=1]
+	%3 = bitcast <4 x i32> %2 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %3
+        
+; X32: _pmovsxwd_1:
+; X32:   movl 4(%esp), %eax
+; X32:   pmovsxwd (%eax), %xmm0
+
+; X64: _pmovsxwd_1:
+; X64:   pmovsxwd (%rdi), %xmm0
+}
+
+
+
+
+define <2 x i64> @pmovzxbq_1() nounwind {
+entry:
+	%0 = load i16* @g16, align 2		; <i16> [#uses=1]
+	%1 = insertelement <8 x i16> undef, i16 %0, i32 0		; <<8 x i16>> [#uses=1]
+	%2 = bitcast <8 x i16> %1 to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%3 = tail call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %2) nounwind readnone		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %3
+
+; X32: _pmovzxbq_1:
+; X32:   movl	L_g16$non_lazy_ptr, %eax
+; X32:   pmovzxbq	(%eax), %xmm0
+
+; X64: _pmovzxbq_1:
+; X64:   movq	_g16@GOTPCREL(%rip), %rax
+; X64:   pmovzxbq	(%rax), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+
+
+define i32 @extractps_1(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  %i = bitcast float %s to i32
+  ret i32 %i
+
+; X32: _extractps_1:  
+; X32:	  extractps	$3, %xmm0, %eax
+
+; X64: _extractps_1:  
+; X64:	  extractps	$3, %xmm0, %eax
+}
+define i32 @extractps_2(<4 x float> %v) nounwind {
+  %t = bitcast <4 x float> %v to <4 x i32>
+  %s = extractelement <4 x i32> %t, i32 3
+  ret i32 %s
+
+; X32: _extractps_2:
+; X32:	  extractps	$3, %xmm0, %eax
+
+; X64: _extractps_2:
+; X64:	  extractps	$3, %xmm0, %eax
+}
+
+
+; The non-store form of extractps puts its result into a GPR.
+; This makes it suitable for an extract from a <4 x float> that
+; is bitcasted to i32, but unsuitable for much of anything else.
+
+define float @ext_1(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  %t = fadd float %s, 1.0
+  ret float %t
+
+; X32: _ext_1:
+; X32:	  pshufd	$3, %xmm0, %xmm0
+; X32:	  addss	LCPI7_0, %xmm0
+
+; X64: _ext_1:
+; X64:	  pshufd	$3, %xmm0, %xmm0
+; X64:	  addss	LCPI7_0(%rip), %xmm0
+}
+define float @ext_2(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  ret float %s
+
+; X32: _ext_2:
+; X32:	  pshufd	$3, %xmm0, %xmm0
+
+; X64: _ext_2:
+; X64:	  pshufd	$3, %xmm0, %xmm0
+}
+define i32 @ext_3(<4 x i32> %v) nounwind {
+  %i = extractelement <4 x i32> %v, i32 3
+  ret i32 %i
+
+; X32: _ext_3:
+; X32:	  pextrd	$3, %xmm0, %eax
+
+; X64: _ext_3:
+; X64:	  pextrd	$3, %xmm0, %eax
+}
+
+define <4 x float> @insertps_1(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 1) nounwind readnone
+        ret <4 x float> %tmp1
+; X32: _insertps_1:
+; X32:    insertps  $1, %xmm1, %xmm0
+
+; X64: _insertps_1:
+; X64:    insertps  $1, %xmm1, %xmm0
+}
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+define <4 x float> @insertps_2(<4 x float> %t1, float %t2) nounwind {
+        %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
+        ret <4 x float> %tmp1
+; X32: _insertps_2:
+; X32:    insertps  $0, 4(%esp), %xmm0
+
+; X64: _insertps_2:
+; X64:    insertps  $0, %xmm1, %xmm0        
+}
+
+define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp2 = extractelement <4 x float> %t2, i32 0
+        %tmp1 = insertelement <4 x float> %t1, float %tmp2, i32 0
+        ret <4 x float> %tmp1
+; X32: _insertps_3:
+; X32:    insertps  $0, %xmm1, %xmm0        
+
+; X64: _insertps_3:
+; X64:    insertps  $0, %xmm1, %xmm0        
+}
+
+define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_1:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    sete	%al
+
+; X64: _ptestz_1:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    sete	%al
+}
+
+define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_2:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    sbbl	%eax
+
+; X64: _ptestz_2:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    sbbl	%eax
+}
+
+define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_3:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    seta	%al
+
+; X64: _ptestz_3:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    seta	%al
+}
+
+
+declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+
+; This used to compile to insertps $0  + insertps $16.  insertps $0 is always
+; pointless.
+define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind  {
+entry:
+  %tmp7 = extractelement <2 x float> %A, i32 0
+  %tmp5 = extractelement <2 x float> %A, i32 1
+  %tmp3 = extractelement <2 x float> %B, i32 0
+  %tmp1 = extractelement <2 x float> %B, i32 1
+  %add.r = fadd float %tmp7, %tmp3
+  %add.i = fadd float %tmp5, %tmp1
+  %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
+  %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
+  ret <2 x float> %tmp9
+; X32: buildvector:
+; X32-NOT: insertps $0
+; X32: insertps $16
+; X32-NOT: insertps $0
+; X32: ret
+; X64: buildvector:
+; X64-NOT: insertps $0
+; X64: insertps $16
+; X64-NOT: insertps $0
+; X64: ret
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/sse42.ll b/src/LLVM/test/CodeGen/X86/sse42.ll
new file mode 100644
index 0000000..c787523
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse42.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
+
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
+
+define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+; X32: _crc32_32_8:
+; X32:     crc32b   8(%esp), %eax
+
+; X64: _crc32_32_8:
+; X64:     crc32b   %sil,
+}
+
+
+define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
+  ret i32 %tmp
+; X32: _crc32_32_16:
+; X32:     crc32w   8(%esp), %eax
+
+; X64: _crc32_32_16:
+; X64:     crc32w   %si,
+}
+
+
+define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
+  ret i32 %tmp
+; X32: _crc32_32_32:
+; X32:     crc32l   8(%esp), %eax
+
+; X64: _crc32_32_32:
+; X64:     crc32l   %esi,
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/sse42_64.ll b/src/LLVM/test/CodeGen/X86/sse42_64.ll
new file mode 100644
index 0000000..8b3a69b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse42_64.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
+
+declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
+
+define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
+  %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
+  ret i64 %tmp
+
+; X64: _crc32_64_8:
+; X64:     crc32b   %sil,
+}
+
+define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
+  %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
+  ret i64 %tmp
+
+; X64: _crc32_64_64:
+; X64:     crc32q   %rsi,
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/sse_reload_fold.ll b/src/LLVM/test/CodeGen/X86/sse_reload_fold.ll
new file mode 100644
index 0000000..a57fa58
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sse_reload_fold.ll

@@ -0,0 +1,134 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic |& FileCheck %s
+; CHECK: fail
+; CHECK-NOT: fail
+
+declare float @test_f(float %f)
+declare double @test_d(double %f)
+declare <4 x float> @test_vf(<4 x float> %f)
+declare <2 x double> @test_vd(<2 x double> %f)
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
+
+declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>)
+declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>)
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>)
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
+declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>)
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8)
+declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>)
+
+define float @foo(float %f) {
+  %a = call float @test_f(float %f)
+  %t = call float @llvm.sqrt.f32(float %f)
+  ret float %t
+}
+define double @doo(double %f) {
+  %a = call double @test_d(double %f)
+  %t = call double @llvm.sqrt.f64(double %f)
+  ret double %t
+}
+define <4 x float> @a0(<4 x float> %f) {
+  %a = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @a1(<4 x float> %f) {
+  %a = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @a2(<4 x float> %f) {
+  %a = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b3(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b4(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b5(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %y, <4 x float> %f, i8 7)
+  ret <4 x float> %t
+}
+define <4 x float> @b6(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b7(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b8(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <2 x double> @c1(<2 x double> %f) {
+  %a = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d3(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d4(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d5(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %y, <2 x double> %f, i8 7)
+  ret <2 x double> %t
+}
+define <2 x double> @d6(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d7(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d8(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+
+; This one should fail to fuse, but -regalloc=greedy isn't even trying. Instead
+; it produces:
+;   callq	test_vd
+;   movapd	(%rsp), %xmm1           # 16-byte Reload
+;   hsubpd	%xmm0, %xmm1
+;   movapd	%xmm1, %xmm0
+;   addq	$24, %rsp
+;   ret
+; RABasic still tries to fold this one.
+
+define <2 x double> @z0(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %f, <2 x double> %y)
+  ret <2 x double> %t
+}

diff --git a/src/LLVM/test/CodeGen/X86/stack-align.ll b/src/LLVM/test/CodeGen/X86/stack-align.ll
new file mode 100644
index 0000000..793c026
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/stack-align.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -relocation-model=static -realign-stack=1 -mcpu=yonah | FileCheck %s
+
+; The double argument is at 4(esp) which is 16-byte aligned, allowing us to
+; fold the load into the andpd.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+@G = external global double
+
+define void @test({ double, double }* byval  %z, double* %P) nounwind {
+entry:
+	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
+	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
+        volatile store double %tmp4, double* %P
+	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp1 = volatile load double* %tmp, align 8		; <double> [#uses=1]
+	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
+    ; CHECK: andpd{{.*}}4(%esp), %xmm
+	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
+	volatile store double %tmp6, double* %P, align 8
+	ret void
+}
+
+define void @test2() alignstack(16) nounwind {
+entry:
+    ; CHECK: andl{{.*}}$-16, %esp
+    ret void
+}
+
+; Use a call to force a spill.
+define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) nounwind {
+entry:
+    ; CHECK: andl{{.*}}$-32, %esp
+    call void @test2()
+    %A = fmul <2 x double> %x, %y
+    ret <2 x double> %A
+}
+
+declare double @fabs(double)
+
+; The pointer is already known aligned, so and x,-16 is eliminable.
+define i32 @test4() nounwind {
+entry:
+  %buffer = alloca [2048 x i8], align 16
+  %0 = ptrtoint [2048 x i8]* %buffer to i32
+  %and = and i32 %0, -16
+  ret i32 %and
+; CHECK: test4:
+; CHECK-NOT: and
+; CHECK: ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/stack-protector-linux.ll b/src/LLVM/test/CodeGen/X86/stack-protector-linux.ll
new file mode 100644
index 0000000..fe2a9c5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/stack-protector-linux.ll

@@ -0,0 +1,28 @@
+; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | grep %gs:
+; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %fs:
+; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %gs:
+; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep {__stack_chk_guard}
+; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep {__stack_chk_fail}
+
+@"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00"		; <[11 x i8]*> [#uses=1]
+
+define void @test(i8* %a) nounwind ssp {
+entry:
+	%a_addr = alloca i8*		; <i8**> [#uses=2]
+	%buf = alloca [8 x i8]		; <[8 x i8]*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i8* %a, i8** %a_addr
+	%buf1 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
+	%0 = load i8** %a_addr, align 4		; <i8*> [#uses=1]
+	%1 = call i8* @strcpy(i8* %buf1, i8* %0) nounwind		; <i8*> [#uses=0]
+	%buf2 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
+	%2 = call i32 (i8*, ...)* @printf(i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i8* %buf2) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8* @strcpy(i8*, i8*) nounwind
+
+declare i32 @printf(i8*, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/stdarg.ll b/src/LLVM/test/CodeGen/X86/stdarg.ll
new file mode 100644
index 0000000..5728daf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/stdarg.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK: testb %al, %al
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define void @foo(i32 %x, ...) nounwind {
+entry:
+  %ap = alloca [1 x %struct.__va_list_tag], align 8; <[1 x %struct.__va_list_tag]*> [#uses=2]
+  %ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*; <i8*> [#uses=2]
+  call void @llvm.va_start(i8* %ap12)
+  %ap3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1]
+  call void @bar(%struct.__va_list_tag* %ap3) nounwind
+  call void @llvm.va_end(i8* %ap12)
+  ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @bar(%struct.__va_list_tag*)
+
+declare void @llvm.va_end(i8*) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/stdcall-notailcall.ll b/src/LLVM/test/CodeGen/X86/stdcall-notailcall.ll
new file mode 100644
index 0000000..8e33c30
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/stdcall-notailcall.ll

@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=i386-apple-darwin11 -O2 < %s | FileCheck %s
+
+%struct.I = type { i32 (...)** }
+define x86_stdcallcc void @bar(%struct.I* nocapture %this) ssp align 2 {
+; CHECK: bar:
+; CHECK-NOT: jmp
+; CHECK: ret $4
+entry:
+  tail call void @foo()
+  ret void
+}
+
+declare void @foo()

diff --git a/src/LLVM/test/CodeGen/X86/stdcall.ll b/src/LLVM/test/CodeGen/X86/stdcall.ll
new file mode 100644
index 0000000..a7c2517
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/stdcall.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+; PR5851
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-mingw32"
+
+%0 = type { void (...)* }
+
+@B = global %0 { void (...)* bitcast (void ()* @MyFunc to void (...)*) }, align 4
+; CHECK: _B:
+; CHECK: .long _MyFunc@0
+
+define internal x86_stdcallcc void @MyFunc() nounwind {
+entry:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/store-empty-member.ll b/src/LLVM/test/CodeGen/X86/store-empty-member.ll
new file mode 100644
index 0000000..37f86c6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/store-empty-member.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; Don't crash on an empty struct member.
+
+; CHECK: movl  $2, 4(%esp)
+; CHECK: movl  $1, (%esp)
+
+%testType = type {i32, [0 x i32], i32}
+
+define void @foo() nounwind {
+  %1 = alloca %testType
+  volatile store %testType {i32 1, [0 x i32] zeroinitializer, i32 2}, %testType* %1
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/store-fp-constant.ll b/src/LLVM/test/CodeGen/X86/store-fp-constant.ll
new file mode 100644
index 0000000..3db4e11
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/store-fp-constant.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 | not grep rodata

+; RUN: llc < %s -march=x86 | not grep literal

+;

+; Check that no FP constants in this testcase ends up in the 

+; constant pool.

+

+@G = external global float              ; <float*> [#uses=1]

+

+declare void @extfloat(float)

+

+declare void @extdouble(double)

+

+define void @testfloatstore() {

+        call void @extfloat( float 0x40934999A0000000 )

+        call void @extdouble( double 0x409349A631F8A090 )

+        store float 0x402A064C20000000, float* @G

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/store-global-address.ll b/src/LLVM/test/CodeGen/X86/store-global-address.ll
new file mode 100644
index 0000000..2bc76ff
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/store-global-address.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | grep movl | count 1

+

+@dst = global i32 0             ; <i32*> [#uses=1]

+@ptr = global i32* null         ; <i32**> [#uses=1]

+

+define void @test() {

+        store i32* @dst, i32** @ptr

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/store-narrow.ll b/src/LLVM/test/CodeGen/X86/store-narrow.ll
new file mode 100644
index 0000000..0dd228e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/store-narrow.ll

@@ -0,0 +1,168 @@
+; rdar://7860110
+; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=X64
+; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s -check-prefix=X32
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+define void @test1(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i32* %a0, align 4
+  %B = and i32 %A, -256     ; 0xFFFFFF00
+  %C = zext i8 %a1 to i32
+  %D = or i32 %C, %B
+  store i32 %D, i32* %a0, align 4
+  ret void
+  
+; X64: test1:
+; X64: movb	%sil, (%rdi)
+
+; X32: test1:
+; X32: movb	8(%esp), %al
+; X32: movb	%al, (%{{.*}})
+}
+
+define void @test2(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i32* %a0, align 4
+  %B = and i32 %A, -65281    ; 0xFFFF00FF
+  %C = zext i8 %a1 to i32
+  %CS = shl i32 %C, 8
+  %D = or i32 %B, %CS
+  store i32 %D, i32* %a0, align 4
+  ret void
+; X64: test2:
+; X64: movb	%sil, 1(%rdi)
+
+; X32: test2:
+; X32: movb	8(%esp), %al
+; X32: movb	%al, 1(%{{.*}})
+}
+
+define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i32* %a0, align 4
+  %B = and i32 %A, -65536    ; 0xFFFF0000
+  %C = zext i16 %a1 to i32
+  %D = or i32 %B, %C
+  store i32 %D, i32* %a0, align 4
+  ret void
+; X64: test3:
+; X64: movw	%si, (%rdi)
+
+; X32: test3:
+; X32: movw	8(%esp), %ax
+; X32: movw	%ax, (%{{.*}})
+}
+
+define void @test4(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i32* %a0, align 4
+  %B = and i32 %A, 65535    ; 0x0000FFFF
+  %C = zext i16 %a1 to i32
+  %CS = shl i32 %C, 16
+  %D = or i32 %B, %CS
+  store i32 %D, i32* %a0, align 4
+  ret void
+; X64: test4:
+; X64: movw	%si, 2(%rdi)
+
+; X32: test4:
+; X32: movl	8(%esp), %eax
+; X32: movw	%ax, 2(%{{.*}})
+}
+
+define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i64* %a0, align 4
+  %B = and i64 %A, -4294901761    ; 0xFFFFFFFF0000FFFF
+  %C = zext i16 %a1 to i64
+  %CS = shl i64 %C, 16
+  %D = or i64 %B, %CS
+  store i64 %D, i64* %a0, align 4
+  ret void
+; X64: test5:
+; X64: movw	%si, 2(%rdi)
+
+; X32: test5:
+; X32: movzwl	8(%esp), %eax
+; X32: movw	%ax, 2(%{{.*}})
+}
+
+define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i64* %a0, align 4
+  %B = and i64 %A, -280375465082881    ; 0xFFFF00FFFFFFFFFF
+  %C = zext i8 %a1 to i64
+  %CS = shl i64 %C, 40
+  %D = or i64 %B, %CS
+  store i64 %D, i64* %a0, align 4
+  ret void
+; X64: test6:
+; X64: movb	%sil, 5(%rdi)
+
+
+; X32: test6:
+; X32: movb	8(%esp), %al
+; X32: movb	%al, 5(%{{.*}})
+}
+
+define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind {
+entry:
+  %OtherLoad = load i32 *%P2
+  %A = load i64* %a0, align 4
+  %B = and i64 %A, -280375465082881    ; 0xFFFF00FFFFFFFFFF
+  %C = zext i8 %a1 to i64
+  %CS = shl i64 %C, 40
+  %D = or i64 %B, %CS
+  store i64 %D, i64* %a0, align 4
+  ret i32 %OtherLoad
+; X64: test7:
+; X64: movb	%sil, 5(%rdi)
+
+
+; X32: test7:
+; X32: movb	8(%esp), %cl
+; X32: movb	%cl, 5(%{{.*}})
+}
+
+; PR7833
+
+@g_16 = internal global i32 -1
+
+; X64: test8:
+; X64-NEXT: movl _g_16(%rip), %eax
+; X64-NEXT: movl $0, _g_16(%rip)
+; X64-NEXT: orl  $1, %eax
+; X64-NEXT: movl %eax, _g_16(%rip)
+; X64-NEXT: ret
+define void @test8() nounwind {
+  %tmp = load i32* @g_16
+  store i32 0, i32* @g_16
+  %or = or i32 %tmp, 1
+  store i32 %or, i32* @g_16
+  ret void
+}
+
+; X64: test9:
+; X64-NEXT: orb $1, _g_16(%rip)
+; X64-NEXT: ret
+define void @test9() nounwind {
+  %tmp = load i32* @g_16
+  %or = or i32 %tmp, 1
+  store i32 %or, i32* @g_16
+  ret void
+}
+
+; rdar://8494845 + PR8244
+; X64: test10:
+; X64-NEXT: movsbl	(%rdi), %eax
+; X64-NEXT: shrl	$8, %eax
+; X64-NEXT: ret
+define i8 @test10(i8* %P) nounwind ssp {
+entry:
+  %tmp = load i8* %P, align 1
+  %conv = sext i8 %tmp to i32
+  %shr3 = lshr i32 %conv, 8
+  %conv2 = trunc i32 %shr3 to i8
+  ret i8 %conv2
+}

diff --git a/src/LLVM/test/CodeGen/X86/store_op_load_fold.ll b/src/LLVM/test/CodeGen/X86/store_op_load_fold.ll
new file mode 100644
index 0000000..c6632ef
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/store_op_load_fold.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 | not grep mov

+;

+; Test the add and load are folded into the store instruction.

+

+@X = internal global i16 0              ; <i16*> [#uses=2]

+

+define void @foo() nounwind {

+        %tmp.0 = load i16* @X           ; <i16> [#uses=1]

+        %tmp.3 = add i16 %tmp.0, 329            ; <i16> [#uses=1]

+        store i16 %tmp.3, i16* @X

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/store_op_load_fold2.ll b/src/LLVM/test/CodeGen/X86/store_op_load_fold2.ll
new file mode 100644
index 0000000..b916975
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/store_op_load_fold2.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s

+

+target datalayout = "e-p:32:32"

+        %struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }

+

+define internal fastcc i32 @dct_chroma(i32 %uv, i32 %cr_cbp) nounwind {

+cond_true2732.preheader:                ; preds = %entry

+        %tmp2666 = getelementptr %struct.Macroblock* null, i32 0, i32 13                ; <i64*> [#uses=2]

+        %tmp2674 = trunc i32 0 to i8            ; <i8> [#uses=1]

+        %tmp2667.us.us = load i64* %tmp2666             ; <i64> [#uses=1]

+        %tmp2670.us.us = load i64* null         ; <i64> [#uses=1]

+        %shift.upgrd.1 = zext i8 %tmp2674 to i64                ; <i64> [#uses=1]

+        %tmp2675.us.us = shl i64 %tmp2670.us.us, %shift.upgrd.1         ; <i64> [#uses=1]

+        %tmp2675not.us.us = xor i64 %tmp2675.us.us, -1          ; <i64> [#uses=1]

+        %tmp2676.us.us = and i64 %tmp2667.us.us, %tmp2675not.us.us              ; <i64> [#uses=1]

+        store i64 %tmp2676.us.us, i64* %tmp2666

+        ret i32 0

+

+; CHECK: 	and	{{E..}}, DWORD PTR [360]

+; CHECK:	and	DWORD PTR [356], {{E..}}

+; CHECK:	mov	DWORD PTR [360], {{E..}}

+

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/storetrunc-fp.ll b/src/LLVM/test/CodeGen/X86/storetrunc-fp.ll
new file mode 100644
index 0000000..03ad093
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/storetrunc-fp.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | not grep flds
+
+define void @foo(x86_fp80 %a, x86_fp80 %b, float* %fp) {
+	%c = fadd x86_fp80 %a, %b
+	%d = fptrunc x86_fp80 %c to float
+	store float %d, float* %fp
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/stride-nine-with-base-reg.ll b/src/LLVM/test/CodeGen/X86/stride-nine-with-base-reg.ll
new file mode 100644
index 0000000..ddf059c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/stride-nine-with-base-reg.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux               | FileCheck %s
+; CHECK-NOT:     lea
+
+; P should be sunk into the loop and folded into the address mode. There
+; shouldn't be any lea instructions inside the loop.
+
+@B = external global [1000 x i8], align 32
+@A = external global [1000 x i8], align 32
+@P = external global [1000 x i8], align 32
+@Q = external global [1000 x i8], align 32
+
+define void @foo(i32 %m, i32 %p) nounwind {
+entry:
+	%tmp1 = icmp sgt i32 %m, 0
+	br i1 %tmp1, label %bb, label %return
+
+bb:
+	%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
+	%tmp2 = getelementptr [1000 x i8]* @B, i32 0, i32 %i.019.0
+	%tmp3 = load i8* %tmp2, align 4
+	%tmp4 = mul i8 %tmp3, 2
+	%tmp5 = getelementptr [1000 x i8]* @A, i32 0, i32 %i.019.0
+	store i8 %tmp4, i8* %tmp5, align 4
+	%tmp8 = mul i32 %i.019.0, 9
+        %tmp0 = add i32 %tmp8, %p
+	%tmp10 = getelementptr [1000 x i8]* @P, i32 0, i32 %tmp0
+	store i8 17, i8* %tmp10, align 4
+	%tmp11 = getelementptr [1000 x i8]* @Q, i32 0, i32 %tmp0
+	store i8 19, i8* %tmp11, align 4
+	%indvar.next = add i32 %i.019.0, 1
+	%exitcond = icmp eq i32 %indvar.next, %m
+	br i1 %exitcond, label %return, label %bb
+
+return:
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/stride-reuse.ll b/src/LLVM/test/CodeGen/X86/stride-reuse.ll
new file mode 100644
index 0000000..1251a24
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/stride-reuse.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86            | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     lea
+
+@B = external global [1000 x float], align 32
+@A = external global [1000 x float], align 32
+@P = external global [1000 x i32], align 32
+
+define void @foo(i32 %m) nounwind {
+entry:
+	%tmp1 = icmp sgt i32 %m, 0
+	br i1 %tmp1, label %bb, label %return
+
+bb:
+	%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
+	%tmp2 = getelementptr [1000 x float]* @B, i32 0, i32 %i.019.0
+	%tmp3 = load float* %tmp2, align 4
+	%tmp4 = fmul float %tmp3, 2.000000e+00
+	%tmp5 = getelementptr [1000 x float]* @A, i32 0, i32 %i.019.0
+	store float %tmp4, float* %tmp5, align 4
+	%tmp8 = shl i32 %i.019.0, 1
+	%tmp9 = add i32 %tmp8, 64
+	%tmp10 = getelementptr [1000 x i32]* @P, i32 0, i32 %i.019.0
+	store i32 %tmp9, i32* %tmp10, align 4
+	%indvar.next = add i32 %i.019.0, 1
+	%exitcond = icmp eq i32 %indvar.next, %m
+	br i1 %exitcond, label %return, label %bb
+
+return:
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/sub-with-overflow.ll b/src/LLVM/test/CodeGen/X86/sub-with-overflow.ll
new file mode 100644
index 0000000..4522e91
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sub-with-overflow.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+
+; CHECK: func1:
+; CHECK: subl 20(%esp)
+; CHECK-NEXT: jo
+}
+
+define i1 @func2(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+carry:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+
+; CHECK: func2:
+; CHECK: subl 20(%esp)
+; CHECK-NEXT: jb
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)
+
+define i1 @func3(i32 %x) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %x, i32 1)
+  %obit = extractvalue {i32, i1} %t, 1
+  ret i1 %obit
+
+; CHECK: func3:
+; CHECK: decl
+; CHECK-NEXT: seto
+}

diff --git a/src/LLVM/test/CodeGen/X86/sub.ll b/src/LLVM/test/CodeGen/X86/sub.ll
new file mode 100644
index 0000000..ee5ea1d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/sub.ll

@@ -0,0 +1,11 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+define i32 @test1(i32 %x) {
+  %xor = xor i32 %x, 31
+  %sub = sub i32 32, %xor
+  ret i32 %sub
+; CHECK: test1:
+; CHECK:      xorl $-32
+; CHECK-NEXT: addl $33
+; CHECK-NEXT: ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/subreg-to-reg-0.ll b/src/LLVM/test/CodeGen/X86/subreg-to-reg-0.ll
new file mode 100644
index 0000000..d718c85
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/subreg-to-reg-0.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
+
+; Do eliminate the zero-extension instruction and rely on
+; x86-64's implicit zero-extension!
+
+define i64 @foo(i32* %p) nounwind {
+  %t = load i32* %p
+  %n = add i32 %t, 1
+  %z = zext i32 %n to i64
+  ret i64 %z
+}

diff --git a/src/LLVM/test/CodeGen/X86/subreg-to-reg-1.ll b/src/LLVM/test/CodeGen/X86/subreg-to-reg-1.ll
new file mode 100644
index 0000000..a297728
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/subreg-to-reg-1.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | grep {leal	.*), %e.\*} | count 1
+
+; Don't eliminate or coalesce away the explicit zero-extension!
+; This is currently using an leal because of a 3-addressification detail,
+; though this isn't necessary; The point of this test is to make sure
+; a 32-bit add is used.
+
+define i64 @foo(i64 %a) nounwind {
+  %b = add i64 %a, 4294967295
+  %c = and i64 %b, 4294967295
+  %d = add i64 %c, 1
+  ret i64 %d
+}

diff --git a/src/LLVM/test/CodeGen/X86/subreg-to-reg-2.ll b/src/LLVM/test/CodeGen/X86/subreg-to-reg-2.ll
new file mode 100644
index 0000000..49d2e88
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/subreg-to-reg-2.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl
+; rdar://6707985
+
+	%XXOO = type { %"struct.XXC::XXCC", i8*, %"struct.XXC::XXOO::$_71" }
+	%XXValue = type opaque
+	%"struct.XXC::ArrayStorage" = type { i32, i32, i32, i8*, i8*, [1 x %XXValue*] }
+	%"struct.XXC::XXArray" = type { %XXOO, i32, %"struct.XXC::ArrayStorage"* }
+	%"struct.XXC::XXCC" = type { i32 (...)**, i8* }
+	%"struct.XXC::XXOO::$_71" = type { [2 x %XXValue*] }
+
+define internal fastcc %XXValue* @t(i64* %out, %"struct.XXC::ArrayStorage"* %tmp9) nounwind {
+prologue:
+	%array = load %XXValue** inttoptr (i64 11111111 to %XXValue**)		; <%XXValue*> [#uses=0]
+	%index = load %XXValue** inttoptr (i64 22222222 to %XXValue**)		; <%XXValue*> [#uses=1]
+	%tmp = ptrtoint %XXValue* %index to i64		; <i64> [#uses=2]
+	store i64 %tmp, i64* %out
+	%tmp6 = trunc i64 %tmp to i32		; <i32> [#uses=1]
+	br label %bb5
+
+bb5:		; preds = %prologue
+	%tmp10 = zext i32 %tmp6 to i64		; <i64> [#uses=1]
+	%tmp11 = getelementptr %"struct.XXC::ArrayStorage"* %tmp9, i64 0, i32 5, i64 %tmp10		; <%XXValue**> [#uses=1]
+	%tmp12 = load %XXValue** %tmp11, align 8		; <%XXValue*> [#uses=1]
+	ret %XXValue* %tmp12
+}

diff --git a/src/LLVM/test/CodeGen/X86/subreg-to-reg-3.ll b/src/LLVM/test/CodeGen/X86/subreg-to-reg-3.ll
new file mode 100644
index 0000000..931ae75
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/subreg-to-reg-3.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep imull
+
+; Don't eliminate or coalesce away the explicit zero-extension!
+
+define i64 @foo(i64 %a) {
+  %b = mul i64 %a, 7823
+  %c = and i64 %b, 4294967295
+  %d = add i64 %c, 1
+  ret i64 %d
+}

diff --git a/src/LLVM/test/CodeGen/X86/subreg-to-reg-4.ll b/src/LLVM/test/CodeGen/X86/subreg-to-reg-4.ll
new file mode 100644
index 0000000..0ea5541
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/subreg-to-reg-4.ll

@@ -0,0 +1,135 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: not grep leaq %t
+; RUN: not grep incq %t
+; RUN: not grep decq %t
+; RUN: not grep negq %t
+; RUN: not grep addq %t
+; RUN: not grep subq %t
+; RUN: not grep {movl	%} %t
+
+; Utilize implicit zero-extension on x86-64 to eliminate explicit
+; zero-extensions. Shrink 64-bit adds to 32-bit when the high
+; 32-bits will be zeroed.
+
+define void @bar(i64 %x, i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = add i64 %x, %y
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @easy(i32 %x, i32 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = add i32 %x, %y
+        %tn = zext i32 %t0 to i64
+	%t1 = and i64 %tn, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @cola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+	%t0 = add i64 %p, %y
+	%t1 = and i64 %t0, 4294967295
+        %t2 = xor i64 %t1, %u
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @yaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+	%t0 = add i64 %p, %y
+        %t1 = xor i64 %t0, %u
+	%t2 = and i64 %t1, 4294967295
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @foo(i64 *%x, i64 *%y, i64* %z) nounwind readnone {
+entry:
+        %a = load i64* %x
+        %b = load i64* %y
+	%t0 = add i64 %a, %b
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @avo(i64 %x, i64* %z, i64 %u) nounwind readnone {
+entry:
+	%t0 = add i64 %x, 734847
+	%t1 = and i64 %t0, 4294967295
+        %t2 = xor i64 %t1, %u
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @phe(i64 %x, i64* %z, i64 %u) nounwind readnone {
+entry:
+	%t0 = add i64 %x, 734847
+        %t1 = xor i64 %t0, %u
+	%t2 = and i64 %t1, 4294967295
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @oze(i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = add i64 %y, 1
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+
+define void @sbar(i64 %x, i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = sub i64 %x, %y
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @seasy(i32 %x, i32 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = sub i32 %x, %y
+        %tn = zext i32 %t0 to i64
+	%t1 = and i64 %tn, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @scola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+	%t0 = sub i64 %p, %y
+	%t1 = and i64 %t0, 4294967295
+        %t2 = xor i64 %t1, %u
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @syaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+	%t0 = sub i64 %p, %y
+        %t1 = xor i64 %t0, %u
+	%t2 = and i64 %t1, 4294967295
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @sfoo(i64 *%x, i64 *%y, i64* %z) nounwind readnone {
+entry:
+        %a = load i64* %x
+        %b = load i64* %y
+	%t0 = sub i64 %a, %b
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @swya(i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = sub i64 0, %y
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @soze(i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = sub i64 %y, 1
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/subreg-to-reg-6.ll b/src/LLVM/test/CodeGen/X86/subreg-to-reg-6.ll
new file mode 100644
index 0000000..76430cd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/subreg-to-reg-6.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86-64
+
+define i64 @foo() nounwind {
+entry:
+	%t0 = load i32* null, align 8
+	switch i32 %t0, label %bb65 [
+		i32 16, label %bb
+		i32 12, label %bb56
+	]
+
+bb:
+	br label %bb65
+
+bb56:
+	unreachable
+
+bb65:
+	%a = phi i64 [ 0, %bb ], [ 0, %entry ]
+	tail call void asm "", "{cx}"(i64 %a) nounwind
+	%t15 = and i64 %a, 4294967295
+	ret i64 %t15
+}
+
+define i64 @bar(i64 %t0) nounwind {
+	call void asm "", "{cx}"(i64 0) nounwind
+	%t1 = sub i64 0, %t0
+	%t2 = and i64 %t1, 4294967295
+	ret i64 %t2
+}

diff --git a/src/LLVM/test/CodeGen/X86/switch-bt.ll b/src/LLVM/test/CodeGen/X86/switch-bt.ll
new file mode 100644
index 0000000..8e39342
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/switch-bt.ll

@@ -0,0 +1,101 @@
+; RUN: llc -march=x86-64 -asm-verbose=false < %s | FileCheck %s
+
+; This switch should use bit tests, and the third bit test case is just
+; testing for one possible value, so it doesn't need a bt.
+
+;      CHECK: movabsq $2305843009482129440, %r
+; CHECK-NEXT: btq %rax, %r
+; CHECK-NEXT: jb
+; CHECK-NEXT: movl  $671088640, %e
+; CHECK-NEXT: btq %rax, %r
+; CHECK-NEXT: jb
+; CHECK-NEXT: testq %rax, %r
+; CHECK-NEXT: j
+
+define void @test(i8* %l) nounwind {
+entry:
+  %l.addr = alloca i8*, align 8                   ; <i8**> [#uses=2]
+  store i8* %l, i8** %l.addr
+  %tmp = load i8** %l.addr                        ; <i8*> [#uses=1]
+  %tmp1 = load i8* %tmp                           ; <i8> [#uses=1]
+  %conv = sext i8 %tmp1 to i32                    ; <i32> [#uses=1]
+  switch i32 %conv, label %sw.default [
+    i32 62, label %sw.bb
+    i32 60, label %sw.bb
+    i32 38, label %sw.bb2
+    i32 94, label %sw.bb2
+    i32 61, label %sw.bb2
+    i32 33, label %sw.bb4
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry
+  call void @foo(i32 0)
+  br label %sw.epilog
+
+sw.bb2:                                           ; preds = %entry, %entry, %entry
+  call void @foo(i32 1)
+  br label %sw.epilog
+
+sw.bb4:                                           ; preds = %entry
+  call void @foo(i32 3)
+  br label %sw.epilog
+
+sw.default:                                       ; preds = %entry
+  call void @foo(i32 97)
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.default, %sw.bb4, %sw.bb2, %sw.bb
+  ret void
+}
+
+declare void @foo(i32)
+
+; Don't zero extend the test operands to pointer type if it can be avoided.
+; rdar://8781238
+define void @test2(i32 %x) nounwind ssp {
+; CHECK: test2:
+; CHECK: cmpl $6
+; CHECK: ja
+
+; CHECK-NEXT: movl $91
+; CHECK-NOT: movl
+; CHECK-NEXT: btl
+; CHECK-NEXT: jb
+entry:
+  switch i32 %x, label %if.end [
+    i32 6, label %if.then
+    i32 4, label %if.then
+    i32 3, label %if.then
+    i32 1, label %if.then
+    i32 0, label %if.then
+  ]
+
+if.then:                                          ; preds = %entry, %entry, %entry, %entry, %entry
+  tail call void @bar() nounwind
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}
+
+declare void @bar()
+
+define void @test3(i32 %x) nounwind {
+; CHECK: test3:
+; CHECK: cmpl $5
+; CHECK: ja
+; CHECK: cmpl $4
+; CHECK: jne
+  switch i32 %x, label %if.end [
+    i32 0, label %if.then
+    i32 1, label %if.then
+    i32 2, label %if.then
+    i32 3, label %if.then
+    i32 5, label %if.then
+  ]
+if.then:
+  tail call void @bar() nounwind
+  ret void
+if.end:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/switch-crit-edge-constant.ll b/src/LLVM/test/CodeGen/X86/switch-crit-edge-constant.ll
new file mode 100644
index 0000000..8019e15
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/switch-crit-edge-constant.ll

@@ -0,0 +1,52 @@
+; PR925

+; RUN: llc < %s -march=x86 | \

+; RUN:   grep mov.*str1 | count 1

+

+target datalayout = "e-p:32:32"

+target triple = "i686-apple-darwin8.7.2"

+@str1 = internal constant [5 x i8] c"bonk\00"		; <[5 x i8]*> [#uses=1]

+@str2 = internal constant [5 x i8] c"bork\00"		; <[5 x i8]*> [#uses=1]

+@str = internal constant [8 x i8] c"perfwap\00"		; <[8 x i8]*> [#uses=1]

+

+define void @foo(i32 %C) {

+entry:

+	switch i32 %C, label %bb2 [

+		 i32 1, label %blahaha

+		 i32 2, label %blahaha

+		 i32 3, label %blahaha

+		 i32 4, label %blahaha

+		 i32 5, label %blahaha

+		 i32 6, label %blahaha

+		 i32 7, label %blahaha

+		 i32 8, label %blahaha

+		 i32 9, label %blahaha

+		 i32 10, label %blahaha

+	]

+

+bb2:		; preds = %entry

+	%tmp5 = and i32 %C, 123		; <i32> [#uses=1]

+	%tmp = icmp eq i32 %tmp5, 0		; <i1> [#uses=1]

+	br i1 %tmp, label %blahaha, label %cond_true

+

+cond_true:		; preds = %bb2

+	br label %blahaha

+

+blahaha:		; preds = %cond_true, %bb2, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry

+	%s.0 = phi i8* [ getelementptr ([8 x i8]* @str, i32 0, i64 0), %cond_true ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str2, i32 0, i64 0), %bb2 ]		; <i8*> [#uses=13]

+	%tmp8 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]

+	%tmp10 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]

+	%tmp12 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]

+	%tmp14 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]

+	%tmp16 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]

+	%tmp18 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]

+	%tmp20 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]

+	%tmp22 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]

+	%tmp24 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]

+	%tmp26 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]

+	%tmp28 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]

+	%tmp30 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]

+	%tmp32 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]

+	ret void

+}

+

+declare i32 @printf(i8*, ...)


diff --git a/src/LLVM/test/CodeGen/X86/switch-or.ll b/src/LLVM/test/CodeGen/X86/switch-or.ll
new file mode 100644
index 0000000..75832c7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/switch-or.ll

@@ -0,0 +1,22 @@
+; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s
+
+; Check that merging switch cases that differ in one bit works.
+; CHECK: orl $2
+; CHECK-NEXT: cmpl $6
+
+define void @foo(i32 %variable) nounwind {
+entry:
+  switch i32 %variable, label %if.end [
+    i32 4, label %if.then
+    i32 6, label %if.then
+  ]
+
+if.then:
+  %call = tail call i32 (...)* @bar() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+declare i32 @bar(...) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/switch-zextload.ll b/src/LLVM/test/CodeGen/X86/switch-zextload.ll
new file mode 100644
index 0000000..55425bc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/switch-zextload.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 | grep mov | count 1
+
+; Do zextload, instead of a load and a separate zext.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	%struct.move_s = type { i32, i32, i32, i32, i32, i32 }
+	%struct.node_t = type { i8, i8, i8, i8, i32, i32, %struct.node_t**, %struct.node_t*, %struct.move_s }
+
+define fastcc void @set_proof_and_disproof_numbers(%struct.node_t* nocapture %node) nounwind {
+entry:
+	%0 = load i8* null, align 1		; <i8> [#uses=1]
+	switch i8 %0, label %return [
+		i8 2, label %bb31
+		i8 0, label %bb80
+		i8 1, label %bb82
+		i8 3, label %bb84
+	]
+
+bb31:		; preds = %entry
+	unreachable
+
+bb80:		; preds = %entry
+	ret void
+
+bb82:		; preds = %entry
+	ret void
+
+bb84:		; preds = %entry
+	ret void
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/swizzle.ll b/src/LLVM/test/CodeGen/X86/swizzle.ll
new file mode 100644
index 0000000..23e0c24
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/swizzle.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movlps
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movups
+; rdar://6523650
+
+	%struct.vector4_t = type { <4 x float> }
+
+define void @swizzle(i8* nocapture %a, %struct.vector4_t* nocapture %b, %struct.vector4_t* nocapture %c) nounwind {
+entry:
+	%0 = getelementptr %struct.vector4_t* %b, i32 0, i32 0		; <<4 x float>*> [#uses=2]
+	%1 = load <4 x float>* %0, align 4		; <<4 x float>> [#uses=1]
+	%tmp.i = bitcast i8* %a to double*		; <double*> [#uses=1]
+	%tmp1.i = load double* %tmp.i		; <double> [#uses=1]
+	%2 = insertelement <2 x double> undef, double %tmp1.i, i32 0		; <<2 x double>> [#uses=1]
+	%tmp2.i = bitcast <2 x double> %2 to <4 x float>		; <<4 x float>> [#uses=1]
+	%3 = shufflevector <4 x float> %1, <4 x float> %tmp2.i, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
+	store <4 x float> %3, <4 x float>* %0, align 4
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/tail-call-got.ll b/src/LLVM/test/CodeGen/X86/tail-call-got.ll
new file mode 100644
index 0000000..1d7eb2e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tail-call-got.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -relocation-model=pic -mattr=+sse2 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-unknown-freebsd9.0"
+
+define double @test1(double %x) nounwind readnone {
+; CHECK: test1:
+; CHECK: movl foo@GOT
+; CHECK-NEXT: jmpl
+  %1 = tail call double @foo(double %x) nounwind readnone
+  ret double %1
+}
+
+declare double @foo(double) readnone
+
+define double @test2(double %x) nounwind readnone {
+; CHECK: test2:
+; CHECK: movl sin@GOT
+; CHECK-NEXT: jmpl
+  %1 = tail call double @sin(double %x) nounwind readnone
+  ret double %1
+}
+
+declare double @sin(double) readnone

diff --git a/src/LLVM/test/CodeGen/X86/tail-dup-addr.ll b/src/LLVM/test/CodeGen/X86/tail-dup-addr.ll
new file mode 100644
index 0000000..c5a105c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tail-dup-addr.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; Test that we don't drop a block that has its address taken.
+
+; CHECK: Ltmp1:                                  ## Block address taken
+; CHECK: Ltmp2:                                  ## Block address taken
+
+@a = common global i32 0, align 4
+@p = common global i8* null, align 8
+
+define void @foo() noreturn nounwind uwtable ssp {
+entry:
+  %tmp = load i32* @a, align 4
+  %foo = icmp eq i32 0, %tmp
+  br i1 %foo, label %sw.bb, label %sw.default
+
+sw.bb:                                            ; preds = %entry
+  store i8* blockaddress(@foo, %sw.bb1), i8** @p, align 8
+  br label %sw.bb1
+
+sw.bb1:                                           ; preds = %sw.default, %sw.bb, %entry
+  store i8* blockaddress(@foo, %sw.default), i8** @p, align 8
+  br label %sw.default
+
+sw.default:                                       ; preds = %sw.bb1, %entry
+  store i8* blockaddress(@foo, %sw.bb1), i8** @p, align 8
+  br label %sw.bb1
+}

diff --git a/src/LLVM/test/CodeGen/X86/tail-opts.ll b/src/LLVM/test/CodeGen/X86/tail-opts.ll
new file mode 100644
index 0000000..d6c16ca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tail-opts.ll

@@ -0,0 +1,432 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false -post-RA-scheduler=true | FileCheck %s
+
+declare void @bar(i32)
+declare void @car(i32)
+declare void @dar(i32)
+declare void @ear(i32)
+declare void @far(i32)
+declare i1 @qux()
+
+@GHJK = global i32 0
+@HABC = global i32 0
+
+; BranchFolding should tail-merge the stores since they all precede
+; direct branches to the same place.
+
+; CHECK: tail_merge_me:
+; CHECK-NOT:  GHJK
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: movl $1, HABC(%rip)
+; CHECK-NOT:  GHJK
+
+define void @tail_merge_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  store i32 1, i32* @HABC
+  %c = call i1 @qux()
+  br i1 %c, label %return, label %altret
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}
+
+declare i8* @choose(i8*, i8*)
+
+; BranchFolding should tail-duplicate the indirect jump to avoid
+; redundant branching.
+
+; CHECK: tail_duplicate_me:
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%r
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%r
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%r
+
+define void @tail_duplicate_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  %c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return),
+                        i8* blockaddress(@tail_duplicate_me, %altret))
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  indirectbr i8* %c, [label %return, label %altret]
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}
+
+; BranchFolding shouldn't try to merge the tails of two blocks
+; with only a branch in common, regardless of the fallthrough situation.
+
+; CHECK: dont_merge_oddly:
+; CHECK-NOT:   ret
+; CHECK:        ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
+; CHECK-NEXT:   jbe .LBB2_3
+; CHECK-NEXT:   ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
+; CHECK-NEXT:   ja .LBB2_4
+; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT:   movb $1, %al
+; CHECK-NEXT:   ret
+; CHECK-NEXT: .LBB2_3:
+; CHECK-NEXT:   ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
+; CHECK-NEXT:   jbe .LBB2_2
+; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT:   xorb %al, %al
+; CHECK-NEXT:   ret
+
+define i1 @dont_merge_oddly(float* %result) nounwind {
+entry:
+  %tmp4 = getelementptr float* %result, i32 2
+  %tmp5 = load float* %tmp4, align 4
+  %tmp7 = getelementptr float* %result, i32 4
+  %tmp8 = load float* %tmp7, align 4
+  %tmp10 = getelementptr float* %result, i32 6
+  %tmp11 = load float* %tmp10, align 4
+  %tmp12 = fcmp olt float %tmp8, %tmp11
+  br i1 %tmp12, label %bb, label %bb21
+
+bb:
+  %tmp23469 = fcmp olt float %tmp5, %tmp8
+  br i1 %tmp23469, label %bb26, label %bb30
+
+bb21:
+  %tmp23 = fcmp olt float %tmp5, %tmp11
+  br i1 %tmp23, label %bb26, label %bb30
+
+bb26:
+  ret i1 0
+
+bb30:
+  ret i1 1
+}
+
+; Do any-size tail-merging when two candidate blocks will both require
+; an unconditional jump to complete a two-way conditional branch.
+
+; CHECK: c_expand_expr_stmt:
+;
+; This test only works when register allocation happens to use %rax for both
+; load addresses.
+;
+; CHE:        jmp .LBB3_11
+; CHE-NEXT: .LBB3_9:
+; CHE-NEXT:   movq 8(%rax), %rax
+; CHE-NEXT:   xorb %dl, %dl
+; CHE-NEXT:   movb 16(%rax), %al
+; CHE-NEXT:   cmpb $16, %al
+; CHE-NEXT:   je .LBB3_11
+; CHE-NEXT:   cmpb $23, %al
+; CHE-NEXT:   jne .LBB3_14
+; CHE-NEXT: .LBB3_11:
+
+%0 = type { %struct.rtx_def* }
+%struct.lang_decl = type opaque
+%struct.rtx_def = type { i16, i8, i8, [1 x %union.rtunion] }
+%struct.tree_decl = type { [24 x i8], i8*, i32, %union.tree_node*, i32, i8, i8, i8, i8, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %union..2anon, %0, %union.tree_node*, %struct.lang_decl* }
+%union..2anon = type { i32 }
+%union.rtunion = type { i8* }
+%union.tree_node = type { %struct.tree_decl }
+
+define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind {
+entry:
+  %tmp4 = load i8* null, align 8                  ; <i8> [#uses=3]
+  switch i8 %tmp4, label %bb3 [
+    i8 18, label %bb
+  ]
+
+bb:                                               ; preds = %entry
+  switch i32 undef, label %bb1 [
+    i32 0, label %bb2.i
+    i32 37, label %bb.i
+  ]
+
+bb.i:                                             ; preds = %bb
+  switch i32 undef, label %bb1 [
+    i32 0, label %lvalue_p.exit
+  ]
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3
+
+lvalue_p.exit:                                    ; preds = %bb.i
+  %tmp21 = load %union.tree_node** null, align 8  ; <%union.tree_node*> [#uses=3]
+  %tmp22 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 0 ; <i8*> [#uses=1]
+  %tmp23 = load i8* %tmp22, align 8               ; <i8> [#uses=1]
+  %tmp24 = zext i8 %tmp23 to i32                  ; <i32> [#uses=1]
+  switch i32 %tmp24, label %lvalue_p.exit4 [
+    i32 0, label %bb2.i3
+    i32 2, label %bb.i1
+  ]
+
+bb.i1:                                            ; preds = %lvalue_p.exit
+  %tmp25 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 2 ; <i32*> [#uses=1]
+  %tmp26 = bitcast i32* %tmp25 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp27 = load %union.tree_node** %tmp26, align 8 ; <%union.tree_node*> [#uses=2]
+  %tmp28 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp29 = load i8* %tmp28, align 8               ; <i8> [#uses=1]
+  %tmp30 = zext i8 %tmp29 to i32                  ; <i32> [#uses=1]
+  switch i32 %tmp30, label %lvalue_p.exit4 [
+    i32 0, label %bb2.i.i2
+    i32 2, label %bb.i.i
+  ]
+
+bb.i.i:                                           ; preds = %bb.i1
+  %tmp34 = tail call fastcc i32 @lvalue_p(%union.tree_node* null) nounwind ; <i32> [#uses=1]
+  %phitmp = icmp ne i32 %tmp34, 0                 ; <i1> [#uses=1]
+  br label %lvalue_p.exit4
+
+bb2.i.i2:                                         ; preds = %bb.i1
+  %tmp35 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
+  %tmp36 = bitcast i8* %tmp35 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp37 = load %union.tree_node** %tmp36, align 8 ; <%union.tree_node*> [#uses=1]
+  %tmp38 = getelementptr inbounds %union.tree_node* %tmp37, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp39 = load i8* %tmp38, align 8               ; <i8> [#uses=1]
+  switch i8 %tmp39, label %bb2 [
+    i8 16, label %lvalue_p.exit4
+    i8 23, label %lvalue_p.exit4
+  ]
+
+bb2.i3:                                           ; preds = %lvalue_p.exit
+  %tmp40 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
+  %tmp41 = bitcast i8* %tmp40 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp42 = load %union.tree_node** %tmp41, align 8 ; <%union.tree_node*> [#uses=1]
+  %tmp43 = getelementptr inbounds %union.tree_node* %tmp42, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp44 = load i8* %tmp43, align 8               ; <i8> [#uses=1]
+  switch i8 %tmp44, label %bb2 [
+    i8 16, label %lvalue_p.exit4
+    i8 23, label %lvalue_p.exit4
+  ]
+
+lvalue_p.exit4:                                   ; preds = %bb2.i3, %bb2.i3, %bb2.i.i2, %bb2.i.i2, %bb.i.i, %bb.i1, %lvalue_p.exit
+  %tmp45 = phi i1 [ %phitmp, %bb.i.i ], [ false, %bb2.i.i2 ], [ false, %bb2.i.i2 ], [ false, %bb.i1 ], [ false, %bb2.i3 ], [ false, %bb2.i3 ], [ false, %lvalue_p.exit ] ; <i1> [#uses=1]
+  %tmp46 = icmp eq i8 %tmp4, 0                    ; <i1> [#uses=1]
+  %or.cond = or i1 %tmp45, %tmp46                 ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb2, label %bb3
+
+bb1:                                              ; preds = %bb2.i.i, %bb.i, %bb
+  %.old = icmp eq i8 %tmp4, 23                    ; <i1> [#uses=1]
+  br i1 %.old, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1, %lvalue_p.exit4, %bb2.i3, %bb2.i.i2
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb1, %lvalue_p.exit4, %bb2.i, %entry
+  %expr_addr.0 = phi %union.tree_node* [ null, %bb2 ], [ %expr, %bb2.i ], [ %expr, %entry ], [ %expr, %bb1 ], [ %expr, %lvalue_p.exit4 ] ; <%union.tree_node*> [#uses=0]
+  unreachable
+}
+
+declare fastcc i32 @lvalue_p(%union.tree_node* nocapture) nounwind readonly
+
+declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
+
+
+; If one tail merging candidate falls through into the other,
+; tail merging is likely profitable regardless of how few
+; instructions are involved. This function should have only
+; one ret instruction.
+
+; CHECK: foo:
+; CHECK:        callq func
+; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT:   popq
+; CHECK-NEXT:   ret
+
+define void @foo(i1* %V) nounwind {
+entry:
+  %t0 = icmp eq i1* %V, null
+  br i1 %t0, label %return, label %bb
+
+bb:
+  call void @func()
+  ret void
+
+return:
+  ret void
+}
+
+declare void @func()
+
+; one - One instruction may be tail-duplicated even with optsize.
+
+; CHECK: one:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+
+@XYZ = external global i32
+
+define void @one() nounwind optsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; two - Same as one, but with two instructions in the common
+; tail instead of one. This is too much to be merged, given
+; the optsize attribute.
+
+; CHECK: two:
+; CHECK-NOT: XYZ
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK-NOT: XYZ
+; CHECK: ret
+
+define void @two() nounwind optsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; two_nosize - Same as two, but without the optsize attribute.
+; Now two instructions are enough to be tail-duplicated.
+
+; CHECK: two_nosize:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+
+define void @two_nosize() nounwind {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; Tail-merging should merge the two ret instructions since one side
+; can fall-through into the ret and the other side has to branch anyway.
+
+; CHECK: TESTE:
+; CHECK: ret
+; CHECK-NOT: ret
+; CHECK: size TESTE
+
+define i64 @TESTE(i64 %parami, i64 %paraml) nounwind readnone {
+entry:
+  %cmp = icmp slt i64 %parami, 1                  ; <i1> [#uses=1]
+  %varx.0 = select i1 %cmp, i64 1, i64 %parami    ; <i64> [#uses=1]
+  %cmp410 = icmp slt i64 %paraml, 1               ; <i1> [#uses=1]
+  br i1 %cmp410, label %for.end, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %tmp15 = mul i64 %paraml, %parami                   ; <i64> [#uses=1]
+  ret i64 %tmp15
+
+for.end:                                          ; preds = %entry
+  ret i64 %varx.0
+}

diff --git a/src/LLVM/test/CodeGen/X86/tail-threshold.ll b/src/LLVM/test/CodeGen/X86/tail-threshold.ll
new file mode 100644
index 0000000..f2296a0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tail-threshold.ll

@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -tail-merge-threshold 2 < %s | FileCheck %s
+
+; Test that we still do some merging if a block has more than
+; tail-merge-threshold predecessors.
+
+; CHECK: 	callq	bar
+; CHECK:	callq	bar
+; CHECK:	callq	bar
+; CHECK-NOT:    callq
+
+declare void @bar()
+
+define void @foo(i32 %xxx) {
+entry:
+  switch i32 %xxx, label %bb4 [
+    i32 0, label %bb0
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+  ]
+
+bb0:
+  call void @bar()
+  br label %bb5
+
+bb1:
+ call void @bar()
+ br label %bb5
+
+bb2:
+  call void @bar()
+  br label %bb5
+
+bb3:
+  call void @bar()
+  br label %bb5
+
+bb4:
+  call void @bar()
+  br label %bb5
+
+bb5:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/tailcall-fastisel.ll b/src/LLVM/test/CodeGen/X86/tailcall-fastisel.ll
new file mode 100644
index 0000000..7f92af4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcall-fastisel.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | not grep TAILCALL
+
+; Fast-isel shouldn't attempt to cope with tail calls.
+
+%0 = type { i64, i32, i8* }
+
+define fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 %arg1) nounwind {
+fail:                                             ; preds = %entry
+  %tmp20 = tail call fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; <i8*> [#uses=1]
+  ret i8* %tmp20
+}
+
+define i32 @foo() nounwind {
+entry:
+ %0 = tail call i32 (...)* @bar() nounwind       ; <i32> [#uses=1]
+ ret i32 %0
+}
+
+declare i32 @bar(...) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/tailcall-i1.ll b/src/LLVM/test/CodeGen/X86/tailcall-i1.ll
new file mode 100644
index 0000000..8ef1f11
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcall-i1.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+define fastcc i1 @i1test(i32, i32, i32, i32) {
+  entry:
+  %4 = tail call fastcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
+  ret i1 %4
+}

diff --git a/src/LLVM/test/CodeGen/X86/tailcall-largecode.ll b/src/LLVM/test/CodeGen/X86/tailcall-largecode.ll
new file mode 100644
index 0000000..c3f4278
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcall-largecode.ll

@@ -0,0 +1,71 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s
+
+declare fastcc i32 @callee(i32 %arg)
+define fastcc i32 @directcall(i32 %arg) {
+entry:
+; This is the large code model, so &callee may not fit into the jmp
+; instruction.  Instead, stick it into a register.
+;  CHECK: movabsq $callee, [[REGISTER:%r[a-z0-9]+]]
+;  CHECK: jmpq    *[[REGISTER]]  # TAILCALL
+  %res = tail call fastcc i32 @callee(i32 %arg)
+  ret i32 %res
+}
+
+; Check that the register used for an indirect tail call doesn't
+; clobber any of the arguments.
+define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target) {
+; Adjust the stack to enter the function.  (The amount of the
+; adjustment may change in the future, in which case the location of
+; the stack argument and the return adjustment will change too.)
+;  CHECK: pushq
+; Put the call target into R11, which won't be clobbered while restoring
+; callee-saved registers and won't be used for passing arguments.
+;  CHECK: movq %rdi, %rax
+; Pass the stack argument.
+;  CHECK: movl $7, 16(%rsp)
+; Pass the register arguments, in the right registers.
+;  CHECK: movl $1, %edi
+;  CHECK: movl $2, %esi
+;  CHECK: movl $3, %edx
+;  CHECK: movl $4, %ecx
+;  CHECK: movl $5, %r8d
+;  CHECK: movl $6, %r9d
+; Adjust the stack to "return".
+;  CHECK: popq
+; And tail-call to the target.
+;  CHECK: jmpq *%rax  # TAILCALL
+  %res = tail call fastcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                      i32 6, i32 7)
+  ret i32 %res
+}
+
+; Check that the register used for a direct tail call doesn't clobber
+; any of the arguments.
+declare fastcc i32 @manyargs_callee(i32,i32,i32,i32,i32,i32,i32)
+define fastcc i32 @direct_manyargs() {
+; Adjust the stack to enter the function.  (The amount of the
+; adjustment may change in the future, in which case the location of
+; the stack argument and the return adjustment will change too.)
+;  CHECK: pushq
+; Pass the stack argument.
+;  CHECK: movl $7, 16(%rsp)
+; Pass the register arguments, in the right registers.
+;  CHECK: movl $1, %edi
+;  CHECK: movl $2, %esi
+;  CHECK: movl $3, %edx
+;  CHECK: movl $4, %ecx
+;  CHECK: movl $5, %r8d
+;  CHECK: movl $6, %r9d
+; This is the large code model, so &manyargs_callee may not fit into
+; the jmp instruction.  Put it into R11, which won't be clobbered
+; while restoring callee-saved registers and won't be used for passing
+; arguments.
+;  CHECK: movabsq $manyargs_callee, %rax
+; Adjust the stack to "return".
+;  CHECK: popq
+; And tail-call to the target.
+;  CHECK: jmpq *%rax  # TAILCALL
+  %res = tail call fastcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4,
+                                               i32 5, i32 6, i32 7)
+  ret i32 %res
+}

diff --git a/src/LLVM/test/CodeGen/X86/tailcall-returndup-void.ll b/src/LLVM/test/CodeGen/X86/tailcall-returndup-void.ll
new file mode 100644
index 0000000..c1d6312
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcall-returndup-void.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK: rBM_info
+; CHECK-NOT: ret
+
+@sES_closure = external global [0 x i64]
+declare cc10 void @sEH_info(i64* noalias nocapture, i64* noalias nocapture, i64* noalias nocapture, i64, i64, i64) align 8
+
+define cc10 void @rBM_info(i64* noalias nocapture %Base_Arg, i64* noalias nocapture %Sp_Arg, i64* noalias nocapture %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind align 8 {
+c263:
+  %ln265 = getelementptr inbounds i64* %Sp_Arg, i64 -2
+  %ln266 = ptrtoint i64* %ln265 to i64
+  %ln268 = icmp ult i64 %ln266, %R3_Arg
+  br i1 %ln268, label %c26a, label %n26p
+
+n26p:                                             ; preds = %c263
+  br i1 icmp ne (i64 and (i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 7), i64 0), label %c1ZP.i, label %n1ZQ.i
+
+n1ZQ.i:                                           ; preds = %n26p
+  %ln1ZT.i = load i64* getelementptr inbounds ([0 x i64]* @sES_closure, i64 0, i64 0), align 8
+  %ln1ZU.i = inttoptr i64 %ln1ZT.i to void (i64*, i64*, i64*, i64, i64, i64)*
+  tail call cc10 void %ln1ZU.i(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 %R3_Arg) nounwind
+  br label %rBL_info.exit
+
+c1ZP.i:                                           ; preds = %n26p
+  tail call cc10 void @sEH_info(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 %R3_Arg) nounwind
+  br label %rBL_info.exit
+
+rBL_info.exit:                                    ; preds = %c1ZP.i, %n1ZQ.i
+  ret void
+
+c26a:                                             ; preds = %c263
+  %ln27h = getelementptr inbounds i64* %Base_Arg, i64 -2
+  %ln27j = load i64* %ln27h, align 8
+  %ln27k = inttoptr i64 %ln27j to void (i64*, i64*, i64*, i64, i64, i64)*
+  tail call cc10 void %ln27k(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/tailcall-ri64.ll b/src/LLVM/test/CodeGen/X86/tailcall-ri64.ll
new file mode 100644
index 0000000..914d8f7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcall-ri64.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=AMD64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; PR8743
+; TAILJMPri64 should not receive "callee-saved" registers beyond epilogue.
+
+; AMD64: jmpq
+; AMD64-NOT: %{{e[a-z]|rbx|rbp|r10|r12|r13|r14|r15}}
+
+; WIN64: jmpq
+; WIN64-NOT: %{{e[a-z]|rbx|rsi|rdi|rbp|r12|r13|r14|r15}}
+
+%class = type { [8 x i8] }
+%vt = type { i32 (...)** }
+
+define %vt* @_ZN4llvm9UnsetInit20convertInitializerToEPNS_5RecTyE(%class*
+%this, %vt* %Ty) align 2 {
+entry:
+  %0 = bitcast %vt* %Ty to %vt* (%vt*, %class*)***
+  %vtable = load %vt* (%vt*, %class*)*** %0, align 8
+  %vfn = getelementptr inbounds %vt* (%vt*, %class*)** %vtable, i64 4
+  %1 = load %vt* (%vt*, %class*)** %vfn, align 8
+  %call = tail call %vt* %1(%vt* %Ty, %class* %this)
+  ret %vt* %call
+}

diff --git a/src/LLVM/test/CodeGen/X86/tailcall-stackalign.ll b/src/LLVM/test/CodeGen/X86/tailcall-stackalign.ll
new file mode 100644
index 0000000..d3f811c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcall-stackalign.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s  -mtriple=i686-unknown-linux  -tailcallopt | FileCheck %s
+; Linux has 8 byte alignment so the params cause stack size 20 when tailcallopt
+; is enabled, ensure that a normal fastcc call has matching stack size
+
+
+define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+       ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2, i32 %in3, i32 %in4) {
+       %tmp11 = tail call fastcc i32 @tailcallee(i32 %in1, i32 %in2,
+                                                 i32 %in1, i32 %in2)
+       ret i32 %tmp11
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+ %tmp1 = call fastcc i32 @tailcaller( i32 1, i32 2, i32 3, i32 4 )
+ ; expect match subl [stacksize] here
+ ret i32 0
+}
+
+; CHECK: calll tailcaller
+; CHECK-NEXT: subl $12

diff --git a/src/LLVM/test/CodeGen/X86/tailcall-structret.ll b/src/LLVM/test/CodeGen/X86/tailcall-structret.ll
new file mode 100644
index 0000000..d8be4b2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcall-structret.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+define fastcc { { i8*, i8* }*, i8*} @init({ { i8*, i8* }*, i8*}, i32) {
+entry:
+      %2 = tail call fastcc { { i8*, i8* }*, i8* } @init({ { i8*, i8*}*, i8*} %0, i32 %1)
+      ret { { i8*, i8* }*, i8*} %2
+}

diff --git a/src/LLVM/test/CodeGen/X86/tailcall-void.ll b/src/LLVM/test/CodeGen/X86/tailcall-void.ll
new file mode 100644
index 0000000..4e578d1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcall-void.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+define fastcc void @i1test(i32, i32, i32, i32) {
+  entry:
+   tail call fastcc void @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
+   ret void 
+}

diff --git a/src/LLVM/test/CodeGen/X86/tailcall1.ll b/src/LLVM/test/CodeGen/X86/tailcall1.ll
new file mode 100644
index 0000000..f7ff5d5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcall1.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL | count 5
+
+; With -tailcallopt, CodeGen guarantees a tail call optimization
+; for all of these.
+
+declare fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4)
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind {
+entry:
+  %tmp11 = tail call fastcc i32 @tailcallee(i32 %in1, i32 %in2, i32 %in1, i32 %in2)
+  ret i32 %tmp11
+}
+
+declare fastcc i8* @alias_callee()
+
+define fastcc noalias i8* @noalias_caller() nounwind {
+  %p = tail call fastcc i8* @alias_callee()
+  ret i8* %p
+}
+
+declare fastcc noalias i8* @noalias_callee()
+
+define fastcc i8* @alias_caller() nounwind {
+  %p = tail call fastcc noalias i8* @noalias_callee()
+  ret i8* %p
+}
+
+declare fastcc i32 @i32_callee()
+
+define fastcc i32 @ret_undef() nounwind {
+  %p = tail call fastcc i32 @i32_callee()
+  ret i32 undef
+}
+
+declare fastcc void @does_not_return()
+
+define fastcc i32 @noret() nounwind {
+  tail call fastcc void @does_not_return()
+  unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/tailcallbyval.ll b/src/LLVM/test/CodeGen/X86/tailcallbyval.ll
new file mode 100644
index 0000000..03d6f94
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcallbyval.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep {movl\[\[:space:\]\]*4(%esp), %eax} | count 1
+%struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32,
+                  i32, i32, i32, i32, i32, i32, i32, i32,
+                  i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define  fastcc i32 @tailcallee(%struct.s* byval %a) nounwind {
+entry:
+        %tmp2 = getelementptr %struct.s* %a, i32 0, i32 0
+        %tmp3 = load i32* %tmp2
+        ret i32 %tmp3
+}
+
+define  fastcc i32 @tailcaller(%struct.s* byval %a) nounwind {
+entry:
+        %tmp4 = tail call fastcc i32 @tailcallee(%struct.s* byval %a )
+        ret i32 %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/X86/tailcallbyval64.ll b/src/LLVM/test/CodeGen/X86/tailcallbyval64.ll
new file mode 100644
index 0000000..7ecf379
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcallbyval64.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=x86_64-linux  -tailcallopt  | FileCheck %s
+
+; FIXME: Win64 does not support byval.
+
+; Expect the entry point.
+; CHECK: tailcaller:
+
+; Expect 2 rep;movs because of tail call byval lowering.
+; CHECK: rep;
+; CHECK: rep;
+
+; A sequence of copyto/copyfrom virtual registers is used to deal with byval
+; lowering appearing after moving arguments to registers. The following two
+; checks verify that the register allocator changes those sequences to direct
+; moves to argument register where it can (for registers that are not used in 
+; byval lowering - not rsi, not rdi, not rcx).
+; Expect argument 4 to be moved directly to register edx.
+; CHECK: movl $7, %edx
+
+; Expect argument 6 to be moved directly to register r8.
+; CHECK: movl $17, %r8d
+
+; Expect not call but jmp to @tailcallee.
+; CHECK: jmp tailcallee
+
+; Expect the trailer.
+; CHECK: .size tailcaller
+
+%struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
+                   i64, i64, i64, i64, i64, i64, i64, i64,
+                   i64, i64, i64, i64, i64, i64, i64, i64 }
+
+declare  fastcc i64 @tailcallee(%struct.s* byval %a, i64 %val, i64 %val2, i64 %val3, i64 %val4, i64 %val5)
+
+
+define  fastcc i64 @tailcaller(i64 %b, %struct.s* byval %a) {
+entry:
+        %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
+        %tmp3 = load i64* %tmp2, align 8
+        %tmp4 = tail call fastcc i64 @tailcallee(%struct.s* byval %a , i64 %tmp3, i64 %b, i64 7, i64 13, i64 17)
+        ret i64 %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/X86/tailcallfp.ll b/src/LLVM/test/CodeGen/X86/tailcallfp.ll
new file mode 100644
index 0000000..c0b609a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcallfp.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -tailcallopt | not grep call
+define fastcc i32 @bar(i32 %X, i32(double, i32) *%FP) {
+     %Y = tail call fastcc i32 %FP(double 0.0, i32 %X)
+     ret i32 %Y
+}

diff --git a/src/LLVM/test/CodeGen/X86/tailcallfp2.ll b/src/LLVM/test/CodeGen/X86/tailcallfp2.ll
new file mode 100644
index 0000000..04c4e95
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcallfp2.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -tailcallopt | FileCheck %s
+
+declare i32 @putchar(i32)
+
+define fastcc i32 @checktail(i32 %x, i32* %f, i32 %g) nounwind {
+; CHECK: checktail:
+        %tmp1 = icmp sgt i32 %x, 0
+        br i1 %tmp1, label %if-then, label %if-else
+
+if-then:
+        %fun_ptr = bitcast i32* %f to i32(i32, i32*, i32)* 
+        %arg1    = add i32 %x, -1
+        call i32 @putchar(i32 90)       
+; CHECK: jmpl *%e{{.*}}
+        %res = tail call fastcc i32 %fun_ptr( i32 %arg1, i32 * %f, i32 %g)
+        ret i32 %res
+
+if-else:
+        ret i32  %x
+}
+
+
+define i32 @main() nounwind { 
+ %f   = bitcast i32 (i32, i32*, i32)* @checktail to i32*
+ %res = tail call fastcc i32 @checktail( i32 10, i32* %f,i32 10)
+ ret i32 %res
+}

diff --git a/src/LLVM/test/CodeGen/X86/tailcallpic1.ll b/src/LLVM/test/CodeGen/X86/tailcallpic1.ll
new file mode 100644
index 0000000..60e3be5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcallpic1.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL
+
+define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+entry:
+	ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+entry:
+	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
+	ret i32 %tmp11
+}

diff --git a/src/LLVM/test/CodeGen/X86/tailcallpic2.ll b/src/LLVM/test/CodeGen/X86/tailcallpic2.ll
new file mode 100644
index 0000000..eaa7631
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcallpic2.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL
+
+define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+entry:
+	ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+entry:
+	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
+	ret i32 %tmp11
+}

diff --git a/src/LLVM/test/CodeGen/X86/tailcallstack64.ll b/src/LLVM/test/CodeGen/X86/tailcallstack64.ll
new file mode 100644
index 0000000..c18c7aa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tailcallstack64.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
+
+; FIXME: Redundant unused stack allocation could be eliminated.
+; CHECK: subq  ${{24|72|80}}, %rsp
+
+; Check that lowered arguments on the stack do not overwrite each other.
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: movl  [[A1:32|144]](%rsp), [[R1:%e..]]
+; Move param %in1 to temp register (%r10d).
+; CHECK: movl  [[A2:40|152]](%rsp), [[R2:%[a-z0-9]+]]
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: addl {{%edi|%ecx}}, [[R1]]
+; Move param %in2 to stack.
+; CHECK: movl  [[R2]], [[A1]](%rsp)
+; Move result of addition to stack.
+; CHECK: movl  [[R1]], [[A2]](%rsp)
+; Eventually, do a TAILCALL
+; CHECK: TAILCALL
+
+declare fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %a, i32 %b) nounwind
+
+define fastcc i32 @tailcaller(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in1, i32 %in2) nounwind {
+entry:
+        %tmp = add i32 %in1, %p1
+        %retval = tail call fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in2,i32 %tmp)
+        ret i32 %retval
+}

diff --git a/src/LLVM/test/CodeGen/X86/test-nofold.ll b/src/LLVM/test/CodeGen/X86/test-nofold.ll
new file mode 100644
index 0000000..97db1b3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/test-nofold.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; rdar://5752025
+
+; We want:
+;      CHECK: movl	$42, %ecx
+; CHECK-NEXT: movl	4(%esp), %eax
+; CHECK-NEXT: andl	$15, %eax
+; CHECK-NEXT: cmovnel	%ecx, %eax
+; CHECK-NEXT: ret
+;
+; We don't want:
+;	movl	4(%esp), %eax
+;	movl	%eax, %ecx     # bad: extra copy
+;	andl	$15, %ecx
+;	testl	$15, %eax      # bad: peep obstructed
+;	movl	$42, %eax
+;	cmovel	%ecx, %eax
+;	ret
+;
+; We also don't want:
+;	movl	$15, %ecx      # bad: larger encoding
+;	andl	4(%esp), %ecx
+;	movl	$42, %eax
+;	cmovel	%ecx, %eax
+;	ret
+;
+; We also don't want:
+;	movl	4(%esp), %ecx
+;	andl	$15, %ecx
+;	testl	%ecx, %ecx     # bad: unnecessary test
+;	movl	$42, %eax
+;	cmovel	%ecx, %eax
+;	ret
+
+define i32 @t1(i32 %X) nounwind  {
+entry:
+	%tmp2 = and i32 %X, 15		; <i32> [#uses=2]
+	%tmp4 = icmp eq i32 %tmp2, 0		; <i1> [#uses=1]
+	%retval = select i1 %tmp4, i32 %tmp2, i32 42		; <i32> [#uses=1]
+	ret i32 %retval
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/test-shrink-bug.ll b/src/LLVM/test/CodeGen/X86/test-shrink-bug.ll
new file mode 100644
index 0000000..64631ea
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/test-shrink-bug.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s | FileCheck %s
+
+; Codegen shouldn't reduce the comparison down to testb $-1, %al
+; because that changes the result of the signed test.
+; PR5132
+; CHECK: testw  $255, %ax
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+@g_14 = global i8 -6, align 1                     ; <i8*> [#uses=1]
+
+declare i32 @func_16(i8 signext %p_19, i32 %p_20) nounwind
+
+define i32 @func_35(i64 %p_38) nounwind ssp {
+entry:
+  %tmp = load i8* @g_14                           ; <i8> [#uses=2]
+  %conv = zext i8 %tmp to i32                     ; <i32> [#uses=1]
+  %cmp = icmp sle i32 1, %conv                    ; <i1> [#uses=1]
+  %conv2 = zext i1 %cmp to i32                    ; <i32> [#uses=1]
+  %call = call i32 @func_16(i8 signext %tmp, i32 %conv2) ssp ; <i32> [#uses=1]
+  ret i32 1
+}

diff --git a/src/LLVM/test/CodeGen/X86/test-shrink.ll b/src/LLVM/test/CodeGen/X86/test-shrink.ll
new file mode 100644
index 0000000..5bc28ec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/test-shrink.ll

@@ -0,0 +1,159 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
+
+; CHECK-64: g64xh:
+; CHECK-64:   testb $8, {{%ah|%ch}}
+; CHECK-64:   ret
+; CHECK-32: g64xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g64xh(i64 inreg %x) nounwind {
+  %t = and i64 %x, 2048
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64xl:
+; CHECK-64:   testb $8, [[A0L:%dil|%cl]]
+; CHECK-64:   ret
+; CHECK-32: g64xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g64xl(i64 inreg %x) nounwind {
+  %t = and i64 %x, 8
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32xh:
+; CHECK-64:   testb $8, {{%ah|%ch}}
+; CHECK-64:   ret
+; CHECK-32: g32xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g32xh(i32 inreg %x) nounwind {
+  %t = and i32 %x, 2048
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32xl:
+; CHECK-64:   testb $8, [[A0L]]
+; CHECK-64:   ret
+; CHECK-32: g32xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g32xl(i32 inreg %x) nounwind {
+  %t = and i32 %x, 8
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g16xh:
+; CHECK-64:   testb $8, {{%ah|%ch}}
+; CHECK-64:   ret
+; CHECK-32: g16xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g16xh(i16 inreg %x) nounwind {
+  %t = and i16 %x, 2048
+  %s = icmp eq i16 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g16xl:
+; CHECK-64:   testb $8, [[A0L]]
+; CHECK-64:   ret
+; CHECK-32: g16xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g16xl(i16 inreg %x) nounwind {
+  %t = and i16 %x, 8
+  %s = icmp eq i16 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64x16:
+; CHECK-64:   testw $-32640, %[[A0W:di|cx]]
+; CHECK-64:   ret
+; CHECK-32: g64x16:
+; CHECK-32:   testw $-32640, %ax
+; CHECK-32:   ret
+define void @g64x16(i64 inreg %x) nounwind {
+  %t = and i64 %x, 32896
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32x16:
+; CHECK-64:   testw $-32640, %[[A0W]]
+; CHECK-64:   ret
+; CHECK-32: g32x16:
+; CHECK-32:   testw $-32640, %ax
+; CHECK-32:   ret
+define void @g32x16(i32 inreg %x) nounwind {
+  %t = and i32 %x, 32896
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64x32:
+; CHECK-64:   testl $268468352, %e[[A0W]]
+; CHECK-64:   ret
+; CHECK-32: g64x32:
+; CHECK-32:   testl $268468352, %eax
+; CHECK-32:   ret
+define void @g64x32(i64 inreg %x) nounwind {
+  %t = and i64 %x, 268468352
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+
+declare void @bar()

diff --git a/src/LLVM/test/CodeGen/X86/testl-commute.ll b/src/LLVM/test/CodeGen/X86/testl-commute.ll
new file mode 100644
index 0000000..0e6f636
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/testl-commute.ll

@@ -0,0 +1,72 @@
+; RUN: llc < %s | FileCheck %s
+; rdar://5671654
+; The loads should fold into the testl instructions, no matter how
+; the inputs are commuted.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin7"
+
+define i32 @test(i32* %P, i32* %G) nounwind {
+; CHECK: test:
+; CHECK-NOT: ret
+; CHECK: testl (%{{.*}}), %{{.*}}
+; CHECK: ret
+
+entry:
+	%0 = load i32* %P, align 4		; <i32> [#uses=3]
+	%1 = load i32* %G, align 4		; <i32> [#uses=1]
+	%2 = and i32 %1, %0		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb1, label %bb
+
+bb:		; preds = %entry
+	%4 = tail call i32 @bar() nounwind		; <i32> [#uses=0]
+	ret i32 %0
+
+bb1:		; preds = %entry
+	ret i32 %0
+}
+
+define i32 @test2(i32* %P, i32* %G) nounwind {
+; CHECK: test2:
+; CHECK-NOT: ret
+; CHECK: testl (%{{.*}}), %{{.*}}
+; CHECK: ret
+
+entry:
+	%0 = load i32* %P, align 4		; <i32> [#uses=3]
+	%1 = load i32* %G, align 4		; <i32> [#uses=1]
+	%2 = and i32 %0, %1		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb1, label %bb
+
+bb:		; preds = %entry
+	%4 = tail call i32 @bar() nounwind		; <i32> [#uses=0]
+	ret i32 %0
+
+bb1:		; preds = %entry
+	ret i32 %0
+}
+
+define i32 @test3(i32* %P, i32* %G) nounwind {
+; CHECK: test3:
+; CHECK-NOT: ret
+; CHECK: testl (%{{.*}}), %{{.*}}
+; CHECK: ret
+
+entry:
+	%0 = load i32* %P, align 4		; <i32> [#uses=3]
+	%1 = load i32* %G, align 4		; <i32> [#uses=1]
+	%2 = and i32 %0, %1		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb1, label %bb
+
+bb:		; preds = %entry
+	%4 = tail call i32 @bar() nounwind		; <i32> [#uses=0]
+	ret i32 %1
+
+bb1:		; preds = %entry
+	ret i32 %1
+}
+
+declare i32 @bar()

diff --git a/src/LLVM/test/CodeGen/X86/tls-pic.ll b/src/LLVM/test/CodeGen/X86/tls-pic.ll
new file mode 100644
index 0000000..b83416d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls-pic.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s
+
+@i = thread_local global i32 15
+
+define i32 @f1() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
+
+; X32: f1:
+; X32:   leal i@TLSGD(,%ebx), %eax
+; X32:   calll ___tls_get_addr@PLT
+
+; X64: f1:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   callq __tls_get_addr@PLT
+
+
+@i2 = external thread_local global i32
+
+define i32* @f2() {
+entry:
+	ret i32* @i
+}
+
+; X32: f2:
+; X32:   leal i@TLSGD(,%ebx), %eax
+; X32:   calll ___tls_get_addr@PLT
+
+; X64: f2:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   callq __tls_get_addr@PLT
+
+
+
+define i32 @f3() {
+entry:
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+; X32: f3:
+; X32:   leal	i@TLSGD(,%ebx), %eax
+; X32:   calll ___tls_get_addr@PLT
+
+; X64: f3:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   callq __tls_get_addr@PLT
+
+
+define i32* @f4() nounwind {
+entry:
+	ret i32* @i
+}
+
+; X32: f4:
+; X32:   leal	i@TLSGD(,%ebx), %eax
+; X32:   calll ___tls_get_addr@PLT
+
+; X64: f4:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   callq __tls_get_addr@PLT
+
+
+

diff --git a/src/LLVM/test/CodeGen/X86/tls1.ll b/src/LLVM/test/CodeGen/X86/tls1.ll
new file mode 100644
index 0000000..99feb50
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls1.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t

+; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t

+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2

+; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2

+

+@i = thread_local global i32 15

+

+define i32 @f() nounwind {

+entry:

+	%tmp1 = load i32* @i

+	ret i32 %tmp1

+}


diff --git a/src/LLVM/test/CodeGen/X86/tls10.ll b/src/LLVM/test/CodeGen/X86/tls10.ll
new file mode 100644
index 0000000..fb61596
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls10.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t
+; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2
+; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+
+@i = external hidden thread_local global i32
+
+define i32* @f() {
+entry:
+	ret i32* @i
+}

diff --git a/src/LLVM/test/CodeGen/X86/tls11.ll b/src/LLVM/test/CodeGen/X86/tls11.ll
new file mode 100644
index 0000000..514a168
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls11.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movzwl	%gs:i@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movzwl	%fs:i@TPOFF, %eax} %t2
+
+@i = thread_local global i16 15
+
+define i16 @f() {
+entry:
+	%tmp1 = load i16* @i
+	ret i16 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/tls12.ll b/src/LLVM/test/CodeGen/X86/tls12.ll
new file mode 100644
index 0000000..c29f6ad
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls12.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movb	%gs:i@NTPOFF, %al} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movb	%fs:i@TPOFF, %al} %t2
+
+@i = thread_local global i8 15
+
+define i8 @f() {
+entry:
+	%tmp1 = load i8* @i
+	ret i8 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/tls13.ll b/src/LLVM/test/CodeGen/X86/tls13.ll
new file mode 100644
index 0000000..08778ec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls13.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movswl	%gs:i@NTPOFF, %eax} %t
+; RUN: grep {movzwl	%gs:j@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movswl	%fs:i@TPOFF, %edi} %t2
+; RUN: grep {movzwl	%fs:j@TPOFF, %edi} %t2
+
+@i = thread_local global i16 0
+@j = thread_local global i16 0
+
+define void @f() nounwind optsize {
+entry:
+        %0 = load i16* @i, align 2
+        %1 = sext i16 %0 to i32
+        tail call void @g(i32 %1) nounwind
+        %2 = load i16* @j, align 2
+        %3 = zext i16 %2 to i32
+        tail call void @h(i32 %3) nounwind
+        ret void
+}
+
+declare void @g(i32)
+
+declare void @h(i32)

diff --git a/src/LLVM/test/CodeGen/X86/tls14.ll b/src/LLVM/test/CodeGen/X86/tls14.ll
new file mode 100644
index 0000000..88426dd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls14.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movsbl	%gs:i@NTPOFF, %eax} %t
+; RUN: grep {movzbl	%gs:j@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movsbl	%fs:i@TPOFF, %edi} %t2
+; RUN: grep {movzbl	%fs:j@TPOFF, %edi} %t2
+
+@i = thread_local global i8 0
+@j = thread_local global i8 0
+
+define void @f() nounwind optsize {
+entry:
+        %0 = load i8* @i, align 2
+        %1 = sext i8 %0 to i32
+        tail call void @g(i32 %1) nounwind
+        %2 = load i8* @j, align 2
+        %3 = zext i8 %2 to i32
+        tail call void @h(i32 %3) nounwind
+        ret void
+}
+
+declare void @g(i32)
+
+declare void @h(i32)

diff --git a/src/LLVM/test/CodeGen/X86/tls15.ll b/src/LLVM/test/CodeGen/X86/tls15.ll
new file mode 100644
index 0000000..7abf070
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls15.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t | count 1
+; RUN: grep {leal	i@NTPOFF(%eax), %ecx} %t
+; RUN: grep {leal	j@NTPOFF(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2 | count 1
+; RUN: grep {leaq	i@TPOFF(%rax), %rcx} %t2
+; RUN: grep {leaq	j@TPOFF(%rax), %rax} %t2
+
+@i = thread_local global i32 0
+@j = thread_local global i32 0
+
+define void @f(i32** %a, i32** %b) {
+entry:
+	store i32* @i, i32** %a, align 8
+	store i32* @j, i32** %b, align 8
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/tls2.ll b/src/LLVM/test/CodeGen/X86/tls2.ll
new file mode 100644
index 0000000..22e181a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls2.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t

+; RUN: grep {movl	%gs:0, %eax} %t

+; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t

+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2

+; RUN: grep {movq	%fs:0, %rax} %t2

+; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2

+

+@i = thread_local global i32 15

+

+define i32* @f() {

+entry:

+	ret i32* @i

+}


diff --git a/src/LLVM/test/CodeGen/X86/tls3.ll b/src/LLVM/test/CodeGen/X86/tls3.ll
new file mode 100644
index 0000000..7327cc4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls3.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	i@INDNTPOFF, %eax} %t
+; RUN: grep {movl	%gs:(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	i@GOTTPOFF(%rip), %rax} %t2
+; RUN: grep {movl	%fs:(%rax), %eax} %t2
+
+@i = external thread_local global i32		; <i32*> [#uses=2]
+
+define i32 @f() nounwind {
+entry:
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/tls4.ll b/src/LLVM/test/CodeGen/X86/tls4.ll
new file mode 100644
index 0000000..d2e40e3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls4.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t
+; RUN: grep {addl	i@INDNTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2
+; RUN: grep {addq	i@GOTTPOFF(%rip), %rax} %t2
+
+@i = external thread_local global i32		; <i32*> [#uses=2]
+
+define i32* @f() {
+entry:
+	ret i32* @i
+}

diff --git a/src/LLVM/test/CodeGen/X86/tls5.ll b/src/LLVM/test/CodeGen/X86/tls5.ll
new file mode 100644
index 0000000..4d2cc02
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls5.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+
+@i = internal thread_local global i32 15
+
+define i32 @f() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/tls6.ll b/src/LLVM/test/CodeGen/X86/tls6.ll
new file mode 100644
index 0000000..505106e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls6.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t
+; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2
+; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+
+@i = internal thread_local global i32 15
+
+define i32* @f() {
+entry:
+	ret i32* @i
+}

diff --git a/src/LLVM/test/CodeGen/X86/tls7.ll b/src/LLVM/test/CodeGen/X86/tls7.ll
new file mode 100644
index 0000000..e9116e7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls7.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+
+@i = hidden thread_local global i32 15
+
+define i32 @f() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/tls8.ll b/src/LLVM/test/CodeGen/X86/tls8.ll
new file mode 100644
index 0000000..375af94
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls8.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t
+; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2
+; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+
+@i = hidden thread_local global i32 15
+
+define i32* @f() {
+entry:
+	ret i32* @i
+}

diff --git a/src/LLVM/test/CodeGen/X86/tls9.ll b/src/LLVM/test/CodeGen/X86/tls9.ll
new file mode 100644
index 0000000..7d08df8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tls9.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+
+@i = external hidden thread_local global i32
+
+define i32 @f() nounwind {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/tlv-1.ll b/src/LLVM/test/CodeGen/X86/tlv-1.ll
new file mode 100644
index 0000000..92dac30
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tlv-1.ll

@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin -mcpu=core2 | FileCheck %s
+
+%struct.A = type { [48 x i8], i32, i32, i32 }
+
+@c = external thread_local global %struct.A, align 4
+
+define void @main() nounwind ssp {
+; CHECK: main:
+entry:
+  call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds (%struct.A* @c, i32 0, i32 0, i32 0), i8 0, i64 60, i32 1, i1 false)
+  unreachable  
+  ; CHECK: movq    _c@TLVP(%rip), %rdi
+  ; CHECK-NEXT: callq   *(%rdi)
+  ; CHECK-NEXT: movl    $0, 56(%rax)
+  ; CHECK-NEXT: movq    $0, 48(%rax)
+}
+
+; rdar://10291355
+define i32 @test() nounwind readonly ssp {
+entry:
+; CHECK: test:
+; CHECK: movq _a@TLVP(%rip),
+; CHECK: callq *
+; CHECK: movl (%rax), [[REGISTER:%[a-z]+]]
+; CHECK: movq _b@TLVP(%rip),
+; CHECK: callq *
+; CHECK: subl (%rax), [[REGISTER]]
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %sub = sub nsw i32 %0, %1
+  ret i32 %sub
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+@a = thread_local global i32 0                    ; <i32*> [#uses=0]
+@b = thread_local global i32 0                    ; <i32*> [#uses=0]
+
+; CHECK: .tbss _a$tlv$init, 4, 2
+; CHECK:        .section        __DATA,__thread_vars,thread_local_variables
+; CHECK:        .globl  _a
+; CHECK: _a:
+; CHECK:        .quad   __tlv_bootstrap
+; CHECK:        .quad   0
+; CHECK:        .quad   _a$tlv$init
+
+; CHECK: .tbss _b$tlv$init, 4, 2
+; CHECK:        .globl  _b
+; CHECK: _b:
+; CHECK:        .quad   __tlv_bootstrap
+; CHECK:        .quad   0
+; CHECK:        .quad   _b$tlv$init

diff --git a/src/LLVM/test/CodeGen/X86/tlv-2.ll b/src/LLVM/test/CodeGen/X86/tlv-2.ll
new file mode 100644
index 0000000..5f29a60
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/tlv-2.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin -O0 | FileCheck %s
+
+@b = thread_local global i32 5, align 4
+@a = thread_local global i32 0, align 4
+@c = internal thread_local global i32 0, align 4
+@d = internal thread_local global i32 5, align 4
+
+define void @foo() nounwind ssp {
+entry:
+  store i32 1, i32* @a, align 4
+  ; CHECK: movq    _a@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $1, (%rax)
+  
+  store i32 2, i32* @b, align 4
+  ; CHECK: movq    _b@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $2, (%rax)
+
+  store i32 3, i32* @c, align 4
+  ; CHECK: movq    _c@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $3, (%rax)
+  
+  store i32 4, i32* @d, align 4
+  ; CHECK: movq    _d@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $4, (%rax)
+  ; CHECK: popq
+  
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/trap.ll b/src/LLVM/test/CodeGen/X86/trap.ll
new file mode 100644
index 0000000..03ae6bf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/trap.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep ud2
+define i32 @test() noreturn nounwind  {
+entry:
+	tail call void @llvm.trap( )
+	unreachable
+}
+
+declare void @llvm.trap() nounwind 
+

diff --git a/src/LLVM/test/CodeGen/X86/trunc-ext-ld-st.ll b/src/LLVM/test/CodeGen/X86/trunc-ext-ld-st.ll
new file mode 100644
index 0000000..57d6e97
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/trunc-ext-ld-st.ll

@@ -0,0 +1,82 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s
+
+;CHECK: load_2_i8
+; A single 16-bit load
+;CHECK: movzwl
+;CHECK: pshufb
+;CHECK: paddq
+;CHECK: pshufb
+; A single 16-bit store
+;CHECK: movw
+;CHECK: ret
+
+define void @load_2_i8(<2 x i8>* %A)  {
+   %T = load <2 x i8>* %A
+   %G = add <2 x i8> %T, <i8 9, i8 7>
+   store <2 x i8> %G, <2 x i8>* %A
+   ret void
+} 
+
+;CHECK: load_2_i16
+; Read 32-bits
+;CHECK: movd
+;CHECK: pshufb
+;CHECK: paddq
+;CHECK: pshufb
+;CHECK: movd
+;CHECK: ret
+define void @load_2_i16(<2 x i16>* %A)  {
+   %T = load <2 x i16>* %A
+   %G = add <2 x i16> %T, <i16 9, i16 7>
+   store <2 x i16> %G, <2 x i16>* %A
+   ret void
+} 
+
+;CHECK: load_2_i32
+;CHECK: pshufd
+;CHECK: paddq
+;CHECK: pshufd
+;CHECK: ret
+define void @load_2_i32(<2 x i32>* %A)  {
+   %T = load <2 x i32>* %A
+   %G = add <2 x i32> %T, <i32 9, i32 7>
+   store <2 x i32> %G, <2 x i32>* %A
+   ret void
+} 
+
+;CHECK: load_4_i8
+;CHECK: movd
+;CHECK: pshufb
+;CHECK: paddd
+;CHECK: pshufb
+;CHECK: ret
+define void @load_4_i8(<4 x i8>* %A)  {
+   %T = load <4 x i8>* %A
+   %G = add <4 x i8> %T, <i8 1, i8 4, i8 9, i8 7>
+   store <4 x i8> %G, <4 x i8>* %A
+   ret void
+} 
+
+;CHECK: load_4_i16
+;CHECK: punpcklwd
+;CHECK: paddd
+;CHECK: pshufb
+;CHECK: ret
+define void @load_4_i16(<4 x i16>* %A)  {
+   %T = load <4 x i16>* %A
+   %G = add <4 x i16> %T, <i16 1, i16 4, i16 9, i16 7>
+   store <4 x i16> %G, <4 x i16>* %A
+   ret void
+} 
+
+;CHECK: load_8_i8
+;CHECK: punpcklbw
+;CHECK: paddw
+;CHECK: pshufb
+;CHECK: ret
+define void @load_8_i8(<8 x i8>* %A)  {
+   %T = load <8 x i8>* %A
+   %G = add <8 x i8> %T, %T
+   store <8 x i8> %G, <8 x i8>* %A
+   ret void
+} 

diff --git a/src/LLVM/test/CodeGen/X86/trunc-to-bool.ll b/src/LLVM/test/CodeGen/X86/trunc-to-bool.ll
new file mode 100644
index 0000000..1943fc7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/trunc-to-bool.ll

@@ -0,0 +1,59 @@
+; An integer truncation to i1 should be done with an and instruction to make

+; sure only the LSBit survives. Test that this is the case both for a returned

+; value and as the operand of a branch.

+; RUN: llc < %s -march=x86 | FileCheck %s

+

+define zeroext i1 @test1(i32 %X)  nounwind {

+    %Y = trunc i32 %X to i1

+    ret i1 %Y

+}

+; CHECK: test1:

+; CHECK: andl $1, %eax

+

+define i1 @test2(i32 %val, i32 %mask) nounwind {

+entry:

+    %shifted = ashr i32 %val, %mask

+    %anded = and i32 %shifted, 1

+    %trunced = trunc i32 %anded to i1

+    br i1 %trunced, label %ret_true, label %ret_false

+ret_true:

+    ret i1 true

+ret_false:

+    ret i1 false

+}

+; CHECK: test2:

+; CHECK: btl %eax

+

+define i32 @test3(i8* %ptr) nounwind {

+    %val = load i8* %ptr

+    %tmp = trunc i8 %val to i1

+    br i1 %tmp, label %cond_true, label %cond_false

+cond_true:

+    ret i32 21

+cond_false:

+    ret i32 42

+}

+; CHECK: test3:

+; CHECK: testb $1, (%eax)

+

+define i32 @test4(i8* %ptr) nounwind {

+    %tmp = ptrtoint i8* %ptr to i1

+    br i1 %tmp, label %cond_true, label %cond_false

+cond_true:

+    ret i32 21

+cond_false:

+    ret i32 42

+}

+; CHECK: test4:

+; CHECK: testb $1, 4(%esp)

+

+define i32 @test5(double %d) nounwind {

+    %tmp = fptosi double %d to i1

+    br i1 %tmp, label %cond_true, label %cond_false

+cond_true:

+    ret i32 21

+cond_false:

+    ret i32 42

+}

+; CHECK: test5:

+; CHECK: testb $1


diff --git a/src/LLVM/test/CodeGen/X86/twoaddr-coalesce-2.ll b/src/LLVM/test/CodeGen/X86/twoaddr-coalesce-2.ll
new file mode 100644
index 0000000..6f16a25
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/twoaddr-coalesce-2.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& \
+; RUN:   grep {twoaddrinstr} | grep {Number of instructions aggressively commuted}
+; rdar://6480363
+
+target triple = "i386-apple-darwin9.6"
+
+define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
+entry:
+	%tmp.i3 = bitcast <2 x double> %B to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp2.i = or <2 x i64> %tmp.i3, <i64 4607632778762754458, i64 4607632778762754458>		; <<2 x i64>> [#uses=1]
+	%tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double>		; <<2 x double>> [#uses=1]
+	%tmp.i2 = fadd <2 x double> %tmp3.i, %A		; <<2 x double>> [#uses=1]
+	%tmp.i = fadd <2 x double> %tmp.i2, %C		; <<2 x double>> [#uses=1]
+	ret <2 x double> %tmp.i
+}

diff --git a/src/LLVM/test/CodeGen/X86/twoaddr-coalesce.ll b/src/LLVM/test/CodeGen/X86/twoaddr-coalesce.ll
new file mode 100644
index 0000000..6f6d6f2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/twoaddr-coalesce.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | grep mov | count 4
+; rdar://6523745
+
+@"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+define i32 @foo() nounwind {
+bb1.thread:
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]		; <i32> [#uses=2]
+	%0 = trunc i32 %i.0.reg2mem.0 to i8		; <i8> [#uses=1]
+	%1 = sdiv i8 %0, 2		; <i8> [#uses=1]
+	%2 = sext i8 %1 to i32		; <i32> [#uses=1]
+	%3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %2) nounwind		; <i32> [#uses=0]
+	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 258		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb2:		; preds = %bb1
+	ret i32 0
+}
+
+declare i32 @printf(i8*, ...) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/twoaddr-lea.ll b/src/LLVM/test/CodeGen/X86/twoaddr-lea.ll
new file mode 100644
index 0000000..a1d797f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/twoaddr-lea.ll

@@ -0,0 +1,47 @@
+;; X's live range extends beyond the shift, so the register allocator
+;; cannot coalesce it with Y.  Because of this, a copy needs to be
+;; emitted before the shift to save the register value before it is
+;; clobbered.  However, this copy is not needed if the register
+;; allocator turns the shift into an LEA.  This also occurs for ADD.
+
+; Check that the shift gets turned into an LEA.
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+@G = external global i32
+
+define i32 @test1(i32 %X) nounwind {
+; CHECK: test1:
+; CHECK-NOT: mov
+; CHECK: leal 1(%rdi)
+        %Z = add i32 %X, 1
+        volatile store i32 %Z, i32* @G
+        ret i32 %X
+}
+
+; rdar://8977508
+; The second add should not be transformed to leal nor should it be
+; commutted (which would require inserting a copy).
+define i32 @test2(i32 inreg %a, i32 inreg %b, i32 %c, i32 %d) nounwind {
+entry:
+; CHECK: test2:
+; CHECK: leal
+; CHECK-NOT: leal
+; CHECK-NOT: mov
+; CHECK-NEXT: addl
+; CHECK-NEXT: ret
+ %add = add i32 %b, %a
+ %add3 = add i32 %add, %c
+ %add5 = add i32 %add3, %d
+ ret i32 %add5
+}
+
+; rdar://9002648
+define i64 @test3(i64 %x) nounwind readnone ssp {
+entry:
+; CHECK: test3:
+; CHECK: leaq (%rdi,%rdi), %rax
+; CHECK-NOT: addq
+; CHECK-NEXT: ret
+  %0 = shl i64 %x, 1
+  ret i64 %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/twoaddr-pass-sink.ll b/src/LLVM/test/CodeGen/X86/twoaddr-pass-sink.ll
new file mode 100644
index 0000000..077fee0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/twoaddr-pass-sink.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& grep {Number of 3-address instructions sunk}
+
+define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind  {
+entry:
+	%tmp25 = bitcast <2 x i64> %a1 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	br label %bb
+bb:		; preds = %bb, %entry
+	%skiplist_addr.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%vYp_addr.0.rec = shl i32 %skiplist_addr.0.rec, 3		; <i32> [#uses=3]
+	%vDct_addr.0 = getelementptr <2 x i64>* %vDct, i32 %vYp_addr.0.rec		; <<2 x i64>*> [#uses=1]
+	%vYp_addr.0 = getelementptr <2 x i64>* %vYp, i32 %vYp_addr.0.rec		; <<2 x i64>*> [#uses=1]
+	%skiplist_addr.0 = getelementptr i8* %skiplist, i32 %skiplist_addr.0.rec		; <i8*> [#uses=1]
+	%vDct_addr.0.sum43 = or i32 %vYp_addr.0.rec, 1		; <i32> [#uses=1]
+	%tmp7 = getelementptr <2 x i64>* %vDct, i32 %vDct_addr.0.sum43		; <<2 x i64>*> [#uses=1]
+	%tmp8 = load <2 x i64>* %tmp7, align 16		; <<2 x i64>> [#uses=1]
+	%tmp11 = load <2 x i64>* %vDct_addr.0, align 16		; <<2 x i64>> [#uses=1]
+	%tmp13 = bitcast <2 x i64> %tmp8 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp15 = bitcast <2 x i64> %tmp11 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp16 = shufflevector <8 x i16> %tmp15, <8 x i16> %tmp13, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
+	%tmp26 = mul <8 x i16> %tmp25, %tmp16		; <<8 x i16>> [#uses=1]
+	%tmp27 = bitcast <8 x i16> %tmp26 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	store <2 x i64> %tmp27, <2 x i64>* %vYp_addr.0, align 16
+	%tmp37 = load i8* %skiplist_addr.0, align 1		; <i8> [#uses=1]
+	%tmp38 = icmp eq i8 %tmp37, 0		; <i1> [#uses=1]
+	%indvar.next = add i32 %skiplist_addr.0.rec, 1		; <i32> [#uses=1]
+	br i1 %tmp38, label %return, label %bb
+return:		; preds = %bb
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/twoaddr-sink-terminator.ll b/src/LLVM/test/CodeGen/X86/twoaddr-sink-terminator.ll
new file mode 100644
index 0000000..209d474
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/twoaddr-sink-terminator.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -verify-coalescing
+; PR10998
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-unknown-freebsd8.2"
+
+define void @test(i32 %arg1) nounwind align 2 {
+bb11:
+  %tmp13 = and i32 %arg1, 7
+  %tmp14 = add i32 %tmp13, -5
+  switch i32 %tmp13, label %bb18 [
+    i32 0, label %bb21
+    i32 4, label %bb22
+    i32 3, label %bb21
+    i32 2, label %bb19
+  ]
+
+bb18:
+  %tmp202 = call i32 @f() nounwind
+  unreachable
+
+bb19:
+  %tmp20 = call i32 @f() nounwind
+  br label %bb24
+
+bb21:
+  %tmp203 = call i32 @f() nounwind
+  br label %bb24
+
+bb22:
+  %tmp23 = call i32 @f() nounwind
+  br label %bb24
+
+bb24:
+  %tmp15 = icmp ult i32 %tmp14, 2
+  %tmp55 = select i1 %tmp15, i32 45, i32 44
+  %tmp56 = call i32 @f2(i32 %tmp55)
+  unreachable
+}
+
+declare i32 @f()
+
+declare i32 @f2(i32)

diff --git a/src/LLVM/test/CodeGen/X86/uint64-to-float.ll b/src/LLVM/test/CodeGen/X86/uint64-to-float.ll
new file mode 100644
index 0000000..1dbbdcf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/uint64-to-float.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
+; by the compiler_rt implementation of __floatundisf.
+; <rdar://problem/8493982>
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; FIXME: This test could generate this code:
+;
+; ## BB#0:                                ## %entry
+; 	testq	%rdi, %rdi
+; 	jns	LBB0_2
+; ## BB#1:
+; 	movq	%rdi, %rax
+; 	shrq	%rax
+; 	andq	$1, %rdi
+; 	orq	%rax, %rdi
+; 	cvtsi2ssq	%rdi, %xmm0
+; 	addss	%xmm0, %xmm0
+; 	ret
+; LBB0_2:                                 ## %entry
+; 	cvtsi2ssq	%rdi, %xmm0
+; 	ret
+;
+; The blocks come from lowering:
+;
+;   %vreg7<def> = CMOV_FR32 %vreg6<kill>, %vreg5<kill>, 15, %EFLAGS<imp-use>; FR32:%vreg7,%vreg6,%vreg5
+;
+; If the instruction had an EFLAGS<kill> flag, it wouldn't need to mark EFLAGS
+; as live-in on the new blocks, and machine sinking would be able to sink
+; everything below the test.
+
+; CHECK: shrq
+; CHECK: andq
+; CHECK-NEXT: orq
+; CHECK: testq %rdi, %rdi
+; CHECK-NEXT: jns LBB0_2
+; CHECK: cvtsi2ss
+; CHECK: LBB0_2
+; CHECK-NEXT: cvtsi2ss
+define float @test(i64 %a) {
+entry:
+  %b = uitofp i64 %a to float
+  ret float %b
+}

diff --git a/src/LLVM/test/CodeGen/X86/uint_to_fp-2.ll b/src/LLVM/test/CodeGen/X86/uint_to_fp-2.ll
new file mode 100644
index 0000000..7536fb8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/uint_to_fp-2.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; rdar://6504833
+define float @test1(i32 %x) nounwind readnone {
+; CHECK: test1
+; CHECK: movd
+; CHECK: orpd
+; CHECK: subsd
+; CHECK: cvtsd2ss
+; CHECK: movss
+; CHECK: flds
+; CHECK: ret
+entry:
+	%0 = uitofp i32 %x to float
+	ret float %0
+}
+
+; PR10802
+define float @test2(<4 x i32> %x) nounwind readnone ssp {
+; CHECK: test2
+; CHECK: xorps [[ZERO:%xmm[0-9]+]]
+; CHECK: movss {{.*}}, [[ZERO]]
+; CHECK: orps
+; CHECK: subsd
+; CHECK: cvtsd2ss
+; CHECK: movss
+; CHECK: flds
+; CHECK: ret
+entry:
+  %vecext = extractelement <4 x i32> %x, i32 0
+  %conv = uitofp i32 %vecext to float
+  ret float %conv
+}

diff --git a/src/LLVM/test/CodeGen/X86/uint_to_fp.ll b/src/LLVM/test/CodeGen/X86/uint_to_fp.ll
new file mode 100644
index 0000000..41ee194
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/uint_to_fp.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep {sub.*esp}
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep cvtsi2ss
+; rdar://6034396
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @test(i32 %x, float* %y) nounwind  {
+entry:
+	lshr i32 %x, 23		; <i32>:0 [#uses=1]
+	uitofp i32 %0 to float		; <float>:1 [#uses=1]
+	store float %1, float* %y
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/umul-with-carry.ll b/src/LLVM/test/CodeGen/X86/umul-with-carry.ll
new file mode 100644
index 0000000..7416051
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/umul-with-carry.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 | grep {jc} | count 1
+; XFAIL: *
+
+; FIXME: umul-with-overflow not supported yet.
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+carry:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32)

diff --git a/src/LLVM/test/CodeGen/X86/umul-with-overflow.ll b/src/LLVM/test/CodeGen/X86/umul-with-overflow.ll
new file mode 100644
index 0000000..e5858de
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/umul-with-overflow.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+define zeroext i1 @a(i32 %x)  nounwind {
+  %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
+  %obil = extractvalue {i32, i1} %res, 1
+  ret i1 %obil
+  
+; CHECK: a:
+; CHECK: mull
+; CHECK: seto %al
+; CHECK: movzbl	%al, %eax
+; CHECK: ret
+}
+
+define i32 @test2(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+; CHECK: test2:
+; CHECK: addl
+; CHECK-NEXT: addl
+; CHECK-NEXT: ret
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+; CHECK: test3:
+; CHECK: addl
+; CHECK: mull
+; CHECK-NEXT: ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/unaligned-load.ll b/src/LLVM/test/CodeGen/X86/unaligned-load.ll
new file mode 100644
index 0000000..d8fffbe
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/unaligned-load.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -mcpu=core2  -relocation-model=dynamic-no-pic --asm-verbose=0   | FileCheck -check-prefix=I386 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2  -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=CORE2 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=corei7 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=COREI7 %s
+
+@.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8
+@.str3 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, 2'ND STRING\00", align 8
+
+define void @func() nounwind ssp {
+entry:
+  %String2Loc = alloca [31 x i8], align 1
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1, i1 false)
+  br label %bb
+
+return:                                           ; No predecessors!
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+; I386: calll {{_?}}memcpy
+
+; CORE2: movabsq
+; CORE2: movabsq
+; CORE2: movabsq
+
+; COREI7: movups _.str3
+
+; CORE2: .section
+; CORE2: .align  3
+; CORE2-NEXT: _.str1:
+; CORE2-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
+; CORE2: .align 3
+; CORE2-NEXT: _.str3:

diff --git a/src/LLVM/test/CodeGen/X86/undef-label.ll b/src/LLVM/test/CodeGen/X86/undef-label.ll
new file mode 100644
index 0000000..1afd935
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/undef-label.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+
+; This is a case where we would incorrectly conclude that LBB0_1 could only
+; be reached via fall through and would therefore omit the label.
+
+; CHECK:      jne     .LBB0_1
+; CHECK-NEXT: jnp     .LBB0_3
+; CHECK-NEXT: .LBB0_1:
+
+define void @xyz() {
+entry:
+  br i1 fcmp oeq (double fsub (double undef, double undef), double 0.000000e+00), label %bar, label %foo
+
+foo:
+  br i1 fcmp ogt (double fdiv (double fsub (double fmul (double undef, double undef), double fsub (double undef, double undef)), double fmul (double undef, double undef)), double 1.0), label %foo, label %bar
+
+bar:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/unknown-location.ll b/src/LLVM/test/CodeGen/X86/unknown-location.ll
new file mode 100644
index 0000000..b89c473
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/unknown-location.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-apple-darwin10 -use-unknown-locations | FileCheck %s
+
+; The divide instruction does not have a debug location. CodeGen should
+; represent this in the debug information. This is done by setting line
+; and column to 0
+
+;      CHECK:         leal
+; CHECK-NEXT:         .loc 1 0 0
+;      CHECK:         cltd
+; CHECK-NEXT:         idivl
+; CHECK-NEXT:         .loc 2 4 3
+
+define i32 @foo(i32 %w, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %a = add  i32 %w, %x, !dbg !8
+  %b = sdiv i32 %a, %y
+  %c = add  i32 %b, %z, !dbg !8
+  ret i32 %c, !dbg !8
+}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"x", metadata !2, i32 1, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"test.c", metadata !"/dir", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 12, metadata !"test.c", metadata !".", metadata !"producer", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524299, metadata !1, i32 1, i32 30} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 4, i32 3, metadata !7, null}

diff --git a/src/LLVM/test/CodeGen/X86/unreachable-loop-sinking.ll b/src/LLVM/test/CodeGen/X86/unreachable-loop-sinking.ll
new file mode 100644
index 0000000..35f6917
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/unreachable-loop-sinking.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s
+; PR6777
+
+; MachineSink shouldn't try to sink code in unreachable blocks, as it's
+; not worthwhile, and there are corner cases which it doesn't handle.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define double @fn1(i8* %arg, i64 %arg1) {
+Entry:
+  br i1 undef, label %Body, label %Exit
+
+Exit:                                             ; preds = %Brancher7, %Entry
+  ret double undef
+
+Body:                                             ; preds = %Entry
+  br i1 false, label %Brancher7, label %Body3
+
+Body3:                                            ; preds = %Body6, %Body3, %Body
+  br label %Body3
+
+Body6:                                            ; preds = %Brancher7
+  %tmp = fcmp oeq double 0xC04FBB2E40000000, undef ; <i1> [#uses=1]
+  br i1 %tmp, label %Body3, label %Brancher7
+
+Brancher7:                                        ; preds = %Body6, %Body
+  %tmp2 = icmp ult i32 undef, 10                  ; <i1> [#uses=1]
+  br i1 %tmp2, label %Body6, label %Exit
+}

diff --git a/src/LLVM/test/CodeGen/X86/unreachable-stack-protector.ll b/src/LLVM/test/CodeGen/X86/unreachable-stack-protector.ll
new file mode 100644
index 0000000..eeebcee
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/unreachable-stack-protector.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+
+define void @test5() nounwind optsize noinline ssp {
+entry:
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
+  %buf = alloca [64 x i8], align 16
+  %0 = call i64 @llvm.objectsize.i64(i8* undef, i1 false)
+  br i1 false, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  unreachable
+
+if.end:                                           ; preds = %entry
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/urem-i8-constant.ll b/src/LLVM/test/CodeGen/X86/urem-i8-constant.ll
new file mode 100644
index 0000000..e3cb69c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/urem-i8-constant.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 | grep 111
+
+define i8 @foo(i8 %tmp325) {
+	%t546 = urem i8 %tmp325, 37
+	ret i8 %t546
+}

diff --git a/src/LLVM/test/CodeGen/X86/use-add-flags.ll b/src/LLVM/test/CodeGen/X86/use-add-flags.ll
new file mode 100644
index 0000000..a0448ec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/use-add-flags.ll

@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+
+; Reuse the flags value from the add instructions instead of emitting separate
+; testl instructions.
+
+; Use the flags on the add.
+
+; CHECK: test1:
+;     CHECK: addl
+; CHECK-NOT: test
+;     CHECK: cmovnsl
+;     CHECK: ret
+
+define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind {
+	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
+	%tmp4 = add i32 %tmp2, %y		; <i32> [#uses=1]
+	%tmp5 = icmp slt i32 %tmp4, 0		; <i1> [#uses=1]
+	%tmp.0 = select i1 %tmp5, i32 %a, i32 %b		; <i32> [#uses=1]
+	ret i32 %tmp.0
+}
+
+declare void @foo(i32)
+
+; Don't use the flags result of the and here, since the and has no
+; other use. A simple test is better.
+
+; CHECK: test2:
+; CHECK: testb   $16, {{%dil|%cl}}
+
+define void @test2(i32 %x) nounwind {
+  %y = and i32 %x, 16
+  %t = icmp eq i32 %y, 0
+  br i1 %t, label %true, label %false
+true:
+  call void @foo(i32 %x)
+  ret void
+false:
+  ret void
+}
+
+; Do use the flags result of the and here, since the and has another use.
+
+; CHECK: test3:
+;      CHECK: andl    $16, %e
+; CHECK-NEXT: jne
+
+define void @test3(i32 %x) nounwind {
+  %y = and i32 %x, 16
+  %t = icmp eq i32 %y, 0
+  br i1 %t, label %true, label %false
+true:
+  call void @foo(i32 %y)
+  ret void
+false:
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/v-binop-widen.ll b/src/LLVM/test/CodeGen/X86/v-binop-widen.ll
new file mode 100644
index 0000000..3bee700
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/v-binop-widen.ll

@@ -0,0 +1,12 @@
+; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
+; CHECK: divss
+; CHECK: divps
+; CHECK: divps
+
+%vec = type <9 x float>
+define %vec @vecdiv( %vec %p1, %vec %p2)
+{
+  %result = fdiv %vec %p1, %p2
+  ret %vec %result
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/v-binop-widen2.ll b/src/LLVM/test/CodeGen/X86/v-binop-widen2.ll
new file mode 100644
index 0000000..ae3f55a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/v-binop-widen2.ll

@@ -0,0 +1,40 @@
+; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
+
+%vec = type <6 x float>
+; CHECK: divss
+; CHECK: divss
+; CHECK: divps
+define %vec @vecdiv( %vec %p1, %vec %p2)
+{
+  %result = fdiv %vec %p1, %p2
+  ret %vec %result
+}
+
+@a = constant %vec < float 2.0, float 4.0, float 8.0, float 16.0, float 32.0, float 64.0 >
+@b = constant %vec < float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0 >
+
+; Expected result: < 1.0, 2.0, 4.0, ..., 2.0^(n-1) >
+; main() returns 0 if the result is expected and 1 otherwise
+; to execute, use llvm-as < %s | lli
+define i32 @main() nounwind {
+entry:
+  %avec = load %vec* @a
+  %bvec = load %vec* @b
+
+  %res = call %vec @vecdiv(%vec %avec, %vec %bvec)
+  br label %loop
+loop:
+  %idx = phi i32 [0, %entry], [%nextInd, %looptail]
+  %expected = phi float [1.0, %entry], [%nextExpected, %looptail]
+  %elem = extractelement %vec %res, i32 %idx
+  %expcmp = fcmp oeq float %elem, %expected
+  br i1 %expcmp, label %looptail, label %return
+looptail:
+  %nextExpected = fmul float %expected, 2.0
+  %nextInd = add i32 %idx, 1
+  %cmp = icmp slt i32 %nextInd, 6
+  br i1 %cmp, label %loop, label %return
+return:
+  %retval = phi i32 [0, %looptail], [1, %loop]
+  ret i32 %retval
+}

diff --git a/src/LLVM/test/CodeGen/X86/v2f32.ll b/src/LLVM/test/CodeGen/X86/v2f32.ll
new file mode 100644
index 0000000..ba54833
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/v2f32.ll

@@ -0,0 +1,115 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -mcpu=yonah -march=x86 -mtriple=i386-linux-gnu -asm-verbose=0 -o - | FileCheck %s -check-prefix=X32
+
+; PR7518
+define void @test1(<2 x float> %Q, float *%P2) nounwind {
+  %a = extractelement <2 x float> %Q, i32 0
+  %b = extractelement <2 x float> %Q, i32 1
+  %c = fadd float %a, %b
+
+  store float %c, float* %P2
+  ret void
+; X64: test1:
+; X64-NEXT: pshufd	$1, %xmm0, %xmm1
+; X64-NEXT: addss	%xmm0, %xmm1
+; X64-NEXT: movss	%xmm1, (%rdi)
+; X64-NEXT: ret
+
+; W64: test1:
+; W64-NEXT: movdqa  (%rcx), %xmm0
+; W64-NEXT: pshufd  $1, %xmm0, %xmm1
+; W64-NEXT: addss   %xmm0, %xmm1
+; W64-NEXT: movss   %xmm1, (%rdx)
+; W64-NEXT: ret
+
+; X32: test1:
+; X32-NEXT: pshufd	$1, %xmm0, %xmm1
+; X32-NEXT: addss	%xmm0, %xmm1
+; X32-NEXT: movl	4(%esp), %eax
+; X32-NEXT: movss	%xmm1, (%eax)
+; X32-NEXT: ret
+}
+
+
+define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounwind {
+  %Z = fadd <2 x float> %Q, %R
+  ret <2 x float> %Z
+  
+; X64: test2:
+; X64-NEXT: addps	%xmm1, %xmm0
+; X64-NEXT: ret
+
+; W64: test2:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   (%rdx), %xmm0
+; W64-NEXT: ret
+
+; X32: test2:
+; X32:      addps	%xmm1, %xmm0
+}
+
+
+define <2 x float> @test3(<4 x float> %A) nounwind {
+	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+	%C = fadd <2 x float> %B, %B
+	ret <2 x float> %C
+; X64: test3:
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: ret
+
+; W64: test3:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: ret
+
+; X32: test3:
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: ret
+}
+
+define <2 x float> @test4(<2 x float> %A) nounwind {
+	%C = fadd <2 x float> %A, %A
+	ret <2 x float> %C
+; X64: test4:
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: ret
+
+; W64: test4:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: ret
+
+; X32: test4:
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: ret
+}
+
+define <4 x float> @test5(<4 x float> %A) nounwind {
+	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+	%C = fadd <2 x float> %B, %B
+        br label %BB
+        
+BB:
+        %D = fadd <2 x float> %C, %C
+	%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+	ret <4 x float> %E
+        
+; X64: test5:
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: ret
+
+; W64: test5:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: ret
+
+; X32: test5:
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: ret
+}
+
+

diff --git a/src/LLVM/test/CodeGen/X86/v4f32-immediate.ll b/src/LLVM/test/CodeGen/X86/v4f32-immediate.ll
new file mode 100644
index 0000000..b5ebaa7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/v4f32-immediate.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 -mattr=+sse | grep movaps
+
+define <4 x float> @foo() {
+  ret <4 x float> <float 0x4009C9D0A0000000, float 0x4002666660000000, float 0x3FF3333340000000, float 0x3FB99999A0000000>
+}

diff --git a/src/LLVM/test/CodeGen/X86/vararg_tailcall.ll b/src/LLVM/test/CodeGen/X86/vararg_tailcall.ll
new file mode 100644
index 0000000..73d80eb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vararg_tailcall.ll

@@ -0,0 +1,98 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64
+
+@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00"
+@sel = external global i8*
+@sel3 = external global i8*
+@sel4 = external global i8*
+@sel5 = external global i8*
+@sel6 = external global i8*
+@sel7 = external global i8*
+
+; X64: @foo
+; X64: jmp
+; WIN64: @foo
+; WIN64: callq
+define void @foo(i64 %arg) nounwind optsize ssp noredzone {
+entry:
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %arg) nounwind optsize noredzone
+  ret void
+}
+
+declare i32 @printf(i8*, ...) optsize noredzone
+
+; X64: @bar
+; X64: jmp
+; WIN64: @bar
+; WIN64: jmp
+define void @bar(i64 %arg) nounwind optsize ssp noredzone {
+entry:
+  tail call void @bar2(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %arg) nounwind optsize noredzone
+  ret void
+}
+
+declare void @bar2(i8*, i64) optsize noredzone
+
+; X64: @foo2
+; X64: jmp
+; WIN64: @foo2
+; WIN64: callq
+define i8* @foo2(i8* %arg) nounwind optsize ssp noredzone {
+entry:
+  %tmp1 = load i8** @sel, align 8, !tbaa !0
+  %call = tail call i8* (i8*, i8*, ...)* @x2(i8* %arg, i8* %tmp1) nounwind optsize noredzone
+  ret i8* %call
+}
+
+declare i8* @x2(i8*, i8*, ...) optsize noredzone
+
+; X64: @foo6
+; X64: jmp
+; WIN64: @foo6
+; WIN64: callq
+define i8* @foo6(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
+entry:
+  %tmp2 = load i8** @sel3, align 8, !tbaa !0
+  %tmp3 = load i8** @sel4, align 8, !tbaa !0
+  %tmp4 = load i8** @sel5, align 8, !tbaa !0
+  %tmp5 = load i8** @sel6, align 8, !tbaa !0
+  %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5) nounwind optsize noredzone
+  ret i8* %call
+}
+
+declare i8* @x3(i8*, i8*, i8*, ...) optsize noredzone
+
+; X64: @foo7
+; X64: callq
+; WIN64: @foo7
+; WIN64: callq
+define i8* @foo7(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
+entry:
+  %tmp2 = load i8** @sel3, align 8, !tbaa !0
+  %tmp3 = load i8** @sel4, align 8, !tbaa !0
+  %tmp4 = load i8** @sel5, align 8, !tbaa !0
+  %tmp5 = load i8** @sel6, align 8, !tbaa !0
+  %tmp6 = load i8** @sel7, align 8, !tbaa !0
+  %call = tail call i8* (i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...)* @x7(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i8* %tmp6) nounwind optsize noredzone
+  ret i8* %call
+}
+
+declare i8* @x7(i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...) optsize noredzone
+
+; X64: @foo8
+; X64: callq
+; WIN64: @foo8
+; WIN64: callq
+define i8* @foo8(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
+entry:
+  %tmp2 = load i8** @sel3, align 8, !tbaa !0
+  %tmp3 = load i8** @sel4, align 8, !tbaa !0
+  %tmp4 = load i8** @sel5, align 8, !tbaa !0
+  %tmp5 = load i8** @sel6, align 8, !tbaa !0
+  %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i32 48879, i32 48879) nounwind optsize noredzone
+  ret i8* %call
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}

diff --git a/src/LLVM/test/CodeGen/X86/variable-sized-darwin-bzero.ll b/src/LLVM/test/CodeGen/X86/variable-sized-darwin-bzero.ll
new file mode 100644
index 0000000..1e86d75
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/variable-sized-darwin-bzero.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
+
+define void @foo(i8* %p, i64 %n) {
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 %n, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/variadic-node-pic.ll b/src/LLVM/test/CodeGen/X86/variadic-node-pic.ll
new file mode 100644
index 0000000..1182a30
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/variadic-node-pic.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -relocation-model=pic -code-model=large
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+declare void @xscanf(i64) nounwind 
+
+define void @foo() nounwind  {
+	call void (i64)* @xscanf( i64 0 ) nounwind
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec-sign.ll b/src/LLVM/test/CodeGen/X86/vec-sign.ll
new file mode 100644
index 0000000..31b9c2e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec-sign.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 -mcpu=nehalem | FileCheck %s
+
+define <4 x i32> @signd(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+; CHECK: signd:
+; CHECK: psignd
+; CHECK-NOT: sub
+; CHECK: ret
+  %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <4 x i32> zeroinitializer, %a
+  %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <4 x i32> %a, %0
+  %2 = and <4 x i32> %b.lobit, %sub
+  %cond = or <4 x i32> %1, %2
+  ret <4 x i32> %cond
+}
+
+define <4 x i32> @blendvb(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) nounwind {
+entry:
+; CHECK: blendvb:
+; CHECK: pblendvb
+; CHECK: ret
+  %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <4 x i32> zeroinitializer, %a
+  %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <4 x i32> %c, %0
+  %2 = and <4 x i32> %a, %b.lobit
+  %cond = or <4 x i32> %1, %2
+  ret <4 x i32> %cond
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec-trunc-store.ll b/src/LLVM/test/CodeGen/X86/vec-trunc-store.ll
new file mode 100644
index 0000000..4d665f1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec-trunc-store.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64
+
+define void @foo(<8 x i32>* %p) nounwind {
+  %t = load <8 x i32>* %p
+  %cti69 = trunc <8 x i32> %t to <8 x i16>     ; <<8 x i16>> [#uses=1]
+  store <8 x i16> %cti69, <8 x i16>* undef
+  ret void
+}
+
+define void @bar(<4 x i32>* %p) nounwind {
+  %t = load <4 x i32>* %p
+  %cti44 = trunc <4 x i32> %t to <4 x i16>     ; <<4 x i16>> [#uses=1]
+  store <4 x i16> %cti44, <4 x i16>* undef
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_add.ll b/src/LLVM/test/CodeGen/X86/vec_add.ll
new file mode 100644
index 0000000..1a55e52
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_add.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2

+

+define <2 x i64> @test(<2 x i64> %a, <2 x i64> %b) {

+entry:

+	%tmp9 = add <2 x i64> %b, %a		; <<2 x i64>> [#uses=1]

+	ret <2 x i64> %tmp9

+}


diff --git a/src/LLVM/test/CodeGen/X86/vec_align.ll b/src/LLVM/test/CodeGen/X86/vec_align.ll
new file mode 100644
index 0000000..e273115
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_align.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -mcpu=yonah -relocation-model=static | grep movaps | count 2
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+%f4 = type <4 x float>
+
+@G = external global { float,float,float,float}, align 16
+
+define %f4 @test1(float %W, float %X, float %Y, float %Z) nounwind {
+        %tmp = insertelement %f4 undef, float %W, i32 0
+        %tmp2 = insertelement %f4 %tmp, float %X, i32 1
+        %tmp4 = insertelement %f4 %tmp2, float %Y, i32 2
+        %tmp6 = insertelement %f4 %tmp4, float %Z, i32 3
+	ret %f4 %tmp6
+}
+
+define %f4 @test2() nounwind {
+	%Wp = getelementptr { float,float,float,float}* @G, i32 0, i32 0
+	%Xp = getelementptr { float,float,float,float}* @G, i32 0, i32 1
+	%Yp = getelementptr { float,float,float,float}* @G, i32 0, i32 2
+	%Zp = getelementptr { float,float,float,float}* @G, i32 0, i32 3
+	
+	%W = load float* %Wp
+	%X = load float* %Xp
+	%Y = load float* %Yp
+	%Z = load float* %Zp
+
+        %tmp = insertelement %f4 undef, float %W, i32 0
+        %tmp2 = insertelement %f4 %tmp, float %X, i32 1
+        %tmp4 = insertelement %f4 %tmp2, float %Y, i32 2
+        %tmp6 = insertelement %f4 %tmp4, float %Z, i32 3
+	ret %f4 %tmp6
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/vec_anyext.ll b/src/LLVM/test/CodeGen/X86/vec_anyext.ll
new file mode 100644
index 0000000..d2a4c7f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_anyext.ll

@@ -0,0 +1,77 @@
+; RUN: llc < %s -march=x86-64
+; PR 9267
+
+define<4 x i16> @func_16_32() {
+  %F = load <4 x i32>* undef
+  %G = trunc <4 x i32> %F to <4 x i16>
+  %H = load <4 x i32>* undef
+  %Y = trunc <4 x i32> %H to <4 x i16>
+  %T = add <4 x i16> %Y, %G
+  store <4 x i16>%T , <4 x i16>* undef
+  ret <4 x i16> %T
+}
+
+define<4 x i16> @func_16_64() {
+  %F = load <4 x i64>* undef
+  %G = trunc <4 x i64> %F to <4 x i16>
+  %H = load <4 x i64>* undef
+  %Y = trunc <4 x i64> %H to <4 x i16>
+  %T = xor <4 x i16> %Y, %G
+  store <4 x i16>%T , <4 x i16>* undef
+  ret <4 x i16> %T
+}
+
+define<4 x i32> @func_32_64() {
+  %F = load <4 x i64>* undef
+  %G = trunc <4 x i64> %F to <4 x i32>
+  %H = load <4 x i64>* undef
+  %Y = trunc <4 x i64> %H to <4 x i32>
+  %T = or <4 x i32> %Y, %G
+  ret <4 x i32> %T
+}
+
+define<4 x i8> @func_8_16() {
+  %F = load <4 x i16>* undef
+  %G = trunc <4 x i16> %F to <4 x i8>
+  %H = load <4 x i16>* undef
+  %Y = trunc <4 x i16> %H to <4 x i8>
+  %T = add <4 x i8> %Y, %G
+  ret <4 x i8> %T
+}
+
+define<4 x i8> @func_8_32() {
+  %F = load <4 x i32>* undef
+  %G = trunc <4 x i32> %F to <4 x i8>
+  %H = load <4 x i32>* undef
+  %Y = trunc <4 x i32> %H to <4 x i8>
+  %T = sub <4 x i8> %Y, %G
+  ret <4 x i8> %T
+}
+
+define<4 x i8> @func_8_64() {
+  %F = load <4 x i64>* undef
+  %G = trunc <4 x i64> %F to <4 x i8>
+  %H = load <4 x i64>* undef
+  %Y = trunc <4 x i64> %H to <4 x i8>
+  %T = add <4 x i8> %Y, %G
+  ret <4 x i8> %T
+}
+
+define<4 x i16> @const_16_32() {
+  %G = trunc <4 x i32> <i32 0, i32 3, i32 8, i32 7> to <4 x i16>
+  ret <4 x i16> %G
+}
+
+define<4 x i16> @const_16_64() {
+  %G = trunc <4 x i64> <i64 0, i64 3, i64 8, i64 7> to <4 x i16>
+  ret <4 x i16> %G
+}
+
+define void @bugOnTruncBitwidthReduce() nounwind {
+meh:
+  %0 = xor <4 x i64> zeroinitializer, zeroinitializer
+  %1 = trunc <4 x i64> %0 to <4 x i32>
+  %2 = lshr <4 x i32> %1, <i32 18, i32 18, i32 18, i32 18>
+  %3 = xor <4 x i32> %2, %1
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_call.ll b/src/LLVM/test/CodeGen/X86/vec_call.ll
new file mode 100644
index 0000000..91d5ac0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_call.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \

+; RUN:   grep {subl.*60}

+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \

+; RUN:   grep {movaps.*32}

+

+

+define void @test() {

+        tail call void @xx( i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <2 x i64> bitcast (<4 x i32> < i32 4, i32 3, i32 2, i32 1 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 8, i32 7, i32 6, i32 5 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 6, i32 4, i32 2, i32 0 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 8, i32 4, i32 2, i32 1 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 0, i32 1, i32 3, i32 9 > to <2 x i64>) )

+        ret void

+}

+

+declare void @xx(i32, i32, i32, i32, i32, i32, i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)

+


diff --git a/src/LLVM/test/CodeGen/X86/vec_cast.ll b/src/LLVM/test/CodeGen/X86/vec_cast.ll
new file mode 100644
index 0000000..90d39d0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_cast.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core2
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=core2
+
+define <8 x i32> @a(<8 x i16> %a) nounwind {
+  %c = sext <8 x i16> %a to <8 x i32>
+  ret <8 x i32> %c
+}
+
+;define <3 x i32> @b(<3 x i16> %a) nounwind {
+;  %c = sext <3 x i16> %a to <3 x i32>
+;  ret <3 x i32> %c
+;}
+
+define <1 x i32> @c(<1 x i16> %a) nounwind {
+  %c = sext <1 x i16> %a to <1 x i32>
+  ret <1 x i32> %c
+}
+
+define <8 x i32> @d(<8 x i16> %a) nounwind {
+  %c = zext <8 x i16> %a to <8 x i32>
+  ret <8 x i32> %c
+}
+
+;define <3 x i32> @e(<3 x i16> %a) nounwind {
+;  %c = zext <3 x i16> %a to <3 x i32>
+;  ret <3 x i32> %c
+;}
+
+define <1 x i32> @f(<1 x i16> %a) nounwind {
+  %c = zext <1 x i16> %a to <1 x i32>
+  ret <1 x i32> %c
+}
+
+define <8 x i16> @g(<8 x i32> %a) nounwind {
+  %c = trunc <8 x i32> %a to <8 x i16>
+  ret <8 x i16> %c
+}
+
+define <3 x i16> @h(<3 x i32> %a) nounwind {
+  %c = trunc <3 x i32> %a to <3 x i16>
+  ret <3 x i16> %c
+}
+
+define <1 x i16> @i(<1 x i32> %a) nounwind {
+  %c = trunc <1 x i32> %a to <1 x i16>
+  ret <1 x i16> %c
+}
+
+; PR6438
+define void @__OpenCL_math_kernel4_kernel() nounwind {
+  %tmp12.i = and <4 x i32> zeroinitializer, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040> ; <<4 x i32>> [#uses=1]
+  %cmp13.i = icmp eq <4 x i32> %tmp12.i, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040> ; <<4 x i1>> [#uses=2]
+  %cmp.ext14.i = sext <4 x i1> %cmp13.i to <4 x i32> ; <<4 x i32>> [#uses=0]
+  %tmp2110.i = and <4 x i1> %cmp13.i, zeroinitializer ; <<4 x i1>> [#uses=0]
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_clear.ll b/src/LLVM/test/CodeGen/X86/vec_clear.ll
new file mode 100644
index 0000000..909508c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_clear.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t

+; RUN: not grep and %t

+; RUN: not grep psrldq %t

+; RUN: grep xorps %t

+

+define <4 x float> @test(<4 x float>* %v1) nounwind {

+        %tmp = load <4 x float>* %v1            ; <<4 x float>> [#uses=1]

+        %tmp15 = bitcast <4 x float> %tmp to <2 x i64>          ; <<2 x i64>> [#uses=1]

+        %tmp24 = and <2 x i64> %tmp15, bitcast (<4 x i32> < i32 0, i32 0, i32 -1, i32 -1 > to <2 x i64>)              ; <<2 x i64>> [#uses=1]

+        %tmp31 = bitcast <2 x i64> %tmp24 to <4 x float>                ; <<4 x float>> [#uses=1]

+        ret <4 x float> %tmp31

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/vec_compare-2.ll b/src/LLVM/test/CodeGen/X86/vec_compare-2.ll
new file mode 100644
index 0000000..04bb725
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_compare-2.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
+
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
+entry:
+; CHECK-NOT: set
+; CHECK: pcmpgt
+; CHECK: blendvps
+  %shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
+  %cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %sub322.i = sub <4 x i32> %shr.i, zeroinitializer ; <<4 x i32>> [#uses=1]
+  %cmp323.x = icmp slt <4 x i32> zeroinitializer, %sub322.i ; <<4 x i1>> [#uses=1]
+  %cmp323.i = sext <4 x i1> %cmp323.x to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %or.i = or <4 x i32> %cmp318.i, %cmp323.i       ; <<4 x i32>> [#uses=1]
+  %tmp10.i83.i = bitcast <4 x i32> %or.i to <4 x float> ; <<4 x float>> [#uses=1]
+  %0 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> undef, <4 x float> undef, <4 x float> %tmp10.i83.i) nounwind ; <<4 x float>> [#uses=1]
+  %conv.i.i15.i = bitcast <4 x float> %0 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %swz.i.i28.i = shufflevector <4 x i32> %conv.i.i15.i, <4 x i32> undef, <2 x i32> <i32 0, i32 1> ; <<2 x i32>> [#uses=1]
+  %tmp6.i29.i = bitcast <2 x i32> %swz.i.i28.i to <4 x i16> ; <<4 x i16>> [#uses=1]
+  %swz.i30.i = shufflevector <4 x i16> %tmp6.i29.i, <4 x i16> undef, <2 x i32> <i32 0, i32 1> ; <<2 x i16>> [#uses=1]
+  store <2 x i16> %swz.i30.i, <2 x i16>* undef
+  unreachable
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_compare-sse4.ll b/src/LLVM/test/CodeGen/X86/vec_compare-sse4.ll
new file mode 100644
index 0000000..b4a4a4c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_compare-sse4.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 -mattr=-sse3,+sse2 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -march=x86 -mattr=-sse42,+sse41 | FileCheck %s -check-prefix=SSE41
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s -check-prefix=SSE42
+
+define <2 x i64> @test1(<2 x i64> %A, <2 x i64> %B) nounwind {
+; SSE42: test1:
+; SSE42: pcmpgtq
+; SSE42: ret
+; SSE41: test1:
+; SSE41-NOT: pcmpgtq
+; SSE41: ret
+; SSE2: test1:
+; SSE2-NOT: pcmpgtq
+; SSE2: ret
+
+	%C = icmp sgt <2 x i64> %A, %B
+  %D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test2(<2 x i64> %A, <2 x i64> %B) nounwind {
+; SSE42: test2:
+; SSE42: pcmpeqq
+; SSE42: ret
+; SSE41: test2:
+; SSE41: pcmpeqq
+; SSE41: ret
+; SSE2: test2:
+; SSE2-NOT: pcmpeqq
+; SSE2: ret
+
+	%C = icmp eq <2 x i64> %A, %B
+  %D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_compare.ll b/src/LLVM/test/CodeGen/X86/vec_compare.ll
new file mode 100644
index 0000000..39c9b77
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_compare.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK: pcmpgtd
+; CHECK: ret
+
+	%C = icmp sgt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test2:
+; CHECK: pcmp
+; CHECK: pcmp
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp sge <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test3:
+; CHECK: pcmpgtd
+; CHECK: movdqa
+; CHECK: ret
+	%C = icmp slt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test4:
+; CHECK: movdqa
+; CHECK: pcmpgtd
+; CHECK: ret
+	%C = icmp ugt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_ctbits.ll b/src/LLVM/test/CodeGen/X86/vec_ctbits.ll
new file mode 100644
index 0000000..f0158d6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_ctbits.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64
+
+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>)
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>)
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
+
+define <2 x i64> @footz(<2 x i64> %a) nounwind {
+  %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a)
+  ret <2 x i64> %c
+}
+define <2 x i64> @foolz(<2 x i64> %a) nounwind {
+  %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a)
+  ret <2 x i64> %c
+}
+define <2 x i64> @foopop(<2 x i64> %a) nounwind {
+  %c = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+  ret <2 x i64> %c
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_ext_inreg.ll b/src/LLVM/test/CodeGen/X86/vec_ext_inreg.ll
new file mode 100644
index 0000000..02b16a7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_ext_inreg.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86-64 
+
+define <8 x i32> @a(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = sext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <3 x i32> @b(<3 x i32> %a) nounwind {
+  %b = trunc <3 x i32> %a to <3 x i16>
+  %c = sext <3 x i16> %b to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <1 x i32> @c(<1 x i32> %a) nounwind {
+  %b = trunc <1 x i32> %a to <1 x i16>
+  %c = sext <1 x i16> %b to <1 x i32>
+  ret <1 x i32> %c
+}
+
+define <8 x i32> @d(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = zext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <3 x i32> @e(<3 x i32> %a) nounwind {
+  %b = trunc <3 x i32> %a to <3 x i16>
+  %c = zext <3 x i16> %b to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <1 x i32> @f(<1 x i32> %a) nounwind {
+  %b = trunc <1 x i32> %a to <1 x i16>
+  %c = zext <1 x i16> %b to <1 x i32>
+  ret <1 x i32> %c
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_extract-sse4.ll b/src/LLVM/test/CodeGen/X86/vec_extract-sse4.ll
new file mode 100644
index 0000000..f487654
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_extract-sse4.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse41 -o %t
+; RUN: not grep extractps   %t
+; RUN: not grep pextrd      %t
+; RUN: not grep pshufd  %t
+; RUN: grep movss   %t | count 2
+
+define void @t1(float* %R, <4 x float>* %P1) nounwind {
+	%X = load <4 x float>* %P1
+	%tmp = extractelement <4 x float> %X, i32 3
+	store float %tmp, float* %R
+	ret void
+}
+
+define float @t2(<4 x float>* %P1) nounwind {
+	%X = load <4 x float>* %P1
+	%tmp = extractelement <4 x float> %X, i32 2
+	ret float %tmp
+}
+
+define void @t3(i32* %R, <4 x i32>* %P1) nounwind {
+	%X = load <4 x i32>* %P1
+	%tmp = extractelement <4 x i32> %X, i32 3
+	store i32 %tmp, i32* %R
+	ret void
+}
+
+define i32 @t4(<4 x i32>* %P1) nounwind {
+	%X = load <4 x i32>* %P1
+	%tmp = extractelement <4 x i32> %X, i32 3
+	ret i32 %tmp
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_extract.ll b/src/LLVM/test/CodeGen/X86/vec_extract.ll
new file mode 100644
index 0000000..d8c4bc0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_extract.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2,-sse41 -o %t

+; RUN: grep movss    %t | count 4

+; RUN: grep movhlps  %t | count 1

+; RUN: not grep pshufd   %t 

+; RUN: grep unpckhpd %t | count 1

+

+define void @test1(<4 x float>* %F, float* %f) nounwind {

+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]

+	%tmp7 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]

+	%tmp2 = extractelement <4 x float> %tmp7, i32 0		; <float> [#uses=1]

+	store float %tmp2, float* %f

+	ret void

+}

+

+define float @test2(<4 x float>* %F, float* %f) nounwind {

+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]

+	%tmp7 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]

+	%tmp2 = extractelement <4 x float> %tmp7, i32 2		; <float> [#uses=1]

+	ret float %tmp2

+}

+

+define void @test3(float* %R, <4 x float>* %P1) nounwind {

+	%X = load <4 x float>* %P1		; <<4 x float>> [#uses=1]

+	%tmp = extractelement <4 x float> %X, i32 3		; <float> [#uses=1]

+	store float %tmp, float* %R

+	ret void

+}

+

+define double @test4(double %A) nounwind {

+	%tmp1 = call <2 x double> @foo( )		; <<2 x double>> [#uses=1]

+	%tmp2 = extractelement <2 x double> %tmp1, i32 1		; <double> [#uses=1]

+	%tmp3 = fadd double %tmp2, %A		; <double> [#uses=1]

+	ret double %tmp3

+}

+

+declare <2 x double> @foo()


diff --git a/src/LLVM/test/CodeGen/X86/vec_fneg.ll b/src/LLVM/test/CodeGen/X86/vec_fneg.ll
new file mode 100644
index 0000000..f118cf5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_fneg.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2

+

+define <4 x float> @t1(<4 x float> %Q) {

+        %tmp15 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %Q

+	ret <4 x float> %tmp15

+}

+

+define <4 x float> @t2(<4 x float> %Q) {

+        %tmp15 = fsub <4 x float> zeroinitializer, %Q

+	ret <4 x float> %tmp15

+}


diff --git a/src/LLVM/test/CodeGen/X86/vec_i64.ll b/src/LLVM/test/CodeGen/X86/vec_i64.ll
new file mode 100644
index 0000000..462e16e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_i64.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep movq %t | count 2
+
+; Used movq to load i64 into a v2i64 when the top i64 is 0.
+
+define <2 x i64> @foo1(i64* %y) nounwind  {
+entry:
+	%tmp1 = load i64* %y, align 8		; <i64> [#uses=1]
+	%s2v = insertelement <2 x i64> undef, i64 %tmp1, i32 0
+	%loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1>
+	ret <2 x i64> %loadl
+}
+
+
+define <4 x float> @foo2(i64* %p) nounwind {
+entry:
+	%load = load i64* %p
+	%s2v = insertelement <2 x i64> undef, i64 %load, i32 0
+	%loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1>
+	%0 = bitcast <2 x i64> %loadl to <4 x float>
+	ret <4 x float> %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_ins_extract-1.ll b/src/LLVM/test/CodeGen/X86/vec_ins_extract-1.ll
new file mode 100644
index 0000000..2951193
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_ins_extract-1.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep {(%esp,%eax,4)} | count 4
+
+; Inserts and extracts with variable indices must be lowered
+; to memory accesses.
+
+define i32 @t0(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+  %t13 = insertelement <4 x i32> %t8, i32 76, i32 %t7
+  %t9 = extractelement <4 x i32> %t13, i32 0
+  ret i32 %t9
+}
+define i32 @t1(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+  %t13 = insertelement <4 x i32> %t8, i32 76, i32 0
+  %t9 = extractelement <4 x i32> %t13, i32 %t7
+  ret i32 %t9
+}
+define <4 x i32> @t2(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+  %t9 = extractelement <4 x i32> %t8, i32 %t7
+  %t13 = insertelement <4 x i32> %t8, i32 %t9, i32 0
+  ret <4 x i32> %t13
+}
+define <4 x i32> @t3(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+  %t9 = extractelement <4 x i32> %t8, i32 0
+  %t13 = insertelement <4 x i32> %t8, i32 %t9, i32 %t7
+  ret <4 x i32> %t13
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_ins_extract.ll b/src/LLVM/test/CodeGen/X86/vec_ins_extract.ll
new file mode 100644
index 0000000..929eda6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_ins_extract.ll

@@ -0,0 +1,52 @@
+; RUN: opt < %s -scalarrepl -instcombine | \

+; RUN:   llc -march=x86 -mcpu=yonah | not grep sub.*esp

+

+; This checks that various insert/extract idiom work without going to the

+; stack.

+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"

+

+define void @test(<4 x float>* %F, float %f) {

+entry:

+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]

+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]

+	%tmp10 = insertelement <4 x float> %tmp3, float %f, i32 0		; <<4 x float>> [#uses=2]

+	%tmp6 = fadd <4 x float> %tmp10, %tmp10		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp6, <4 x float>* %F

+	ret void

+}

+

+define void @test2(<4 x float>* %F, float %f) {

+entry:

+	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]

+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]

+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp3, <4 x float>* %G

+	%tmp.upgrd.1 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]

+	store float %f, float* %tmp.upgrd.1

+	%tmp4 = load <4 x float>* %G		; <<4 x float>> [#uses=2]

+	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp6, <4 x float>* %F

+	ret void

+}

+

+define void @test3(<4 x float>* %F, float* %f) {

+entry:

+	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]

+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]

+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp3, <4 x float>* %G

+	%tmp.upgrd.2 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]

+	%tmp.upgrd.3 = load float* %tmp.upgrd.2		; <float> [#uses=1]

+	store float %tmp.upgrd.3, float* %f

+	ret void

+}

+

+define void @test4(<4 x float>* %F, float* %f) {

+entry:

+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]

+	%tmp5.lhs = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]

+	%tmp5.rhs = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]

+	%tmp5 = fadd float %tmp5.lhs, %tmp5.rhs		; <float> [#uses=1]

+	store float %tmp5, float* %f

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/vec_insert-2.ll b/src/LLVM/test/CodeGen/X86/vec_insert-2.ll
new file mode 100644
index 0000000..dee91fd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_insert-2.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | FileCheck --check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | FileCheck --check-prefix=X64 %s
+
+define <4 x float> @t1(float %s, <4 x float> %tmp) nounwind {
+; X32: t1:
+; X32: shufps $36
+; X32: ret
+
+  %tmp1 = insertelement <4 x float> %tmp, float %s, i32 3
+  ret <4 x float> %tmp1
+}
+
+define <4 x i32> @t2(i32 %s, <4 x i32> %tmp) nounwind {
+; X32: t2:
+; X32: shufps $36
+; X32: ret
+
+  %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 3
+  ret <4 x i32> %tmp1
+}
+
+define <2 x double> @t3(double %s, <2 x double> %tmp) nounwind {
+; X32: t3:
+; X32: movhpd
+; X32: ret
+
+; X64: t3:
+; X64: unpcklpd
+; X64: ret
+
+  %tmp1 = insertelement <2 x double> %tmp, double %s, i32 1
+  ret <2 x double> %tmp1
+}
+
+define <8 x i16> @t4(i16 %s, <8 x i16> %tmp) nounwind {
+; X32: t4:
+; X32: pinsrw
+; X32: ret
+
+  %tmp1 = insertelement <8 x i16> %tmp, i16 %s, i32 5
+  ret <8 x i16> %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_insert-3.ll b/src/LLVM/test/CodeGen/X86/vec_insert-3.ll
new file mode 100644
index 0000000..a18cd86
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_insert-3.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep punpcklqdq | count 1
+
+define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) nounwind {
+        %tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
+        ret <2 x i64> %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_insert-4.ll b/src/LLVM/test/CodeGen/X86/vec_insert-4.ll
new file mode 100644
index 0000000..2c31e56
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_insert-4.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep 1084227584 | count 1
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9.2.2"
+
+define <8 x float> @f(<8 x float> %a, i32 %b) nounwind  {
+entry:
+	%vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b		; <<4 x float>> [#uses=1]
+	ret <8 x float> %vecins
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_insert-5.ll b/src/LLVM/test/CodeGen/X86/vec_insert-5.ll
new file mode 100644
index 0000000..471cc16
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_insert-5.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
+; RUN: grep shll %t | grep 12
+; RUN: grep pslldq %t | grep 12
+; RUN: grep psrldq %t | grep 8
+; RUN: grep psrldq %t | grep 12
+; There are no MMX operations in @t1
+
+define void  @t1(i32 %a, x86_mmx* %P) nounwind {
+       %tmp12 = shl i32 %a, 12
+       %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
+       %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
+       %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx
+       store x86_mmx %tmp23, x86_mmx* %P
+       ret void
+}
+
+define <4 x float> @t2(<4 x float>* %P) nounwind {
+        %tmp1 = load <4 x float>* %P
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
+        ret <4 x float> %tmp2
+}
+
+define <4 x float> @t3(<4 x float>* %P) nounwind {
+        %tmp1 = load <4 x float>* %P
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
+        ret <4 x float> %tmp2
+}
+
+define <4 x float> @t4(<4 x float>* %P) nounwind {
+        %tmp1 = load <4 x float>* %P
+        %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
+        ret <4 x float> %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_insert-6.ll b/src/LLVM/test/CodeGen/X86/vec_insert-6.ll
new file mode 100644
index 0000000..de3b36f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_insert-6.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pslldq
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6
+
+define <4 x float> @t3(<4 x float>* %P) nounwind  {
+	%tmp1 = load <4 x float>* %P
+	%tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
+	ret <4 x float> %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_insert-7.ll b/src/LLVM/test/CodeGen/X86/vec_insert-7.ll
new file mode 100644
index 0000000..268b5c4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_insert-7.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=i686-apple-darwin9 | FileCheck %s
+; MMX insertelement is not available; these are promoted to XMM.
+; (Without SSE they are split to two ints, and the code is much better.)
+
+define x86_mmx @mmx_movzl(x86_mmx %x) nounwind  {
+entry:
+; CHECK: mmx_movzl
+; CHECK: pinsrd
+; CHECK: pinsrd
+        %tmp = bitcast x86_mmx %x to <2 x i32> 
+	%tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0		; <<2 x i32>> [#uses=1]
+	%tmp8 = insertelement <2 x i32> %tmp3, i32 0, i32 1		; <<2 x i32>> [#uses=1]
+        %tmp9 = bitcast <2 x i32> %tmp8 to x86_mmx
+	ret x86_mmx %tmp9
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_insert-8.ll b/src/LLVM/test/CodeGen/X86/vec_insert-8.ll
new file mode 100644
index 0000000..650951c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_insert-8.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
+
+; tests variable insert and extract of a 4 x i32
+
+define <4 x i32> @var_insert(<4 x i32> %x, i32 %val, i32 %idx) nounwind  {
+entry:
+	%tmp3 = insertelement <4 x i32> %x, i32 %val, i32 %idx		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp3
+}
+
+define i32 @var_extract(<4 x i32> %x, i32 %idx) nounwind  {
+entry:
+	%tmp3 = extractelement <4 x i32> %x, i32 %idx		; <<i32>> [#uses=1]
+	ret i32 %tmp3
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_insert-9.ll b/src/LLVM/test/CodeGen/X86/vec_insert-9.ll
new file mode 100644
index 0000000..e5a7ccc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_insert-9.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 > %t
+; RUN: grep pinsrd %t | count 1
+
+define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind  {
+entry:
+	%tmp3 = insertelement <4 x i32> undef, i32 %val, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp4 = insertelement <4 x i32> %tmp3, i32 %idx, i32 3		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp4
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_insert.ll b/src/LLVM/test/CodeGen/X86/vec_insert.ll
new file mode 100644
index 0000000..47a7ba1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_insert.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss | count 1

+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | not grep pinsrw

+

+define void @test(<4 x float>* %F, i32 %I) nounwind {

+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]

+	%f = sitofp i32 %I to float		; <float> [#uses=1]

+	%tmp1 = insertelement <4 x float> %tmp, float %f, i32 0		; <<4 x float>> [#uses=2]

+	%tmp18 = fadd <4 x float> %tmp1, %tmp1		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp18, <4 x float>* %F

+	ret void

+}

+

+define void @test2(<4 x float>* %F, i32 %I, float %g) nounwind {

+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]

+	%f = sitofp i32 %I to float		; <float> [#uses=1]

+	%tmp1 = insertelement <4 x float> %tmp, float %f, i32 2		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp1, <4 x float>* %F

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/vec_loadsingles.ll b/src/LLVM/test/CodeGen/X86/vec_loadsingles.ll
new file mode 100644
index 0000000..8812c4f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_loadsingles.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+
+define <4 x float> @a(<4 x float> %a, float* nocapture %p) nounwind readonly {
+entry:
+	%tmp1 = load float* %p
+	%vecins = insertelement <4 x float> undef, float %tmp1, i32 0
+	%add.ptr = getelementptr float* %p, i32 1
+	%tmp5 = load float* %add.ptr
+	%vecins7 = insertelement <4 x float> %vecins, float %tmp5, i32 1
+	ret <4 x float> %vecins7
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/vec_logical.ll b/src/LLVM/test/CodeGen/X86/vec_logical.ll
new file mode 100644
index 0000000..1dc0b16
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_logical.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
+; RUN: grep xorps %t | count 2
+; RUN: grep andnps %t
+; RUN: grep movaps %t | count 2
+
+define void @t(<4 x float> %A) {
+	%tmp1277 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %A
+	store <4 x float> %tmp1277, <4 x float>* null
+	ret void
+}
+
+define <4 x float> @t1(<4 x float> %a, <4 x float> %b) {
+entry:
+	%tmp9 = bitcast <4 x float> %a to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp10 = bitcast <4 x float> %b to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp11 = xor <4 x i32> %tmp9, %tmp10		; <<4 x i32>> [#uses=1]
+	%tmp13 = bitcast <4 x i32> %tmp11 to <4 x float>		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp13
+}
+
+define <2 x double> @t2(<2 x double> %a, <2 x double> %b) {
+entry:
+	%tmp9 = bitcast <2 x double> %a to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp10 = bitcast <2 x double> %b to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp11 = and <2 x i64> %tmp9, %tmp10		; <<2 x i64>> [#uses=1]
+	%tmp13 = bitcast <2 x i64> %tmp11 to <2 x double>		; <<2 x double>> [#uses=1]
+	ret <2 x double> %tmp13
+}
+
+define void @t3(<4 x float> %a, <4 x float> %b, <4 x float>* %c, <4 x float>* %d) {
+entry:
+	%tmp3 = load <4 x float>* %c		; <<4 x float>> [#uses=1]
+	%tmp11 = bitcast <4 x float> %a to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp12 = bitcast <4 x float> %b to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp13 = xor <4 x i32> %tmp11, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%tmp14 = and <4 x i32> %tmp12, %tmp13		; <<4 x i32>> [#uses=1]
+	%tmp27 = bitcast <4 x float> %tmp3 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp28 = or <4 x i32> %tmp14, %tmp27		; <<4 x i32>> [#uses=1]
+	%tmp30 = bitcast <4 x i32> %tmp28 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp30, <4 x float>* %d
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_return.ll b/src/LLVM/test/CodeGen/X86/vec_return.ll
new file mode 100644
index 0000000..d7dba29
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_return.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t

+; RUN: grep pxor %t | count 1

+; RUN: grep movaps %t | count 1

+; RUN: not grep shuf %t

+

+define <2 x double> @test() {

+	ret <2 x double> zeroinitializer

+}

+

+define <4 x i32> @test2() nounwind  {

+	ret <4 x i32> < i32 0, i32 0, i32 1, i32 0 >

+}


diff --git a/src/LLVM/test/CodeGen/X86/vec_set-2.ll b/src/LLVM/test/CodeGen/X86/vec_set-2.ll
new file mode 100644
index 0000000..de2c690
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-2.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss | count 1

+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 1

+

+define <4 x float> @test1(float %a) nounwind {

+	%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0		; <<4 x float>> [#uses=1]

+	%tmp5 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]

+	%tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]

+	%tmp7 = insertelement <4 x float> %tmp6, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]

+	ret <4 x float> %tmp7

+}

+

+define <2 x i64> @test(i32 %a) nounwind {

+	%tmp = insertelement <4 x i32> zeroinitializer, i32 %a, i32 0		; <<8 x i16>> [#uses=1]

+	%tmp6 = insertelement <4 x i32> %tmp, i32 0, i32 1		; <<8 x i32>> [#uses=1]

+	%tmp8 = insertelement <4 x i32> %tmp6, i32 0, i32 2		; <<8 x i32>> [#uses=1]

+	%tmp10 = insertelement <4 x i32> %tmp8, i32 0, i32 3		; <<8 x i32>> [#uses=1]

+	%tmp19 = bitcast <4 x i32> %tmp10 to <2 x i64>		; <<2 x i64>> [#uses=1]

+	ret <2 x i64> %tmp19

+}


diff --git a/src/LLVM/test/CodeGen/X86/vec_set-3.ll b/src/LLVM/test/CodeGen/X86/vec_set-3.ll
new file mode 100644
index 0000000..7355415
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-3.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t

+; RUN: grep pshufd %t | count 2

+

+define <4 x float> @test(float %a) nounwind {

+        %tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1               ; <<4 x float>> [#uses=1]

+        %tmp5 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 2               ; <<4 x float>> [#uses=1]

+        %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3              ; <<4 x float>> [#uses=1]

+        ret <4 x float> %tmp6

+}

+

+define <2 x i64> @test2(i32 %a) nounwind {

+        %tmp7 = insertelement <4 x i32> zeroinitializer, i32 %a, i32 2          ; <<4 x i32>> [#uses=1]

+        %tmp9 = insertelement <4 x i32> %tmp7, i32 0, i32 3             ; <<4 x i32>> [#uses=1]

+        %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64>           ; <<2 x i64>> [#uses=1]

+        ret <2 x i64> %tmp10

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/vec_set-4.ll b/src/LLVM/test/CodeGen/X86/vec_set-4.ll
new file mode 100644
index 0000000..d0c61e5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-4.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pinsrw | count 2

+

+define <2 x i64> @test(i16 %a) nounwind {

+entry:

+	%tmp10 = insertelement <8 x i16> zeroinitializer, i16 %a, i32 3		; <<8 x i16>> [#uses=1]

+	%tmp12 = insertelement <8 x i16> %tmp10, i16 0, i32 4		; <<8 x i16>> [#uses=1]

+	%tmp14 = insertelement <8 x i16> %tmp12, i16 0, i32 5		; <<8 x i16>> [#uses=1]

+	%tmp16 = insertelement <8 x i16> %tmp14, i16 0, i32 6		; <<8 x i16>> [#uses=1]

+	%tmp18 = insertelement <8 x i16> %tmp16, i16 0, i32 7		; <<8 x i16>> [#uses=1]

+	%tmp19 = bitcast <8 x i16> %tmp18 to <2 x i64>		; <<2 x i64>> [#uses=1]

+	ret <2 x i64> %tmp19

+}

+

+define <2 x i64> @test2(i8 %a) nounwind {

+entry:

+	%tmp24 = insertelement <16 x i8> zeroinitializer, i8 %a, i32 10		; <<16 x i8>> [#uses=1]

+	%tmp26 = insertelement <16 x i8> %tmp24, i8 0, i32 11		; <<16 x i8>> [#uses=1]

+	%tmp28 = insertelement <16 x i8> %tmp26, i8 0, i32 12		; <<16 x i8>> [#uses=1]

+	%tmp30 = insertelement <16 x i8> %tmp28, i8 0, i32 13		; <<16 x i8>> [#uses=1]

+	%tmp32 = insertelement <16 x i8> %tmp30, i8 0, i32 14		; <<16 x i8>> [#uses=1]

+	%tmp34 = insertelement <16 x i8> %tmp32, i8 0, i32 15		; <<16 x i8>> [#uses=1]

+	%tmp35 = bitcast <16 x i8> %tmp34 to <2 x i64>		; <<2 x i64>> [#uses=1]

+	ret <2 x i64> %tmp35

+}


diff --git a/src/LLVM/test/CodeGen/X86/vec_set-5.ll b/src/LLVM/test/CodeGen/X86/vec_set-5.ll
new file mode 100644
index 0000000..80d0a66
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-5.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t

+; RUN: grep movlhps   %t | count 1

+; RUN: grep movq      %t | count 2

+

+define <4 x float> @test1(float %a, float %b) nounwind {

+	%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0		; <<4 x float>> [#uses=1]

+	%tmp6 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]

+	%tmp8 = insertelement <4 x float> %tmp6, float %b, i32 2		; <<4 x float>> [#uses=1]

+	%tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]

+	ret <4 x float> %tmp9

+}

+

+define <4 x float> @test2(float %a, float %b) nounwind {

+	%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0		; <<4 x float>> [#uses=1]

+	%tmp7 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]

+	%tmp8 = insertelement <4 x float> %tmp7, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]

+	%tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]

+	ret <4 x float> %tmp9

+}

+

+define <2 x i64> @test3(i32 %a, i32 %b) nounwind {

+	%tmp = insertelement <4 x i32> zeroinitializer, i32 %a, i32 0		; <<4 x i32>> [#uses=1]

+	%tmp6 = insertelement <4 x i32> %tmp, i32 %b, i32 1		; <<4 x i32>> [#uses=1]

+	%tmp8 = insertelement <4 x i32> %tmp6, i32 0, i32 2		; <<4 x i32>> [#uses=1]

+	%tmp10 = insertelement <4 x i32> %tmp8, i32 0, i32 3		; <<4 x i32>> [#uses=1]

+	%tmp11 = bitcast <4 x i32> %tmp10 to <2 x i64>		; <<2 x i64>> [#uses=1]

+	ret <2 x i64> %tmp11

+}


diff --git a/src/LLVM/test/CodeGen/X86/vec_set-6.ll b/src/LLVM/test/CodeGen/X86/vec_set-6.ll
new file mode 100644
index 0000000..95cdde7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-6.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t

+; RUN: grep movss    %t | count 1

+; RUN: grep movq     %t | count 1

+; RUN: grep shufps   %t | count 1

+

+define <4 x float> @test(float %a, float %b, float %c) nounwind {

+        %tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1               ; <<4 x float>> [#uses=1]

+        %tmp8 = insertelement <4 x float> %tmp, float %b, i32 2         ; <<4 x float>> [#uses=1]

+        %tmp10 = insertelement <4 x float> %tmp8, float %c, i32 3               ; <<4 x float>> [#uses=1]

+        ret <4 x float> %tmp10

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/vec_set-7.ll b/src/LLVM/test/CodeGen/X86/vec_set-7.ll
new file mode 100644
index 0000000..e0fef11
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-7.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 1

+

+define <2 x i64> @test(<2 x i64>* %p) nounwind {

+	%tmp = bitcast <2 x i64>* %p to double*		

+	%tmp.upgrd.1 = load double* %tmp	

+	%tmp.upgrd.2 = insertelement <2 x double> undef, double %tmp.upgrd.1, i32 0

+	%tmp5 = insertelement <2 x double> %tmp.upgrd.2, double 0.0, i32 1

+	%tmp.upgrd.3 = bitcast <2 x double> %tmp5 to <2 x i64>

+	ret <2 x i64> %tmp.upgrd.3

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/vec_set-8.ll b/src/LLVM/test/CodeGen/X86/vec_set-8.ll
new file mode 100644
index 0000000..66056d0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-8.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK-NOT: movsd
+; CHECK: movd {{%rdi|%rcx}}, %xmm0
+; CHECK-NOT: movsd
+
+define <2 x i64> @test(i64 %i) nounwind  {
+entry:
+	%tmp10 = insertelement <2 x i64> undef, i64 %i, i32 0
+	%tmp11 = insertelement <2 x i64> %tmp10, i64 0, i32 1
+	ret <2 x i64> %tmp11
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/vec_set-9.ll b/src/LLVM/test/CodeGen/X86/vec_set-9.ll
new file mode 100644
index 0000000..3656e5f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-9.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 | grep movd | count 1
+; RUN: llc < %s -march=x86-64 | grep {movlhps.*%xmm0, %xmm0}
+
+define <2 x i64> @test3(i64 %A) nounwind {
+entry:
+	%B = insertelement <2 x i64> undef, i64 %A, i32 1
+	ret <2 x i64> %B
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/vec_set-A.ll b/src/LLVM/test/CodeGen/X86/vec_set-A.ll
new file mode 100644
index 0000000..92dda4c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-A.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; CHECK: movl $1, %{{.*}}
+define <2 x i64> @test1() nounwind {
+entry:
+	ret <2 x i64> < i64 1, i64 0 >
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/vec_set-B.ll b/src/LLVM/test/CodeGen/X86/vec_set-B.ll
new file mode 100644
index 0000000..f5b3e8b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-B.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep esp | count 2
+
+; These should both generate something like this:
+;_test3:
+;	movl	$1234567, %eax
+;	andl	4(%esp), %eax
+;	movd	%eax, %xmm0
+;	ret
+
+define <2 x i64> @test3(i64 %arg) nounwind {
+entry:
+        %A = and i64 %arg, 1234567
+        %B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
+        ret <2 x i64> %B
+}
+
+define <2 x i64> @test2(i64 %arg) nounwind {
+entry:
+	%A = and i64 %arg, 1234567
+	%B = insertelement <2 x i64> undef, i64 %A, i32 0
+	ret <2 x i64> %B
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/vec_set-C.ll b/src/LLVM/test/CodeGen/X86/vec_set-C.ll
new file mode 100644
index 0000000..133f23b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-C.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 | grep mov | count 1
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux -mattr=+sse2 | grep movd
+
+define <2 x i64> @t1(i64 %x) nounwind  {
+	%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
+	ret <2 x i64> %tmp8
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_set-D.ll b/src/LLVM/test/CodeGen/X86/vec_set-D.ll
new file mode 100644
index 0000000..3d6369e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-D.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+
+define <4 x i32> @t(i32 %x, i32 %y) nounwind  {
+	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
+	%tmp2 = insertelement <4 x i32> %tmp1, i32 %y, i32 1
+	ret <4 x i32> %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_set-E.ll b/src/LLVM/test/CodeGen/X86/vec_set-E.ll
new file mode 100644
index 0000000..d78be66
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-E.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+
+define <4 x float> @t(float %X) nounwind  {
+	%tmp11 = insertelement <4 x float> undef, float %X, i32 0
+	%tmp12 = insertelement <4 x float> %tmp11, float %X, i32 1
+	%tmp27 = insertelement <4 x float> %tmp12, float 0.000000e+00, i32 2
+	%tmp28 = insertelement <4 x float> %tmp27, float 0.000000e+00, i32 3
+	ret <4 x float> %tmp28
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_set-F.ll b/src/LLVM/test/CodeGen/X86/vec_set-F.ll
new file mode 100644
index 0000000..6dd3cb0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-F.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movq
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movsd
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep mov | count 3
+
+define <2 x i64> @t1(<2 x i64>* %ptr) nounwind  {
+	%tmp45 = bitcast <2 x i64>* %ptr to <2 x i32>*
+	%tmp615 = load <2 x i32>* %tmp45
+	%tmp7 = bitcast <2 x i32> %tmp615 to i64
+	%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %tmp7, i32 0
+	ret <2 x i64> %tmp8
+}
+
+define <2 x i64> @t2(i64 %x) nounwind  {
+	%tmp717 = bitcast i64 %x to double
+	%tmp8 = insertelement <2 x double> undef, double %tmp717, i32 0
+	%tmp9 = insertelement <2 x double> %tmp8, double 0.000000e+00, i32 1
+	%tmp11 = bitcast <2 x double> %tmp9 to <2 x i64>
+	ret <2 x i64> %tmp11
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_set-G.ll b/src/LLVM/test/CodeGen/X86/vec_set-G.ll
new file mode 100644
index 0000000..4a542fe
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-G.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
+
+define fastcc void @t(<4 x float> %A) nounwind  {
+	%tmp41896 = extractelement <4 x float> %A, i32 0		; <float> [#uses=1]
+	%tmp14082 = insertelement <4 x float> < float 0.000000e+00, float undef, float undef, float undef >, float %tmp41896, i32 1		; <<4 x float>> [#uses=1]
+	%tmp14083 = insertelement <4 x float> %tmp14082, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp14083, <4 x float>* null, align 16
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_set-H.ll b/src/LLVM/test/CodeGen/X86/vec_set-H.ll
new file mode 100644
index 0000000..5037e36
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-H.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movz
+
+define <2 x i64> @doload64(i16 signext  %x) nounwind  {
+entry:
+	%tmp36 = insertelement <8 x i16> undef, i16 %x, i32 0		; <<8 x i16>> [#uses=1]
+	%tmp37 = insertelement <8 x i16> %tmp36, i16 %x, i32 1		; <<8 x i16>> [#uses=1]
+	%tmp38 = insertelement <8 x i16> %tmp37, i16 %x, i32 2		; <<8 x i16>> [#uses=1]
+	%tmp39 = insertelement <8 x i16> %tmp38, i16 %x, i32 3		; <<8 x i16>> [#uses=1]
+	%tmp40 = insertelement <8 x i16> %tmp39, i16 %x, i32 4		; <<8 x i16>> [#uses=1]
+	%tmp41 = insertelement <8 x i16> %tmp40, i16 %x, i32 5		; <<8 x i16>> [#uses=1]
+	%tmp42 = insertelement <8 x i16> %tmp41, i16 %x, i32 6		; <<8 x i16>> [#uses=1]
+	%tmp43 = insertelement <8 x i16> %tmp42, i16 %x, i32 7		; <<8 x i16>> [#uses=1]
+	%tmp46 = bitcast <8 x i16> %tmp43 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp46
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_set-I.ll b/src/LLVM/test/CodeGen/X86/vec_set-I.ll
new file mode 100644
index 0000000..64f36f9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-I.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xorp
+
+define void @t1() nounwind  {
+	%tmp298.i.i = load <4 x float>* null, align 16
+	%tmp304.i.i = bitcast <4 x float> %tmp298.i.i to <4 x i32>
+	%tmp305.i.i = and <4 x i32> %tmp304.i.i, < i32 -1, i32 0, i32 0, i32 0 >
+	store <4 x i32> %tmp305.i.i, <4 x i32>* null, align 16
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_set-J.ll b/src/LLVM/test/CodeGen/X86/vec_set-J.ll
new file mode 100644
index 0000000..d90ab85
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set-J.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
+; PR2472
+
+define <4 x i32> @a(<4 x i32> %a) nounwind {
+entry:
+        %vecext = extractelement <4 x i32> %a, i32 0
+        insertelement <4 x i32> zeroinitializer, i32 %vecext, i32 0
+        %add = add <4 x i32> %a, %0
+        ret <4 x i32> %add
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_set.ll b/src/LLVM/test/CodeGen/X86/vec_set.ll
new file mode 100644
index 0000000..be5d01a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_set.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep punpckl | count 7

+

+define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {

+        %tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0          ; <<8 x i16>> [#uses=1]

+        %tmp2 = insertelement <8 x i16> %tmp, i16 %a1, i32 1            ; <<8 x i16>> [#uses=1]

+        %tmp4 = insertelement <8 x i16> %tmp2, i16 %a2, i32 2           ; <<8 x i16>> [#uses=1]

+        %tmp6 = insertelement <8 x i16> %tmp4, i16 %a3, i32 3           ; <<8 x i16>> [#uses=1]

+        %tmp8 = insertelement <8 x i16> %tmp6, i16 %a4, i32 4           ; <<8 x i16>> [#uses=1]

+        %tmp10 = insertelement <8 x i16> %tmp8, i16 %a5, i32 5          ; <<8 x i16>> [#uses=1]

+        %tmp12 = insertelement <8 x i16> %tmp10, i16 %a6, i32 6         ; <<8 x i16>> [#uses=1]

+        %tmp14 = insertelement <8 x i16> %tmp12, i16 %a7, i32 7         ; <<8 x i16>> [#uses=1]

+        store <8 x i16> %tmp14, <8 x i16>* %b

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/vec_sext.ll b/src/LLVM/test/CodeGen/X86/vec_sext.ll
new file mode 100644
index 0000000..776ddec
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_sext.ll

@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86-64
+; PR 9267
+
+define<4 x i32> @func_16_32() {
+  %F = load <4 x i16>* undef
+  %G = sext <4 x i16> %F to <4 x i32>
+  %H = load <4 x i16>* undef
+  %Y = sext <4 x i16> %H to <4 x i32>
+  %T = add <4 x i32> %Y, %G
+  store <4 x i32>%T , <4 x i32>* undef
+  ret <4 x i32> %T
+}
+
+define<4 x i64> @func_16_64() {
+  %F = load <4 x i16>* undef
+  %G = sext <4 x i16> %F to <4 x i64>
+  %H = load <4 x i16>* undef
+  %Y = sext <4 x i16> %H to <4 x i64>
+  %T = xor <4 x i64> %Y, %G
+  store <4 x i64>%T , <4 x i64>* undef
+  ret <4 x i64> %T
+}
+
+define<4 x i64> @func_32_64() {
+  %F = load <4 x i32>* undef
+  %G = sext <4 x i32> %F to <4 x i64>
+  %H = load <4 x i32>* undef
+  %Y = sext <4 x i32> %H to <4 x i64>
+  %T = or <4 x i64> %Y, %G
+  ret <4 x i64> %T
+}
+
+define<4 x i16> @func_8_16() {
+  %F = load <4 x i8>* undef
+  %G = sext <4 x i8> %F to <4 x i16>
+  %H = load <4 x i8>* undef
+  %Y = sext <4 x i8> %H to <4 x i16>
+  %T = add <4 x i16> %Y, %G
+  ret <4 x i16> %T
+}
+
+define<4 x i32> @func_8_32() {
+  %F = load <4 x i8>* undef
+  %G = sext <4 x i8> %F to <4 x i32>
+  %H = load <4 x i8>* undef
+  %Y = sext <4 x i8> %H to <4 x i32>
+  %T = sub <4 x i32> %Y, %G
+  ret <4 x i32> %T
+}
+
+define<4 x i64> @func_8_64() {
+  %F = load <4 x i8>* undef
+  %G = sext <4 x i8> %F to <4 x i64>
+  %H = load <4 x i8>* undef
+  %Y = sext <4 x i8> %H to <4 x i64>
+  %T = add <4 x i64> %Y, %G
+  ret <4 x i64> %T
+}
+
+define<4 x i32> @const_16_32() {
+  %G = sext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i32>
+  ret <4 x i32> %G
+}
+
+define<4 x i64> @const_16_64() {
+  %G = sext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i64>
+  ret <4 x i64> %G
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/vec_shift.ll b/src/LLVM/test/CodeGen/X86/vec_shift.ll
new file mode 100644
index 0000000..ddf0469
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shift.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psllw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psrlq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psraw
+
+define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind  {
+entry:
+	%tmp6 = bitcast <2 x i64> %c to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp8 = bitcast <2 x i64> %b1 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp8, <8 x i16> %tmp6 ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp10 = bitcast <8 x i16> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp10
+}
+
+define <2 x i64> @t3(<2 x i64> %b1, i32 %c) nounwind  {
+entry:
+	%tmp2 = bitcast <2 x i64> %b1 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp4 = insertelement <4 x i32> undef, i32 %c, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp8 = bitcast <4 x i32> %tmp4 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = tail call <8 x i16> @llvm.x86.sse2.psra.w( <8 x i16> %tmp2, <8 x i16> %tmp8 )		; <<8 x i16>> [#uses=1]
+	%tmp11 = bitcast <8 x i16> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp11
+}
+
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+define <2 x i64> @t2(<2 x i64> %b1, <2 x i64> %c) nounwind  {
+entry:
+	%tmp9 = tail call <2 x i64> @llvm.x86.sse2.psrl.q( <2 x i64> %b1, <2 x i64> %c ) nounwind readnone 		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp9
+}
+
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 

diff --git a/src/LLVM/test/CodeGen/X86/vec_shift2.ll b/src/LLVM/test/CodeGen/X86/vec_shift2.ll
new file mode 100644
index 0000000..c5f9dc4
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shift2.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep CPI
+
+define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind  {
+	%tmp1 = bitcast <2 x i64> %b1 to <8 x i16>
+	%tmp2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp1, <8 x i16> bitcast (<4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > to <8 x i16>) ) nounwind readnone
+	%tmp3 = bitcast <8 x i16> %tmp2 to <2 x i64>
+	ret <2 x i64> %tmp3
+}
+
+define <4 x i32> @t2(<2 x i64> %b1, <2 x i64> %c) nounwind  {
+	%tmp1 = bitcast <2 x i64> %b1 to <4 x i32>
+	%tmp2 = tail call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> %tmp1, <4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > ) nounwind readnone
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 

diff --git a/src/LLVM/test/CodeGen/X86/vec_shift3.ll b/src/LLVM/test/CodeGen/X86/vec_shift3.ll
new file mode 100644
index 0000000..1ebf455
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shift3.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psllq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psraw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 2
+
+define <2 x i64> @t1(<2 x i64> %x1, i32 %bits) nounwind  {
+entry:
+	%tmp3 = tail call <2 x i64> @llvm.x86.sse2.pslli.q( <2 x i64> %x1, i32 %bits ) nounwind readnone 		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp3
+}
+
+define <2 x i64> @t2(<2 x i64> %x1) nounwind  {
+entry:
+	%tmp3 = tail call <2 x i64> @llvm.x86.sse2.pslli.q( <2 x i64> %x1, i32 10 ) nounwind readnone 		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp3
+}
+
+define <2 x i64> @t3(<2 x i64> %x1, i32 %bits) nounwind  {
+entry:
+	%tmp2 = bitcast <2 x i64> %x1 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w( <8 x i16> %tmp2, i32 %bits ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp5 = bitcast <8 x i16> %tmp4 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp5
+}
+
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 

diff --git a/src/LLVM/test/CodeGen/X86/vec_shift4.ll b/src/LLVM/test/CodeGen/X86/vec_shift4.ll
new file mode 100644
index 0000000..9ef7fbd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shift4.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
+
+define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
+entry:
+; CHECK-NOT: shll
+; CHECK: pslld
+; CHECK: paddd
+; CHECK: cvttps2dq
+; CHECK: pmulld
+
+  %shl = shl <4 x i32> %r, %a                     ; <<4 x i32>> [#uses=1]
+  %tmp2 = bitcast <4 x i32> %shl to <2 x i64>     ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
+entry:
+; CHECK-NOT: shlb
+; CHECK: pblendvb
+; CHECK: pblendvb
+; CHECK: pblendvb
+  %shl = shl <16 x i8> %r, %a                     ; <<16 x i8>> [#uses=1]
+  %tmp2 = bitcast <16 x i8> %shl to <2 x i64>     ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-11.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-11.ll
new file mode 100644
index 0000000..2e8f85c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-11.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 

+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov

+

+define <4 x i32> @test() nounwind {

+        %tmp131 = call <2 x i64> @llvm.x86.sse2.psrl.dq( <2 x i64> < i64 -1, i64 -1 >, i32 96 )         ; <<2 x i64>> [#uses=1]

+        %tmp137 = bitcast <2 x i64> %tmp131 to <4 x i32>                ; <<4 x i32>> [#uses=1]

+        %tmp138 = and <4 x i32> %tmp137, bitcast (<2 x i64> < i64 -1, i64 -1 > to <4 x i32>)            ; <<4 x i32>> [#uses=1]

+        ret <4 x i32> %tmp138

+}

+

+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32)


diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-14.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-14.ll
new file mode 100644
index 0000000..f0cfc44
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-14.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd | count 2
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movq | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
+
+define <4 x i32> @t1(i32 %a) nounwind  {
+entry:
+        %tmp = insertelement <4 x i32> undef, i32 %a, i32 0
+	%tmp6 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp, <4 x i32> < i32 4, i32 1, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @t2(i64 %a) nounwind  {
+entry:
+        %tmp = insertelement <2 x i64> undef, i64 %a, i32 0
+	%tmp6 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %tmp, <2 x i32> < i32 2, i32 1 >		; <<4 x i32>> [#uses=1]
+	ret <2 x i64> %tmp6
+}
+
+define <2 x i64> @t3(<2 x i64>* %a) nounwind  {
+entry:
+	%tmp4 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
+	%tmp6 = bitcast <2 x i64> %tmp4 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp6, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
+	%tmp8 = bitcast <4 x i32> %tmp7 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp8
+}
+
+define <2 x i64> @t4(<2 x i64> %a) nounwind  {
+entry:
+	%tmp5 = bitcast <2 x i64> %a to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp6 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp5, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
+	%tmp7 = bitcast <4 x i32> %tmp6 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp7
+}
+
+define <2 x i64> @t5(<2 x i64> %a) nounwind  {
+entry:
+	%tmp6 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %a, <2 x i32> < i32 2, i32 1 >		; <<4 x i32>> [#uses=1]
+	ret <2 x i64> %tmp6
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-15.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-15.ll
new file mode 100644
index 0000000..5a9b8fd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-15.ll

@@ -0,0 +1,81 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define <2 x i64> @t00(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 0 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t01(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 1 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t02(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 2 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t03(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 3 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t10(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 0 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t11(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 1 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t12(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 2 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t13(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 3 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t20(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 0 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t21(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 1 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t22(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 2 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t23(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 3 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t30(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 0 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t31(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 1 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t32(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 2 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t33(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 3 >
+	ret <2 x i64> %tmp
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-16.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-16.ll
new file mode 100644
index 0000000..06f38ed
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-16.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse2
+
+; sse:  t1:
+; sse2: t1:
+define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind  {
+; sse: shufps
+; sse2: pshufd
+; sse2-NEXT: ret
+        %tmp1 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
+        ret <4 x float> %tmp1
+}
+
+; sse:  t2:
+; sse2: t2:
+define <4 x float> @t2(<4 x float> %A, <4 x float> %B) nounwind {
+; sse: shufps
+; sse2: pshufd
+; sse2-NEXT: ret
+	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >
+	ret <4 x float> %tmp
+}
+
+; sse:  t3:
+; sse2: t3:
+define <4 x float> @t3(<4 x float> %A, <4 x float> %B) nounwind {
+; sse: shufps
+; sse2: pshufd
+; sse2-NEXT: ret
+	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 4, i32 4, i32 4, i32 4 >
+	ret <4 x float> %tmp
+}
+
+; sse:  t4:
+; sse2: t4:
+define <4 x float> @t4(<4 x float> %A, <4 x float> %B) nounwind {
+
+; sse: shufps
+; sse2: pshufd
+; sse2-NEXT: ret
+	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 1, i32 3, i32 2, i32 0 >
+	ret <4 x float> %tmp
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-17.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-17.ll
new file mode 100644
index 0000000..ebc8c5b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-17.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK-NOT: xor
+; CHECK: movd {{%rdi|%rcx}}, %xmm0
+; CHECK-NOT: xor
+; PR2108
+
+define <2 x i64> @doload64(i64 %x) nounwind  {
+entry:
+	%tmp717 = bitcast i64 %x to double		; <double> [#uses=1]
+	%tmp8 = insertelement <2 x double> undef, double %tmp717, i32 0		; <<2 x double>> [#uses=1]
+	%tmp9 = insertelement <2 x double> %tmp8, double 0.000000e+00, i32 1		; <<2 x double>> [#uses=1]
+	%tmp11 = bitcast <2 x double> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp11
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-18.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-18.ll
new file mode 100644
index 0000000..1104a4a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-18.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
+
+	%struct.vector4_t = type { <4 x float> }
+
+define void @swizzle(i8* %a, %struct.vector4_t* %b, %struct.vector4_t* %c) nounwind  {
+entry:
+	%tmp9 = getelementptr %struct.vector4_t* %b, i32 0, i32 0		; <<4 x float>*> [#uses=2]
+	%tmp10 = load <4 x float>* %tmp9, align 16		; <<4 x float>> [#uses=1]
+	%tmp14 = bitcast i8* %a to double*		; <double*> [#uses=1]
+	%tmp15 = load double* %tmp14		; <double> [#uses=1]
+	%tmp16 = insertelement <2 x double> undef, double %tmp15, i32 0		; <<2 x double>> [#uses=1]
+	%tmp18 = bitcast <2 x double> %tmp16 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp19 = shufflevector <4 x float> %tmp10, <4 x float> %tmp18, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp19, <4 x float>* %tmp9, align 16
+	%tmp28 = getelementptr %struct.vector4_t* %c, i32 0, i32 0		; <<4 x float>*> [#uses=2]
+	%tmp29 = load <4 x float>* %tmp28, align 16		; <<4 x float>> [#uses=1]
+	%tmp26 = getelementptr i8* %a, i32 8		; <i8*> [#uses=1]
+	%tmp33 = bitcast i8* %tmp26 to double*		; <double*> [#uses=1]
+	%tmp34 = load double* %tmp33		; <double> [#uses=1]
+	%tmp35 = insertelement <2 x double> undef, double %tmp34, i32 0		; <<2 x double>> [#uses=1]
+	%tmp37 = bitcast <2 x double> %tmp35 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp38 = shufflevector <4 x float> %tmp29, <4 x float> %tmp37, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp38, <4 x float>* %tmp28, align 16
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-19.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-19.ll
new file mode 100644
index 0000000..861a1cc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-19.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
+; PR2485
+
+define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind  {
+entry:
+	%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> < i32 4, i32 0, i32 0, i32 0 >		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %shuffle
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-20.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-20.ll
new file mode 100644
index 0000000..fc06b95
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-20.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+
+define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
+entry:
+	shufflevector <4 x float> %fp0, <4 x float> %fp1, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:0 [#uses=1]
+	ret <4 x float> %0
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-22.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-22.ll
new file mode 100644
index 0000000..6807e4d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-22.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mcpu=pentium-m  | FileCheck %s
+
+define <4 x float> @t1(<4 x float> %a) nounwind  {
+; CHECK: movlhps
+  %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 >       ; <<4 x float>> [#uses=1]
+  ret <4 x float> %tmp1
+}
+
+define <4 x i32> @t2(<4 x i32>* %a) nounwind {
+; CHECK: pshufd
+; CHECK: ret
+  %tmp1 = load <4 x i32>* %a
+	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 >		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp2
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-23.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-23.ll
new file mode 100644
index 0000000..05a3a1e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-23.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2                | not grep punpck
+; RUN: llc < %s -march=x86 -mattr=+sse2                |     grep pshufd
+
+define i32 @t() nounwind {
+entry:
+	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
+	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp, <4 x i32>* %b
+	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%tmp2 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%punpckldq = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %punpckldq, <4 x i32>* %b
+	%tmp3 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%result = extractelement <4 x i32> %tmp3, i32 0		; <i32> [#uses=1]
+	ret i32 %result
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-24.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-24.ll
new file mode 100644
index 0000000..1b104de
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-24.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+define i32 @t() nounwind optsize {
+entry:
+; CHECK: punpckldq
+	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
+	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp, <4 x i32>* %b
+	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%tmp2 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%punpckldq = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %punpckldq, <4 x i32>* %b
+	%tmp3 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%result = extractelement <4 x i32> %tmp3, i32 0		; <i32> [#uses=1]
+	ret i32 %result
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-25.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-25.ll
new file mode 100644
index 0000000..d9b2388
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-25.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep unpcklps %t | count 3
+; RUN: grep unpckhps %t | count 1
+ 
+; Transpose example using the more generic vector shuffle.  We return
+; float8 instead of float16 since x86 can return that in register.
+; ModuleID = 'transpose2_opt.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-apple-cl.1.0"
+@r0 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r1 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r2 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r3 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+
+define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1, <4 x float> %p2, <4 x float> %p3) nounwind {
+entry:
+	%unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
+	%unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
+	%unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
+	%unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
+	%unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%unpcklps14a = shufflevector <4 x float> %unpcklps14,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+	%unpckhps17a = shufflevector <4 x float> %unpckhps17,  <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%r1 = shufflevector <16 x float> %unpcklps14a,  <16 x float> %unpckhps17a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+	%unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%unpcklps20a = shufflevector <4 x float> %unpcklps20,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%r2 = shufflevector <16 x float> %r1,  <16 x float> %unpcklps20a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+	%unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+	%unpckhps23a = shufflevector <4 x float> %unpckhps23,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%r3 = shufflevector <16 x float> %r2,  <16 x float> %unpckhps23a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+	%r4 = shufflevector <16 x float> %r3,  <16 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+	ret <8 x float> %r4
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-26.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-26.ll
new file mode 100644
index 0000000..086af6b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-26.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep unpcklps %t | count 1
+; RUN: grep unpckhps %t | count 3
+
+; Transpose example using the more generic vector shuffle. Return float8
+; instead of float16
+; ModuleID = 'transpose2_opt.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-apple-cl.1.0"
+@r0 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r1 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r2 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r3 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+
+define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1, <4 x float> %p2, <4 x float> %p3) nounwind {
+entry:
+	%unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
+	%unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
+	%unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
+	%unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
+	%unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+        %r1 = shufflevector <4 x float> %unpcklps14,  <4 x float> %unpckhps17,  <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	%unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+        %r2 = shufflevector <4 x float> %unpcklps20,  <4 x float> %unpckhps23,  <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+;       %r3 = shufflevector <8 x float> %r1,  <8 x float> %r2,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >; 
+	ret <8 x float> %r2
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-27.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-27.ll
new file mode 100644
index 0000000..dec98c7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-27.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s
+
+; ModuleID = 'vec_shuffle-27.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-apple-cl.1.0"
+
+define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
+entry:
+; CHECK: subps
+; CHECK: mulps
+; CHECK: addps
+; CHECK: subps
+; CHECK: mulps
+; CHECK: addps
+	%tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 >		; <<8 x float>> [#uses=1]
+	%sub = fsub <8 x float> %T1, %T0		; <<8 x float>> [#uses=1]
+	%mul = fmul <8 x float> %sub, %tmp7		; <<8 x float>> [#uses=1]
+	%add = fadd <8 x float> %mul, %T0		; <<8 x float>> [#uses=1]
+	ret <8 x float> %add
+}
+
+; Test case for r122206
+define void @test2(<4 x i64>* %ap, <4 x i64>* %bp) nounwind {
+entry:
+; CHECK: movdqa
+  %a = load <4 x i64> * %ap
+  %b = load <4 x i64> * %bp
+  %mulaa = mul <4 x i64> %a, %a
+  %mulbb = mul <4 x i64> %b, %b
+  %mulab = mul <4 x i64> %a, %b
+  %vect1271 = shufflevector <4 x i64> %mulaa, <4 x i64> %mulbb, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
+  %vect1272 = shufflevector <4 x i64> %mulaa, <4 x i64> %mulbb, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
+  %vect1487 = shufflevector <4 x i64> %vect1271, <4 x i64> %mulab, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  %vect1488 = shufflevector <4 x i64> %vect1272, <4 x i64> %mulab, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+  store <4 x i64> %vect1487, <4 x i64>* %ap
+  store <4 x i64> %vect1488, <4 x i64>* %bp
+  ret void;
+}
\ No newline at end of file

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-28.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-28.ll
new file mode 100644
index 0000000..343685b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-28.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
+; RUN: grep pshufb %t | count 1
+
+; FIXME: this test has a superfluous punpcklqdq pre-pshufb currently.
+;        Don't XFAIL it because it's still better than the previous code.
+
+; Pack various elements via shuffles.
+define <8 x i16> @shuf1(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp7
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-30.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-30.ll
new file mode 100644
index 0000000..1651c4c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-30.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep pshufhw %t | grep -- -95 | count 1
+; RUN: grep shufps %t | count 1
+; RUN: not grep pslldq %t
+
+; Test case when creating pshufhw, we incorrectly set the higher order bit
+; for an undef,
+define void @test(<8 x i16>* %dest, <8 x i16> %in) nounwind {
+entry:
+  %0 = load <8 x i16>* %dest
+  %1 = shufflevector <8 x i16> %0, <8 x i16> %in, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 13, i32 undef, i32 14, i32 14>
+  store <8 x i16> %1, <8 x i16>* %dest
+  ret void
+}                              
+
+; A test case where we shouldn't generate a punpckldq but a pshufd and a pslldq
+define void @test2(<4 x i32>* %dest, <4 x i32> %in) nounwind {
+entry:
+  %0 = shufflevector <4 x i32> %in, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> < i32 undef, i32 5, i32 undef, i32 2>
+  store <4 x i32> %0, <4 x i32>* %dest
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-31.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-31.ll
new file mode 100644
index 0000000..bb06e15
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-31.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
+; RUN: grep pshufb %t | count 1
+
+define <8 x i16> @shuf3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-34.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-34.ll
new file mode 100644
index 0000000..d057b3f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-34.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 | grep pshufb | count 2
+
+define <8 x i16> @shuf2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp8
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-35.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-35.ll
new file mode 100644
index 0000000..7f0fcb5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-35.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -stack-alignment=16 -o %t
+; RUN: grep pextrw %t | count 13
+; RUN: grep pinsrw %t | count 14
+; RUN: grep rolw %t | count 13
+; RUN: not grep esp %t
+; RUN: not grep ebp %t
+; RUN: llc < %s -march=x86 -mcpu=core2 -stack-alignment=16 -o %t
+; RUN: grep pshufb %t | count 3
+
+define <16 x i8> @shuf1(<16 x i8> %T0) nounwind readnone {
+entry:
+	%tmp8 = shufflevector <16 x i8> %T0, <16 x i8> undef, <16 x i32> < i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 15 , i32 14 >
+	ret <16 x i8> %tmp8
+}
+
+define <16 x i8> @shuf2(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+entry:
+	%tmp8 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> < i32 undef, i32 undef, i32 3, i32 2, i32 17, i32 16, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 15 , i32 14 >
+	ret <16 x i8> %tmp8
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-36.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-36.ll
new file mode 100644
index 0000000..8090afc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-36.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s
+
+define <8 x i16> @shuf6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+; CHECK: pshufb
+; CHECK-NOT: pshufb
+; CHECK: ret
+entry:
+  %tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 3, i32 2, i32 0, i32 2, i32 1, i32 5, i32 6 , i32 undef >
+  ret <8 x i16> %tmp9
+}
+
+define <8 x i16> @shuf7(<8 x i16> %t0) {
+; CHECK: pshufd
+  %tmp10 = shufflevector <8 x i16> %t0, <8 x i16> undef, <8 x i32> < i32 undef, i32 2, i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef >
+  ret <8 x i16> %tmp10
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-37.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-37.ll
new file mode 100644
index 0000000..950040a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-37.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc -O0 < %s -march=x86 -mcpu=core2 | FileCheck %s --check-prefix=CHECK_O0
+
+define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
+entry:
+; CHECK: movaps  ({{%rdi|%rcx}}), %xmm0
+; CHECK: movaps  %xmm0, %xmm1
+; CHECK-NEXT: movss   %xmm2, %xmm1
+; CHECK-NEXT: shufps  $36, %xmm1, %xmm0
+  %0 = load <4 x i32>* undef, align 16
+  %1 = load <4 x i32>* %a0, align 16
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x i32> %2
+}
+
+define void @t01(double* %a0) nounwind ssp {
+entry:
+; CHECK_O0: movsd (%eax), %xmm0
+; CHECK_O0: unpcklpd  %xmm0, %xmm0
+  %tmp93 = load double* %a0, align 8
+  %vecinit94 = insertelement <2 x double> undef, double %tmp93, i32 1
+  store <2 x double> %vecinit94, <2 x double>* undef
+  ret void
+}
+
+define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
+entry:
+; CHECK: movaps  32({{%rdi|%rcx}}), %xmm0
+; CHECK-NEXT: movaps  48({{%rdi|%rcx}}), %xmm1
+; CHECK-NEXT: movss   %xmm1, %xmm0
+; CHECK-NEXT: movq    %xmm0, ({{%rsi|%rdx}}) 
+  %0 = bitcast <8 x i32>* %source to <4 x i32>*
+  %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
+  %tmp2 = load <4 x i32>* %arrayidx, align 16
+  %tmp3 = extractelement <4 x i32> %tmp2, i32 0
+  %tmp5 = insertelement <2 x i32> <i32 undef, i32 0>, i32 %tmp3, i32 0
+  %arrayidx7 = getelementptr inbounds <8 x i32>* %source, i64 1
+  %1 = bitcast <8 x i32>* %arrayidx7 to <4 x i32>*
+  %tmp8 = load <4 x i32>* %1, align 16
+  %tmp9 = extractelement <4 x i32> %tmp8, i32 1
+  %tmp11 = insertelement <2 x i32> %tmp5, i32 %tmp9, i32 1
+  store <2 x i32> %tmp11, <2 x i32>* %dest, align 8
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle-38.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle-38.ll
new file mode 100644
index 0000000..69a2ede
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle-38.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+define <2 x double> @ld(<2 x double> %p) nounwind optsize ssp {
+; CHECK: unpcklpd
+  %shuffle = shufflevector <2 x double> %p, <2 x double> undef, <2 x i32> zeroinitializer
+  ret <2 x double> %shuffle
+}
+
+define <2 x double> @hd(<2 x double> %p) nounwind optsize ssp {
+; CHECK: unpckhpd
+  %shuffle = shufflevector <2 x double> %p, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x double> %shuffle
+}
+
+define <2 x i64> @ldi(<2 x i64> %p) nounwind optsize ssp {
+; CHECK: punpcklqdq
+  %shuffle = shufflevector <2 x i64> %p, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @hdi(<2 x i64> %p) nounwind optsize ssp {
+; CHECK: punpckhqdq
+  %shuffle = shufflevector <2 x i64> %p, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %shuffle
+}
+
+; rdar://10050549
+%struct.Float2 = type { float, float }
+
+define <4 x float> @loadhpi(%struct.Float2* %vPtr, <4 x float> %vecin1) nounwind readonly ssp {
+entry:
+; CHECK: loadhpi
+; CHECK-NOT: movq
+; CHECK: movhps (
+  %tmp1 = bitcast %struct.Float2* %vPtr to <1 x i64>*
+  %addptr7 = getelementptr inbounds <1 x i64>* %tmp1, i64 0
+  %tmp2 = bitcast <1 x i64>* %addptr7 to float*
+  %tmp3 = load float* %tmp2, align 4
+  %vec = insertelement <4 x float> undef, float %tmp3, i32 0
+  %addptr.i12 = getelementptr inbounds float* %tmp2, i64 1
+  %tmp4 = load float* %addptr.i12, align 4
+  %vecin2 = insertelement <4 x float> %vec, float %tmp4, i32 1
+  %shuffle = shufflevector <4 x float> %vecin1, <4 x float> %vecin2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x float> %shuffle
+}
+
+; rdar://10119696
+; CHECK: f
+define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind uwtable readonly ssp {
+entry:
+  ; CHECK: movsd  (%
+  ; CHECK-NEXT: movsd  %xmm
+  %u110.i = load double* %y, align 1
+  %tmp8.i = insertelement <2 x double> undef, double %u110.i, i32 0
+  %tmp9.i = bitcast <2 x double> %tmp8.i to <4 x float>
+  %shuffle.i = shufflevector <4 x float> %x, <4 x float> %tmp9.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x float> %shuffle.i
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/vec_shuffle.ll b/src/LLVM/test/CodeGen/X86/vec_shuffle.ll
new file mode 100644
index 0000000..a9a84d2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_shuffle.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t

+; RUN: grep movq    %t | count 1

+; RUN: grep pshufd  %t | count 1

+; RUN: grep movupd  %t | count 1

+; RUN: grep pshufhw %t | count 1

+

+define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {

+	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]

+	%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1		; <<4 x float>> [#uses=1]

+	%tmp4 = insertelement <4 x float> %tmp2, float %Y, i32 2		; <<4 x float>> [#uses=1]

+	%tmp6 = insertelement <4 x float> %tmp4, float %Y, i32 3		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp6, <4 x float>* %P

+	ret void

+}

+

+define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {

+	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]

+	%tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1		; <<2 x double>> [#uses=1]

+	store <2 x double> %tmp2, <2 x double>* %P

+	ret void

+}

+

+define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {

+	%tmp = load <2 x i64>* %A		; <<2 x i64>> [#uses=1]

+	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>		; <<8 x i16>> [#uses=8]

+	%tmp.upgrd.2 = extractelement <8 x i16> %tmp.upgrd.1, i32 0		; <i16> [#uses=1]

+	%tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1		; <i16> [#uses=1]

+	%tmp2 = extractelement <8 x i16> %tmp.upgrd.1, i32 2		; <i16> [#uses=1]

+	%tmp3 = extractelement <8 x i16> %tmp.upgrd.1, i32 3		; <i16> [#uses=1]

+	%tmp4 = extractelement <8 x i16> %tmp.upgrd.1, i32 6		; <i16> [#uses=1]

+	%tmp5 = extractelement <8 x i16> %tmp.upgrd.1, i32 5		; <i16> [#uses=1]

+	%tmp6 = extractelement <8 x i16> %tmp.upgrd.1, i32 4		; <i16> [#uses=1]

+	%tmp7 = extractelement <8 x i16> %tmp.upgrd.1, i32 7		; <i16> [#uses=1]

+	%tmp8 = insertelement <8 x i16> undef, i16 %tmp.upgrd.2, i32 0		; <<8 x i16>> [#uses=1]

+	%tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 1		; <<8 x i16>> [#uses=1]

+	%tmp10 = insertelement <8 x i16> %tmp9, i16 %tmp2, i32 2		; <<8 x i16>> [#uses=1]

+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 3		; <<8 x i16>> [#uses=1]

+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 4		; <<8 x i16>> [#uses=1]

+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 5		; <<8 x i16>> [#uses=1]

+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 6		; <<8 x i16>> [#uses=1]

+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 7		; <<8 x i16>> [#uses=1]

+	%tmp15.upgrd.3 = bitcast <8 x i16> %tmp15 to <2 x i64>		; <<2 x i64>> [#uses=1]

+	store <2 x i64> %tmp15.upgrd.3, <2 x i64>* %res

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/vec_splat-2.ll b/src/LLVM/test/CodeGen/X86/vec_splat-2.ll
new file mode 100644
index 0000000..00d2a68
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_splat-2.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd | count 1

+

+define void @test(<2 x i64>* %P, i8 %x) nounwind {

+	%tmp = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0		; <<16 x i8>> [#uses=1]

+	%tmp36 = insertelement <16 x i8> %tmp, i8 %x, i32 1		; <<16 x i8>> [#uses=1]

+	%tmp38 = insertelement <16 x i8> %tmp36, i8 %x, i32 2		; <<16 x i8>> [#uses=1]

+	%tmp40 = insertelement <16 x i8> %tmp38, i8 %x, i32 3		; <<16 x i8>> [#uses=1]

+	%tmp42 = insertelement <16 x i8> %tmp40, i8 %x, i32 4		; <<16 x i8>> [#uses=1]

+	%tmp44 = insertelement <16 x i8> %tmp42, i8 %x, i32 5		; <<16 x i8>> [#uses=1]

+	%tmp46 = insertelement <16 x i8> %tmp44, i8 %x, i32 6		; <<16 x i8>> [#uses=1]

+	%tmp48 = insertelement <16 x i8> %tmp46, i8 %x, i32 7		; <<16 x i8>> [#uses=1]

+	%tmp50 = insertelement <16 x i8> %tmp48, i8 %x, i32 8		; <<16 x i8>> [#uses=1]

+	%tmp52 = insertelement <16 x i8> %tmp50, i8 %x, i32 9		; <<16 x i8>> [#uses=1]

+	%tmp54 = insertelement <16 x i8> %tmp52, i8 %x, i32 10		; <<16 x i8>> [#uses=1]

+	%tmp56 = insertelement <16 x i8> %tmp54, i8 %x, i32 11		; <<16 x i8>> [#uses=1]

+	%tmp58 = insertelement <16 x i8> %tmp56, i8 %x, i32 12		; <<16 x i8>> [#uses=1]

+	%tmp60 = insertelement <16 x i8> %tmp58, i8 %x, i32 13		; <<16 x i8>> [#uses=1]

+	%tmp62 = insertelement <16 x i8> %tmp60, i8 %x, i32 14		; <<16 x i8>> [#uses=1]

+	%tmp64 = insertelement <16 x i8> %tmp62, i8 %x, i32 15		; <<16 x i8>> [#uses=1]

+	%tmp68 = load <2 x i64>* %P		; <<2 x i64>> [#uses=1]

+	%tmp71 = bitcast <2 x i64> %tmp68 to <16 x i8>		; <<16 x i8>> [#uses=1]

+	%tmp73 = add <16 x i8> %tmp71, %tmp64		; <<16 x i8>> [#uses=1]

+	%tmp73.upgrd.1 = bitcast <16 x i8> %tmp73 to <2 x i64>		; <<2 x i64>> [#uses=1]

+	store <2 x i64> %tmp73.upgrd.1, <2 x i64>* %P

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/vec_splat-3.ll b/src/LLVM/test/CodeGen/X86/vec_splat-3.ll
new file mode 100644
index 0000000..649b85c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_splat-3.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep punpcklwd %t | count 4
+; RUN: grep punpckhwd %t | count 4
+; RUN: grep "pshufd" %t | count 8
+
+; Splat test for v8i16
+; Should generate with pshufd with masks $0, $85, $170, $255 (each mask is used twice)
+define <8 x i16> @shuf_8i16_0(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_1(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_4(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_5(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 6, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+
+define <8 x i16> @shuf_8i16_7(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_splat-4.ll b/src/LLVM/test/CodeGen/X86/vec_splat-4.ll
new file mode 100644
index 0000000..d9941e6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_splat-4.ll

@@ -0,0 +1,104 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep punpcklbw %t | count 16
+; RUN: grep punpckhbw %t | count 16
+; RUN: grep "pshufd" %t | count 16
+
+; Should generate with pshufd with masks $0, $85, $170, $255 (each mask is used 4 times)
+
+; Splat test for v16i8
+define <16 x i8 > @shuf_16i8_0(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 0, i32 0 , i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_1(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef  >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_2(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 2 , i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_3(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 3, i32 undef, i32 undef, i32 3, i32 undef, i32 3, i32 3 , i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3 >
+	ret <16 x i8 > %tmp6
+}
+
+
+define <16 x i8 > @shuf_16i8_4(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef  >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_5(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 5, i32 5 , i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_6(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 6, i32 undef, i32 undef, i32 6, i32 undef, i32 6, i32 6 , i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_7(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef , i32 undef, i32 undef, i32 undef , i32 undef  >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_8(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 8, i32 undef, i32 undef, i32 8, i32 undef, i32 8, i32 8 , i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_9(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 9, i32 undef, i32 undef, i32 9, i32 undef, i32 9, i32 9 , i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_10(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 10, i32 undef, i32 undef, i32 10, i32 undef, i32 10, i32 10 , i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_11(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 11, i32 undef, i32 undef, i32 11, i32 undef, i32 11, i32 11 , i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_12(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 12, i32 undef, i32 undef, i32 12, i32 undef, i32 12, i32 12 , i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_13(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 13, i32 undef, i32 undef, i32 13, i32 undef, i32 13, i32 13 , i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_14(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 14, i32 undef, i32 undef, i32 14, i32 undef, i32 14, i32 14 , i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_15(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 15, i32 undef, i32 undef, i32 15, i32 undef, i32 15, i32 15 , i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15 >
+	ret <16 x i8 > %tmp6
+}

diff --git a/src/LLVM/test/CodeGen/X86/vec_splat.ll b/src/LLVM/test/CodeGen/X86/vec_splat.ll
new file mode 100644
index 0000000..0fb01c1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_splat.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd

+; RUN: llc < %s -march=x86 -mattr=+sse3 | grep movddup

+

+define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {

+	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]

+	%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1		; <<4 x float>> [#uses=1]

+	%tmp4 = insertelement <4 x float> %tmp2, float %X, i32 2		; <<4 x float>> [#uses=1]

+	%tmp6 = insertelement <4 x float> %tmp4, float %X, i32 3		; <<4 x float>> [#uses=1]

+	%tmp8 = load <4 x float>* %Q		; <<4 x float>> [#uses=1]

+	%tmp10 = fmul <4 x float> %tmp8, %tmp6		; <<4 x float>> [#uses=1]

+	store <4 x float> %tmp10, <4 x float>* %P

+	ret void

+}

+

+define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {

+	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]

+	%tmp2 = insertelement <2 x double> %tmp, double %X, i32 1		; <<2 x double>> [#uses=1]

+	%tmp4 = load <2 x double>* %Q		; <<2 x double>> [#uses=1]

+	%tmp6 = fmul <2 x double> %tmp4, %tmp2		; <<2 x double>> [#uses=1]

+	store <2 x double> %tmp6, <2 x double>* %P

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/vec_ss_load_fold.ll b/src/LLVM/test/CodeGen/X86/vec_ss_load_fold.ll
new file mode 100644
index 0000000..44862c1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_ss_load_fold.ll

@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse41 | FileCheck %s

+

+target datalayout = "e-p:32:32"

+target triple = "i686-apple-darwin8.7.2"

+

+define i16 @test1(float %f) nounwind {

+	%tmp = insertelement <4 x float> undef, float %f, i32 0		; <<4 x float>> [#uses=1]

+	%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]

+	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]

+	%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]

+	%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]

+	%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]

+	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]

+	%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )		; <<4 x float>> [#uses=1]

+	%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]

+	%tmp69 = trunc i32 %tmp.upgrd.1 to i16		; <i16> [#uses=1]

+	ret i16 %tmp69

+; CHECK: test1:

+; CHECK: subss	LCPI0_

+; CHECK: mulss	LCPI0_

+; CHECK: minss	LCPI0_

+}

+

+define i16 @test2(float %f) nounwind {

+	%tmp28 = fsub float %f, 1.000000e+00		; <float> [#uses=1]

+	%tmp37 = fmul float %tmp28, 5.000000e-01		; <float> [#uses=1]

+	%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0		; <<4 x float>> [#uses=1]

+	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]

+	%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]

+	%tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]

+	%tmp69 = trunc i32 %tmp to i16		; <i16> [#uses=1]

+	ret i16 %tmp69

+; CHECK: test2:

+; CHECK: addss	LCPI1_

+; CHECK: mulss	LCPI1_

+; CHECK: minss	LCPI1_

+}

+

+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)

+

+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)

+

+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)

+

+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)

+

+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)

+

+

+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32)

+declare <4 x float> @f()

+

+define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind {

+  %a = load float *%b

+  %B = insertelement <4 x float> undef, float %a, i32 0

+  %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4)

+  ret <4 x float> %X

+; CHECK: test3:

+; CHECK: roundss	$4, (%eax), %xmm0

+}

+

+define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {

+  %a = load float *%b

+  %B = insertelement <4 x float> undef, float %a, i32 0

+  %q = call <4 x float> @f()

+  %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %q, <4 x float> %B, i32 4)

+  ret <4 x float> %X

+; CHECK: test4:

+; CHECK: movss	(%eax), %xmm

+; CHECK: call

+; CHECK: roundss $4, %xmm{{.*}}, %xmm0

+}


diff --git a/src/LLVM/test/CodeGen/X86/vec_uint_to_fp.ll b/src/LLVM/test/CodeGen/X86/vec_uint_to_fp.ll
new file mode 100644
index 0000000..fe7fa2f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_uint_to_fp.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck %s
+
+; Test that we are not lowering uinttofp to scalars
+define <4 x float> @test1(<4 x i32> %A) nounwind {
+; CHECK: test1:
+; CHECK-NOT: cvtsd2ss
+; CHECK: ret
+  %C = uitofp <4 x i32> %A to <4 x float>
+  ret <4 x float> %C
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/vec_zero-2.ll b/src/LLVM/test/CodeGen/X86/vec_zero-2.ll
new file mode 100644
index 0000000..cdb030e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_zero-2.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define i32 @t() {
+entry:
+	br i1 true, label %bb4743, label %bb1656
+bb1656:		; preds = %entry
+	ret i32 0
+bb1664:		; preds = %entry
+	br i1 false, label %bb5310, label %bb4743
+bb4743:		; preds = %bb1664
+	%tmp5256 = bitcast <2 x i64> zeroinitializer to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp5257 = sub <8 x i16> %tmp5256, zeroinitializer		; <<8 x i16>> [#uses=1]
+	%tmp5258 = bitcast <8 x i16> %tmp5257 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp5265 = bitcast <2 x i64> %tmp5258 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp5266 = call <16 x i8> @llvm.x86.sse2.packuswb.128( <8 x i16> %tmp5265, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp5267 = bitcast <16 x i8> %tmp5266 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp5294 = and <2 x i64> zeroinitializer, %tmp5267		; <<2 x i64>> [#uses=1]
+	br label %bb5310
+bb5310:		; preds = %bb4743, %bb1664
+	%tmp5294.pn = phi <2 x i64> [ %tmp5294, %bb4743 ], [ zeroinitializer, %bb1664 ]		; <<2 x i64>> [#uses=0]
+	ret i32 0
+}
+
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/vec_zero.ll b/src/LLVM/test/CodeGen/X86/vec_zero.ll
new file mode 100644
index 0000000..95d93b8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_zero.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s

+

+; CHECK: xorps

+define void @foo(<4 x float>* %P) {

+        %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]

+        %S = fadd <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]

+        store <4 x float> %S, <4 x float>* %P

+        ret void

+}

+

+; CHECK: pxor

+define void @bar(<4 x i32>* %P) {

+        %T = load <4 x i32>* %P         ; <<4 x i32>> [#uses=1]

+        %S = add <4 x i32> zeroinitializer, %T          ; <<4 x i32>> [#uses=1]

+        store <4 x i32> %S, <4 x i32>* %P

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/vec_zero_cse.ll b/src/LLVM/test/CodeGen/X86/vec_zero_cse.ll
new file mode 100644
index 0000000..8aa5094
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_zero_cse.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 1
+; 64-bit stores here do not use MMX.
+
+@M1 = external global <1 x i64>
+@M2 = external global <2 x i32>
+
+@S1 = external global <2 x i64>
+@S2 = external global <4 x i32>
+
+define void @test() {
+  store <1 x i64> zeroinitializer, <1 x i64>* @M1
+  store <2 x i32> zeroinitializer, <2 x i32>* @M2
+  ret void
+}
+
+define void @test2() {
+  store <1 x i64> < i64 -1 >, <1 x i64>* @M1
+  store <2 x i32> < i32 -1, i32 -1 >, <2 x i32>* @M2
+  ret void
+}
+
+define void @test3() {
+  store <2 x i64> zeroinitializer, <2 x i64>* @S1
+  store <4 x i32> zeroinitializer, <4 x i32>* @S2
+  ret void
+}
+
+define void @test4() {
+  store <2 x i64> < i64 -1, i64 -1>, <2 x i64>* @S1
+  store <4 x i32> < i32 -1, i32 -1, i32 -1, i32 -1 >, <4 x i32>* @S2
+  ret void
+}
+
+

diff --git a/src/LLVM/test/CodeGen/X86/vec_zext.ll b/src/LLVM/test/CodeGen/X86/vec_zext.ll
new file mode 100644
index 0000000..615a50b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vec_zext.ll

@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86-64
+; PR 9267
+
+define<4 x i32> @func_16_32() {
+  %F = load <4 x i16>* undef
+  %G = zext <4 x i16> %F to <4 x i32>
+  %H = load <4 x i16>* undef
+  %Y = zext <4 x i16> %H to <4 x i32>
+  %T = add <4 x i32> %Y, %G
+  store <4 x i32>%T , <4 x i32>* undef
+  ret <4 x i32> %T
+}
+
+define<4 x i64> @func_16_64() {
+  %F = load <4 x i16>* undef
+  %G = zext <4 x i16> %F to <4 x i64>
+  %H = load <4 x i16>* undef
+  %Y = zext <4 x i16> %H to <4 x i64>
+  %T = xor <4 x i64> %Y, %G
+  store <4 x i64>%T , <4 x i64>* undef
+  ret <4 x i64> %T
+}
+
+define<4 x i64> @func_32_64() {
+  %F = load <4 x i32>* undef
+  %G = zext <4 x i32> %F to <4 x i64>
+  %H = load <4 x i32>* undef
+  %Y = zext <4 x i32> %H to <4 x i64>
+  %T = or <4 x i64> %Y, %G
+  ret <4 x i64> %T
+}
+
+define<4 x i16> @func_8_16() {
+  %F = load <4 x i8>* undef
+  %G = zext <4 x i8> %F to <4 x i16>
+  %H = load <4 x i8>* undef
+  %Y = zext <4 x i8> %H to <4 x i16>
+  %T = add <4 x i16> %Y, %G
+  ret <4 x i16> %T
+}
+
+define<4 x i32> @func_8_32() {
+  %F = load <4 x i8>* undef
+  %G = zext <4 x i8> %F to <4 x i32>
+  %H = load <4 x i8>* undef
+  %Y = zext <4 x i8> %H to <4 x i32>
+  %T = sub <4 x i32> %Y, %G
+  ret <4 x i32> %T
+}
+
+define<4 x i64> @func_8_64() {
+  %F = load <4 x i8>* undef
+  %G = zext <4 x i8> %F to <4 x i64>
+  %H = load <4 x i8>* undef
+  %Y = zext <4 x i8> %H to <4 x i64>
+  %T = add <4 x i64> %Y, %G
+  ret <4 x i64> %T
+}
+
+define<4 x i32> @const_16_32() {
+  %G = zext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i32>
+  ret <4 x i32> %G
+}
+
+define<4 x i64> @const_16_64() {
+  %G = zext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i64>
+  ret <4 x i64> %G
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/vector-intrinsics.ll b/src/LLVM/test/CodeGen/X86/vector-intrinsics.ll
new file mode 100644
index 0000000..cabacb5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vector-intrinsics.ll

@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=x86-64 | grep call | count 43
+
+declare <4 x double> @llvm.sin.v4f64(<4 x double> %p)
+declare <4 x double> @llvm.cos.v4f64(<4 x double> %p)
+declare <4 x double> @llvm.pow.v4f64(<4 x double> %p, <4 x double> %q)
+declare <4 x double> @llvm.powi.v4f64(<4 x double> %p, i32)
+
+define <4 x double> @foo(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.sin.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+define <4 x double> @goo(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.cos.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+define <4 x double> @moo(<4 x double> %p, <4 x double> %q)
+{
+  %t = call <4 x double> @llvm.pow.v4f64(<4 x double> %p, <4 x double> %q)
+  ret <4 x double> %t
+}
+define <4 x double> @zoo(<4 x double> %p, i32 %q)
+{
+  %t = call <4 x double> @llvm.powi.v4f64(<4 x double> %p, i32 %q)
+  ret <4 x double> %t
+}
+
+
+declare <9 x double> @llvm.exp.v9f64(<9 x double> %a)
+declare <9 x double> @llvm.pow.v9f64(<9 x double> %a, <9 x double> %b)
+declare <9 x double> @llvm.powi.v9f64(<9 x double> %a, i32)
+
+define void @a(<9 x double>* %p) nounwind {
+  %a = load <9 x double>* %p
+  %r = call <9 x double> @llvm.exp.v9f64(<9 x double> %a)
+  store <9 x double> %r, <9 x double>* %p
+  ret void
+}
+define void @b(<9 x double>* %p, <9 x double>* %q) nounwind {
+  %a = load <9 x double>* %p
+  %b = load <9 x double>* %q
+  %r = call <9 x double> @llvm.pow.v9f64(<9 x double> %a, <9 x double> %b)
+  store <9 x double> %r, <9 x double>* %p
+  ret void
+}
+define void @c(<9 x double>* %p, i32 %n) nounwind {
+  %a = load <9 x double>* %p
+  %r = call <9 x double> @llvm.powi.v9f64(<9 x double> %a, i32 %n)
+  store <9 x double> %r, <9 x double>* %p
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vector-rem.ll b/src/LLVM/test/CodeGen/X86/vector-rem.ll
new file mode 100644
index 0000000..51cd872
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vector-rem.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep div | count 8
+; RUN: llc < %s -march=x86-64 | grep fmodf | count 4
+
+define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) {
+	%m = srem <4 x i32> %t, %u
+	ret <4 x i32> %m
+}
+define <4 x i32> @bar(<4 x i32> %t, <4 x i32> %u) {
+	%m = urem <4 x i32> %t, %u
+	ret <4 x i32> %m
+}
+define <4 x float> @qux(<4 x float> %t, <4 x float> %u) {
+	%m = frem <4 x float> %t, %u
+	ret <4 x float> %m
+}

diff --git a/src/LLVM/test/CodeGen/X86/vector-variable-idx.ll b/src/LLVM/test/CodeGen/X86/vector-variable-idx.ll
new file mode 100644
index 0000000..2a4d18c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vector-variable-idx.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 | grep movss | count 2
+; PR2676
+
+define float @foo(<4 x float> %p, i32 %t) {
+  %z = extractelement <4 x float> %p, i32 %t
+  ret float %z
+}
+define <4 x float> @bar(<4 x float> %p, float %f, i32 %t) {
+  %z = insertelement <4 x float> %p, float %f, i32 %t
+  ret <4 x float> %z
+}

diff --git a/src/LLVM/test/CodeGen/X86/vector.ll b/src/LLVM/test/CodeGen/X86/vector.ll
new file mode 100644
index 0000000..0c07293
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vector.ll

@@ -0,0 +1,156 @@
+; Test that vectors are scalarized/lowered correctly.

+; RUN: llc < %s -march=x86 -mcpu=i386 > %t

+; RUN: llc < %s -march=x86 -mcpu=yonah >> %t

+

+%d8 = type <8 x double>

+%f1 = type <1 x float>

+%f2 = type <2 x float>

+%f4 = type <4 x float>

+%f8 = type <8 x float>

+%i4 = type <4 x i32>

+

+

+;;; TEST HANDLING OF VARIOUS VECTOR SIZES

+

+define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {

+        %p = load %f1* %P               ; <%f1> [#uses=1]

+        %q = load %f1* %Q               ; <%f1> [#uses=1]

+        %R = fadd %f1 %p, %q             ; <%f1> [#uses=1]

+        store %f1 %R, %f1* %S

+        ret void

+}

+

+define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {

+        %p = load %f2* %P               ; <%f2> [#uses=1]

+        %q = load %f2* %Q               ; <%f2> [#uses=1]

+        %R = fadd %f2 %p, %q             ; <%f2> [#uses=1]

+        store %f2 %R, %f2* %S

+        ret void

+}

+

+define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {

+        %p = load %f4* %P               ; <%f4> [#uses=1]

+        %q = load %f4* %Q               ; <%f4> [#uses=1]

+        %R = fadd %f4 %p, %q             ; <%f4> [#uses=1]

+        store %f4 %R, %f4* %S

+        ret void

+}

+

+define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {

+        %p = load %f8* %P               ; <%f8> [#uses=1]

+        %q = load %f8* %Q               ; <%f8> [#uses=1]

+        %R = fadd %f8 %p, %q             ; <%f8> [#uses=1]

+        store %f8 %R, %f8* %S

+        ret void

+}

+

+define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {

+        %p = load %f8* %P               ; <%f8> [#uses=1]

+        %q = load %f8* %Q               ; <%f8> [#uses=1]

+        %R = fmul %f8 %p, %q             ; <%f8> [#uses=1]

+        store %f8 %R, %f8* %S

+        ret void

+}

+

+define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {

+        %p = load %f8* %P               ; <%f8> [#uses=1]

+        %q = load %f8* %Q               ; <%f8> [#uses=1]

+        %R = fdiv %f8 %p, %q            ; <%f8> [#uses=1]

+        store %f8 %R, %f8* %S

+        ret void

+}

+

+;;; TEST VECTOR CONSTRUCTS

+

+define void @test_cst(%f4* %P, %f4* %S) {

+        %p = load %f4* %P               ; <%f4> [#uses=1]

+        %R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >             ; <%f4> [#uses=1]

+        store %f4 %R, %f4* %S

+        ret void

+}

+

+define void @test_zero(%f4* %P, %f4* %S) {

+        %p = load %f4* %P               ; <%f4> [#uses=1]

+        %R = fadd %f4 %p, zeroinitializer                ; <%f4> [#uses=1]

+        store %f4 %R, %f4* %S

+        ret void

+}

+

+define void @test_undef(%f4* %P, %f4* %S) {

+        %p = load %f4* %P               ; <%f4> [#uses=1]

+        %R = fadd %f4 %p, undef          ; <%f4> [#uses=1]

+        store %f4 %R, %f4* %S

+        ret void

+}

+

+define void @test_constant_insert(%f4* %S) {

+        %R = insertelement %f4 zeroinitializer, float 1.000000e+01, i32 0               ; <%f4> [#uses

+        store %f4 %R, %f4* %S

+        ret void

+}

+

+define void @test_variable_buildvector(float %F, %f4* %S) {

+        %R = insertelement %f4 zeroinitializer, float %F, i32 0         ; <%f4> [#uses=1]

+        store %f4 %R, %f4* %S

+        ret void

+}

+

+define void @test_scalar_to_vector(float %F, %f4* %S) {

+        %R = insertelement %f4 undef, float %F, i32 0           ; <%f4> [#uses=1]

+        store %f4 %R, %f4* %S

+        ret void

+}

+

+define float @test_extract_elt(%f8* %P) {

+        %p = load %f8* %P               ; <%f8> [#uses=1]

+        %R = extractelement %f8 %p, i32 3               ; <float> [#uses=1]

+        ret float %R

+}

+

+define double @test_extract_elt2(%d8* %P) {

+        %p = load %d8* %P               ; <%d8> [#uses=1]

+        %R = extractelement %d8 %p, i32 3               ; <double> [#uses=1]

+        ret double %R

+}

+

+define void @test_cast_1(%f4* %b, %i4* %a) {

+        %tmp = load %f4* %b             ; <%f4> [#uses=1]

+        %tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >              ; <%f4> [#uses=1]

+        %tmp3 = bitcast %f4 %tmp2 to %i4                ; <%i4> [#uses=1]

+        %tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >           ; <%i4> [#uses=1]

+        store %i4 %tmp4, %i4* %a

+        ret void

+}

+

+define void @test_cast_2(%f8* %a, <8 x i32>* %b) {

+        %T = load %f8* %a               ; <%f8> [#uses=1]

+        %T2 = bitcast %f8 %T to <8 x i32>               ; <<8 x i32>> [#uses=1]

+        store <8 x i32> %T2, <8 x i32>* %b

+        ret void

+}

+

+

+;;; TEST IMPORTANT IDIOMS

+

+define void @splat(%f4* %P, %f4* %Q, float %X) {

+        %tmp = insertelement %f4 undef, float %X, i32 0         ; <%f4> [#uses=1]

+        %tmp2 = insertelement %f4 %tmp, float %X, i32 1         ; <%f4> [#uses=1]

+        %tmp4 = insertelement %f4 %tmp2, float %X, i32 2                ; <%f4> [#uses=1]

+        %tmp6 = insertelement %f4 %tmp4, float %X, i32 3                ; <%f4> [#uses=1]

+        %q = load %f4* %Q               ; <%f4> [#uses=1]

+        %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]

+        store %f4 %R, %f4* %P

+        ret void

+}

+

+define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {

+        %tmp = insertelement %i4 undef, i32 %X, i32 0           ; <%i4> [#uses=1]

+        %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1           ; <%i4> [#uses=1]

+        %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2          ; <%i4> [#uses=1]

+        %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3          ; <%i4> [#uses=1]

+        %q = load %i4* %Q               ; <%i4> [#uses=1]

+        %R = add %i4 %q, %tmp6          ; <%i4> [#uses=1]

+        store %i4 %R, %i4* %P

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/vfcmp.ll b/src/LLVM/test/CodeGen/X86/vfcmp.ll
new file mode 100644
index 0000000..f5f5293
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vfcmp.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; PR2620
+
+
+define void @t2(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind {
+	%A = fcmp olt <2 x double> zeroinitializer, zeroinitializer		; <<2 x i64>>:1 [#uses=1]
+        sext <2 x i1> %A to <2 x i64>
+	extractelement <2 x i64> %1, i32 1		; <i64>:2 [#uses=1]
+	lshr i64 %2, 63		; <i64>:3 [#uses=1]
+	trunc i64 %3 to i1		; <i1>:4 [#uses=1]
+	zext i1 %4 to i8		; <i8>:5 [#uses=1]
+	insertelement <2 x i8> zeroinitializer, i8 %5, i32 1		; <<2 x i8>>:6 [#uses=1]
+	store <2 x i8> %6, <2 x i8>* null
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/visibility.ll b/src/LLVM/test/CodeGen/X86/visibility.ll
new file mode 100644
index 0000000..580c3dc
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/visibility.ll

@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
+
+@zed = external hidden constant i32
+
+define hidden void @foo() nounwind {
+entry:
+  call void @bar(i32* @zed)
+  ret void
+}
+
+declare hidden void @bar(i32*)
+
+;CHECK: .hidden	zed
+;CHECK: .hidden	bar

diff --git a/src/LLVM/test/CodeGen/X86/visibility2.ll b/src/LLVM/test/CodeGen/X86/visibility2.ll
new file mode 100644
index 0000000..72ea733
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/visibility2.ll

@@ -0,0 +1,18 @@
+; This test case ensures that when the visibility of a global declaration is 
+; emitted they are not treated as definitions.  Test case for r132825.
+; Fixes <rdar://problem/9429892>.
+;
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s
+
+@foo_private_extern_str = external hidden global i8*
+
+define void @foo1() nounwind ssp {
+entry:
+  %tmp = load i8** @foo_private_extern_str, align 8
+  call void @foo3(i8* %tmp)
+  ret void
+}
+
+declare void @foo3(i8*)
+
+; CHECK-NOT: .private_extern

diff --git a/src/LLVM/test/CodeGen/X86/volatile.ll b/src/LLVM/test/CodeGen/X86/volatile.ll
new file mode 100644
index 0000000..2e5742a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/volatile.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=sse2 | grep movsd | count 5
+; RUN: llc < %s -march=x86 -mattr=sse2 -O0 | grep -v esp | grep movsd | count 5
+
+@x = external global double
+
+define void @foo() nounwind  {
+  %a = volatile load double* @x
+  volatile store double 0.0, double* @x
+  volatile store double 0.0, double* @x
+  %b = volatile load double* @x
+  ret void
+}
+
+define void @bar() nounwind  {
+  %c = volatile load double* @x
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vortex-bug.ll b/src/LLVM/test/CodeGen/X86/vortex-bug.ll
new file mode 100644
index 0000000..40f1117
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vortex-bug.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86-64
+
+	%struct.blktkntype = type { i32, i32 }
+	%struct.fieldstruc = type { [128 x i8], %struct.blktkntype*, i32, i32 }
+
+define fastcc i32 @Env_GetFieldStruc(i8* %FieldName, i32* %Status, %struct.fieldstruc* %FieldStruc) nounwind  {
+entry:
+	br label %bb137.i
+
+bb137.i:		; preds = %bb137.i, %entry
+	%FieldName_addr.0209.rec.i = phi i64 [ %tmp139.rec.i, %bb137.i ], [ 0, %entry ]		; <i64> [#uses=1]
+	%tmp147213.i = phi i32 [ %tmp147.i, %bb137.i ], [ 1, %entry ]		; <i32> [#uses=2]
+	%tmp139.rec.i = add i64 %FieldName_addr.0209.rec.i, 1		; <i64> [#uses=2]
+	%tmp141142.i = sext i32 %tmp147213.i to i64		; <i64> [#uses=0]
+	%tmp147.i = add i32 %tmp147213.i, 1		; <i32> [#uses=1]
+	br i1 false, label %bb137.i, label %bb149.i.loopexit
+
+bb149.i.loopexit:		; preds = %bb137.i
+	%tmp139.i = getelementptr i8* %FieldName, i64 %tmp139.rec.i		; <i8*> [#uses=0]
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/vshift-1.ll b/src/LLVM/test/CodeGen/X86/vshift-1.ll
new file mode 100644
index 0000000..4955156
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vshift-1.ll

@@ -0,0 +1,79 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; test vector shifts converted to proper SSE2 vector shifts when the shift
+; amounts are the same.
+
+define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
+entry:
+; CHECK: shift1a:
+; CHECK: psllq
+  %shl = shl <2 x i64> %val, < i64 32, i64 32 >
+  store <2 x i64> %shl, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
+entry:
+; CHECK: shift1b:
+; CHECK: movd
+; CHECK-NEXT: psllq
+  %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
+  %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
+  %shl = shl <2 x i64> %val, %1
+  store <2 x i64> %shl, <2 x i64>* %dst
+  ret void
+}
+
+
+define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
+entry:
+; CHECK: shift2a:
+; CHECK: pslld
+  %shl = shl <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
+  %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
+  %3 = insertelement <4 x i32> %2, i32 %amt, i32 3
+  %shl = shl <4 x i32> %val, %3
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
+entry:
+; CHECK: shift3a:
+; CHECK: psllw
+  %shl = shl <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
+  store <8 x i16> %shl, <8 x i16>* %dst
+  ret void
+}
+
+; Make sure the shift amount is properly zero extended.
+define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
+entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psllw
+  %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
+  %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
+  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %shl = shl <8 x i16> %val, %7
+  store <8 x i16> %shl, <8 x i16>* %dst
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/vshift-2.ll b/src/LLVM/test/CodeGen/X86/vshift-2.ll
new file mode 100644
index 0000000..9a9b419
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vshift-2.ll

@@ -0,0 +1,78 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; test vector shifts converted to proper SSE2 vector shifts when the shift
+; amounts are the same.
+
+define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
+entry:
+; CHECK: shift1a:
+; CHECK: psrlq
+  %lshr = lshr <2 x i64> %val, < i64 32, i64 32 >
+  store <2 x i64> %lshr, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
+entry:
+; CHECK: shift1b:
+; CHECK: movd
+; CHECK-NEXT: psrlq
+  %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
+  %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
+  %lshr = lshr <2 x i64> %val, %1
+  store <2 x i64> %lshr, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
+entry:
+; CHECK: shift2a:
+; CHECK: psrld
+  %lshr = lshr <4 x i32> %val, < i32 17, i32 17, i32 17, i32 17 >
+  store <4 x i32> %lshr, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: psrld
+  %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
+  %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
+  %3 = insertelement <4 x i32> %2, i32 %amt, i32 3
+  %lshr = lshr <4 x i32> %val, %3
+  store <4 x i32> %lshr, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
+entry:
+; CHECK: shift3a:
+; CHECK: psrlw
+  %lshr = lshr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
+  store <8 x i16> %lshr, <8 x i16>* %dst
+  ret void
+}
+
+; properly zero extend the shift amount
+define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
+entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psrlw
+  %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
+  %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
+  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %lshr = lshr <8 x i16> %val, %7
+  store <8 x i16> %lshr, <8 x i16>* %dst
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vshift-3.ll b/src/LLVM/test/CodeGen/X86/vshift-3.ll
new file mode 100644
index 0000000..8e8a9aa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vshift-3.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; test vector shifts converted to proper SSE2 vector shifts when the shift
+; amounts are the same.
+
+; Note that x86 does have ashr 
+
+; shift1a can't use a packed shift
+define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
+entry:
+; CHECK: shift1a:
+; CHECK: sarl
+  %ashr = ashr <2 x i64> %val, < i64 32, i64 32 >
+  store <2 x i64> %ashr, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
+entry:
+; CHECK: shift2a:
+; CHECK: psrad	$5
+  %ashr = ashr <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
+  store <4 x i32> %ashr, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
+  %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
+  %3 = insertelement <4 x i32> %2, i32 %amt, i32 3
+  %ashr = ashr <4 x i32> %val, %3
+  store <4 x i32> %ashr, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
+entry:
+; CHECK: shift3a:
+; CHECK: psraw	$5
+  %ashr = ashr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
+  store <8 x i16> %ashr, <8 x i16>* %dst
+  ret void
+}
+
+define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
+entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psraw
+  %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
+  %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
+  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %ashr = ashr <8 x i16> %val, %7
+  store <8 x i16> %ashr, <8 x i16>* %dst
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vshift-4.ll b/src/LLVM/test/CodeGen/X86/vshift-4.ll
new file mode 100644
index 0000000..8e24fda
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vshift-4.ll

@@ -0,0 +1,85 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; test vector shifts converted to proper SSE2 vector shifts when the shift
+; amounts are the same when using a shuffle splat.
+
+define void @shift1a(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
+entry:
+; CHECK: shift1a:
+; CHECK: psllq
+  %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %shl = shl <2 x i64> %val, %shamt
+  store <2 x i64> %shl, <2 x i64>* %dst
+  ret void
+}
+
+; shift1b can't use a packed shift
+define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
+entry:
+; CHECK: shift1b:
+; CHECK: shll
+  %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %shl = shl <2 x i64> %val, %shamt
+  store <2 x i64> %shl, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift2a(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
+entry:
+; CHECK: shift2a:
+; CHECK: pslld
+  %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
+entry:
+; CHECK: shift2b:
+; CHECK: pslld
+  %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 1, i32 1>
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2c(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
+entry:
+; CHECK: shift2c:
+; CHECK: pslld
+  %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift3a(<8 x i16> %val, <8 x i16>* %dst, <8 x i16> %amt) nounwind {
+entry:
+; CHECK: shift3a:
+; CHECK: movzwl
+; CHECK: psllw
+  %shamt = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
+  %shl = shl <8 x i16> %val, %shamt
+  store <8 x i16> %shl, <8 x i16>* %dst
+  ret void
+}
+
+define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
+entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: psllw
+  %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
+  %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
+  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %shl = shl <8 x i16> %val, %7
+  store <8 x i16> %shl, <8 x i16>* %dst
+  ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/vshift-5.ll b/src/LLVM/test/CodeGen/X86/vshift-5.ll
new file mode 100644
index 0000000..cb254ae
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vshift-5.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; When loading the shift amount from memory, avoid generating the splat.
+
+define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
+entry:
+; CHECK: shift5a:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %amt = load i32* %pamt 
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
+entry:
+; CHECK: shift5b:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %amt = load i32* %pamt 
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
+  %shr = ashr <4 x i32> %val, %shamt
+  store <4 x i32> %shr, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift5c:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift5d:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shr = ashr <4 x i32> %val, %shamt
+  store <4 x i32> %shr, <4 x i32>* %dst
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vshift_scalar.ll b/src/LLVM/test/CodeGen/X86/vshift_scalar.ll
new file mode 100644
index 0000000..9dd8478
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vshift_scalar.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s
+
+; Legalization test that requires scalarizing a vector.
+
+define void @update(<1 x i32> %val, <1 x i32>* %dst) nounwind {
+entry:
+	%shl = shl <1 x i32> %val, < i32 2>
+	%shr = ashr <1 x i32> %val, < i32 4>
+	store <1 x i32> %shr, <1 x i32>* %dst
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vshift_split.ll b/src/LLVM/test/CodeGen/X86/vshift_split.ll
new file mode 100644
index 0000000..359d36d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vshift_split.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+; Example that requires splitting and expanding a vector shift.
+define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
+entry:
+	%shr = lshr <2 x i64> %val, < i64 2, i64 3 >
+	ret <2 x i64> %shr
+}

diff --git a/src/LLVM/test/CodeGen/X86/vshift_split2.ll b/src/LLVM/test/CodeGen/X86/vshift_split2.ll
new file mode 100644
index 0000000..0f8c2b8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vshift_split2.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah
+
+; Legalization example that requires splitting a large vector into smaller pieces.
+
+define void @update(<8 x i32> %val, <8 x i32>* %dst) nounwind {
+entry:
+	%shl = shl <8 x i32> %val, < i32 2, i32 2, i32 2, i32 2, i32 4, i32 4, i32 4, i32 4 >
+	%shr = ashr <8 x i32> %val, < i32 2, i32 2, i32 2, i32 2, i32 4, i32 4, i32 4, i32 4 >
+	store <8 x i32> %shr, <8 x i32>* %dst
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/vsplit-and.ll b/src/LLVM/test/CodeGen/X86/vsplit-and.ll
new file mode 100644
index 0000000..97dacfd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/vsplit-and.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 |  FileCheck %s
+
+
+define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
+; CHECK: andb
+  %cmp1 = icmp ne <2 x i64> %src1, zeroinitializer
+  %cmp2 = icmp ne <2 x i64> %src2, zeroinitializer
+  %t1 = and <2 x i1> %cmp1, %cmp2
+  %t2 = sext <2 x i1> %t1 to <2 x i64>
+  store <2 x i64> %t2, <2 x i64>* %dst
+  ret void
+}
+
+define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
+; CHECK: andb
+  %cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
+  %cmp2 = icmp ne <3 x i64> %src2, zeroinitializer
+  %t1 = and <3 x i1> %cmp1, %cmp2
+  %t2 = sext <3 x i1> %t1 to <3 x i64>
+  store <3 x i64> %t2, <3 x i64>* %dst
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/weak.ll b/src/LLVM/test/CodeGen/X86/weak.ll
new file mode 100644
index 0000000..e0608dd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/weak.ll

@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=x86

+@a = extern_weak global i32             ; <i32*> [#uses=1]

+@b = global i32* @a             ; <i32**> [#uses=0]

+


diff --git a/src/LLVM/test/CodeGen/X86/wide-integer-fold.ll b/src/LLVM/test/CodeGen/X86/wide-integer-fold.ll
new file mode 100644
index 0000000..b3b4d24
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/wide-integer-fold.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK:  movq  $-65535, %rax
+
+; DAGCombiner should fold this to a simple constant.
+
+define i64 @foo(i192 %a) nounwind {
+  %t = or i192 %a, -22300404916163702203072254898040925442801665
+  %s = and i192 %t, -22300404916163702203072254898040929737768960
+  %u = lshr i192 %s, 128
+  %v = trunc i192 %u to i64
+  ret i64 %v
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_arith-1.ll b/src/LLVM/test/CodeGen/X86/widen_arith-1.ll
new file mode 100644
index 0000000..4b8016d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_arith-1.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 |  FileCheck %s
+
+; Widen a v3i8 to v16i8 to use a vector add
+
+define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
+entry:
+; CHECK-NOT: pextrw
+; CHECK: paddb
+; CHECK: pextrb
+	%dst.addr = alloca <3 x i8>*		; <<3 x i8>**> [#uses=2]
+	%src.addr = alloca <3 x i8>*		; <<3 x i8>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <3 x i8>* %dst, <3 x i8>** %dst.addr
+	store <3 x i8>* %src, <3 x i8>** %src.addr
+	store i32 %n, i32* %n.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x i8>** %dst.addr		; <<3 x i8>*> [#uses=1]
+	%arrayidx = getelementptr <3 x i8>* %tmp3, i32 %tmp2		; <<3 x i8>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x i8>** %src.addr		; <<3 x i8>*> [#uses=1]
+	%arrayidx6 = getelementptr <3 x i8>* %tmp5, i32 %tmp4		; <<3 x i8>*> [#uses=1]
+	%tmp7 = load <3 x i8>* %arrayidx6		; <<3 x i8>> [#uses=1]
+	%add = add <3 x i8> %tmp7, < i8 1, i8 1, i8 1 >		; <<3 x i8>> [#uses=1]
+	store <3 x i8> %add, <3 x i8>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_arith-2.ll b/src/LLVM/test/CodeGen/X86/widen_arith-2.ll
new file mode 100644
index 0000000..03b3fea
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_arith-2.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: paddb
+; CHECK: pand
+
+; widen v8i8 to v16i8 (checks even power of 2 widening with add & and)
+
+define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
+entry:
+	%dst_i.addr = alloca i64*		; <i64**> [#uses=2]
+	%src_i.addr = alloca i64*		; <i64**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=8]
+	%dst = alloca <8 x i8>*, align 4		; <<8 x i8>**> [#uses=2]
+	%src = alloca <8 x i8>*, align 4		; <<8 x i8>**> [#uses=2]
+	store i64* %dst_i, i64** %dst_i.addr
+	store i64* %src_i, i64** %src_i.addr
+	store i32 %n, i32* %n.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load i64** %dst_i.addr		; <i64*> [#uses=1]
+	%arrayidx = getelementptr i64* %tmp3, i32 %tmp2		; <i64*> [#uses=1]
+	%conv = bitcast i64* %arrayidx to <8 x i8>*		; <<8 x i8>*> [#uses=1]
+	store <8 x i8>* %conv, <8 x i8>** %dst
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load i64** %src_i.addr		; <i64*> [#uses=1]
+	%arrayidx6 = getelementptr i64* %tmp5, i32 %tmp4		; <i64*> [#uses=1]
+	%conv7 = bitcast i64* %arrayidx6 to <8 x i8>*		; <<8 x i8>*> [#uses=1]
+	store <8 x i8>* %conv7, <8 x i8>** %src
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%tmp9 = load <8 x i8>** %dst		; <<8 x i8>*> [#uses=1]
+	%arrayidx10 = getelementptr <8 x i8>* %tmp9, i32 %tmp8		; <<8 x i8>*> [#uses=1]
+	%tmp11 = load i32* %i		; <i32> [#uses=1]
+	%tmp12 = load <8 x i8>** %src		; <<8 x i8>*> [#uses=1]
+	%arrayidx13 = getelementptr <8 x i8>* %tmp12, i32 %tmp11		; <<8 x i8>*> [#uses=1]
+	%tmp14 = load <8 x i8>* %arrayidx13		; <<8 x i8>> [#uses=1]
+	%add = add <8 x i8> %tmp14, < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >		; <<8 x i8>> [#uses=1]
+	%and = and <8 x i8> %add, < i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4 >		; <<8 x i8>> [#uses=1]
+	store <8 x i8> %and, <8 x i8>* %arrayidx10
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp15 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp15, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/widen_arith-3.ll b/src/LLVM/test/CodeGen/X86/widen_arith-3.ll
new file mode 100644
index 0000000..0574923
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_arith-3.ll

@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
+; CHECK: paddw
+; CHECK: pextrw
+; CHECK: movd
+
+; Widen a v3i16 to v8i16 to do a vector add
+
+@.str = internal constant [4 x i8] c"%d \00"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [2 x i8] c"\0A\00"		; <[2 x i8]*> [#uses=1]
+
+define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind {
+entry:
+	%dst.addr = alloca <3 x i16>*		; <<3 x i16>**> [#uses=2]
+	%src.addr = alloca <3 x i16>*		; <<3 x i16>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%v = alloca <3 x i16>, align 8		; <<3 x i16>*> [#uses=1]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <3 x i16>* %dst, <3 x i16>** %dst.addr
+	store <3 x i16>* %src, <3 x i16>** %src.addr
+	store i32 %n, i32* %n.addr
+	store <3 x i16> < i16 1, i16 1, i16 1 >, <3 x i16>* %v
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x i16>** %dst.addr		; <<3 x i16>*> [#uses=1]
+	%arrayidx = getelementptr <3 x i16>* %tmp3, i32 %tmp2		; <<3 x i16>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x i16>** %src.addr		; <<3 x i16>*> [#uses=1]
+	%arrayidx6 = getelementptr <3 x i16>* %tmp5, i32 %tmp4		; <<3 x i16>*> [#uses=1]
+	%tmp7 = load <3 x i16>* %arrayidx6		; <<3 x i16>> [#uses=1]
+	%add = add <3 x i16> %tmp7, < i16 1, i16 1, i16 1 >		; <<3 x i16>> [#uses=1]
+	store <3 x i16> %add, <3 x i16>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/widen_arith-4.ll b/src/LLVM/test/CodeGen/X86/widen_arith-4.ll
new file mode 100644
index 0000000..5931d63
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_arith-4.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
+; CHECK: psubw
+; CHECK-NEXT: pmullw
+
+; Widen a v5i16 to v8i16 to do a vector sub and multiple
+
+define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind {
+entry:
+	%dst.addr = alloca <5 x i16>*		; <<5 x i16>**> [#uses=2]
+	%src.addr = alloca <5 x i16>*		; <<5 x i16>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%v = alloca <5 x i16>, align 16		; <<5 x i16>*> [#uses=1]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <5 x i16>* %dst, <5 x i16>** %dst.addr
+	store <5 x i16>* %src, <5 x i16>** %src.addr
+	store i32 %n, i32* %n.addr
+	store <5 x i16> < i16 1, i16 1, i16 1, i16 0, i16 0 >, <5 x i16>* %v
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <5 x i16>** %dst.addr		; <<5 x i16>*> [#uses=1]
+	%arrayidx = getelementptr <5 x i16>* %tmp3, i32 %tmp2		; <<5 x i16>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <5 x i16>** %src.addr		; <<5 x i16>*> [#uses=1]
+	%arrayidx6 = getelementptr <5 x i16>* %tmp5, i32 %tmp4		; <<5 x i16>*> [#uses=1]
+	%tmp7 = load <5 x i16>* %arrayidx6		; <<5 x i16>> [#uses=1]
+	%sub = sub <5 x i16> %tmp7, < i16 271, i16 271, i16 271, i16 271, i16 271 >		; <<5 x i16>> [#uses=1]
+	%mul = mul <5 x i16> %sub, < i16 2, i16 2, i16 2, i16 2, i16 2 >		; <<5 x i16>> [#uses=1]
+	store <5 x i16> %mul, <5 x i16>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/widen_arith-5.ll b/src/LLVM/test/CodeGen/X86/widen_arith-5.ll
new file mode 100644
index 0000000..7f2eff0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_arith-5.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42  | FileCheck %s
+; CHECK: movdqa
+; CHECK: pmulld
+; CHECK: psubd
+
+; widen a v3i32 to v4i32 to do a vector multiple and a subtraction
+
+define void @update(<3 x i32>* %dst, <3 x i32>* %src, i32 %n) nounwind {
+entry:
+	%dst.addr = alloca <3 x i32>*		; <<3 x i32>**> [#uses=2]
+	%src.addr = alloca <3 x i32>*		; <<3 x i32>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%v = alloca <3 x i32>, align 16		; <<3 x i32>*> [#uses=1]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <3 x i32>* %dst, <3 x i32>** %dst.addr
+	store <3 x i32>* %src, <3 x i32>** %src.addr
+	store i32 %n, i32* %n.addr
+	store <3 x i32> < i32 1, i32 1, i32 1 >, <3 x i32>* %v
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x i32>** %dst.addr		; <<3 x i32>*> [#uses=1]
+	%arrayidx = getelementptr <3 x i32>* %tmp3, i32 %tmp2		; <<3 x i32>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x i32>** %src.addr		; <<3 x i32>*> [#uses=1]
+	%arrayidx6 = getelementptr <3 x i32>* %tmp5, i32 %tmp4		; <<3 x i32>*> [#uses=1]
+	%tmp7 = load <3 x i32>* %arrayidx6		; <<3 x i32>> [#uses=1]
+	%mul = mul <3 x i32> %tmp7, < i32 4, i32 4, i32 4 >		; <<3 x i32>> [#uses=1]
+	%sub = sub <3 x i32> %mul, < i32 3, i32 3, i32 3 >		; <<3 x i32>> [#uses=1]
+	store <3 x i32> %sub, <3 x i32>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/widen_arith-6.ll b/src/LLVM/test/CodeGen/X86/widen_arith-6.ll
new file mode 100644
index 0000000..b983d141
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_arith-6.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: mulps
+; CHECK: addps
+
+; widen a v3f32 to vfi32 to do a vector multiple and an add
+
+define void @update(<3 x float>* %dst, <3 x float>* %src, i32 %n) nounwind {
+entry:
+	%dst.addr = alloca <3 x float>*		; <<3 x float>**> [#uses=2]
+	%src.addr = alloca <3 x float>*		; <<3 x float>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%v = alloca <3 x float>, align 16		; <<3 x float>*> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <3 x float>* %dst, <3 x float>** %dst.addr
+	store <3 x float>* %src, <3 x float>** %src.addr
+	store i32 %n, i32* %n.addr
+	store <3 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00 >, <3 x float>* %v
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x float>** %dst.addr		; <<3 x float>*> [#uses=1]
+	%arrayidx = getelementptr <3 x float>* %tmp3, i32 %tmp2		; <<3 x float>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x float>** %src.addr		; <<3 x float>*> [#uses=1]
+	%arrayidx6 = getelementptr <3 x float>* %tmp5, i32 %tmp4		; <<3 x float>*> [#uses=1]
+	%tmp7 = load <3 x float>* %arrayidx6		; <<3 x float>> [#uses=1]
+	%tmp8 = load <3 x float>* %v		; <<3 x float>> [#uses=1]
+	%mul = fmul <3 x float> %tmp7, %tmp8		; <<3 x float>> [#uses=1]
+	%add = fadd <3 x float> %mul, < float 0x409EE02900000000, float 0x409EE02900000000, float 0x409EE02900000000 >		; <<3 x float>> [#uses=1]
+	store <3 x float> %add, <3 x float>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp9 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp9, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_cast-1.ll b/src/LLVM/test/CodeGen/X86/widen_cast-1.ll
new file mode 100644
index 0000000..1eace9e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_cast-1.ll

@@ -0,0 +1,44 @@
+; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
+; CHECK: paddw
+; CHECK: pextrd
+; CHECK: movd
+
+; bitcast a v4i16 to v2i32
+
+define void @convert(<2 x i32>* %dst, <4 x i16>* %src) nounwind {
+entry:
+	%dst.addr = alloca <2 x i32>*		; <<2 x i32>**> [#uses=2]
+	%src.addr = alloca <4 x i16>*		; <<4 x i16>**> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <2 x i32>* %dst, <2 x i32>** %dst.addr
+	store <4 x i16>* %src, <4 x i16>** %src.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, 4		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp1 = load i32* %i		; <i32> [#uses=1]
+	%tmp2 = load <2 x i32>** %dst.addr		; <<2 x i32>*> [#uses=1]
+	%arrayidx = getelementptr <2 x i32>* %tmp2, i32 %tmp1		; <<2 x i32>*> [#uses=1]
+	%tmp3 = load i32* %i		; <i32> [#uses=1]
+	%tmp4 = load <4 x i16>** %src.addr		; <<4 x i16>*> [#uses=1]
+	%arrayidx5 = getelementptr <4 x i16>* %tmp4, i32 %tmp3		; <<4 x i16>*> [#uses=1]
+	%tmp6 = load <4 x i16>* %arrayidx5		; <<4 x i16>> [#uses=1]
+	%add = add <4 x i16> %tmp6, < i16 1, i16 1, i16 1, i16 1 >		; <<4 x i16>> [#uses=1]
+	%conv = bitcast <4 x i16> %add to <2 x i32>		; <<2 x i32>> [#uses=1]
+	store <2 x i32> %conv, <2 x i32>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp7 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp7, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_cast-2.ll b/src/LLVM/test/CodeGen/X86/widen_cast-2.ll
new file mode 100644
index 0000000..5c695ea
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_cast-2.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: pextrd
+; CHECK: pextrd
+; CHECK: movd
+; CHECK: movdqa
+
+
+; bitcast v14i16 to v7i32
+
+define void @convert(<7 x i32>* %dst, <14 x i16>* %src) nounwind {
+entry:
+	%dst.addr = alloca <7 x i32>*		; <<7 x i32>**> [#uses=2]
+	%src.addr = alloca <14 x i16>*		; <<14 x i16>**> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <7 x i32>* %dst, <7 x i32>** %dst.addr
+	store <14 x i16>* %src, <14 x i16>** %src.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, 4		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp1 = load i32* %i		; <i32> [#uses=1]
+	%tmp2 = load <7 x i32>** %dst.addr		; <<2 x i32>*> [#uses=1]
+	%arrayidx = getelementptr <7 x i32>* %tmp2, i32 %tmp1		; <<7 x i32>*> [#uses=1]
+	%tmp3 = load i32* %i		; <i32> [#uses=1]
+	%tmp4 = load <14 x i16>** %src.addr		; <<4 x i16>*> [#uses=1]
+	%arrayidx5 = getelementptr <14 x i16>* %tmp4, i32 %tmp3		; <<4 x i16>*> [#uses=1]
+	%tmp6 = load <14 x i16>* %arrayidx5		; <<4 x i16>> [#uses=1]
+	%add = add <14 x i16> %tmp6, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >		; <<4 x i16>> [#uses=1]
+	%conv = bitcast <14 x i16> %add to <7 x i32>		; <<7 x i32>> [#uses=1]
+	store <7 x i32> %conv, <7 x i32>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp7 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp7, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_cast-3.ll b/src/LLVM/test/CodeGen/X86/widen_cast-3.ll
new file mode 100644
index 0000000..87486d9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_cast-3.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: pextrd
+
+; bitcast v12i8 to v3i32
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin10.0.0d2"
+
+define void @convert(<12 x i8>* %dst.addr, <3 x i32> %src) nounwind {
+entry:
+	%add = add <3 x i32> %src, < i32 1, i32 1, i32 1 >		; <<3 x i32>> [#uses=1]
+	%conv = bitcast <3 x i32> %add to <12 x i8>		; <<12 x i8>> [#uses=1]
+	store <12 x i8> %conv, <12 x i8>* %dst.addr
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_cast-4.ll b/src/LLVM/test/CodeGen/X86/widen_cast-4.ll
new file mode 100644
index 0000000..8e1adf5
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_cast-4.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+
+; v8i8 that is widen to v16i8 then split
+; FIXME: This is widen to v16i8 and split to 16 and we then rebuild the vector.
+; Unfortunately, we don't split the store so we don't get the code we want.
+
+define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
+entry:
+	%dst_i.addr = alloca i64*		; <i64**> [#uses=2]
+	%src_i.addr = alloca i64*		; <i64**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=8]
+	%dst = alloca <8 x i8>*, align 4		; <<8 x i8>**> [#uses=2]
+	%src = alloca <8 x i8>*, align 4		; <<8 x i8>**> [#uses=2]
+	store i64* %dst_i, i64** %dst_i.addr
+	store i64* %src_i, i64** %src_i.addr
+	store i32 %n, i32* %n.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load i64** %dst_i.addr		; <i64*> [#uses=1]
+	%arrayidx = getelementptr i64* %tmp3, i32 %tmp2		; <i64*> [#uses=1]
+	%conv = bitcast i64* %arrayidx to <8 x i8>*		; <<8 x i8>*> [#uses=1]
+	store <8 x i8>* %conv, <8 x i8>** %dst
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load i64** %src_i.addr		; <i64*> [#uses=1]
+	%arrayidx6 = getelementptr i64* %tmp5, i32 %tmp4		; <i64*> [#uses=1]
+	%conv7 = bitcast i64* %arrayidx6 to <8 x i8>*		; <<8 x i8>*> [#uses=1]
+	store <8 x i8>* %conv7, <8 x i8>** %src
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%tmp9 = load <8 x i8>** %dst		; <<8 x i8>*> [#uses=1]
+	%arrayidx10 = getelementptr <8 x i8>* %tmp9, i32 %tmp8		; <<8 x i8>*> [#uses=1]
+	%tmp11 = load i32* %i		; <i32> [#uses=1]
+	%tmp12 = load <8 x i8>** %src		; <<8 x i8>*> [#uses=1]
+	%arrayidx13 = getelementptr <8 x i8>* %tmp12, i32 %tmp11		; <<8 x i8>*> [#uses=1]
+	%tmp14 = load <8 x i8>* %arrayidx13		; <<8 x i8>> [#uses=1]
+	%add = add <8 x i8> %tmp14, < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >		; <<8 x i8>> [#uses=1]
+	%shr = ashr <8 x i8> %add, < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >		; <<8 x i8>> [#uses=1]
+	store <8 x i8> %shr, <8 x i8>* %arrayidx10
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp15 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp15, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/widen_cast-5.ll b/src/LLVM/test/CodeGen/X86/widen_cast-5.ll
new file mode 100644
index 0000000..136578d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_cast-5.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: movl
+; CHECK: movd
+
+; bitcast a i64 to v2i32
+
+define void @convert(<2 x i32>* %dst.addr, i64 %src) nounwind {
+entry:
+	%conv = bitcast i64 %src to <2 x i32>
+	%xor = xor <2 x i32> %conv, < i32 255, i32 32767 >
+	store <2 x i32> %xor, <2 x i32>* %dst.addr
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_cast-6.ll b/src/LLVM/test/CodeGen/X86/widen_cast-6.ll
new file mode 100644
index 0000000..3903234
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_cast-6.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
+; CHECK: movd
+
+; Test bit convert that requires widening in the operand.
+
+define i32 @return_v2hi() nounwind {
+entry:
+	%retval12 = bitcast <2 x i16> zeroinitializer to i32		; <i32> [#uses=1]
+	ret i32 %retval12
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_conv-1.ll b/src/LLVM/test/CodeGen/X86/widen_conv-1.ll
new file mode 100644
index 0000000..f6810cd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_conv-1.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: pshufd
+; CHECK: paddd
+
+; truncate v2i64 to v2i32
+
+define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind {
+entry:
+	%val = trunc <2 x i64> %src to <2 x i32>
+	%add = add <2 x i32> %val, < i32 1, i32 1 >
+	store <2 x i32> %add, <2 x i32>* %dst.addr
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_conv-2.ll b/src/LLVM/test/CodeGen/X86/widen_conv-2.ll
new file mode 100644
index 0000000..969cb51
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_conv-2.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: movswl
+; CHECK: movswl
+
+; sign extension v2i32 to v2i16
+
+define void @convert(<2 x i32>* %dst.addr, <2 x i16> %src) nounwind {
+entry:
+	%signext = sext <2 x i16> %src to <2 x i32>		; <<12 x i8>> [#uses=1]
+	store <2 x i32> %signext, <2 x i32>* %dst.addr
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_conv-3.ll b/src/LLVM/test/CodeGen/X86/widen_conv-3.ll
new file mode 100644
index 0000000..a25fae9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_conv-3.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: cvtsi2ss
+
+; sign to float v2i16 to v2f32
+
+define void @convert(<2 x float>* %dst.addr, <2 x i16> %src) nounwind {
+entry:
+	%val = sitofp <2 x i16> %src to <2 x float>
+	store <2 x float> %val, <2 x float>* %dst.addr
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_conv-4.ll b/src/LLVM/test/CodeGen/X86/widen_conv-4.ll
new file mode 100644
index 0000000..80f3a49
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_conv-4.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: cvtsi2ss
+
+; unsigned to float v7i16 to v7f32
+
+define void @convert(<7 x float>* %dst.addr, <7 x i16> %src) nounwind {
+entry:
+	%val = sitofp <7 x i16> %src to <7 x float>
+	store <7 x float> %val, <7 x float>* %dst.addr
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_extract-1.ll b/src/LLVM/test/CodeGen/X86/widen_extract-1.ll
new file mode 100644
index 0000000..4bcac58
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_extract-1.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
+; widen extract subvector
+
+define void @convert(<2 x double>* %dst.addr, <3 x double> %src)  {
+entry:
+; CHECK: convert:
+; CHECK: unpcklpd {{%xmm[0-7]}}, {{%xmm[0-7]}}
+; CHECK-NEXT: movapd
+  %val = shufflevector <3 x double> %src, <3 x double> undef, <2 x i32> < i32 0, i32 1>
+  store <2 x double> %val, <2 x double>* %dst.addr
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_load-0.ll b/src/LLVM/test/CodeGen/X86/widen_load-0.ll
new file mode 100644
index 0000000..c91627c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_load-0.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -o - -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -o - -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; PR4891
+
+; Both loads should happen before either store.
+
+; CHECK: movl  (%rdi), %[[R1:...]]
+; CHECK: movl  (%rsi), %[[R2:...]]
+; CHECK: movl  %[[R2]], (%rdi)
+; CHECK: movl  %[[R1]], (%rsi)
+
+; WIN64: movl  (%rcx), %[[R1:...]]
+; WIN64: movl  (%rdx), %[[R2:...]]
+; WIN64: movl  %[[R2]], (%rcx)
+; WIN64: movl  %[[R1]], (%rdx)
+
+define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
+entry:
+  %0 = load <2 x i16>* %b, align 2                ; <<2 x i16>> [#uses=1]
+  %1 = load i32* %c, align 4                      ; <i32> [#uses=1]
+  %tmp1 = bitcast i32 %1 to <2 x i16>             ; <<2 x i16>> [#uses=1]
+  store <2 x i16> %tmp1, <2 x i16>* %b, align 2
+  %tmp5 = bitcast <2 x i16> %0 to <1 x i32>       ; <<1 x i32>> [#uses=1]
+  %tmp3 = extractelement <1 x i32> %tmp5, i32 0   ; <i32> [#uses=1]
+  store i32 %tmp3, i32* %c, align 4
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_load-1.ll b/src/LLVM/test/CodeGen/X86/widen_load-1.ll
new file mode 100644
index 0000000..639617f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_load-1.ll

@@ -0,0 +1,45 @@
+; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; PR4891
+
+; This load should be before the call, not after.
+
+; CHECK: movaps    compl+128(%rip), %xmm0
+; CHECK: movaps  %xmm0, (%rsp)
+; CHECK: callq   killcommon
+
+@compl = linkonce global [20 x i64] zeroinitializer, align 64 ; <[20 x i64]*> [#uses=1]
+
+declare void @killcommon(i32* noalias)
+
+define void @reset(<2 x float>* noalias %garbage1) {
+"file complex.c, line 27, bb1":
+  %changed = alloca i32, align 4                  ; <i32*> [#uses=3]
+  br label %"file complex.c, line 27, bb13"
+
+"file complex.c, line 27, bb13":                  ; preds = %"file complex.c, line 27, bb1"
+  store i32 0, i32* %changed, align 4
+  %r2 = getelementptr float* bitcast ([20 x i64]* @compl to float*), i64 32 ; <float*> [#uses=1]
+  %r3 = bitcast float* %r2 to <2 x float>*        ; <<2 x float>*> [#uses=1]
+  %r4 = load <2 x float>* %r3, align 4            ; <<2 x float>> [#uses=1]
+  call void @killcommon(i32* %changed)
+  br label %"file complex.c, line 34, bb4"
+
+"file complex.c, line 34, bb4":                   ; preds = %"file complex.c, line 27, bb13"
+  %r5 = load i32* %changed, align 4               ; <i32> [#uses=1]
+  %r6 = icmp eq i32 %r5, 0                        ; <i1> [#uses=1]
+  %r7 = zext i1 %r6 to i32                        ; <i32> [#uses=1]
+  %r8 = icmp ne i32 %r7, 0                        ; <i1> [#uses=1]
+  br i1 %r8, label %"file complex.c, line 34, bb7", label %"file complex.c, line 27, bb5"
+
+"file complex.c, line 27, bb5":                   ; preds = %"file complex.c, line 34, bb4"
+  br label %"file complex.c, line 35, bb6"
+
+"file complex.c, line 35, bb6":                   ; preds = %"file complex.c, line 27, bb5"
+  %r11 = ptrtoint <2 x float>* %garbage1 to i64   ; <i64> [#uses=1]
+  %r12 = inttoptr i64 %r11 to <2 x float>*        ; <<2 x float>*> [#uses=1]
+  store <2 x float> %r4, <2 x float>* %r12, align 4
+  br label %"file complex.c, line 34, bb7"
+
+"file complex.c, line 34, bb7":                   ; preds = %"file complex.c, line 35, bb6", %"file complex.c, line 34, bb4"
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/widen_load-2.ll b/src/LLVM/test/CodeGen/X86/widen_load-2.ll
new file mode 100644
index 0000000..6422063
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_load-2.ll

@@ -0,0 +1,179 @@
+; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 | FileCheck %s
+
+; Test based on pr5626 to load/store
+;
+
+%i32vec3 = type <3 x i32>
+define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
+; CHECK: movdqa
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: movq
+	%a = load %i32vec3* %ap, align 16
+	%b = load %i32vec3* %bp, align 16
+	%x = add %i32vec3 %a, %b
+	store %i32vec3 %x, %i32vec3* %ret, align 16
+	ret void
+}
+
+define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
+; CHECK: movq
+; CHECK: pinsrd
+; CHECK: movq
+; CHECK: pinsrd
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: movq
+	%a = load %i32vec3* %ap, align 8
+	%b = load %i32vec3* %bp, align 8
+	%x = add %i32vec3 %a, %b
+	store %i32vec3 %x, %i32vec3* %ret, align 8
+	ret void
+}
+
+%i32vec7 = type <7 x i32>
+define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: paddd
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: movq
+; CHECK: movdqa
+	%a = load %i32vec7* %ap, align 16
+	%b = load %i32vec7* %bp, align 16
+	%x = add %i32vec7 %a, %b
+	store %i32vec7 %x, %i32vec7* %ret, align 16
+	ret void
+}
+
+%i32vec12 = type <12 x i32>
+define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: paddd
+; CHECK: paddd
+; CHECK: paddd
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: movdqa
+	%a = load %i32vec12* %ap, align 16
+	%b = load %i32vec12* %bp, align 16
+	%x = add %i32vec12 %a, %b
+	store %i32vec12 %x, %i32vec12* %ret, align 16
+	ret void
+}
+
+
+%i16vec3 = type <3 x i16>
+define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
+; CHECK: movdqa
+; CHECK: paddw
+; CHECK: movd
+; CHECK: pextrw
+	%a = load %i16vec3* %ap, align 16
+	%b = load %i16vec3* %bp, align 16
+	%x = add %i16vec3 %a, %b
+	store %i16vec3 %x, %i16vec3* %ret, align 16
+	ret void
+}
+
+%i16vec4 = type <4 x i16>
+define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
+; CHECK: movdqa
+; CHECK: paddw
+; CHECK: movq
+	%a = load %i16vec4* %ap, align 16
+	%b = load %i16vec4* %bp, align 16
+	%x = add %i16vec4 %a, %b
+	store %i16vec4 %x, %i16vec4* %ret, align 16
+	ret void
+}
+
+%i16vec12 = type <12 x i16>
+define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: paddw
+; CHECK: paddw
+; CHECK: movq
+; CHECK: movdqa
+	%a = load %i16vec12* %ap, align 16
+	%b = load %i16vec12* %bp, align 16
+	%x = add %i16vec12 %a, %b
+	store %i16vec12 %x, %i16vec12* %ret, align 16
+	ret void
+}
+
+%i16vec18 = type <18 x i16>
+define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: paddw
+; CHECK: paddw
+; CHECK: paddw
+; CHECK: movd
+; CHECK: movdqa
+; CHECK: movdqa
+	%a = load %i16vec18* %ap, align 16
+	%b = load %i16vec18* %bp, align 16
+	%x = add %i16vec18 %a, %b
+	store %i16vec18 %x, %i16vec18* %ret, align 16
+	ret void
+}
+
+
+%i8vec3 = type <3 x i8>
+define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
+; CHECK: movdqa
+; CHECK: paddb
+; CHECK: pextrb
+; CHECK: movb
+	%a = load %i8vec3* %ap, align 16
+	%b = load %i8vec3* %bp, align 16
+	%x = add %i8vec3 %a, %b
+	store %i8vec3 %x, %i8vec3* %ret, align 16
+	ret void
+}
+
+%i8vec31 = type <31 x i8>
+define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: paddb
+; CHECK: paddb
+; CHECK: movq
+; CHECK: pextrb
+; CHECK: pextrw
+	%a = load %i8vec31* %ap, align 16
+	%b = load %i8vec31* %bp, align 16
+	%x = add %i8vec31 %a, %b
+	store %i8vec31 %x, %i8vec31* %ret, align 16
+	ret void
+}
+
+
+%i8vec3pack = type { <3 x i8>, i8 }
+define %i8vec3pack  @rot() nounwind {
+; CHECK: shrb
+entry:
+  %X = alloca %i8vec3pack, align 4
+  %rot = alloca %i8vec3pack, align 4
+  %result = alloca %i8vec3pack, align 4
+  %storetmp = bitcast %i8vec3pack* %X to <3 x i8>*
+  store <3 x i8> <i8 -98, i8 -98, i8 -98>, <3 x i8>* %storetmp
+  %storetmp1 = bitcast %i8vec3pack* %rot to <3 x i8>*
+  store <3 x i8> <i8 1, i8 1, i8 1>, <3 x i8>* %storetmp1
+  %tmp = load %i8vec3pack* %X
+  %extractVec = extractvalue %i8vec3pack %tmp, 0
+  %tmp2 = load %i8vec3pack* %rot
+  %extractVec3 = extractvalue %i8vec3pack %tmp2, 0
+  %shr = lshr <3 x i8> %extractVec, %extractVec3
+  %storetmp4 = bitcast %i8vec3pack* %result to <3 x i8>*
+  store <3 x i8> %shr, <3 x i8>* %storetmp4
+  %tmp5 = load %i8vec3pack* %result
+  ret %i8vec3pack %tmp5
+}
+

diff --git a/src/LLVM/test/CodeGen/X86/widen_shuffle-1.ll b/src/LLVM/test/CodeGen/X86/widen_shuffle-1.ll
new file mode 100644
index 0000000..8e951b7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/widen_shuffle-1.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+
+; widening shuffle v3float and then a add
+define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
+entry:
+; CHECK: shuf:
+; CHECK: extractps
+; CHECK: extractps
+	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
+	%val = fadd <3 x float> %x, %src2
+	store <3 x float> %val, <3 x float>* %dst.addr
+	ret void
+}
+
+
+; widening shuffle v3float with a different mask and then a add
+define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
+entry:
+; CHECK: shuf2:
+; CHECK: extractps
+; CHECK: extractps
+	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
+	%val = fadd <3 x float> %x, %src2
+	store <3 x float> %val, <3 x float>* %dst.addr
+	ret void
+}
+
+; Example of when widening a v3float operation causes the DAG to replace a node
+; with the operation that we are currently widening, i.e. when replacing
+; opA with opB, the DAG will produce new operations with opA.
+define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
+entry:
+; CHECK: shuf3:
+; CHECK: pshufd
+  %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 
+  %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %tmp3.i13 = shufflevector <4 x float> %tmp1.i.i, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> ; <<3 x float>>
+  %tmp6.i14 = shufflevector <3 x float> %tmp3.i13, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %tmp97.i = shufflevector <4 x float> %tmp6.i14, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %tmp2.i18 = shufflevector <3 x float> %tmp97.i, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %t5 = bitcast <4 x float> %tmp2.i18 to <4 x i32>
+  %shr.i.i19 = lshr <4 x i32> %t5, <i32 19, i32 19, i32 19, i32 19>
+  %and.i.i20 = and <4 x i32> %shr.i.i19, <i32 4080, i32 4080, i32 4080, i32 4080> 
+  %shuffle.i.i.i21 = shufflevector <4 x float> %tmp2.i18, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+  store <4 x float> %shuffle.i.i.i21, <4 x float>* %dst
+  ret void
+}
+
+; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS
+define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone {
+; CHECK: shuf4:
+; CHECK: punpckldq
+  %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %vshuf
+}

diff --git a/src/LLVM/test/CodeGen/X86/win64_alloca_dynalloca.ll b/src/LLVM/test/CodeGen/X86/win64_alloca_dynalloca.ll
new file mode 100644
index 0000000..e39d007
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/win64_alloca_dynalloca.ll

@@ -0,0 +1,77 @@
+; RUN: llc < %s -join-physregs -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
+; RUN: llc < %s -join-physregs -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -join-physregs -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
+; PR8777
+; PR8778
+
+; Passing the same value in two registers creates a false interference that
+; only -join-physregs resolves. It could also be handled by a parallel copy.
+
+define i64 @foo(i64 %n, i64 %x) nounwind {
+entry:
+
+  %buf0 = alloca i8, i64 4096, align 1
+
+; ___chkstk must adjust %rsp.
+; M64: movq  %rsp, %rbp
+; M64:       $4096, %rax
+; M64: callq ___chkstk
+; M64-NOT:   %rsp
+
+; __chkstk does not adjust %rsp.
+; W64: movq  %rsp, %rbp
+; W64:       $4096, %rax
+; W64: callq __chkstk
+; W64: subq  $4096, %rsp
+
+; Freestanding
+; EFI: movq  %rsp, %rbp
+; EFI:       $[[B0OFS:4096|4104]], %rsp
+; EFI-NOT:   call
+
+  %buf1 = alloca i8, i64 %n, align 1
+
+; M64: leaq  15(%rcx), %rax
+; M64: andq  $-16, %rax
+; M64: callq ___chkstk
+; M64-NOT:   %rsp
+; M64: movq  %rsp, %rax
+
+; W64: leaq  15(%rcx), %rax
+; W64: andq  $-16, %rax
+; W64: callq __chkstk
+; W64: subq  %rax, %rsp
+; W64: movq  %rsp, %rax
+
+; EFI: leaq  15(%rcx), [[R1:%r.*]]
+; EFI: andq  $-16, [[R1]]
+; EFI: movq  %rsp, [[R64:%r.*]]
+; EFI: subq  [[R1]], [[R64]]
+; EFI: movq  [[R64]], %rsp
+
+  %r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind
+
+; M64: subq  $48, %rsp
+; M64: leaq  -4096(%rbp), %r9
+; M64: movq  %rax, 32(%rsp)
+; M64: callq bar
+
+; W64: subq  $48, %rsp
+; W64: leaq  -4096(%rbp), %r9
+; W64: movq  %rax, 32(%rsp)
+; W64: callq bar
+
+; EFI: subq  $48, %rsp
+; EFI: leaq  -[[B0OFS]](%rbp), %r9
+; EFI: movq  [[R64]], 32(%rsp)
+; EFI: callq _bar
+
+  ret i64 %r
+
+; M64: movq    %rbp, %rsp
+
+; W64: movq    %rbp, %rsp
+
+}
+
+declare i64 @bar(i64, i64, i64, i8* nocapture, i8* nocapture) nounwind

diff --git a/src/LLVM/test/CodeGen/X86/win64_params.ll b/src/LLVM/test/CodeGen/X86/win64_params.ll
new file mode 100644
index 0000000..f9d4bf9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/win64_params.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; Verify that the 5th and 6th parameters are coming from the correct location
+; on the stack.
+define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize {
+entry:
+; CHECK: movl    48(%rsp), %eax
+; CHECK: addl    40(%rsp), %eax
+  %add = add nsw i32 %p6, %p5
+  ret i32 %add
+}

diff --git a/src/LLVM/test/CodeGen/X86/win64_vararg.ll b/src/LLVM/test/CodeGen/X86/win64_vararg.ll
new file mode 100644
index 0000000..efe8bca
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/win64_vararg.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; Verify that the var arg parameters which are passed in registers are stored
+; in home stack slots allocated by the caller and that AP is correctly
+; calculated.
+define void @average_va(i32 %count, ...) nounwind {
+entry:
+; CHECK: pushq
+; CHECK: movq   %r9, 40(%rsp)
+; CHECK: movq   %r8, 32(%rsp)
+; CHECK: movq   %rdx, 24(%rsp)
+; CHECK: leaq   24(%rsp), %rax
+
+  %ap = alloca i8*, align 8                       ; <i8**> [#uses=1]
+  %ap1 = bitcast i8** %ap to i8*                  ; <i8*> [#uses=1]
+  call void @llvm.va_start(i8* %ap1)
+  ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+; CHECK: f5:
+; CHECK: pushq
+; CHECK: leaq 56(%rsp),
+define i8* @f5(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  ret i8* %ap1
+}
+
+; CHECK: f4:
+; CHECK: pushq
+; CHECK: leaq 48(%rsp),
+define i8* @f4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  ret i8* %ap1
+}
+
+; CHECK: f3:
+; CHECK: pushq
+; CHECK: leaq 40(%rsp),
+define i8* @f3(i64 %a0, i64 %a1, i64 %a2, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  ret i8* %ap1
+}

diff --git a/src/LLVM/test/CodeGen/X86/win_chkstk.ll b/src/LLVM/test/CodeGen/X86/win_chkstk.ll
new file mode 100644
index 0000000..e4e4483
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/win_chkstk.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN_X64
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64
+; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-pc-win32-macho | FileCheck %s -check-prefix=LINUX
+
+; Windows and mingw require a prologue helper routine if more than 4096 bytes area
+; allocated on the stack.  Windows uses __chkstk and mingw uses __alloca.  __alloca
+; and the 32-bit version of __chkstk will probe the stack and adjust the stack pointer.
+; The 64-bit version of __chkstk is only responsible for probing the stack.  The 64-bit
+; prologue is responsible for adjusting the stack pointer.
+
+; Stack allocation >= 4096 bytes will require call to __chkstk in the Windows ABI.
+define i32 @main4k() nounwind {
+entry:
+; WIN_X32:    calll __chkstk
+; WIN_X64:    callq __chkstk
+; MINGW_X32:  calll __alloca
+; MINGW_X64:  callq ___chkstk
+; LINUX-NOT:  call __chkstk
+  %array4096 = alloca [4096 x i8], align 16       ; <[4096 x i8]*> [#uses=0]
+  ret i32 0
+}
+
+; Make sure we don't call __chkstk or __alloca when we have less than a 4096 stack
+; allocation.
+define i32 @main128() nounwind {
+entry:
+; WIN_X32:       # BB#0:
+; WIN_X32-NOT:   calll __chkstk
+; WIN_X32:       ret
+
+; WIN_X64:       # BB#0:
+; WIN_X64-NOT:   callq __chkstk
+; WIN_X64:       ret
+
+; MINGW_X64:     # BB#0:
+; MINGW_X64-NOT: callq _alloca
+; MINGW_X64:     ret
+
+; LINUX:         # BB#0:
+; LINUX-NOT:     call __chkstk
+; LINUX:         ret
+  %array128 = alloca [128 x i8], align 16         ; <[128 x i8]*> [#uses=0]
+  ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-and-mask.ll b/src/LLVM/test/CodeGen/X86/x86-64-and-mask.ll
new file mode 100644
index 0000000..07ccb23
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-and-mask.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+; This should be a single mov, not a load of immediate + andq.
+; CHECK: test:
+; CHECK: movl %edi, %eax
+
+define i64 @test(i64 %x) nounwind {
+entry:
+	%tmp123 = and i64 %x, 4294967295		; <i64> [#uses=1]
+	ret i64 %tmp123
+}
+
+; This copy can't be coalesced away because it needs the implicit zero-extend.
+; CHECK: bbb:
+; CHECK: movl %edi, %edi
+
+define void @bbb(i64 %x) nounwind {
+  %t = and i64 %x, 4294967295
+  call void @foo(i64 %t)
+  ret void
+}
+
+; This should use a 32-bit and with implicit zero-extension, not a 64-bit and
+; with a separate mov to materialize the mask.
+; rdar://7527390
+; CHECK: ccc:
+; CHECK: andl $-1048593, %edi
+
+declare void @foo(i64 %x) nounwind
+
+define void @ccc(i64 %x) nounwind {
+  %t = and i64 %x, 4293918703
+  call void @foo(i64 %t)
+  ret void
+}
+
+; This requires a mov and a 64-bit and.
+; CHECK: ddd:
+; CHECK: movabsq $4294967296, %r
+; CHECK: andq %rax, %rdi
+
+define void @ddd(i64 %x) nounwind {
+  %t = and i64 %x, 4294967296
+  call void @foo(i64 %t)
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-arg.ll b/src/LLVM/test/CodeGen/X86/x86-64-arg.ll
new file mode 100644
index 0000000..c53df6a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-arg.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s | grep {movl	%edi, %eax}

+; The input value is already sign extended, don't re-extend it.

+; This testcase corresponds to:

+;   int test(short X) { return (int)X; }

+

+target datalayout = "e-p:64:64"

+target triple = "x86_64-apple-darwin8"

+

+

+define i32 @test(i16 signext  %X) {

+entry:

+        %tmp12 = sext i16 %X to i32             ; <i32> [#uses=1]

+        ret i32 %tmp12

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/x86-64-asm.ll b/src/LLVM/test/CodeGen/X86/x86-64-asm.ll
new file mode 100644
index 0000000..8e9769f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-asm.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s

+; PR1029

+

+target datalayout = "e-p:64:64"

+target triple = "x86_64-unknown-linux-gnu"

+

+define void @frame_dummy() {

+entry:

+        %tmp1 = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0,~{dirflag},~{fpsr},~{flags}"( void (i8*)* null )           ; <void (i8*)*> [#uses=0]

+        ret void

+}

+


diff --git a/src/LLVM/test/CodeGen/X86/x86-64-dead-stack-adjust.ll b/src/LLVM/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
new file mode 100644
index 0000000..79316f2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-dead-stack-adjust.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s | not grep rsp
+; RUN: llc < %s | grep cvttsd2siq
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+define double @a(double %b) nounwind  {
+entry:
+	%tmp12 = fptoui double %b to i32		; <i32> [#uses=1]
+	%tmp123 = uitofp i32 %tmp12 to double		; <double> [#uses=1]
+	ret double %tmp123
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-disp.ll b/src/LLVM/test/CodeGen/X86/x86-64-disp.ll
new file mode 100644
index 0000000..d8059eb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-disp.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
+
+; Fold an offset into an address even if it's not a 32-bit
+; signed integer.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@call_used_regs = external global [53 x i8], align 32
+
+define fastcc void @foo() nounwind {
+	%t = getelementptr [53 x i8]* @call_used_regs, i64 0, i64 4294967295
+	store i8 1, i8* %t, align 1
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-extend-shift.ll b/src/LLVM/test/CodeGen/X86/x86-64-extend-shift.ll
new file mode 100644
index 0000000..6ebaeee
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-extend-shift.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; Formerly there were two shifts.
+
+define i64 @baz(i32 %A) nounwind {
+; CHECK:  shlq  $49, %r
+        %tmp1 = shl i32 %A, 17
+        %tmp2 = zext i32 %tmp1 to i64
+        %tmp3 = shl i64 %tmp2, 32
+        ret i64 %tmp3
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-frameaddr.ll b/src/LLVM/test/CodeGen/X86/x86-64-frameaddr.ll
new file mode 100644
index 0000000..57163d3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-frameaddr.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep movq | grep rbp
+
+define i64* @stack_end_address() nounwind  {
+entry:
+	tail call i8* @llvm.frameaddress( i32 0 )
+	bitcast i8* %0 to i64*
+	ret i64* %1
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone 

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-gv-offset.ll b/src/LLVM/test/CodeGen/X86/x86-64-gv-offset.ll
new file mode 100644
index 0000000..365e4af
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-gv-offset.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep lea
+
+	%struct.x = type { float, double }
+@X = global %struct.x { float 1.000000e+00, double 2.000000e+00 }, align 16		; <%struct.x*> [#uses=2]
+
+define i32 @main() nounwind  {
+entry:
+	%tmp2 = load float* getelementptr (%struct.x* @X, i32 0, i32 0), align 16		; <float> [#uses=1]
+	%tmp4 = load double* getelementptr (%struct.x* @X, i32 0, i32 1), align 8		; <double> [#uses=1]
+	tail call void @t( float %tmp2, double %tmp4 ) nounwind 
+	ret i32 0
+}
+
+declare void @t(float, double)

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-jumps.ll b/src/LLVM/test/CodeGen/X86/x86-64-jumps.ll
new file mode 100644
index 0000000..11b40c8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-jumps.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+define i8 @test1() nounwind ssp {
+entry:
+  %0 = select i1 undef, i8* blockaddress(@test1, %bb), i8* blockaddress(@test1, %bb6) ; <i8*> [#uses=1]
+  indirectbr i8* %0, [label %bb, label %bb6]
+
+bb:                                               ; preds = %entry
+  ret i8 1
+
+bb6:                                              ; preds = %entry
+  ret i8 2
+}
+
+
+; PR5930 - Trunc of block address differences.
+@test.array = internal constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %bar) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %hack) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32)] ; <[3 x i32]*> [#uses=1]
+
+define void @test2(i32 %i) nounwind ssp {
+entry:
+  %i.addr = alloca i32                            ; <i32*> [#uses=2]
+  store i32 %i, i32* %i.addr
+  %tmp = load i32* %i.addr                        ; <i32> [#uses=1]
+  %idxprom = sext i32 %tmp to i64                 ; <i64> [#uses=1]
+  %arrayidx = getelementptr inbounds i32* getelementptr inbounds ([3 x i32]* @test.array, i32 0, i32 0), i64 %idxprom ; <i32*> [#uses=1]
+  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=1]
+  %idx.ext = sext i32 %tmp1 to i64                ; <i64> [#uses=1]
+  %add.ptr = getelementptr i8* blockaddress(@test2, %foo), i64 %idx.ext ; <i8*> [#uses=1]
+  br label %indirectgoto
+
+foo:                                              ; preds = %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto
+  br label %bar
+
+bar:                                              ; preds = %foo, %indirectgoto
+  br label %hack
+
+hack:                                             ; preds = %bar, %indirectgoto
+  ret void
+
+indirectgoto:                                     ; preds = %entry
+  %indirect.goto.dest = phi i8* [ %add.ptr, %entry ] ; <i8*> [#uses=1]
+  indirectbr i8* %indirect.goto.dest, [label %foo, label %foo, label %bar, label %foo, label %hack, label %foo, label %foo]
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-mem.ll b/src/LLVM/test/CodeGen/X86/x86-64-mem.ll
new file mode 100644
index 0000000..d2fc1bd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-mem.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -o %t1

+; RUN: grep GOTPCREL %t1 | count 4

+; RUN: grep %%rip      %t1 | count 6

+; RUN: grep movq     %t1 | count 6

+; RUN: grep leaq     %t1 | count 1

+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=static -o %t2

+; RUN: grep movl %t2 | count 2

+; RUN: grep movq %t2 | count 2

+

+@ptr = external global i32*		; <i32**> [#uses=1]

+@src = external global [0 x i32]		; <[0 x i32]*> [#uses=1]

+@dst = external global [0 x i32]		; <[0 x i32]*> [#uses=1]

+@lptr = internal global i32* null		; <i32**> [#uses=1]

+@ldst = internal global [500 x i32] zeroinitializer, align 32		; <[500 x i32]*> [#uses=1]

+@lsrc = internal global [500 x i32] zeroinitializer, align 32		; <[500 x i32]*> [#uses=0]

+@bsrc = internal global [500000 x i32] zeroinitializer, align 32		; <[500000 x i32]*> [#uses=0]

+@bdst = internal global [500000 x i32] zeroinitializer, align 32		; <[500000 x i32]*> [#uses=0]

+

+define void @test1() nounwind {

+	%tmp = load i32* getelementptr ([0 x i32]* @src, i32 0, i32 0)		; <i32> [#uses=1]

+	store i32 %tmp, i32* getelementptr ([0 x i32]* @dst, i32 0, i32 0)

+	ret void

+}

+

+define void @test2() nounwind {

+	store i32* getelementptr ([0 x i32]* @dst, i32 0, i32 0), i32** @ptr

+	ret void

+}

+

+define void @test3() nounwind {

+	store i32* getelementptr ([500 x i32]* @ldst, i32 0, i32 0), i32** @lptr

+	br label %return

+

+return:		; preds = %0

+	ret void

+}


diff --git a/src/LLVM/test/CodeGen/X86/x86-64-pic-1.ll b/src/LLVM/test/CodeGen/X86/x86-64-pic-1.ll
new file mode 100644
index 0000000..46f6d33
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-pic-1.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	f@PLT} %t1
+
+define void @g() {
+entry:
+	call void @f( )
+	ret void
+}
+
+declare void @f()

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-pic-10.ll b/src/LLVM/test/CodeGen/X86/x86-64-pic-10.ll
new file mode 100644
index 0000000..b6f82e2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-pic-10.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	g@PLT} %t1
+
+@g = alias weak i32 ()* @f
+
+define void @h() {
+entry:
+	%tmp31 = call i32 @g()
+        ret void
+}
+
+declare extern_weak i32 @f()

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-pic-11.ll b/src/LLVM/test/CodeGen/X86/x86-64-pic-11.ll
new file mode 100644
index 0000000..4db331c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-pic-11.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	__fixunsxfti@PLT} %t1
+
+define i128 @f(x86_fp80 %a) nounwind {
+entry:
+	%tmp78 = fptoui x86_fp80 %a to i128
+	ret i128 %tmp78
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-pic-2.ll b/src/LLVM/test/CodeGen/X86/x86-64-pic-2.ll
new file mode 100644
index 0000000..1ce2de7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-pic-2.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	f} %t1
+; RUN: not grep {callq	f@PLT} %t1
+
+define void @g() {
+entry:
+	call void @f( )
+	ret void
+}
+
+declare hidden void @f()

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-pic-3.ll b/src/LLVM/test/CodeGen/X86/x86-64-pic-3.ll
new file mode 100644
index 0000000..aa3c888
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-pic-3.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	f} %t1
+; RUN: not grep {callq	f@PLT} %t1
+
+define void @g() {
+entry:
+	call void @f( )
+	ret void
+}
+
+define internal void @f() {
+entry:
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-pic-4.ll b/src/LLVM/test/CodeGen/X86/x86-64-pic-4.ll
new file mode 100644
index 0000000..90fc119
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-pic-4.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {movq	a@GOTPCREL(%rip),} %t1
+
+@a = global i32 0
+
+define i32 @get_a() {
+entry:
+	%tmp1 = load i32* @a, align 4
+	ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-pic-5.ll b/src/LLVM/test/CodeGen/X86/x86-64-pic-5.ll
new file mode 100644
index 0000000..6369bde
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-pic-5.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {movl	a(%rip),} %t1
+; RUN: not grep GOTPCREL %t1
+
+@a = hidden global i32 0
+
+define i32 @get_a() {
+entry:
+	%tmp1 = load i32* @a, align 4
+	ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-pic-6.ll b/src/LLVM/test/CodeGen/X86/x86-64-pic-6.ll
new file mode 100644
index 0000000..6e19ad3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-pic-6.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {movl	a(%rip),} %t1
+; RUN: not grep GOTPCREL %t1
+
+@a = internal global i32 0
+
+define i32 @get_a() nounwind {
+entry:
+	%tmp1 = load i32* @a, align 4
+	ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-pic-7.ll b/src/LLVM/test/CodeGen/X86/x86-64-pic-7.ll
new file mode 100644
index 0000000..4d98ee6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-pic-7.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {movq	f@GOTPCREL(%rip),} %t1
+
+define void ()* @g() nounwind {
+entry:
+	ret void ()* @f
+}
+
+declare void @f()

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-pic-8.ll b/src/LLVM/test/CodeGen/X86/x86-64-pic-8.ll
new file mode 100644
index 0000000..d3b567c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-pic-8.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {leaq	f(%rip),} %t1
+; RUN: not grep GOTPCREL %t1
+
+define void ()* @g() {
+entry:
+	ret void ()* @f
+}
+
+declare hidden void @f()

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-pic-9.ll b/src/LLVM/test/CodeGen/X86/x86-64-pic-9.ll
new file mode 100644
index 0000000..0761031
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-pic-9.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {leaq	f(%rip),} %t1
+; RUN: not grep GOTPCREL %t1
+
+define void ()* @g() nounwind {
+entry:
+	ret void ()* @f
+}
+
+define internal void @f() nounwind {
+entry:
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-ret0.ll b/src/LLVM/test/CodeGen/X86/x86-64-ret0.ll
new file mode 100644
index 0000000..c74f6d8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-ret0.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
+
+define i32 @f() nounwind  {
+	tail call void @t( i32 1 ) nounwind 
+	ret i32 0
+}
+
+declare void @t(i32)

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-shortint.ll b/src/LLVM/test/CodeGen/X86/x86-64-shortint.ll
new file mode 100644
index 0000000..14e1eb6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-shortint.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s | grep movswl

+

+target datalayout = "e-p:64:64"

+target triple = "x86_64-apple-darwin8"

+

+

+define void @bar(i16 zeroext  %A) {

+        tail call void @foo( i16 signext %A   )

+        ret void

+}

+declare void @foo(i16 signext )

+


diff --git a/src/LLVM/test/CodeGen/X86/x86-64-sret-return.ll b/src/LLVM/test/CodeGen/X86/x86-64-sret-return.ll
new file mode 100644
index 0000000..7b5f189
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-sret-return.ll

@@ -0,0 +1,63 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+	%struct.foo = type { [4 x i64] }
+
+; CHECK: bar:
+; CHECK: movq %rdi, %rax
+define void @bar(%struct.foo* noalias sret  %agg.result, %struct.foo* %d) nounwind  {
+entry:
+	%d_addr = alloca %struct.foo*		; <%struct.foo**> [#uses=2]
+	%memtmp = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store %struct.foo* %d, %struct.foo** %d_addr
+	%tmp = load %struct.foo** %d_addr, align 8		; <%struct.foo*> [#uses=1]
+	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp2 = getelementptr %struct.foo* %tmp, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp3 = getelementptr [4 x i64]* %tmp1, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp4 = getelementptr [4 x i64]* %tmp2, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp5 = load i64* %tmp4, align 8		; <i64> [#uses=1]
+	store i64 %tmp5, i64* %tmp3, align 8
+	%tmp6 = getelementptr [4 x i64]* %tmp1, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp7 = getelementptr [4 x i64]* %tmp2, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp8 = load i64* %tmp7, align 8		; <i64> [#uses=1]
+	store i64 %tmp8, i64* %tmp6, align 8
+	%tmp9 = getelementptr [4 x i64]* %tmp1, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp10 = getelementptr [4 x i64]* %tmp2, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp11 = load i64* %tmp10, align 8		; <i64> [#uses=1]
+	store i64 %tmp11, i64* %tmp9, align 8
+	%tmp12 = getelementptr [4 x i64]* %tmp1, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp13 = getelementptr [4 x i64]* %tmp2, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp14 = load i64* %tmp13, align 8		; <i64> [#uses=1]
+	store i64 %tmp14, i64* %tmp12, align 8
+	%tmp15 = getelementptr %struct.foo* %memtmp, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp16 = getelementptr %struct.foo* %agg.result, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp17 = getelementptr [4 x i64]* %tmp15, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp18 = getelementptr [4 x i64]* %tmp16, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp19 = load i64* %tmp18, align 8		; <i64> [#uses=1]
+	store i64 %tmp19, i64* %tmp17, align 8
+	%tmp20 = getelementptr [4 x i64]* %tmp15, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp21 = getelementptr [4 x i64]* %tmp16, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp22 = load i64* %tmp21, align 8		; <i64> [#uses=1]
+	store i64 %tmp22, i64* %tmp20, align 8
+	%tmp23 = getelementptr [4 x i64]* %tmp15, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp24 = getelementptr [4 x i64]* %tmp16, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp25 = load i64* %tmp24, align 8		; <i64> [#uses=1]
+	store i64 %tmp25, i64* %tmp23, align 8
+	%tmp26 = getelementptr [4 x i64]* %tmp15, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp27 = getelementptr [4 x i64]* %tmp16, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp28 = load i64* %tmp27, align 8		; <i64> [#uses=1]
+	store i64 %tmp28, i64* %tmp26, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+; CHECK: foo:
+; CHECK: movq %rdi, %rax
+define void @foo({ i64 }* noalias nocapture sret %agg.result) nounwind {
+  store { i64 } { i64 0 }, { i64 }* %agg.result
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-tls-1.ll b/src/LLVM/test/CodeGen/X86/x86-64-tls-1.ll
new file mode 100644
index 0000000..8d3b300
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-tls-1.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+@tm_nest_level = internal thread_local global i32 0
+define i64 @z() nounwind {
+; CHECK: movabsq    $tm_nest_level@TPOFF, %rcx
+  ret i64 and (i64 ptrtoint (i32* @tm_nest_level to i64), i64 100)
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-64-varargs.ll b/src/LLVM/test/CodeGen/X86/x86-64-varargs.ll
new file mode 100644
index 0000000..428f449
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-64-varargs.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -code-model=large -relocation-model=static | grep call | not grep rax
+
+@.str = internal constant [26 x i8] c"%d, %f, %d, %lld, %d, %f\0A\00"		; <[26 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...) nounwind 
+
+define i32 @main() nounwind  {
+entry:
+	%tmp10.i = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([26 x i8]* @.str, i32 0, i64 0), i32 12, double 0x3FF3EB8520000000, i32 120, i64 123456677890, i32 -10, double 4.500000e+15 ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-frameaddr.ll b/src/LLVM/test/CodeGen/X86/x86-frameaddr.ll
new file mode 100644
index 0000000..d595874
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-frameaddr.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep mov | grep ebp
+
+define i8* @t() nounwind {
+entry:
+	%0 = tail call i8* @llvm.frameaddress(i32 0)
+	ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/x86-frameaddr2.ll b/src/LLVM/test/CodeGen/X86/x86-frameaddr2.ll
new file mode 100644
index 0000000..c509115
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-frameaddr2.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep mov | count 3
+
+define i8* @t() nounwind {
+entry:
+	%0 = tail call i8* @llvm.frameaddress(i32 2)
+	ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone

diff --git a/src/LLVM/test/CodeGen/X86/x86-shifts.ll b/src/LLVM/test/CodeGen/X86/x86-shifts.ll
new file mode 100644
index 0000000..fdf68f9
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-shifts.ll

@@ -0,0 +1,142 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+; Splat patterns below
+
+
+define <4 x i32> @shl4(<4 x i32> %A) nounwind {
+entry:
+; CHECK:      shl4
+; CHECK:      pslld
+; CHECK-NEXT: pslld
+  %B = shl <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
+  %C = shl <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
+  %K = xor <4 x i32> %B, %C
+  ret <4 x i32> %K
+}
+
+define <4 x i32> @shr4(<4 x i32> %A) nounwind {
+entry:
+; CHECK:      shr4
+; CHECK:      psrld
+; CHECK-NEXT: psrld
+  %B = lshr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
+  %C = lshr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
+  %K = xor <4 x i32> %B, %C
+  ret <4 x i32> %K
+}
+
+define <4 x i32> @sra4(<4 x i32> %A) nounwind {
+entry:
+; CHECK:      sra4
+; CHECK:      psrad
+; CHECK-NEXT: psrad
+  %B = ashr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
+  %C = ashr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
+  %K = xor <4 x i32> %B, %C
+  ret <4 x i32> %K
+}
+
+define <2 x i64> @shl2(<2 x i64> %A) nounwind {
+entry:
+; CHECK:      shl2
+; CHECK:      psllq
+; CHECK-NEXT: psllq
+  %B = shl <2 x i64> %A,  < i64 2, i64 2>
+  %C = shl <2 x i64> %A,  < i64 9, i64 9>
+  %K = xor <2 x i64> %B, %C
+  ret <2 x i64> %K
+}
+
+define <2 x i64> @shr2(<2 x i64> %A) nounwind {
+entry:
+; CHECK:      shr2
+; CHECK:      psrlq
+; CHECK-NEXT: psrlq
+  %B = lshr <2 x i64> %A,  < i64 8, i64 8>
+  %C = lshr <2 x i64> %A,  < i64 1, i64 1>
+  %K = xor <2 x i64> %B, %C
+  ret <2 x i64> %K
+}
+
+
+define <8 x i16> @shl8(<8 x i16> %A) nounwind {
+entry:
+; CHECK:      shl8
+; CHECK:      psllw
+; CHECK-NEXT: psllw
+  %B = shl <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  %C = shl <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %K = xor <8 x i16> %B, %C
+  ret <8 x i16> %K
+}
+
+define <8 x i16> @shr8(<8 x i16> %A) nounwind {
+entry:
+; CHECK:      shr8
+; CHECK:      psrlw
+; CHECK-NEXT: psrlw
+  %B = lshr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  %C = lshr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %K = xor <8 x i16> %B, %C
+  ret <8 x i16> %K
+}
+
+define <8 x i16> @sra8(<8 x i16> %A) nounwind {
+entry:
+; CHECK:      sra8
+; CHECK:      psraw
+; CHECK-NEXT: psraw
+  %B = ashr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  %C = ashr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %K = xor <8 x i16> %B, %C
+  ret <8 x i16> %K
+}
+
+; non splat test
+
+
+define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind {
+entry:
+; CHECK: sll8_nosplat
+; CHECK-NOT: psll
+; CHECK-NOT: psll
+  %B = shl <8 x i16> %A,  < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2>
+  %C = shl <8 x i16> %A,  < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1>
+  %K = xor <8 x i16> %B, %C
+  ret <8 x i16> %K
+}
+
+
+define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
+entry:
+; CHECK: shr2_nosplat
+; CHECK-NOT:  psrlq
+; CHECK-NOT:  psrlq
+  %B = lshr <2 x i64> %A,  < i64 8, i64 1>
+  %C = lshr <2 x i64> %A,  < i64 1, i64 0>
+  %K = xor <2 x i64> %B, %C
+  ret <2 x i64> %K
+}
+
+
+; Other shifts
+
+define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
+entry:
+; CHECK: shl2_other
+; CHECK-not:      psllq
+  %B = shl <2 x i32> %A,  < i32 2, i32 2>
+  %C = shl <2 x i32> %A,  < i32 9, i32 9>
+  %K = xor <2 x i32> %B, %C
+  ret <2 x i32> %K
+}
+
+define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
+entry:
+; CHECK: shr2_other
+; CHECK-NOT:      psrlq
+  %B = lshr <2 x i32> %A,  < i32 8, i32 8>
+  %C = lshr <2 x i32> %A,  < i32 1, i32 1>
+  %K = xor <2 x i32> %B, %C
+  ret <2 x i32> %K
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86-store-gv-addr.ll b/src/LLVM/test/CodeGen/X86/x86-store-gv-addr.ll
new file mode 100644
index 0000000..089517a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86-store-gv-addr.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=static | not grep lea
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -relocation-model=static | not grep lea
+
+@v = external global i32, align 8
+@v_addr = external global i32*, align 8
+
+define void @t() nounwind optsize {
+	store i32* @v, i32** @v_addr, align 8
+	unreachable
+}

diff --git a/src/LLVM/test/CodeGen/X86/x86_64-mul-by-const.ll b/src/LLVM/test/CodeGen/X86/x86_64-mul-by-const.ll
new file mode 100644
index 0000000..df48a29
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/x86_64-mul-by-const.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; Formerly there were two shifts.  rdar://8771012.
+
+define i32 @f9188_mul365384439_shift27(i32 %A) nounwind {
+; CHECK:  imulq $365384439,
+; CHECK:  shrq  $59, %rax
+        %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}

diff --git a/src/LLVM/test/CodeGen/X86/xmm-r64.ll b/src/LLVM/test/CodeGen/X86/xmm-r64.ll
new file mode 100644
index 0000000..e53fac0
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/xmm-r64.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64

+

+define <4 x i32> @test() {

+        %tmp1039 = call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )               ; <<4 x i32>> [#uses=1]

+        %tmp1040 = bitcast <4 x i32> %tmp1039 to <2 x i64>              ; <<2 x i64>> [#uses=1]

+        %tmp1048 = add <2 x i64> %tmp1040, zeroinitializer              ; <<2 x i64>> [#uses=1]

+        %tmp1048.upgrd.1 = bitcast <2 x i64> %tmp1048 to <4 x i32>              ; <<4 x i32>> [#uses=1]

+        ret <4 x i32> %tmp1048.upgrd.1

+}

+

+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>)

+


diff --git a/src/LLVM/test/CodeGen/X86/xor-icmp.ll b/src/LLVM/test/CodeGen/X86/xor-icmp.ll
new file mode 100644
index 0000000..34875ed
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/xor-icmp.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+; rdar://7367229
+
+define i32 @t(i32 %a, i32 %b) nounwind ssp {
+entry:
+; X32:     t:
+; X32:     xorb
+; X32-NOT: andb
+; X32-NOT: shrb
+; X32:     testb $64
+; X32:     jne
+
+; X64:     t:
+; X64-NOT: setne
+; X64:     xorl
+; X64:     testb $64
+; X64:     jne
+  %0 = and i32 %a, 16384
+  %1 = icmp ne i32 %0, 0
+  %2 = and i32 %b, 16384
+  %3 = icmp ne i32 %2, 0
+  %4 = xor i1 %1, %3
+  br i1 %4, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  %5 = tail call i32 (...)* @foo() nounwind       ; <i32> [#uses=1]
+  ret i32 %5
+
+bb1:                                              ; preds = %entry
+  %6 = tail call i32 (...)* @bar() nounwind       ; <i32> [#uses=1]
+  ret i32 %6
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(...)
+
+define i32 @t2(i32 %x, i32 %y) nounwind ssp {
+; X32: t2:
+; X32: cmpl
+; X32: sete
+; X32: cmpl
+; X32: sete
+; X32-NOT: xor
+; X32: jne
+
+; X64: t2:
+; X64: testl
+; X64: sete
+; X64: testl
+; X64: sete
+; X64-NOT: xor
+; X64: jne
+entry:
+  %0 = icmp eq i32 %x, 0                          ; <i1> [#uses=1]
+  %1 = icmp eq i32 %y, 0                          ; <i1> [#uses=1]
+  %2 = xor i1 %1, %0                              ; <i1> [#uses=1]
+  br i1 %2, label %bb, label %return
+
+bb:                                               ; preds = %entry
+  %3 = tail call i32 (...)* @foo() nounwind       ; <i32> [#uses=0]
+  ret i32 undef
+
+return:                                           ; preds = %entry
+  ret i32 undef
+}

diff --git a/src/LLVM/test/CodeGen/X86/xor.ll b/src/LLVM/test/CodeGen/X86/xor.ll
new file mode 100644
index 0000000..178c59d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/xor.ll

@@ -0,0 +1,144 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2  | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
+
+; Though it is undefined, we want xor undef,undef to produce zero.
+define <4 x i32> @test1() nounwind {
+	%tmp = xor <4 x i32> undef, undef
+	ret <4 x i32> %tmp
+        
+; X32: test1:
+; X32:	pxor	%xmm0, %xmm0
+; X32:	ret
+}
+
+; Though it is undefined, we want xor undef,undef to produce zero.
+define i32 @test2() nounwind{
+	%tmp = xor i32 undef, undef
+	ret i32 %tmp
+; X32: test2:
+; X32:	xorl	%eax, %eax
+; X32:	ret
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind  {
+entry:
+        %tmp1not = xor i32 %b, -2
+	%tmp3 = and i32 %tmp1not, %a
+        %tmp4 = lshr i32 %tmp3, 1
+        ret i32 %tmp4
+        
+; X64: test3:
+; X64:	notl
+; X64:	andl
+; X64:	shrl	%eax
+; X64:	ret
+
+; X32: test3:
+; X32: 	movl	8(%esp), %eax
+; X32: 	notl	%eax
+; X32: 	andl	4(%esp), %eax
+; X32: 	shrl	%eax
+; X32: 	ret
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i32 %a_addr.0, %b_addr.0
+        %tmp4not = xor i32 %tmp3, 2147483647
+        %tmp6 = and i32 %tmp4not, %b_addr.0
+        %tmp8 = shl i32 %tmp6, 1
+        %tmp10 = icmp eq i32 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i32 %tmp3
+        
+; X64: test4:
+; X64:    notl	[[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
+; X32: test4:
+; X32:    notl	[[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
+}
+
+define i16 @test5(i16 %a, i16 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i16 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i16 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i16 %a_addr.0, %b_addr.0
+        %tmp4not = xor i16 %tmp3, 32767
+        %tmp6 = and i16 %tmp4not, %b_addr.0
+        %tmp8 = shl i16 %tmp6, 1
+        %tmp10 = icmp eq i16 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i16 %tmp3
+; X64: test5:
+; X64:    notl	[[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
+; X32: test5:
+; X32:    notl	[[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
+}
+
+define i8 @test6(i8 %a, i8 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i8 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i8 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i8 %a_addr.0, %b_addr.0
+        %tmp4not = xor i8 %tmp3, 127
+        %tmp6 = and i8 %tmp4not, %b_addr.0
+        %tmp8 = shl i8 %tmp6, 1
+        %tmp10 = icmp eq i8 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i8 %tmp3
+; X64: test6:
+; X64:    notb	[[REG:%[a-z]+]]
+; X64:    andb	{{.*}}[[REG]]
+; X32: test6:
+; X32:    notb	[[REG:%[a-z]+]]
+; X32:    andb	{{.*}}[[REG]]
+}
+
+define i32 @test7(i32 %a, i32 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i32 %a_addr.0, %b_addr.0
+        %tmp4not = xor i32 %tmp3, 2147483646
+        %tmp6 = and i32 %tmp4not, %b_addr.0
+        %tmp8 = shl i32 %tmp6, 1
+        %tmp10 = icmp eq i32 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i32 %tmp3
+; X64: test7:
+; X64:    xorl	$2147483646, [[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
+; X32: test7:
+; X32:    xorl	$2147483646, [[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
+}
+
+define i32 @test8(i32 %a) nounwind {
+; rdar://7553032
+entry:
+  %t1 = sub i32 0, %a
+  %t2 = add i32 %t1, -1
+  ret i32 %t2
+; X64: test8:
+; X64:   notl {{%eax|%edi|%ecx}}
+; X32: test8:
+; X32:   notl %eax
+}

diff --git a/src/LLVM/test/CodeGen/X86/zero-remat.ll b/src/LLVM/test/CodeGen/X86/zero-remat.ll
new file mode 100644
index 0000000..4470074
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/zero-remat.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86-64 -o /dev/null -stats  -info-output-file - | grep asm-printer  | grep 12
+; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
+
+declare void @bar(double %x)
+declare void @barf(float %x)
+
+define double @foo() nounwind {
+
+  call void @bar(double 0.0)
+  ret double 0.0
+
+;CHECK-32: foo:
+;CHECK-32: call
+;CHECK-32: fldz
+;CHECK-32: ret
+
+;CHECK-64: foo:
+;CHECK-64: pxor
+;CHECK-64: call
+;CHECK-64: pxor
+;CHECK-64: ret
+}
+
+
+define float @foof() nounwind {
+  call void @barf(float 0.0)
+  ret float 0.0
+
+;CHECK-32: foof:
+;CHECK-32: call
+;CHECK-32: fldz
+;CHECK-32: ret
+
+;CHECK-64: foof:
+;CHECK-64: pxor
+;CHECK-64: call
+;CHECK-64: pxor
+;CHECK-64: ret
+}

diff --git a/src/LLVM/test/CodeGen/X86/zext-extract_subreg.ll b/src/LLVM/test/CodeGen/X86/zext-extract_subreg.ll
new file mode 100644
index 0000000..4f1dde3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/zext-extract_subreg.ll

@@ -0,0 +1,61 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define void @t() nounwind ssp {
+; CHECK: t:
+entry:
+  br i1 undef, label %return, label %if.end.i
+
+if.end.i:                                         ; preds = %entry
+  %tmp7.i = load i32* undef, align 4, !tbaa !0
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %if.end.i
+; CHECK: %if.end
+; CHECK: movl (%{{.*}}), [[REG:%[a-z]+]]
+; CHECK-NOT: movl [[REG]], [[REG]]
+; CHECK-NEXT: testl [[REG]], [[REG]]
+; CHECK-NEXT: xorb
+  %tmp138 = select i1 undef, i32 0, i32 %tmp7.i
+  %tmp867 = zext i32 %tmp138 to i64
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %if.end
+  %tmp869 = sub i64 %tmp867, 0
+  %scale2.0 = trunc i64 %tmp869 to i32
+  %cmp149 = icmp eq i32 %scale2.0, 0
+  br i1 %cmp149, label %while.end, label %land.rhs
+
+land.rhs:                                         ; preds = %while.cond
+  br i1 undef, label %while.body, label %while.end
+
+while.body:                                       ; preds = %land.rhs
+  br label %while.cond
+
+while.end:                                        ; preds = %land.rhs, %while.cond
+  br i1 undef, label %cond.false205, label %cond.true190
+
+cond.true190:                                     ; preds = %while.end
+  br i1 undef, label %cond.false242, label %cond.true225
+
+cond.false205:                                    ; preds = %while.end
+  unreachable
+
+cond.true225:                                     ; preds = %cond.true190
+  br i1 undef, label %cond.false280, label %cond.true271
+
+cond.false242:                                    ; preds = %cond.true190
+  unreachable
+
+cond.true271:                                     ; preds = %cond.true225
+  unreachable
+
+cond.false280:                                    ; preds = %cond.true225
+  unreachable
+
+return:                                           ; preds = %if.end.i, %entry
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}

diff --git a/src/LLVM/test/CodeGen/X86/zext-fold.ll b/src/LLVM/test/CodeGen/X86/zext-fold.ll
new file mode 100644
index 0000000..b3f5cdb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/zext-fold.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+;; Simple case
+define i32 @test1(i8 %x) nounwind readnone {
+  %A = and i8 %x, -32
+  %B = zext i8 %A to i32
+  ret i32 %B
+}
+; CHECK: test1
+; CHECK: movzbl
+; CHECK-NEXT: andl {{.*}}224
+
+;; Multiple uses of %x but easily extensible. 
+define i32 @test2(i8 %x) nounwind readnone {
+  %A = and i8 %x, -32
+  %B = zext i8 %A to i32
+  %C = or i8 %x, 63
+  %D = zext i8 %C to i32
+  %E = add i32 %B, %D
+  ret i32 %E
+}
+; CHECK: test2
+; CHECK: movzbl
+; CHECK: orl $63
+; CHECK: andl $224
+
+declare void @use(i32, i8)
+
+;; Multiple uses of %x where we shouldn't extend the load.
+define void @test3(i8 %x) nounwind readnone {
+  %A = and i8 %x, -32
+  %B = zext i8 %A to i32
+  call void @use(i32 %B, i8 %x)
+  ret void
+}
+; CHECK: test3
+; CHECK: movzbl 16(%esp), [[REGISTER:%e[a-z]{2}]]
+; CHECK-NEXT: movl [[REGISTER]], 4(%esp)
+; CHECK-NEXT: andl $224, [[REGISTER]]
+; CHECK-NEXT: movl [[REGISTER]], (%esp)
+; CHECK-NEXT: call{{.*}}use

diff --git a/src/LLVM/test/CodeGen/X86/zext-inreg-0.ll b/src/LLVM/test/CodeGen/X86/zext-inreg-0.ll
new file mode 100644
index 0000000..ae6221a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/zext-inreg-0.ll

@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86 | not grep and
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: not grep and %t
+; RUN: not grep movzbq %t
+; RUN: not grep movzwq %t
+; RUN: not grep movzlq %t
+
+; These should use movzbl instead of 'and 255'.
+; This related to not having a ZERO_EXTEND_REG opcode.
+
+define i32 @a(i32 %d) nounwind  {
+        %e = add i32 %d, 1
+        %retval = and i32 %e, 255
+        ret i32 %retval
+}
+define i32 @b(float %d) nounwind  {
+        %tmp12 = fptoui float %d to i8
+        %retval = zext i8 %tmp12 to i32
+        ret i32 %retval
+}
+define i32 @c(i32 %d) nounwind  {
+        %e = add i32 %d, 1
+        %retval = and i32 %e, 65535
+        ret i32 %retval
+}
+define i64 @d(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 255
+        ret i64 %retval
+}
+define i64 @e(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 65535
+        ret i64 %retval
+}
+define i64 @f(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 4294967295
+        ret i64 %retval
+}
+
+define i32 @g(i8 %d) nounwind  {
+        %e = add i8 %d, 1
+        %retval = zext i8 %e to i32
+        ret i32 %retval
+}
+define i32 @h(i16 %d) nounwind  {
+        %e = add i16 %d, 1
+        %retval = zext i16 %e to i32
+        ret i32 %retval
+}
+define i64 @i(i8 %d) nounwind  {
+        %e = add i8 %d, 1
+        %retval = zext i8 %e to i64
+        ret i64 %retval
+}
+define i64 @j(i16 %d) nounwind  {
+        %e = add i16 %d, 1
+        %retval = zext i16 %e to i64
+        ret i64 %retval
+}
+define i64 @k(i32 %d) nounwind  {
+        %e = add i32 %d, 1
+        %retval = zext i32 %e to i64
+        ret i64 %retval
+}

diff --git a/src/LLVM/test/CodeGen/X86/zext-inreg-1.ll b/src/LLVM/test/CodeGen/X86/zext-inreg-1.ll
new file mode 100644
index 0000000..17fe374
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/zext-inreg-1.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | not grep and
+
+; These tests differ from the ones in zext-inreg-0.ll in that
+; on x86-64 they do require and instructions.
+
+; These should use movzbl instead of 'and 255'.
+; This related to not having ZERO_EXTEND_REG node.
+
+define i64 @l(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 1099511627775
+        ret i64 %retval
+}
+define i64 @m(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 281474976710655
+        ret i64 %retval
+}

diff --git a/src/LLVM/test/CodeGen/X86/zext-sext.ll b/src/LLVM/test/CodeGen/X86/zext-sext.ll
new file mode 100644
index 0000000..cea9e9c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/zext-sext.ll

@@ -0,0 +1,54 @@
+; XFAIL: *
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; <rdar://problem/8006248>
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void ([40 x i16]*, i32*, i16**, i64*)* @func to i8*)], section "llvm.metadata"
+
+define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind {
+entry:
+  %tmp103 = getelementptr inbounds [40 x i16]* %a, i64 0, i64 4
+  %tmp104 = load i16* %tmp103, align 2
+  %tmp105 = sext i16 %tmp104 to i32
+  %tmp106 = load i32* %b, align 4
+  %tmp107 = sub nsw i32 4, %tmp106
+  %tmp108 = load i16** %c, align 8
+  %tmp109 = sext i32 %tmp107 to i64
+  %tmp110 = getelementptr inbounds i16* %tmp108, i64 %tmp109
+  %tmp111 = load i16* %tmp110, align 1
+  %tmp112 = sext i16 %tmp111 to i32
+  %tmp = mul i32 355244649, %tmp112
+  %tmp1 = mul i32 %tmp, %tmp105
+  %tmp2 = add i32 %tmp1, 2138875574
+  %tmp3 = add i32 %tmp2, 1546991088
+  %tmp4 = mul i32 %tmp3, 2122487257
+  %tmp5 = icmp sge i32 %tmp4, 2138875574
+  %tmp6 = icmp slt i32 %tmp4, -8608074
+  %tmp7 = or i1 %tmp5, %tmp6
+  %outSign = select i1 %tmp7, i32 1, i32 -1
+  %tmp8 = icmp slt i32 %tmp4, 0
+  %tmp9 = icmp eq i32 %outSign, 1
+  %tmp10 = and i1 %tmp8, %tmp9
+  %tmp11 = sext i32 %tmp4 to i64
+  %tmp12 = add i64 %tmp11, 5089792279245435153
+
+; CHECK:      addl	$2138875574, %e[[REGISTER_zext:[a-z]+]]
+; CHECK-NEXT: movslq	%e[[REGISTER_zext]], [[REGISTER_tmp:%[a-z]+]]
+; CHECK:      movq	[[REGISTER_tmp]], [[REGISTER_sext:%[a-z]+]]
+; CHECK-NEXT: subq	%r[[REGISTER_zext]], [[REGISTER_sext]]
+
+  %tmp13 = sub i64 %tmp12, 2138875574
+  %tmp14 = zext i32 %tmp4 to i64
+  %tmp15 = sub i64 %tmp11, %tmp14
+  %tmp16 = select i1 %tmp10, i64 %tmp15, i64 0
+  %tmp17 = sub i64 %tmp13, %tmp16
+  %tmp18 = mul i64 %tmp17, 4540133155013554595
+  %tmp19 = sub i64 %tmp18, 5386586244038704851
+  %tmp20 = add i64 %tmp19, -1368057358110947217
+  %tmp21 = mul i64 %tmp20, -422037402840850817
+  %tmp115 = load i64* %d, align 8
+  %alphaX = mul i64 468858157810230901, %tmp21
+  %alphaXbetaY = add i64 %alphaX, %tmp115
+  %transformed = add i64 %alphaXbetaY, 9040145182981852475
+  store i64 %transformed, i64* %d, align 8
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/X86/zext-shl.ll b/src/LLVM/test/CodeGen/X86/zext-shl.ll
new file mode 100644
index 0000000..928848e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/zext-shl.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32 @t1(i8 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t1:
+; CHECK: shll
+; CHECK-NOT: movzwl
+; CHECK: ret
+  %0 = zext i8 %x to i16
+  %1 = shl i16 %0, 5
+  %2 = zext i16 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t2(i8 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t2:
+; CHECK: shrl
+; CHECK-NOT: movzwl
+; CHECK: ret
+  %0 = zext i8 %x to i16
+  %1 = lshr i16 %0, 3
+  %2 = zext i16 %1 to i32
+  ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/X86/zext-trunc.ll b/src/LLVM/test/CodeGen/X86/zext-trunc.ll
new file mode 100644
index 0000000..b9ffbe8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/X86/zext-trunc.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; rdar://7570931
+
+define i64 @foo(i64 %a, i64 %b) nounwind {
+; CHECK: foo:
+; CHECK: leal
+; CHECK-NOT: movl
+; CHECK: ret
+  %c = add i64 %a, %b
+  %d = trunc i64 %c to i32
+  %e = zext i32 %d to i64
+  ret i64 %e
+}

diff --git a/src/LLVM/test/CodeGen/XCore/2008-11-17-Shl64.ll b/src/LLVM/test/CodeGen/XCore/2008-11-17-Shl64.ll
new file mode 100644
index 0000000..04b1b5a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/2008-11-17-Shl64.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; PR3080
+define i64 @test(i64 %a) {
+	%result = shl i64 %a, 1
+	ret i64 %result
+}

diff --git a/src/LLVM/test/CodeGen/XCore/2009-01-08-Crash.ll b/src/LLVM/test/CodeGen/XCore/2009-01-08-Crash.ll
new file mode 100644
index 0000000..a31ea1e
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/2009-01-08-Crash.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=xcore > %t1.s
+;; This caused a compilation failure since the
+;; address arithmetic was folded into the LDWSP instruction,
+;; resulting in a negative offset which eliminateFrameIndex was
+;; unable to eliminate.
+define i32 @test(i32 %bar) nounwind readnone {
+entry:
+        %bar_addr = alloca i32
+        %0 = getelementptr i32* %bar_addr, i32 -1
+        %1 = load i32* %0, align 4
+        ret i32 %1
+}

diff --git a/src/LLVM/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll b/src/LLVM/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
new file mode 100644
index 0000000..b2bbcb1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; PR3324
+define double @f1(double %a, double %b, double %c, double %d, double %e, double %f, double %g) nounwind {
+entry:
+	br i1 false, label %bb113, label %bb129
+
+bb113:		; preds = %entry
+	ret double 0.000000e+00
+
+bb129:		; preds = %entry
+	%tmp134 = fsub double %b, %a		; <double> [#uses=1]
+	%tmp136 = fsub double %tmp134, %c		; <double> [#uses=1]
+	%tmp138 = fadd double %tmp136, %d		; <double> [#uses=1]
+	%tmp140 = fsub double %tmp138, %e		; <double> [#uses=1]
+	%tmp142 = fadd double %tmp140, %f		; <double> [#uses=1]
+	%tmp.0 = fmul double %tmp142, 0.000000e+00		; <double> [#uses=1]
+	ret double %tmp.0
+}

diff --git a/src/LLVM/test/CodeGen/XCore/2009-03-27-v2f64-param.ll b/src/LLVM/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
new file mode 100644
index 0000000..e35a36a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/2009-03-27-v2f64-param.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=xcore
+; PR3898
+
+define i32 @vector_param(<2 x double> %x) nounwind {
+       ret i32 1
+}

diff --git a/src/LLVM/test/CodeGen/XCore/2009-07-15-store192.ll b/src/LLVM/test/CodeGen/XCore/2009-07-15-store192.ll
new file mode 100644
index 0000000..5278af8
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/2009-07-15-store192.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=xcore > %t1.s
+define void @store32(i8* %p) nounwind {
+entry:
+	%0 = bitcast i8* %p to i192*
+	store i192 0, i192* %0, align 4
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll b/src/LLVM/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll
new file mode 100644
index 0000000..6ad9a73
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=xcore
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "xcore-xmos-elf"
+
+%0 = type { i32 }
+%struct.dwarf_fde = type <{ i32, i32, [0 x i8] }>
+%struct.object = type { i8*, i8*, i8*, %union.anon, %0, %struct.object* }
+%union.anon = type { %struct.dwarf_fde* }
+
+define %struct.dwarf_fde* @search_object(%struct.object* %ob, i8* %pc) {
+entry:
+  br i1 undef, label %bb3.i15.i.i, label %bb2
+
+bb3.i15.i.i:                                      ; preds = %bb3.i15.i.i, %entry
+  %indvar.i.i.i = phi i32 [ %indvar.next.i.i.i, %bb3.i15.i.i ], [ 0, %entry ] ; <i32> [#uses=2]
+  %tmp137 = sub i32 0, %indvar.i.i.i              ; <i32> [#uses=1]
+  %scevgep13.i.i.i = getelementptr i32* undef, i32 %tmp137 ; <i32*> [#uses=2]
+  %scevgep1314.i.i.i = bitcast i32* %scevgep13.i.i.i to %struct.dwarf_fde** ; <%struct.dwarf_fde**> [#uses=1]
+  %0 = load %struct.dwarf_fde** %scevgep1314.i.i.i, align 4 ; <%struct.dwarf_fde*> [#uses=0]
+  store i32 undef, i32* %scevgep13.i.i.i
+  %indvar.next.i.i.i = add i32 %indvar.i.i.i, 1   ; <i32> [#uses=1]
+  br label %bb3.i15.i.i
+
+bb2:                                              ; preds = %entry
+  ret %struct.dwarf_fde* undef
+}

diff --git a/src/LLVM/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll b/src/LLVM/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 0000000..80cf3a6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll

@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=xcore -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+

diff --git a/src/LLVM/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll b/src/LLVM/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
new file mode 100644
index 0000000..f8fe0d2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=xcore
+%struct.st = type <{ i8, i32, i8, i32, i8, i32 }>
+
+@x = external global %struct.st, align 4
+
+define i32 @test_entry() nounwind {
+entry:
+  %0 = load i32* getelementptr inbounds (%struct.st* @x, i32 0, i32 3), align 2
+  ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll b/src/LLVM/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
new file mode 100644
index 0000000..7d6d7ba
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+declare void @g()
+declare i8* @llvm.stacksave() nounwind
+declare void @llvm.stackrestore(i8*) nounwind
+
+define void @f(i32** %p, i32 %size) {
+allocas:
+  %0 = call i8* @llvm.stacksave()
+  %a = alloca i32, i32 %size
+  store i32* %a, i32** %p
+  call void @g()
+  call void @llvm.stackrestore(i8* %0)
+  ret void
+}
+; CHECK: f:
+; CHECK: ldaw [[REGISTER:r[0-9]+]], {{r[0-9]+}}[-r1]
+; CHECK: set sp, [[REGISTER]]
+; CHECK extsp 1
+; CHECK bl g

diff --git a/src/LLVM/test/CodeGen/XCore/2011-08-01-VarargsBug.ll b/src/LLVM/test/CodeGen/XCore/2011-08-01-VarargsBug.ll
new file mode 100644
index 0000000..2076057
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/2011-08-01-VarargsBug.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+define void @_Z1fz(...) {
+entry:
+; CHECK: _Z1fz:
+; CHECK: extsp 3
+; CHECK: stw r[[REG:[0-3]{1,1}]]
+; CHECK: , sp{{\[}}[[REG]]{{\]}}
+; CHECK: stw r[[REG:[0-3]{1,1}]]
+; CHECK: , sp{{\[}}[[REG]]{{\]}}
+; CHECK: stw r[[REG:[0-3]{1,1}]]
+; CHECK: , sp{{\[}}[[REG]]{{\]}}
+; CHECK: stw r[[REG:[0-3]{1,1}]]
+; CHECK: , sp{{\[}}[[REG]]{{\]}}
+; CHECK: ldaw sp, sp[3]
+; CHECK: retsp 0
+  ret void
+}

diff --git a/src/LLVM/test/CodeGen/XCore/addsub64.ll b/src/LLVM/test/CodeGen/XCore/addsub64.ll
new file mode 100644
index 0000000..d062480
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/addsub64.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+define i64 @add64(i64 %a, i64 %b) {
+	%result = add i64 %a, %b
+	ret i64 %result
+}
+; CHECK: add64
+; CHECK: ldc r11, 0
+; CHECK-NEXT: ladd r2, r0, r0, r2, r11
+; CHECK-NEXT: ladd r2, r1, r1, r3, r2
+; CHECK-NEXT: retsp 0
+
+define i64 @sub64(i64 %a, i64 %b) {
+	%result = sub i64 %a, %b
+	ret i64 %result
+}
+; CHECK: sub64
+; CHECK: ldc r11, 0
+; CHECK-NEXT: lsub r2, r0, r0, r2, r11
+; CHECK-NEXT: lsub r2, r1, r1, r3, r2
+; CHECK-NEXT: retsp 0
+
+define i64 @maccu(i64 %a, i32 %b, i32 %c) {
+entry:
+	%0 = zext i32 %b to i64
+	%1 = zext i32 %c to i64
+	%2 = mul i64 %1, %0
+	%3 = add i64 %2, %a
+	ret i64 %3
+}
+; CHECK: maccu:
+; CHECK: maccu r1, r0, r3, r2
+; CHECK-NEXT: retsp 0
+
+define i64 @maccs(i64 %a, i32 %b, i32 %c) {
+entry:
+	%0 = sext i32 %b to i64
+	%1 = sext i32 %c to i64
+	%2 = mul i64 %1, %0
+	%3 = add i64 %2, %a
+	ret i64 %3
+}
+; CHECK: maccs:
+; CHECK: maccs r1, r0, r3, r2
+; CHECK-NEXT: retsp 0
+
+define i64 @lmul(i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+	%0 = zext i32 %a to i64
+	%1 = zext i32 %b to i64
+	%2 = zext i32 %c to i64
+	%3 = zext i32 %d to i64
+	%4 = mul i64 %1, %0
+	%5 = add i64 %4, %2
+	%6 = add i64 %5, %3
+	ret i64 %6
+}
+; CHECK: lmul:
+; CHECK: lmul r1, r0, r1, r0, r2, r3
+; CHECK-NEXT: retsp 0

diff --git a/src/LLVM/test/CodeGen/XCore/ashr.ll b/src/LLVM/test/CodeGen/XCore/ashr.ll
new file mode 100644
index 0000000..4514fdb
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/ashr.ll

@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=xcore -asm-verbose=0 | FileCheck %s
+define i32 @ashr(i32 %a, i32 %b) {
+	%1 = ashr i32 %a, %b
+	ret i32 %1
+}
+; CHECK: ashr:
+; CHECK-NEXT: ashr r0, r0, r1
+
+define i32 @ashri1(i32 %a) {
+	%1 = ashr i32 %a, 24
+	ret i32 %1
+}
+; CHECK: ashri1:
+; CHECK-NEXT: ashr r0, r0, 24
+
+define i32 @ashri2(i32 %a) {
+	%1 = ashr i32 %a, 31
+	ret i32 %1
+}
+; CHECK: ashri2:
+; CHECK-NEXT: ashr r0, r0, 32
+
+define i32 @f1(i32 %a) {
+        %1 = icmp slt i32 %a, 0
+	br i1 %1, label %less, label %not_less
+less:
+	ret i32 10
+not_less:
+	ret i32 17
+}
+; CHECK: f1:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bf r0
+
+define i32 @f2(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	br i1 %1, label %greater, label %not_greater
+greater:
+	ret i32 10
+not_greater:
+	ret i32 17
+}
+; CHECK: f2:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bt r0
+
+define i32 @f3(i32 %a) {
+        %1 = icmp slt i32 %a, 0
+	%2 = select i1 %1, i32 10, i32 17
+	ret i32 %2
+}
+; CHECK: f3:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bf r0
+; CHECK-NEXT: ldc r0, 10
+; CHECK: ldc r0, 17
+
+define i32 @f4(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	%2 = select i1 %1, i32 10, i32 17
+	ret i32 %2
+}
+; CHECK: f4:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bf r0
+; CHECK-NEXT: ldc r0, 17
+; CHECK: ldc r0, 10
+
+define i32 @f5(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	%2 = zext i1 %1 to i32
+	ret i32 %2
+}
+; CHECK: f5:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: eq r0, r0, 0

diff --git a/src/LLVM/test/CodeGen/XCore/basictest.ll b/src/LLVM/test/CodeGen/XCore/basictest.ll
new file mode 100644
index 0000000..de5eaff
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/basictest.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=xcore
+
+define i32 @test(i32 %X) {
+	%tmp.1 = add i32 %X, 1
+	ret i32 %tmp.1
+}

diff --git a/src/LLVM/test/CodeGen/XCore/bigstructret.ll b/src/LLVM/test/CodeGen/XCore/bigstructret.ll
new file mode 100644
index 0000000..56af930
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/bigstructret.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+%0 = type { i32, i32, i32, i32 }
+%1 = type { i32, i32, i32, i32, i32 }
+
+; Structs of 4 words can be returned in registers
+define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+entry:
+  %0 = insertvalue %0 zeroinitializer, i32 12, 0
+  %1 = insertvalue %0 %0, i32 24, 1
+  %2 = insertvalue %0 %1, i32 48, 2
+  %3 = insertvalue %0 %2, i32 24601, 3
+  ret %0 %3
+}
+; CHECK: ReturnBigStruct:
+; CHECK: ldc r0, 12
+; CHECK: ldc r1, 24
+; CHECK: ldc r2, 48
+; CHECK: ldc r3, 24601
+; CHECK: retsp 0
+
+; Structs bigger than 4 words are returned via a hidden hidden sret-parameter
+define internal fastcc %1 @ReturnBigStruct2() nounwind readnone {
+entry:
+  %0 = insertvalue %1 zeroinitializer, i32 12, 0
+  %1 = insertvalue %1 %0, i32 24, 1
+  %2 = insertvalue %1 %1, i32 48, 2
+  %3 = insertvalue %1 %2, i32 24601, 3
+  %4 = insertvalue %1 %3, i32 4321, 4
+  ret %1 %4
+}
+; CHECK: ReturnBigStruct2:
+; CHECK: ldc r1, 4321
+; CHECK: stw r1, r0[4]
+; CHECK: ldc r1, 24601
+; CHECK: stw r1, r0[3]
+; CHECK: ldc r1, 48
+; CHECK: stw r1, r0[2]
+; CHECK: ldc r1, 24
+; CHECK: stw r1, r0[1]
+; CHECK: ldc r1, 12
+; CHECK: stw r1, r0[0]
+; CHECK: retsp 0

diff --git a/src/LLVM/test/CodeGen/XCore/constants.ll b/src/LLVM/test/CodeGen/XCore/constants.ll
new file mode 100644
index 0000000..cad1a21
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/constants.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
+; CHECK: .LCPI0_0:
+; CHECK: .long 12345678
+; CHECK: f:
+; CHECK: ldw r0, cp[.LCPI0_0]
+define i32 @f() {
+entry:
+	ret i32 12345678
+}

diff --git a/src/LLVM/test/CodeGen/XCore/cos.ll b/src/LLVM/test/CodeGen/XCore/cos.ll
new file mode 100644
index 0000000..8211f85
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/cos.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl cosf" %t1.s | count 1
+; RUN: grep "bl cos" %t1.s | count 2
+declare double @llvm.cos.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.cos.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.cos.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.cos.f32(float %F)
+	ret float %result
+}

diff --git a/src/LLVM/test/CodeGen/XCore/dg.exp b/src/LLVM/test/CodeGen/XCore/dg.exp
new file mode 100644
index 0000000..7110eab
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target XCore] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}

diff --git a/src/LLVM/test/CodeGen/XCore/events.ll b/src/LLVM/test/CodeGen/XCore/events.ll
new file mode 100644
index 0000000..30a6ec3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/events.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+declare void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* %p)
+declare i8* @llvm.xcore.waitevent()
+declare i8* @llvm.xcore.checkevent(i8*)
+declare void @llvm.xcore.clre()
+
+define i32 @f(i8 addrspace(1)* %r) nounwind {
+; CHECK: f:
+entry:
+; CHECK: clre
+  call void @llvm.xcore.clre()
+  call void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* blockaddress(@f, %L1))
+  call void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* blockaddress(@f, %L2))
+  %goto_addr = call i8* @llvm.xcore.waitevent()
+; CHECK: waiteu
+  indirectbr i8* %goto_addr, [label %L1, label %L2]
+L1:
+  br label %ret
+L2:
+  br label %ret
+ret:
+  %retval = phi i32 [1, %L1], [2, %L2]
+  ret i32 %retval
+}
+
+define i32 @g(i8 addrspace(1)* %r) nounwind {
+; CHECK: g:
+entry:
+; CHECK: clre
+  call void @llvm.xcore.clre()
+  call void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* blockaddress(@f, %L1))
+  %goto_addr = call i8* @llvm.xcore.checkevent(i8 *blockaddress(@f, %L2))
+; CHECK: setsr 1
+; CHECK: clrsr 1
+  indirectbr i8* %goto_addr, [label %L1, label %L2]
+L1:
+  br label %ret
+L2:
+  br label %ret
+ret:
+  %retval = phi i32 [1, %L1], [2, %L2]
+  ret i32 %retval
+}

diff --git a/src/LLVM/test/CodeGen/XCore/exp.ll b/src/LLVM/test/CodeGen/XCore/exp.ll
new file mode 100644
index 0000000..d23d484
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/exp.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl expf" %t1.s | count 1
+; RUN: grep "bl exp" %t1.s | count 2
+declare double @llvm.exp.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.exp.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.exp.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.exp.f32(float %F)
+	ret float %result
+}

diff --git a/src/LLVM/test/CodeGen/XCore/exp2.ll b/src/LLVM/test/CodeGen/XCore/exp2.ll
new file mode 100644
index 0000000..4c4d17f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/exp2.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl exp2f" %t1.s | count 1
+; RUN: grep "bl exp2" %t1.s | count 2
+declare double @llvm.exp2.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.exp2.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.exp2.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.exp2.f32(float %F)
+	ret float %result
+}

diff --git a/src/LLVM/test/CodeGen/XCore/fneg.ll b/src/LLVM/test/CodeGen/XCore/fneg.ll
new file mode 100644
index 0000000..e3dd3dd
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/fneg.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=xcore | grep "xor" | count 1
+define i1 @test(double %F) nounwind {
+entry:
+	%0 = fsub double -0.000000e+00, %F
+	%1 = fcmp olt double 0.000000e+00, %0
+	ret i1 %1
+}

diff --git a/src/LLVM/test/CodeGen/XCore/getid.ll b/src/LLVM/test/CodeGen/XCore/getid.ll
new file mode 100644
index 0000000..ecab65c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/getid.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "get r11, id" %t1.s | count 1 
+declare i32 @llvm.xcore.getid()
+
+define i32 @test() {
+	%result = call i32 @llvm.xcore.getid()
+	ret i32 %result
+}

diff --git a/src/LLVM/test/CodeGen/XCore/globals.ll b/src/LLVM/test/CodeGen/XCore/globals.ll
new file mode 100644
index 0000000..7487561
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/globals.ll

@@ -0,0 +1,92 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+define i32 *@addr_G1() {
+entry:
+; CHECK: addr_G1:
+; CHECK: ldaw r0, dp[G1]
+	ret i32* @G1
+}
+
+define i32 *@addr_G2() {
+entry:
+; CHECK: addr_G2:
+; CHECK: ldaw r0, dp[G2]
+	ret i32* @G2
+}
+
+define i32 *@addr_G3() {
+entry:
+; CHECK: addr_G3:
+; CHECK: ldaw r11, cp[G3]
+; CHECK: mov r0, r11
+	ret i32* @G3
+}
+
+define i32 **@addr_G4() {
+entry:
+; CHECK: addr_G4:
+; CHECK: ldaw r0, dp[G4]
+	ret i32** @G4
+}
+
+define i32 **@addr_G5() {
+entry:
+; CHECK: addr_G5:
+; CHECK: ldaw r11, cp[G5]
+; CHECK: mov r0, r11
+	ret i32** @G5
+}
+
+define i32 **@addr_G6() {
+entry:
+; CHECK: addr_G6:
+; CHECK: ldaw r0, dp[G6]
+	ret i32** @G6
+}
+
+define i32 **@addr_G7() {
+entry:
+; CHECK: addr_G7:
+; CHECK: ldaw r11, cp[G7]
+; CHECK: mov r0, r11
+	ret i32** @G7
+}
+
+define i32 *@addr_G8() {
+entry:
+; CHECK: addr_G8:
+; CHECK: ldaw r0, dp[G8]
+	ret i32* @G8
+}
+
+@G1 = global i32 4712
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G1:
+
+@G2 = global i32 0
+; CHECK: .section .dp.bss,"awd",@nobits
+; CHECK: G2:
+
+@G3 = unnamed_addr constant i32 9401
+; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
+; CHECK: G3:
+
+@G4 = global i32* @G1
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G4:
+
+@G5 = unnamed_addr constant i32* @G1
+; CHECK: .section .cp.rodata,"ac",@progbits
+; CHECK: G5:
+
+@G6 = global i32* @G8
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G6:
+
+@G7 = unnamed_addr constant i32* @G8
+; CHECK: .section .cp.rodata,"ac",@progbits
+; CHECK: G7:
+
+@G8 = internal global i32 9312
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G8:

diff --git a/src/LLVM/test/CodeGen/XCore/indirectbr.ll b/src/LLVM/test/CodeGen/XCore/indirectbr.ll
new file mode 100644
index 0000000..9269002
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/indirectbr.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+@nextaddr = global i8* null                       ; <i8**> [#uses=2]
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal i32 @foo(i32 %i) nounwind {
+; CHECK: foo:
+entry:
+  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
+  br i1 %1, label %bb3, label %bb2
+
+bb2:                                              ; preds = %entry, %bb3
+  %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
+; CHECK: bau
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+bb3:                                              ; preds = %entry
+  %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
+  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  br label %bb2
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i32 [ 385, %L5 ], [ 35, %bb2 ]     ; <i32> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i32 [ %res.0, %L4 ], [ 5, %bb2 ]   ; <i32> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+  %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ]   ; <i32> [#uses=1]
+  %phitmp = mul i32 %res.2, 6                     ; <i32> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
+; CHECK: ldap r11, .Ltmp0
+; CHECK: stw r11, dp[nextaddr]
+  store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
+  ret i32 %res.3
+}

diff --git a/src/LLVM/test/CodeGen/XCore/ladd_lsub_combine.ll b/src/LLVM/test/CodeGen/XCore/ladd_lsub_combine.ll
new file mode 100644
index 0000000..a693ee2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/ladd_lsub_combine.ll

@@ -0,0 +1,67 @@
+; RUN: llvm-as < %s | llc -march=xcore | FileCheck %s
+
+; Only needs one ladd
+define i64 @f1(i32 %x, i32 %y) nounwind {
+entry:
+	%0 = zext i32 %x to i64		; <i64> [#uses=1]
+	%1 = zext i32 %y to i64		; <i64> [#uses=1]
+	%2 = add i64 %1, %0		; <i64> [#uses=1]
+	ret i64 %2
+}
+; CHECK: f1:
+; CHECK: ldc r2, 0
+; CHECK-NEXT: ladd r1, r0, r1, r0, r2
+; CHECK-NEXT: retsp 0
+
+; Only needs one lsub and one neg
+define i64 @f2(i32 %x, i32 %y) nounwind {
+entry:
+	%0 = zext i32 %x to i64		; <i64> [#uses=1]
+	%1 = zext i32 %y to i64		; <i64> [#uses=1]
+	%2 = sub i64 %1, %0		; <i64> [#uses=1]
+	ret i64 %2
+}
+; CHECK: f2:
+; CHECK: ldc r2, 0
+; CHECK-NEXT: lsub r1, r0, r1, r0, r2
+; CHECK-NEXT: neg r1, r1
+; CHECK-NEXT: retsp 0
+
+; Should compile to one ladd and one add
+define i64 @f3(i64 %x, i32 %y) nounwind {
+entry:
+	%0 = zext i32 %y to i64		; <i64> [#uses=1]
+	%1 = add i64 %x, %0		; <i64> [#uses=1]
+	ret i64 %1
+}
+; CHECK: f3:
+; CHECK: ldc r3, 0
+; CHECK-NEXT: ladd r2, r0, r0, r2, r3
+; CHECK-NEXT: add r1, r1, r2
+; CHECK-NEXT: retsp 0
+
+; Should compile to one ladd and one add
+define i64 @f4(i32 %x, i64 %y) nounwind {
+entry:
+	%0 = zext i32 %x to i64		; <i64> [#uses=1]
+	%1 = add i64 %0, %y		; <i64> [#uses=1]
+	ret i64 %1
+}
+; CHECK: f4:
+; CHECK: ldc r3, 0
+; CHECK-NEXT: ladd r1, r0, r0, r1, r3
+; CHECK-NEXT: add r1, r2, r1
+; CHECK-NEXT: retsp 0
+
+; Should compile to one lsub and one sub
+define i64 @f5(i64 %x, i32 %y) nounwind {
+entry:
+	%0 = zext i32 %y to i64		; <i64> [#uses=1]
+	%1 = sub i64 %x, %0		; <i64> [#uses=1]
+	ret i64 %1
+}
+; CHECK: f5:
+; CHECK: ldc r3, 0
+; CHECK-NEXT: lsub r2, r0, r0, r2, r3
+; CHECK-NEXT: sub r1, r1, r2
+; CHECK-NEXT: retsp 0

diff --git a/src/LLVM/test/CodeGen/XCore/licm-ldwcp.ll b/src/LLVM/test/CodeGen/XCore/licm-ldwcp.ll
new file mode 100644
index 0000000..4884f70
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/licm-ldwcp.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=xcore -asm-verbose=0 | FileCheck %s
+
+; MachineLICM should hoist the LDWCP out of the loop.
+
+; CHECK: f:
+; CHECK-NEXT: ldw [[REG:r[0-9]+]], cp[.LCPI0_0]
+; CHECK-NEXT: .LBB0_1:
+; CHECK-NEXT: stw [[REG]], r0[0]
+; CHECK-NEXT: bu .LBB0_1
+
+define void @f(i32* nocapture %p) noreturn nounwind {
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+  volatile store i32 525509670, i32* %p, align 4
+  br label %bb
+}

diff --git a/src/LLVM/test/CodeGen/XCore/load.ll b/src/LLVM/test/CodeGen/XCore/load.ll
new file mode 100644
index 0000000..adfea21
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/load.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: not grep add %t1.s
+; RUN: not grep ldaw %t1.s
+; RUN: not grep lda16 %t1.s
+; RUN: not grep zext %t1.s
+; RUN: not grep sext %t1.s
+; RUN: grep "ldw" %t1.s | count 2
+; RUN: grep "ld16s" %t1.s | count 1
+; RUN: grep "ld8u" %t1.s | count 1
+
+define i32 @load32(i32* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 %offset
+	%1 = load i32* %0, align 4
+	ret i32 %1
+}
+
+define i32 @load32_imm(i32* %p) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 11
+	%1 = load i32* %0, align 4
+	ret i32 %1
+}
+
+define i32 @load16(i16* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i16* %p, i32 %offset
+	%1 = load i16* %0, align 2
+	%2 = sext i16 %1 to i32
+	ret i32 %2
+}
+
+define i32 @load8(i8* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i8* %p, i32 %offset
+	%1 = load i8* %0, align 1
+	%2 = zext i8 %1 to i32
+	ret i32 %2
+}

diff --git a/src/LLVM/test/CodeGen/XCore/log.ll b/src/LLVM/test/CodeGen/XCore/log.ll
new file mode 100644
index 0000000..a08471f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/log.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl logf" %t1.s | count 1
+; RUN: grep "bl log" %t1.s | count 2
+declare double @llvm.log.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.log.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.log.f32(float %F)
+	ret float %result
+}

diff --git a/src/LLVM/test/CodeGen/XCore/log10.ll b/src/LLVM/test/CodeGen/XCore/log10.ll
new file mode 100644
index 0000000..a72b8bf
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/log10.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl log10f" %t1.s | count 1
+; RUN: grep "bl log10" %t1.s | count 2
+declare double @llvm.log10.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.log10.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log10.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.log10.f32(float %F)
+	ret float %result
+}

diff --git a/src/LLVM/test/CodeGen/XCore/log2.ll b/src/LLVM/test/CodeGen/XCore/log2.ll
new file mode 100644
index 0000000..d257433
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/log2.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl log2f" %t1.s | count 1
+; RUN: grep "bl log2" %t1.s | count 2
+declare double @llvm.log2.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.log2.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log2.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.log2.f32(float %F)
+	ret float %result
+}

diff --git a/src/LLVM/test/CodeGen/XCore/misc-intrinsics.ll b/src/LLVM/test/CodeGen/XCore/misc-intrinsics.ll
new file mode 100644
index 0000000..6d39d77
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/misc-intrinsics.ll

@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+%0 = type { i32, i32 }
+
+declare i32 @llvm.xcore.bitrev(i32)
+declare i32 @llvm.xcore.crc32(i32, i32, i32)
+declare %0 @llvm.xcore.crc8(i32, i32, i32)
+declare i32 @llvm.xcore.zext(i32, i32)
+declare i32 @llvm.xcore.sext(i32, i32)
+declare i32 @llvm.xcore.geted()
+declare i32 @llvm.xcore.getet()
+
+define i32 @bitrev(i32 %val) {
+; CHECK: bitrev:
+; CHECK: bitrev r0, r0
+	%result = call i32 @llvm.xcore.bitrev(i32 %val)
+	ret i32 %result
+}
+
+define i32 @crc32(i32 %crc, i32 %data, i32 %poly) {
+; CHECK: crc32:
+; CHECK: crc32 r0, r1, r2
+	%result = call i32 @llvm.xcore.crc32(i32 %crc, i32 %data, i32 %poly)
+	ret i32 %result
+}
+
+define %0 @crc8(i32 %crc, i32 %data, i32 %poly) {
+; CHECK: crc8:
+; CHECK: crc8 r0, r1, r1, r2
+	%result = call %0 @llvm.xcore.crc8(i32 %crc, i32 %data, i32 %poly)
+	ret %0 %result
+}
+
+define i32 @zext(i32 %a, i32 %b) {
+; CHECK: zext:
+; CHECK: zext r0, r1
+	%result = call i32 @llvm.xcore.zext(i32 %a, i32 %b)
+	ret i32 %result
+}
+
+define i32 @zexti(i32 %a) {
+; CHECK: zexti:
+; CHECK: zext r0, 4
+	%result = call i32 @llvm.xcore.zext(i32 %a, i32 4)
+	ret i32 %result
+}
+
+define i32 @sext(i32 %a, i32 %b) {
+; CHECK: sext:
+; CHECK: sext r0, r1
+	%result = call i32 @llvm.xcore.sext(i32 %a, i32 %b)
+	ret i32 %result
+}
+
+define i32 @sexti(i32 %a) {
+; CHECK: sexti:
+; CHECK: sext r0, 4
+	%result = call i32 @llvm.xcore.sext(i32 %a, i32 4)
+	ret i32 %result
+}
+
+define i32 @geted() {
+; CHECK: geted:
+; CHECK: get r11, ed
+; CHECK-NEXT: mov r0, r11
+	%result = call i32 @llvm.xcore.geted()
+	ret i32 %result
+}
+
+define i32 @getet() {
+; CHECK: getet:
+; CHECK: get r11, et
+; CHECK-NEXT: mov r0, r11
+	%result = call i32 @llvm.xcore.getet()
+	ret i32 %result
+}

diff --git a/src/LLVM/test/CodeGen/XCore/mul64.ll b/src/LLVM/test/CodeGen/XCore/mul64.ll
new file mode 100644
index 0000000..3d373b1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/mul64.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+; RUN: llc < %s -march=xcore -regalloc=basic | FileCheck %s
+define i64 @umul_lohi(i32 %a, i32 %b) {
+entry:
+	%0 = zext i32 %a to i64
+	%1 = zext i32 %b to i64
+	%2 = mul i64 %1, %0
+	ret i64 %2
+}
+; CHECK: umul_lohi:
+; CHECK: ldc [[REG:r[0-9]+]], 0
+; CHECK-NEXT: lmul {{.*}}, [[REG]], [[REG]]
+; CHECK-NEXT: retsp 0
+
+define i64 @smul_lohi(i32 %a, i32 %b) {
+entry:
+	%0 = sext i32 %a to i64
+	%1 = sext i32 %b to i64
+	%2 = mul i64 %1, %0
+	ret i64 %2
+}
+; CHECK: smul_lohi:
+; CHECK: ldc
+; CHECK-NEXT: mov
+; CHECK-NEXT: maccs
+; CHECK: retsp 0
+
+define i64 @mul64(i64 %a, i64 %b) {
+entry:
+	%0 = mul i64 %a, %b
+	ret i64 %0
+}
+; CHECK: mul64:
+; CHECK: ldc
+; CHECK-NEXT: lmul
+; CHECK-NEXT: mul
+; CHECK-NEXT: lmul
+
+define i64 @mul64_2(i64 %a, i32 %b) {
+entry:
+	%0 = zext i32 %b to i64
+	%1 = mul i64 %a, %0
+	ret i64 %1
+}
+; CHECK: mul64_2:
+; CHECK: ldc
+; CHECK-NEXT: lmul
+; CHECK-NEXT: mul
+; CHECK-NEXT: add r1,
+; CHECK: retsp 0

diff --git a/src/LLVM/test/CodeGen/XCore/pow.ll b/src/LLVM/test/CodeGen/XCore/pow.ll
new file mode 100644
index 0000000..b461185
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/pow.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl powf" %t1.s | count 1
+; RUN: grep "bl pow" %t1.s | count 2
+declare double @llvm.pow.f64(double, double)
+
+define double @test(double %F, double %power) {
+        %result = call double @llvm.pow.f64(double %F, double %power)
+	ret double %result
+}
+
+declare float @llvm.pow.f32(float, float)
+
+define float @testf(float %F, float %power) {
+        %result = call float @llvm.pow.f32(float %F, float %power)
+	ret float %result
+}

diff --git a/src/LLVM/test/CodeGen/XCore/powi.ll b/src/LLVM/test/CodeGen/XCore/powi.ll
new file mode 100644
index 0000000..de31cbe
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/powi.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl __powidf2" %t1.s | count 1
+; RUN: grep "bl __powisf2" %t1.s | count 1
+declare double @llvm.powi.f64(double, i32)
+
+define double @test(double %F, i32 %power) {
+        %result = call double @llvm.powi.f64(double %F, i32 %power)
+	ret double %result
+}
+
+declare float @llvm.powi.f32(float, i32)
+
+define float @testf(float %F, i32 %power) {
+        %result = call float @llvm.powi.f32(float %F, i32 %power)
+	ret float %result
+}

diff --git a/src/LLVM/test/CodeGen/XCore/private.ll b/src/LLVM/test/CodeGen/XCore/private.ll
new file mode 100644
index 0000000..537d63b
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/private.ll

@@ -0,0 +1,19 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -march=xcore > %t
+; RUN: grep .Lfoo: %t
+; RUN: grep bl.*\.Lfoo %t
+; RUN: grep .Lbaz: %t
+; RUN: grep ldw.*\.Lbaz %t
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}

diff --git a/src/LLVM/test/CodeGen/XCore/ps-intrinsics.ll b/src/LLVM/test/CodeGen/XCore/ps-intrinsics.ll
new file mode 100644
index 0000000..92b26c7
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/ps-intrinsics.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+declare i32 @llvm.xcore.getps(i32)
+declare void @llvm.xcore.setps(i32, i32)
+
+define i32 @getps(i32 %reg) nounwind {
+; CHECK: getps:
+; CHECK: get r0, ps[r0]
+	%result = call i32 @llvm.xcore.getps(i32 %reg)
+	ret i32 %result
+}
+
+
+define void @setps(i32 %reg, i32 %value) nounwind {
+; CHECK: setps:
+; CHECK: set ps[r0], r1
+	call void @llvm.xcore.setps(i32 %reg, i32 %value)
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/XCore/resources.ll b/src/LLVM/test/CodeGen/XCore/resources.ll
new file mode 100644
index 0000000..8f00fed
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/resources.ll

@@ -0,0 +1,241 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+declare i8 addrspace(1)* @llvm.xcore.getr.p1i8(i32 %type)
+declare void @llvm.xcore.freer.p1i8(i8 addrspace(1)* %r)
+declare i32 @llvm.xcore.in.p1i8(i8 addrspace(1)* %r)
+declare i32 @llvm.xcore.int.p1i8(i8 addrspace(1)* %r)
+declare i32 @llvm.xcore.inct.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.out.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.outt.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare i32 @llvm.xcore.testct.p1i8(i8 addrspace(1)* %r)
+declare i32 @llvm.xcore.testwct.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.setd.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare i32 @llvm.xcore.inshr.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare i32 @llvm.xcore.outshr.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.setpt.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare i32 @llvm.xcore.getts.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.syncr.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.settw.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* %p)
+declare void @llvm.xcore.setev.p1i8(i8 addrspace(1)* %r, i8* %p)
+declare void @llvm.xcore.eeu.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.setclk.p1i8.p1i8(i8 addrspace(1)* %a, i8 addrspace(1)* %b)
+declare void @llvm.xcore.setrdy.p1i8.p1i8(i8 addrspace(1)* %a, i8 addrspace(1)* %b)
+declare void @llvm.xcore.setpsc.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare i32 @llvm.xcore.peek.p1i8(i8 addrspace(1)* %r)
+declare i32 @llvm.xcore.endin.p1i8(i8 addrspace(1)* %r)
+
+define i8 addrspace(1)* @getr() {
+; CHECK: getr:
+; CHECK: getr r0, 5
+	%result = call i8 addrspace(1)* @llvm.xcore.getr.p1i8(i32 5)
+	ret i8 addrspace(1)* %result
+}
+
+define void @freer(i8 addrspace(1)* %r) {
+; CHECK: freer:
+; CHECK: freer res[r0]
+	call void @llvm.xcore.freer.p1i8(i8 addrspace(1)* %r)
+	ret void
+}
+
+define i32 @in(i8 addrspace(1)* %r) {
+; CHECK: in:
+; CHECK: in r0, res[r0]
+	%result = call i32 @llvm.xcore.in.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define i32 @int(i8 addrspace(1)* %r) {
+; CHECK: int:
+; CHECK: int r0, res[r0]
+	%result = call i32 @llvm.xcore.int.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define i32 @inct(i8 addrspace(1)* %r) {
+; CHECK: inct:
+; CHECK: inct r0, res[r0]
+	%result = call i32 @llvm.xcore.inct.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define void @out(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: out:
+; CHECK: out res[r0], r1
+	call void @llvm.xcore.out.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @outt(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: outt:
+; CHECK: outt res[r0], r1
+	call void @llvm.xcore.outt.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @outct(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: outct:
+; CHECK: outct res[r0], r1
+	call void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @outcti(i8 addrspace(1)* %r) {
+; CHECK: outcti:
+; CHECK: outct res[r0], 11
+	call void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 11)
+	ret void
+}
+
+define void @chkct(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: chkct:
+; CHECK: chkct res[r0], r1
+	call void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @chkcti(i8 addrspace(1)* %r) {
+; CHECK: chkcti:
+; CHECK: chkct res[r0], 11
+	call void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 11)
+	ret void
+}
+
+define void @setd(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: setd:
+; CHECK: setd res[r0], r1
+	call void @llvm.xcore.setd.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @setc(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: setc:
+; CHECK: setc res[r0], r1
+	call void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @setci(i8 addrspace(1)* %r) {
+; CHECK: setci:
+; CHECK: setc res[r0], 2
+	call void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 2)
+	ret void
+}
+
+define i32 @inshr(i32 %value, i8 addrspace(1)* %r) {
+; CHECK: inshr:
+; CHECK: inshr r0, res[r1]
+	%result = call i32 @llvm.xcore.inshr.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret i32 %result
+}
+
+define i32 @outshr(i32 %value, i8 addrspace(1)* %r) {
+; CHECK: outshr:
+; CHECK: outshr res[r1], r0
+	%result = call i32 @llvm.xcore.outshr.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret i32 %result
+}
+
+define void @setpt(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: setpt:
+; CHECK: setpt res[r0], r1
+	call void @llvm.xcore.setpt.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define i32 @getts(i8 addrspace(1)* %r) {
+; CHECK: getts:
+; CHECK: getts r0, res[r0]
+	%result = call i32 @llvm.xcore.getts.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define void @syncr(i8 addrspace(1)* %r) {
+; CHECK: syncr:
+; CHECK: syncr res[r0]
+	call void @llvm.xcore.syncr.p1i8(i8 addrspace(1)* %r)
+	ret void
+}
+
+define void @settw(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: settw:
+; CHECK: settw res[r0], r1
+	call void @llvm.xcore.settw.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @setv(i8 addrspace(1)* %r, i8* %p) {
+; CHECK: setv:
+; CHECK: mov r11, r1
+; CHECK-NEXT: setv res[r0], r11
+	call void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* %p)
+	ret void
+}
+
+define void @setev(i8 addrspace(1)* %r, i8* %p) {
+; CHECK: setev:
+; CHECK: mov r11, r1
+; CHECK-NEXT: setev res[r0], r11
+	call void @llvm.xcore.setev.p1i8(i8 addrspace(1)* %r, i8* %p)
+	ret void
+}
+
+define void @eeu(i8 addrspace(1)* %r) {
+; CHECK: eeu:
+; CHECK: eeu res[r0]
+	call void @llvm.xcore.eeu.p1i8(i8 addrspace(1)* %r)
+	ret void
+}
+
+define void @setclk(i8 addrspace(1)* %a, i8 addrspace(1)* %b) {
+; CHECK: setclk
+; CHECK: setclk res[r0], r1
+	call void @llvm.xcore.setclk.p1i8.p1i8(i8 addrspace(1)* %a, i8 addrspace(1)* %b)
+	ret void
+}
+
+define void @setrdy(i8 addrspace(1)* %a, i8 addrspace(1)* %b) {
+; CHECK: setrdy
+; CHECK: setrdy res[r0], r1
+	call void @llvm.xcore.setrdy.p1i8.p1i8(i8 addrspace(1)* %a, i8 addrspace(1)* %b)
+	ret void
+}
+
+define void @setpsc(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: setpsc
+; CHECK: setpsc res[r0], r1
+	call void @llvm.xcore.setpsc.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define i32 @peek(i8 addrspace(1)* %r) {
+; CHECK: peek:
+; CHECK: peek r0, res[r0]
+	%result = call i32 @llvm.xcore.peek.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define i32 @endin(i8 addrspace(1)* %r) {
+; CHECK: endin:
+; CHECK: endin r0, res[r0]
+	%result = call i32 @llvm.xcore.endin.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define i32 @testct(i8 addrspace(1)* %r) {
+; CHECK: testct:
+; CHECK: testct r0, res[r0]
+	%result = call i32 @llvm.xcore.testct.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define i32 @testwct(i8 addrspace(1)* %r) {
+; CHECK: testwct:
+; CHECK: testwct r0, res[r0]
+	%result = call i32 @llvm.xcore.testwct.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}

diff --git a/src/LLVM/test/CodeGen/XCore/scavenging.ll b/src/LLVM/test/CodeGen/XCore/scavenging.ll
new file mode 100644
index 0000000..3181e96
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/scavenging.ll

@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=xcore
+@size = global i32 0		; <i32*> [#uses=1]
+@g0 = external global i32		; <i32*> [#uses=2]
+@g1 = external global i32		; <i32*> [#uses=2]
+@g2 = external global i32		; <i32*> [#uses=2]
+@g3 = external global i32		; <i32*> [#uses=2]
+@g4 = external global i32		; <i32*> [#uses=2]
+@g5 = external global i32		; <i32*> [#uses=2]
+@g6 = external global i32		; <i32*> [#uses=2]
+@g7 = external global i32		; <i32*> [#uses=2]
+@g8 = external global i32		; <i32*> [#uses=2]
+@g9 = external global i32		; <i32*> [#uses=2]
+@g10 = external global i32		; <i32*> [#uses=2]
+@g11 = external global i32		; <i32*> [#uses=2]
+
+define void @f() nounwind {
+entry:
+	%x = alloca [100 x i32], align 4		; <[100 x i32]*> [#uses=2]
+	%0 = load i32* @size, align 4		; <i32> [#uses=1]
+	%1 = alloca i32, i32 %0, align 4		; <i32*> [#uses=1]
+	%2 = volatile load i32* @g0, align 4		; <i32> [#uses=1]
+	%3 = volatile load i32* @g1, align 4		; <i32> [#uses=1]
+	%4 = volatile load i32* @g2, align 4		; <i32> [#uses=1]
+	%5 = volatile load i32* @g3, align 4		; <i32> [#uses=1]
+	%6 = volatile load i32* @g4, align 4		; <i32> [#uses=1]
+	%7 = volatile load i32* @g5, align 4		; <i32> [#uses=1]
+	%8 = volatile load i32* @g6, align 4		; <i32> [#uses=1]
+	%9 = volatile load i32* @g7, align 4		; <i32> [#uses=1]
+	%10 = volatile load i32* @g8, align 4		; <i32> [#uses=1]
+	%11 = volatile load i32* @g9, align 4		; <i32> [#uses=1]
+	%12 = volatile load i32* @g10, align 4		; <i32> [#uses=1]
+	%13 = volatile load i32* @g11, align 4		; <i32> [#uses=2]
+	%14 = getelementptr [100 x i32]* %x, i32 0, i32 50		; <i32*> [#uses=1]
+	store i32 %13, i32* %14, align 4
+	volatile store i32 %13, i32* @g11, align 4
+	volatile store i32 %12, i32* @g10, align 4
+	volatile store i32 %11, i32* @g9, align 4
+	volatile store i32 %10, i32* @g8, align 4
+	volatile store i32 %9, i32* @g7, align 4
+	volatile store i32 %8, i32* @g6, align 4
+	volatile store i32 %7, i32* @g5, align 4
+	volatile store i32 %6, i32* @g4, align 4
+	volatile store i32 %5, i32* @g3, align 4
+	volatile store i32 %4, i32* @g2, align 4
+	volatile store i32 %3, i32* @g1, align 4
+	volatile store i32 %2, i32* @g0, align 4
+	%x1 = getelementptr [100 x i32]* %x, i32 0, i32 0		; <i32*> [#uses=1]
+	call void @g(i32* %x1, i32* %1) nounwind
+	ret void
+}
+
+declare void @g(i32*, i32*)

diff --git a/src/LLVM/test/CodeGen/XCore/sext.ll b/src/LLVM/test/CodeGen/XCore/sext.ll
new file mode 100644
index 0000000..9cd4ad6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/sext.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+define i32 @sext1(i32 %a) {
+	%1 = trunc i32 %a to i1
+	%2 = sext i1 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext1:
+; CHECK: sext r0, 1
+
+define i32 @sext2(i32 %a) {
+	%1 = trunc i32 %a to i2
+	%2 = sext i2 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext2:
+; CHECK: sext r0, 2
+
+define i32 @sext8(i32 %a) {
+	%1 = trunc i32 %a to i8
+	%2 = sext i8 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext8:
+; CHECK: sext r0, 8
+
+define i32 @sext16(i32 %a) {
+	%1 = trunc i32 %a to i16
+	%2 = sext i16 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext16:
+; CHECK: sext r0, 16

diff --git a/src/LLVM/test/CodeGen/XCore/sin.ll b/src/LLVM/test/CodeGen/XCore/sin.ll
new file mode 100644
index 0000000..ced026f
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/sin.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl sinf" %t1.s | count 1
+; RUN: grep "bl sin" %t1.s | count 2
+declare double @llvm.sin.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.sin.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.sin.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.sin.f32(float %F)
+	ret float %result
+}

diff --git a/src/LLVM/test/CodeGen/XCore/sqrt.ll b/src/LLVM/test/CodeGen/XCore/sqrt.ll
new file mode 100644
index 0000000..364d1a1
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/sqrt.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl sqrtf" %t1.s | count 1
+; RUN: grep "bl sqrt" %t1.s | count 2
+declare double @llvm.sqrt.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.sqrt.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.sqrt.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.sqrt.f32(float %F)
+	ret float %result
+}

diff --git a/src/LLVM/test/CodeGen/XCore/sr-intrinsics.ll b/src/LLVM/test/CodeGen/XCore/sr-intrinsics.ll
new file mode 100644
index 0000000..e12ed03
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/sr-intrinsics.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+declare void @llvm.xcore.setsr(i32)
+declare void @llvm.xcore.clrsr(i32)
+
+define void @setsr() nounwind {
+; CHECK: setsr:
+; CHECK: setsr 128
+	call void @llvm.xcore.setsr(i32 128)
+	ret void
+}
+
+
+define void @clrsr() nounwind {
+; CHECK: clrsr:
+; CHECK: clrsr 128
+	call void @llvm.xcore.clrsr(i32 128)
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/XCore/store.ll b/src/LLVM/test/CodeGen/XCore/store.ll
new file mode 100644
index 0000000..2213743
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/store.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: not grep add %t1.s
+; RUN: not grep ldaw %t1.s
+; RUN: not grep lda16 %t1.s
+; RUN: grep "stw" %t1.s | count 2
+; RUN: grep "st16" %t1.s | count 1
+; RUN: grep "st8" %t1.s | count 1
+
+define void @store32(i32* %p, i32 %offset, i32 %val) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 %offset
+	store i32 %val, i32* %0, align 4
+	ret void
+}
+
+define void @store32_imm(i32* %p, i32 %val) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 11
+	store i32 %val, i32* %0, align 4
+	ret void
+}
+
+define void @store16(i16* %p, i32 %offset, i16 %val) nounwind {
+entry:
+	%0 = getelementptr i16* %p, i32 %offset
+	store i16 %val, i16* %0, align 2
+	ret void
+}
+
+define void @store8(i8* %p, i32 %offset, i8 %val) nounwind {
+entry:
+	%0 = getelementptr i8* %p, i32 %offset
+	store i8 %val, i8* %0, align 1
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/XCore/switch.ll b/src/LLVM/test/CodeGen/XCore/switch.ll
new file mode 100644
index 0000000..9cc27f2
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/switch.ll

@@ -0,0 +1,24 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+define i32 @switch(i32 %i) {
+entry:
+        switch i32 %i, label %default [
+                 i32 0, label %bb0
+                 i32 1, label %bb1
+                 i32 2, label %bb2
+                 i32 3, label %bb3
+        ]
+; CHECK-NOT: shl
+; CHECK: bru
+; CHECK: .jmptable
+bb0:
+        ret i32 0
+bb1:
+        ret i32 1
+bb2:
+        ret i32 2
+bb3:
+        ret i32 3
+default:
+        ret i32 4
+}

diff --git a/src/LLVM/test/CodeGen/XCore/switch_long.ll b/src/LLVM/test/CodeGen/XCore/switch_long.ll
new file mode 100644
index 0000000..30c9e3d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/switch_long.ll

@@ -0,0 +1,132 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+define i32 @switch(i32 %i) {
+entry:
+        switch i32 %i, label %default [
+                 i32 0, label %bb0
+                 i32 1, label %bb1
+                 i32 2, label %bb2
+                 i32 3, label %bb3
+                 i32 4, label %bb4
+                 i32 5, label %bb5
+                 i32 6, label %bb6
+                 i32 7, label %bb7
+                 i32 8, label %bb8
+                 i32 9, label %bb9
+                 i32 10, label %bb10
+                 i32 11, label %bb11
+                 i32 12, label %bb12
+                 i32 13, label %bb13
+                 i32 14, label %bb14
+                 i32 15, label %bb15
+                 i32 16, label %bb16
+                 i32 17, label %bb17
+                 i32 18, label %bb18
+                 i32 19, label %bb19
+                 i32 20, label %bb20
+                 i32 21, label %bb21
+                 i32 22, label %bb22
+                 i32 23, label %bb23
+                 i32 24, label %bb24
+                 i32 25, label %bb25
+                 i32 26, label %bb26
+                 i32 27, label %bb27
+                 i32 28, label %bb28
+                 i32 29, label %bb29
+                 i32 30, label %bb30
+                 i32 31, label %bb31
+                 i32 32, label %bb32
+                 i32 33, label %bb33
+                 i32 34, label %bb34
+                 i32 35, label %bb35
+                 i32 36, label %bb36
+                 i32 37, label %bb37
+                 i32 38, label %bb38
+                 i32 39, label %bb39
+        ]
+; CHECK: shl
+; CHECK: bru
+; CHECK: .jmptable
+bb0:
+        ret i32 0
+bb1:
+        ret i32 1
+bb2:
+        ret i32 2
+bb3:
+        ret i32 3
+bb4:
+        ret i32 4
+bb5:
+        ret i32 5
+bb6:
+        ret i32 6
+bb7:
+        ret i32 7
+bb8:
+        ret i32 8
+bb9:
+        ret i32 9
+bb10:
+        ret i32 0
+bb11:
+        ret i32 1
+bb12:
+        ret i32 2
+bb13:
+        ret i32 3
+bb14:
+        ret i32 4
+bb15:
+        ret i32 5
+bb16:
+        ret i32 6
+bb17:
+        ret i32 7
+bb18:
+        ret i32 8
+bb19:
+        ret i32 9
+bb20:
+        ret i32 0
+bb21:
+        ret i32 1
+bb22:
+        ret i32 2
+bb23:
+        ret i32 3
+bb24:
+        ret i32 4
+bb25:
+        ret i32 5
+bb26:
+        ret i32 6
+bb27:
+        ret i32 7
+bb28:
+        ret i32 8
+bb29:
+        ret i32 9
+bb30:
+        ret i32 0
+bb31:
+        ret i32 1
+bb32:
+        ret i32 2
+bb33:
+        ret i32 3
+bb34:
+        ret i32 4
+bb35:
+        ret i32 5
+bb36:
+        ret i32 6
+bb37:
+        ret i32 7
+bb38:
+        ret i32 8
+bb39:
+        ret i32 9
+default:
+        ret i32 0
+}

diff --git a/src/LLVM/test/CodeGen/XCore/threads.ll b/src/LLVM/test/CodeGen/XCore/threads.ll
new file mode 100644
index 0000000..a0558e3
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/threads.ll

@@ -0,0 +1,67 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+declare i8 addrspace(1)* @llvm.xcore.getst.p1i8.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.msync.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.ssync()
+declare void @llvm.xcore.mjoin.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.initsp.p1i8(i8 addrspace(1)* %r, i8* %value)
+declare void @llvm.xcore.initpc.p1i8(i8 addrspace(1)* %r, i8* %value)
+declare void @llvm.xcore.initlr.p1i8(i8 addrspace(1)* %r, i8* %value)
+declare void @llvm.xcore.initcp.p1i8(i8 addrspace(1)* %r, i8* %value)
+declare void @llvm.xcore.initdp.p1i8(i8 addrspace(1)* %r, i8* %value)
+
+define i8 addrspace(1)* @getst(i8 addrspace(1)* %r) {
+; CHECK: getst:
+; CHECK: getst r0, res[r0]
+        %result = call i8 addrspace(1)* @llvm.xcore.getst.p1i8.p1i8(i8 addrspace(1)* %r)
+        ret i8 addrspace(1)* %result
+}
+
+define void @ssync() {
+; CHECK: ssync:
+; CHECK: ssync
+	call void @llvm.xcore.ssync()
+	ret void
+}
+
+define void @mjoin(i8 addrspace(1)* %r) {
+; CHECK: mjoin:
+; CHECK: mjoin res[r0]
+	call void @llvm.xcore.mjoin.p1i8(i8 addrspace(1)* %r)
+	ret void
+}
+
+define void @initsp(i8 addrspace(1)* %t, i8* %src) {
+; CHECK: initsp:
+; CHECK: init t[r0]:sp, r1
+        call void @llvm.xcore.initsp.p1i8(i8 addrspace(1)* %t, i8* %src)
+        ret void
+}
+
+define void @initpc(i8 addrspace(1)* %t, i8* %src) {
+; CHECK: initpc:
+; CHECK: init t[r0]:pc, r1
+        call void @llvm.xcore.initpc.p1i8(i8 addrspace(1)* %t, i8* %src)
+        ret void
+}
+
+define void @initlr(i8 addrspace(1)* %t, i8* %src) {
+; CHECK: initlr:
+; CHECK: init t[r0]:lr, r1
+        call void @llvm.xcore.initlr.p1i8(i8 addrspace(1)* %t, i8* %src)
+        ret void
+}
+
+define void @initcp(i8 addrspace(1)* %t, i8* %src) {
+; CHECK: initcp:
+; CHECK: init t[r0]:cp, r1
+        call void @llvm.xcore.initcp.p1i8(i8 addrspace(1)* %t, i8* %src)
+        ret void
+}
+
+define void @initdp(i8 addrspace(1)* %t, i8* %src) {
+; CHECK: initdp:
+; CHECK: init t[r0]:dp, r1
+        call void @llvm.xcore.initdp.p1i8(i8 addrspace(1)* %t, i8* %src)
+        ret void
+}

diff --git a/src/LLVM/test/CodeGen/XCore/tls.ll b/src/LLVM/test/CodeGen/XCore/tls.ll
new file mode 100644
index 0000000..ed41afa
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/tls.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+define i32 *@addr_G() {
+entry:
+; CHECK: addr_G:
+; CHECK: get r11, id
+	ret i32* @G
+}
+
+@G = thread_local global i32 15
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G:
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15

diff --git a/src/LLVM/test/CodeGen/XCore/trampoline.ll b/src/LLVM/test/CodeGen/XCore/trampoline.ll
new file mode 100644
index 0000000..6b42134
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/trampoline.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+%struct.FRAME.f = type { i32, i32 ()* }
+
+define void @f() nounwind {
+entry:
+; CHECK: f:
+; CHECK: ldap r11, g.1101
+; CHECK: stw r11, sp[7]
+  %TRAMP.23 = alloca [20 x i8], align 2
+  %FRAME.0 = alloca %struct.FRAME.f, align 4
+  %TRAMP.23.sub = getelementptr inbounds [20 x i8]* %TRAMP.23, i32 0, i32 0
+  %FRAME.02 = bitcast %struct.FRAME.f* %FRAME.0 to i8*
+  call void @llvm.init.trampoline(i8* %TRAMP.23.sub, i8* bitcast (i32 (%struct.FRAME.f*)* @g.1101 to i8*), i8* %FRAME.02)
+  %tramp = call i8* @llvm.adjust.trampoline(i8* %TRAMP.23.sub)
+  %0 = getelementptr inbounds %struct.FRAME.f* %FRAME.0, i32 0, i32 1
+  %1 = bitcast i8* %tramp to i32 ()*
+  store i32 ()* %1, i32 ()** %0, align 4
+  %2 = getelementptr inbounds %struct.FRAME.f* %FRAME.0, i32 0, i32 0
+  store i32 1, i32* %2, align 4
+  call void @h(i32 ()* %1) nounwind
+  ret void
+}
+
+define internal i32 @g.1101(%struct.FRAME.f* nocapture nest %CHAIN.1) nounwind readonly {
+entry:
+; CHECK: g.1101:
+; CHECK: ldw r11, sp[0]
+; CHECK-NEXT: ldw r0, r11[0]
+; CHECK-NEXT: retsp 0
+  %0 = getelementptr inbounds %struct.FRAME.f* %CHAIN.1, i32 0, i32 0
+  %1 = load i32* %0, align 4
+  ret i32 %1
+}
+
+declare void @llvm.init.trampoline(i8*, i8*, i8*) nounwind
+declare i8* @llvm.adjust.trampoline(i8*) nounwind
+
+declare void @h(i32 ()*)

diff --git a/src/LLVM/test/CodeGen/XCore/trap.ll b/src/LLVM/test/CodeGen/XCore/trap.ll
new file mode 100644
index 0000000..45f886d
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/trap.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "ecallf" %t1.s | count 1
+; RUN: grep "ldc" %t1.s | count 1
+define i32 @test() noreturn nounwind  {
+entry:
+	tail call void @llvm.trap( )
+	unreachable
+}
+
+declare void @llvm.trap() nounwind 
+

diff --git a/src/LLVM/test/CodeGen/XCore/unaligned_load.ll b/src/LLVM/test/CodeGen/XCore/unaligned_load.ll
new file mode 100644
index 0000000..0ee8e1c
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/unaligned_load.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl __misaligned_load" %t1.s | count 1
+; RUN: grep ld16s %t1.s | count 2
+; RUN: grep ldw %t1.s | count 2
+; RUN: grep shl %t1.s | count 2
+; RUN: grep shr %t1.s | count 1
+; RUN: grep zext %t1.s | count 1
+; RUN: grep "or " %t1.s | count 2
+
+; Byte aligned load. Expands to call to __misaligned_load.
+define i32 @align1(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 1		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+; Half word aligned load. Expands to two 16bit loads.
+define i32 @align2(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 2		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+@a = global [5 x i8] zeroinitializer, align 4
+
+; Constant offset from word aligned base. Expands to two 32bit loads.
+define i32 @align3() nounwind {
+entry:
+	%0 = load i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1
+	ret i32 %0
+}

diff --git a/src/LLVM/test/CodeGen/XCore/unaligned_store.ll b/src/LLVM/test/CodeGen/XCore/unaligned_store.ll
new file mode 100644
index 0000000..62078e6
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/unaligned_store.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl __misaligned_store" %t1.s | count 1
+; RUN: grep st16 %t1.s | count 2
+; RUN: grep shr %t1.s | count 1
+
+; Byte aligned store. Expands to call to __misaligned_store.
+define void @align1(i32* %p, i32 %val) nounwind {
+entry:
+	store i32 %val, i32* %p, align 1
+	ret void
+}
+
+; Half word aligned store. Expands to two 16bit stores.
+define void @align2(i32* %p, i32 %val) nounwind {
+entry:
+	store i32 %val, i32* %p, align 2
+	ret void
+}

diff --git a/src/LLVM/test/CodeGen/XCore/unaligned_store_combine.ll b/src/LLVM/test/CodeGen/XCore/unaligned_store_combine.ll
new file mode 100644
index 0000000..493ca6a
--- /dev/null
+++ b/src/LLVM/test/CodeGen/XCore/unaligned_store_combine.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl memmove" %t1.s | count 1
+; RUN: grep "ldc r., 8" %t1.s | count 1
+
+; Unaligned load / store pair. Should be combined into a memmove
+; of size 8
+define void @f(i64* %dst, i64* %src) nounwind {
+entry:
+	%0 = load i64* %src, align 1
+	store i64 %0, i64* %dst, align 1
+	ret void
+}