Subzero. ARM32. Reverts cl 1687553002.

BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1745393002 .
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index e5abe21..c722438 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -3592,10 +3592,7 @@
 }
 
 void TargetARM32::lowerCall(const InstCall *Instr) {
-  // Note: Keep original call target. This allows us to call the correct
-  // postamble helper, even if the CallTarget gets modified during lowering.
-  Operand *OrigCallTarget = Instr->getCallTarget();
-  Operand *CallTarget = OrigCallTarget;
+  Operand *CallTarget = Instr->getCallTarget();
   if (Instr->isTargetHelperCall()) {
     auto TargetHelperPreamble = ARM32HelpersPreamble.find(CallTarget);
     if (TargetHelperPreamble != ARM32HelpersPreamble.end()) {
@@ -3720,9 +3717,12 @@
     }
   }
 
-  // Note: To allow far calls, even for constant relocatables, we force
-  // the call target into a register, and make an indirect call.
-  CallTarget = legalizeToReg(CallTarget);
+  // Allow ConstantRelocatable to be left alone as a direct call, but force
+  // other constants like ConstantInteger32 to be in a register and make it an
+  // indirect call.
+  if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
+    CallTarget = legalize(CallTarget, Legal_Reg);
+  }
 
   // Copy arguments to be passed in registers to the appropriate registers.
   CfgVector<Variable *> RegArgs;
@@ -3777,7 +3777,7 @@
   }
 
   if (Instr->isTargetHelperCall()) {
-    auto TargetHelpersPostamble = ARM32HelpersPostamble.find(OrigCallTarget);
+    auto TargetHelpersPostamble = ARM32HelpersPostamble.find(CallTarget);
     if (TargetHelpersPostamble != ARM32HelpersPostamble.end()) {
       (this->*TargetHelpersPostamble->second)(Instr);
     }
diff --git a/tests_lit/assembler/arm32/call.ll b/tests_lit/assembler/arm32/call.ll
deleted file mode 100644
index f551170..0000000
--- a/tests_lit/assembler/arm32/call.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; Show that we convert direct calls, into indirect calls (to handle far
-;  branches).
-
-; NOTE: We use -O2 to get rid of memory stores.
-
-; REQUIRES: allow_dump
-
-; Compile using standalone assembler.
-; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 -allow-extern \
-; RUN:   -reg-use r5 | FileCheck %s --check-prefix=ASM
-
-; Show bytes in assembled standalone code.
-; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
-; RUN:   --args -O2 -allow-extern -reg-use r5 | FileCheck %s --check-prefix=DIS
-
-; Compile using integrated assembler.
-; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
-; RUN:   -allow-extern -reg-use r5 | FileCheck %s --check-prefix=IASM
-
-; Show bytes in assembled integrated code.
-; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
-; RUN:   --args -O2 -allow-extern -reg-use r5 | FileCheck %s --check-prefix=DIS
-
-declare external void @doSomething()
-
-define internal void @callSomething() {
-; ASM-LABEL:callSomething:
-; DIS-LABEL:{{.+}} <callSomething>:
-; IASM-LABEL:callSomething:
-
-  call void @doSomething();
-
-; ASM:      movw        r5, #:lower16:doSomething
-; DIS:      {{.+}}:     e3005000
-; ASM-NOT:  movw
-
-; ASM-NEXT: movt        r5, #:upper16:doSomething
-; DIS-NEXT: {{.+}}:     e3405000
-; ASM-NOT:  movt
-
-; ASM-NEXT: blx r5
-; DIS-NEXT: {{.+}}:     e12fff35
-; ASM-NOT:  blx
-  ret void
-}
diff --git a/tests_lit/assembler/arm32/popmult.ll b/tests_lit/assembler/arm32/popmult.ll
index fda9e22..d7d1c45 100644
--- a/tests_lit/assembler/arm32/popmult.ll
+++ b/tests_lit/assembler/arm32/popmult.ll
@@ -40,9 +40,9 @@
 ; ASM-NEXT:     vpush   {s22, s23}
 ; ASM-NEXT:     push    {lr}
 
-; DIS:          {{.+}}:      ed2daa01
-; DIS-NEXT:     {{.+}}:      ed2dba02
-; DIS-NEXT:     {{.+}}:      e52de004
+; DIS:          0:      ed2daa01
+; DIS-NEXT:     4:      ed2dba02
+; DIS-NEXT:     8:      e52de004
 
 ; IASM-NOT:     vpush
 ; IASM-NOT:     push
@@ -59,9 +59,9 @@
 ; ASM-NEXT:     vpop    {s22, s23}
 ; ASM-NEXT:     vpop    {s20}
 
-; DIS:         {{.+}}:      e49de004
-; DIS-NEXT:    {{.+}}:      ecbdba02
-; DIS-NEXT:    {{.+}}:      ecbdaa01
+; DIS:         40:      e49de004
+; DIS-NEXT:    44:      ecbdba02
+; DIS-NEXT:    48:      ecbdaa01
 
 ; IASM-NOT: pop
 ; IASM-NOT: vpop
diff --git a/tests_lit/assembler/arm32/push-pop.ll b/tests_lit/assembler/arm32/push-pop.ll
index eef1569..7b4b749 100644
--- a/tests_lit/assembler/arm32/push-pop.ll
+++ b/tests_lit/assembler/arm32/push-pop.ll
@@ -7,64 +7,155 @@
 
 ; Compile using standalone assembler.
 ; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 -allow-extern \
-; RUN:   -reg-use r0,r1,r2,r3,r4,r5 | FileCheck %s --check-prefix=ASM
+; RUN:   | FileCheck %s --check-prefix=ASM
 
 ; Show bytes in assembled standalone code.
 ; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
-; RUN:   --args -O2 -allow-extern -reg-use r0,r1,r2,r3,r4,r5 \
-; RUN:   | FileCheck %s --check-prefix=DIS
+; RUN:   --args -O2 -allow-extern | FileCheck %s --check-prefix=DIS
 
 ; Compile using integrated assembler.
 ; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
-; RUN:   -allow-extern -reg-use r0,r1,r2,r3,r4,r5 \
-; RUN:   | FileCheck %s --check-prefix=IASM
+; RUN:   -allow-extern | FileCheck %s --check-prefix=IASM
 
 ; Show bytes in assembled integrated code.
 ; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
-; RUN:   --args -O2 -allow-extern -reg-use r0,r1,r2,r3,r4,r5 \
-; RUN:   | FileCheck %s --check-prefix=DIS
+; RUN:   --args -O2 -allow-extern | FileCheck %s --check-prefix=DIS
 
 declare external void @DoSomething()
 
 define internal void @SinglePushPop() {
-; ASM-LABEL:SinglePushPop:
-; DIS-LABEL:{{.+}} <SinglePushPop>:
-; IASM-LABEL:SinglePushPop:
-
-; ASM:    push    {lr}
-; DIS:    {{.+}}  e52de004
-; IASM-NOT: push
-
   call void @DoSomething();
   ret void
-
-; ASM:    pop     {lr}
-; DIS:    {{.+}}  e49de004
-; IASM-NOT: pop
-
 }
 
+; ASM-LABEL:SinglePushPop:
+; ASM-NEXT:.LSinglePushPop$__0:
+; ASM-NEXT:    push    {lr}
+; ASM-NEXT:    sub     sp, sp, #12
+; ASM-NEXT:    bl      DoSomething
+; ASM-NEXT:    add     sp, sp, #12
+; ASM-NEXT:    pop     {lr}
+; ASM-NEXT:     # lr = def.pseudo
+; ASM-NEXT:    bx      lr
+
+; DIS-LABEL:00000000 <SinglePushPop>:
+; DIS-NEXT:   0:        e52de004
+; DIS-NEXT:   4:        e24dd00c
+; DIS-NEXT:   8:        ebfffffe
+; DIS-NEXT:   c:        e28dd00c
+; DIS-NEXT:  10:        e49de004
+; DIS-NEXT:  14:        e12fff1e
+
+; IASM-LABEL:SinglePushPop:
+; IASM-NEXT:.LSinglePushPop$__0:
+; IASM-NEXT:    .byte 0x4
+; IASM-NEXT:    .byte 0xe0
+; IASM-NEXT:    .byte 0x2d
+; IASM-NEXT:    .byte 0xe5
+
+; IASM-NEXT:    .byte 0xc
+; IASM-NEXT:    .byte 0xd0
+; IASM-NEXT:    .byte 0x4d
+; IASM-NEXT:    .byte 0xe2
+; IASM-NEXT:    bl      DoSomething     @ .word ebfffffe
+; IASM-NEXT:    .byte 0xc
+; IASM-NEXT:    .byte 0xd0
+; IASM-NEXT:    .byte 0x8d
+; IASM-NEXT:    .byte 0xe2
+
+; IASM-NEXT:    .byte 0x4
+; IASM-NEXT:    .byte 0xe0
+; IASM-NEXT:    .byte 0x9d
+; IASM-NEXT:    .byte 0xe4
+
+; IASM:         .byte 0x1e
+; IASM-NEXT:    .byte 0xff
+; IASM-NEXT:    .byte 0x2f
+; IASM-NEXT:    .byte 0xe1
+
 ; This test is based on taking advantage of the over-eager -O2
 ; register allocator that puts V1 and V2 into callee-save registers,
 ; since the call instruction kills the scratch registers. This
 ; requires the callee-save registers to be pushed/popped in the
 ; prolog/epilog.
 define internal i32 @MultPushPop(i32 %v1, i32 %v2) {
-; ASM-LABEL:MultPushPop:
-; DIS_LABEL: {{.+}} <MultPushPop>:
-; IASM-LABEL:MultPushPop:
-; ASM:    push    {r4, r5, lr}
-; DIS:    {{.+}}: e92d4030
-
-; IASM-NOT: push
-
-
   call void @DoSomething();
   %v3 = add i32 %v1, %v2
   ret i32 %v3
-
-; ASM:    pop     {r4, r5, lr}
-; DIS:    {{.+}}  e8bd4030
-; IASM-NOT: pop
-
 }
+
+; ASM-LABEL:MultPushPop:
+; ASM-NEXT:.LMultPushPop$__0:
+; ASM-NEXT:     push    {r4, r5, lr}
+; ASM-NEXT:     sub     sp, sp, #4
+; ASM-NEXT:     mov     r4, r0
+; ASM-NEXT:     mov     r5, r1
+; ASM-NEXT:     bl      DoSomething
+; ASM-NEXT:     add     r4, r4, r5
+; ASM-NEXT:     mov     r0, r4
+; ASM-NEXT:     add     sp, sp, #4
+; ASM-NEXT:     pop     {r4, r5, lr}
+; ASM-NEXT:     # r4 = def.pseudo
+; ASM-NEXT:     # r5 = def.pseudo
+; ASM-NEXT:     # lr = def.pseudo
+; ASM-NEXT:     bx      lr
+
+; DIS-LABEL:00000020 <MultPushPop>:
+; DIS-NEXT:  20:        e92d4030
+; DIS-NEXT:  24:        e24dd004
+; DIS-NEXT:  28:        e1a04000
+; DIS-NEXT:  2c:        e1a05001
+; DIS-NEXT:  30:        ebfffffe
+; DIS-NEXT:  34:        e0844005
+; DIS-NEXT:  38:        e1a00004
+; DIS-NEXT:  3c:        e28dd004
+; DIS-NEXT:  40:        e8bd4030
+; DIS-NEXT:  44:        e12fff1e
+
+; IASM-LABEL:MultPushPop:
+; IASM-NEXT:.LMultPushPop$__0:
+; IASM-NEXT:    .byte 0x30
+; IASM-NEXT:    .byte 0x40
+; IASM-NEXT:    .byte 0x2d
+; IASM-NEXT:    .byte 0xe9
+
+; IASM-NEXT:    .byte 0x4
+; IASM-NEXT:    .byte 0xd0
+; IASM-NEXT:    .byte 0x4d
+; IASM-NEXT:    .byte 0xe2
+
+; IASM-NEXT:    .byte 0x0
+; IASM-NEXT:    .byte 0x40
+; IASM-NEXT:    .byte 0xa0
+; IASM-NEXT:    .byte 0xe1
+
+; IASM-NEXT:    .byte 0x1
+; IASM-NEXT:    .byte 0x50
+; IASM-NEXT:    .byte 0xa0
+; IASM-NEXT:    .byte 0xe1
+
+; IASM-NEXT:    bl      DoSomething     @ .word ebfffffe
+; IASM-NEXT:    .byte 0x5
+; IASM-NEXT:    .byte 0x40
+; IASM-NEXT:    .byte 0x84
+; IASM-NEXT:    .byte 0xe0
+
+; IASM-NEXT:    .byte 0x4
+; IASM-NEXT:    .byte 0x0
+; IASM-NEXT:    .byte 0xa0
+; IASM-NEXT:    .byte 0xe1
+
+; IASM-NEXT:    .byte 0x4
+; IASM-NEXT:    .byte 0xd0
+; IASM-NEXT:    .byte 0x8d
+; IASM-NEXT:    .byte 0xe2
+
+; IASM-NEXT:    .byte 0x30
+; IASM-NEXT:    .byte 0x40
+; IASM-NEXT:    .byte 0xbd
+; IASM-NEXT:    .byte 0xe8
+
+; IASM:         .byte 0x1e
+; IASM-NEXT:    .byte 0xff
+; IASM-NEXT:    .byte 0x2f
+; IASM-NEXT:    .byte 0xe1
diff --git a/tests_lit/assembler/arm32/rem-vec.ll b/tests_lit/assembler/arm32/rem-vec.ll
index 0fd4be4..00190cf 100644
--- a/tests_lit/assembler/arm32/rem-vec.ll
+++ b/tests_lit/assembler/arm32/rem-vec.ll
@@ -17,24 +17,24 @@
   %v = urem <4 x i32> %a, %b
 
 ; ASM-LABEL:.LUrem4i32$local$__0:
-; ASM-NEXT:	udiv	r2, r0, r1
-; ASM-NEXT:	mls	r2, r2, r1, r0
-; ASM-NEXT:	vmov.32	d4[0], r2
+; ASM-NEXT: udiv r2, r0, r1
+; ASM-NEXT: mls r2, r2, r1, r0
+; ASM-NEXT: vmov.32 d4[0], r2
 
 ; ASM-LABEL:.LUrem4i32$local$__1:
-; ASM-NEXT:	udiv	r2, r0, r1
-; ASM-NEXT:	mls	r2, r2, r1, r0
-; ASM-NEXT:	vmov.32	d4[1], r2
+; ASM-NEXT: udiv r2, r0, r1
+; ASM-NEXT: mls r2, r2, r1, r0
+; ASM-NEXT: vmov.32 d4[1], r2
 
 ; ASM-LABEL:.LUrem4i32$local$__2:
-; ASM-NEXT:	udiv	r2, r0, r1
-; ASM-NEXT:	mls	r2, r2, r1, r0
-; ASM-NEXT:	vmov.32	d5[0], r2
+; ASM-NEXT: udiv r2, r0, r1
+; ASM-NEXT: mls r2, r2, r1, r0
+; ASM-NEXT: vmov.32 d5[0], r2
 
 ; ASM-LABEL:.LUrem4i32$local$__3:
-; ASM-NEXT:	udiv	r2, r0, r1
-; ASM-NEXT:	mls	r2, r2, r1, r0
-; ASM-NEXT:	vmov.32	d5[1], r2
+; ASM-NEXT: udiv r2, r0, r1
+; ASM-NEXT: mls r2, r2, r1, r0
+; ASM-NEXT: vmov.32 d5[1], r2
 
   ret <4 x i32> %v
 }
@@ -48,24 +48,24 @@
   %v = srem <4 x i32> %a, %b
 
 ; ASM-LABEL:.LSrem4i32$local$__0:
-; ASM-NEXT:	sdiv	r2, r0, r1
-; ASM-NEXT:	mls	r2, r2, r1, r0
-; ASM-NEXT:	vmov.32	d4[0], r2
+; ASM-NEXT: sdiv r2, r0, r1
+; ASM-NEXT: mls r2, r2, r1, r0
+; ASM-NEXT: vmov.32 d4[0], r2
 
 ; ASM-LABEL:.LSrem4i32$local$__1:
-; ASM-NEXT:	sdiv	r2, r0, r1
-; ASM-NEXT:	mls	r2, r2, r1, r0
-; ASM-NEXT:	vmov.32	d4[1], r2
+; ASM-NEXT: sdiv r2, r0, r1
+; ASM-NEXT: mls r2, r2, r1, r0
+; ASM-NEXT: vmov.32 d4[1], r2
 
 ; ASM-LABEL:.LSrem4i32$local$__2:
-; ASM-NEXT:	sdiv	r2, r0, r1
-; ASM-NEXT:	mls	r2, r2, r1, r0
-; ASM-NEXT:	vmov.32	d5[0], r2
+; ASM-NEXT: sdiv r2, r0, r1
+; ASM-NEXT: mls r2, r2, r1, r0
+; ASM-NEXT: vmov.32 d5[0], r2
 
 ; ASM-LABEL:.LSrem4i32$local$__3:
-; ASM-NEXT:	sdiv	r2, r0, r1
-; ASM-NEXT:	mls	r2, r2, r1, r0
-; ASM-NEXT:	vmov.32	d5[1], r2
+; ASM-NEXT: sdiv r2, r0, r1
+; ASM-NEXT: mls r2, r2, r1, r0
+; ASM-NEXT: vmov.32 d5[1], r2
 
   ret <4 x i32> %v
 }
@@ -78,29 +78,21 @@
 
   %v = frem <4 x float> %a, %b
 
-; ASM:	       	vmov.f32	s0, s16
-; ASM-NEXT:	vmov.f32	s1, s20
-; ASM-NEXT:	movw	r0, #:lower16:fmodf
-; ASM-NEXT:	movt	r0, #:upper16:fmodf
-; ASM-NEXT:	blx	r0
+; ASM:         vmov.f32 s0, s16
+; ASM-NEXT: vmov.f32 s1, s20
+; ASM-NEXT: bl fmodf
 
-; ASM:		vmov.f32	s0, s17
-; ASM-NEXT:	vmov.f32	s1, s21
-; ASM-NEXT:	movw	r0, #:lower16:fmodf
-; ASM-NEXT:	movt	r0, #:upper16:fmodf
-; ASM-NEXT:	blx	r0
+; ASM:  vmov.f32 s0, s17
+; ASM-NEXT: vmov.f32 s1, s21
+; ASM-NEXT: bl fmodf
 
-; ASM:		vmov.f32	s0, s18
-; ASM-NEXT:	vmov.f32	s1, s22
-; ASM-NEXT:	movw	r0, #:lower16:fmodf
-; ASM-NEXT:	movt	r0, #:upper16:fmodf
-; ASM-NEXT:	blx	r0
+; ASM:  vmov.f32 s0, s18
+; ASM-NEXT: vmov.f32 s1, s22
+; ASM-NEXT: bl fmodf
 
-; ASM:		vmov.f32	s16, s19
-; ASM-NEXT:	vmov.f32	s20, s23
-; ASM-NEXT:	movw	r0, #:lower16:fmodf
-; ASM-NEXT:	movt	r0, #:upper16:fmodf
-; ASM:	blx	r0
+; ASM:  vmov.f32 s16, s19
+; ASM-NEXT: vmov.f32 s20, s23
+; ASM: bl fmodf
 
   ret <4 x float> %v
 }
diff --git a/tests_lit/assembler/arm32/sandboxing.ll b/tests_lit/assembler/arm32/sandboxing.ll
index 44e39e3..3ff27cb 100644
--- a/tests_lit/assembler/arm32/sandboxing.ll
+++ b/tests_lit/assembler/arm32/sandboxing.ll
@@ -19,19 +19,12 @@
 entry:
   call void @call_target()
   ; bundle aigned.
-
-  call void @call_target()
   ret void
 }
 
 ; CHECK-LABEL:<test_direct_call>:
 ;             Search for bundle alignment of first call.
-; CHECK:      {{[0-9a-f]*}}c: {{.+}} blx
-; CHECK-NEXT: movw [[REG:r[0-9]]], {{.+}} call_target
-; CHECK-NEXT: movt [[REG]], {{.+}} call_target
-; CHECK-NEXT: bic [[REG]], [[REG]], {{.+}} ; 0xc000000f
-; CHECK-NEXT: blx [[REG]]
-; CHECK-NEXT: {{[0-9a-f]*}}0:
+; CHECK:      {{[0-9a-f]*}}c: {{.*}} bl {{.*}} call_target
 
 ; Same as above, but force bundle padding by adding three (branch) instruction
 ; before the tested call.
@@ -44,22 +37,16 @@
 next1:
   br label %next2 ; add 1 inst.
 next2:
-  br label %next3 ; add 1 inst.
-next3:
   call void @call_target()
   ret void
 }
 ; CHECK-LABEL:<test_direct_call_with_padding_1>:
 ;             Search for bundle alignment of first call.
-; CHECK:      {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK:      {{[0-9a-f]*}}c: {{.+}} bl
 ; CHECK-NEXT: b
 ; CHECK-NEXT: b
-; CHECK-NEXT: b
-; CHECK-NEXT: movw [[REG:r[0-9]]], {{.+}} call_target
-; CHECK-NEXT: movt [[REG]], {{.+}} call_target
 ; CHECK-NEXT: nop
-; CHECK-NEXT: bic [[REG]], [[REG]], {{.+}} ; 0xc000000f
-; CHECK-NEXT: blx r0
+; CHECK-NEXT: bl {{.*}} call_target
 ; CHECK-NEXT: {{[0-9a-f]*}}0:
 
 ; Same as above, but force bundle padding by adding two (branch) instruction
@@ -71,23 +58,17 @@
 
   br label %next1 ; add 1 inst.
 next1:
-  br label %next2 ; add 1 inst.
-next2:
   call void @call_target()
   ret void
 }
 
 ; CHECK-LABEL:<test_direct_call_with_padding_2>:
 ;             Search for bundle alignment of first call.
-; CHECK:      {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK:      {{[0-9a-f]*}}c: {{.+}} bl
 ; CHECK-NEXT: b
-; CHECK-NEXT: b
-; CHECK-NEXT: movw [[REG:r[0-9]]], {{.+}} call_target
-; CHECK-NEXT: movt [[REG]], {{.+}} call_target
 ; CHECK-NEXT: nop
 ; CHECK-NEXT: nop
-; CHECK-NEXT: bic [[REG]], [[REG]], {{.+}} ; 0xc000000f
-; CHECK-NEXT: blx r0
+; CHECK-NEXT: bl {{.*}} call_target
 ; CHECK-NEXT: {{[0-9a-f]*}}0:
 
 ; Same as above, but force bundle padding by adding single (branch) instruction
@@ -97,23 +78,17 @@
   call void @call_target()
   ; bundle aigned.
 
-  br label %next ; add 1 inst.
-next:
   call void @call_target()
   ret void
 }
 
 ; CHECK-LABEL:<test_direct_call_with_padding_3>:
 ;             Search for bundle alignment of first call.
-; CHECK:      {{[0-9a-f]*}}c: {{.+}} blx
-; CHECK-NEXT: b
-; CHECK-NEXT: movw [[REG:r[0-9]]], {{.+}} call_target
-; CHECK-NEXT: movt [[REG]], {{.+}} call_target
+; CHECK:      {{[0-9a-f]*}}c: {{.+}} bl
 ; CHECK-NEXT: nop
 ; CHECK-NEXT: nop
 ; CHECK-NEXT: nop
-; CHECK-NEXT: bic [[REG]], [[REG]], {{.+}} ; 0xc000000f
-; CHECK-NEXT: blx r0
+; CHECK-NEXT: bl {{.*}} call_target
 ; CHECK-NEXT: {{[0-9a-f]*}}0:
 
 ; An indirect call sequence uses the right mask and register-call sequence.
@@ -131,7 +106,7 @@
 
 ; CHECK-LABEL:<test_indirect_call>:
 ;             Search for bundle alignment of first call.
-; CHECK:      {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK:      {{[0-9a-f]*}}c: {{.+}} bl
 ; CHECK-NEXT: b
 ; CHECK-NEXT: ldr
 ; CHECK-NEXT: bic [[REG:r[0-3]]], [[REG]], {{.*}} 0xc000000f
@@ -151,7 +126,7 @@
 
 ; CHECK-LABEL: <test_indirect_call_with_padding_1>:
 ;              Search for bundle alignment of first call.
-; CHECK:      {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK:      {{[0-9a-f]*}}c: {{.+}} bl
 ; CHECK-NEXT: ldr
 ; CHECK-NEXT: nop
 ; CHECK-NEXT: bic [[REG:r[0-3]]], [[REG]], {{.*}} 0xc000000f
@@ -178,7 +153,7 @@
 
 ; CHECK-LABEL: <test_indirect_call_with_padding_2>:
 ;              Search for bundle alignment of first call.
-; CHECK:      {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK:      {{[0-9a-f]*}}c: {{.+}} bl
 ; CHECK-NEXT: b
 ; CHECK-NEXT: b
 ; CHECK-NEXT: b
@@ -206,7 +181,7 @@
 }
 ; CHECK-LABEL: <test_indirect_call_with_padding_3>:
 ;              Search for bundle alignment of first call.
-; CHECK:      {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK:      {{[0-9a-f]*}}c: {{.+}} bl
 ; CHECK-NEXT: b
 ; CHECK-NEXT: b
 ; CHECK-NEXT: ldr
@@ -228,7 +203,7 @@
 }
 ; CHECK-LABEL:<test_ret>:
 ;             Search for bundle alignment of first call.
-; CHECK:      {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK:      {{[0-9a-f]*}}c: {{.+}} bl
 ; CHECK-NEXT: b
 ; CHECK-NEXT: add sp, sp
 ; CHECK-NEXT: bic sp, sp, {{.+}} ; 0xc0000000
@@ -245,7 +220,7 @@
 
 ; CHECK-LABEL:<test_ret_with_padding>:
 ;             Search for bundle alignment of first call.
-; CHECK:      {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK:      {{[0-9a-f]*}}c: {{.+}} bl
 ; CHECK-NEXT: add sp, sp
 ; CHECK-NEXT: bic sp, sp, {{.+}} ; 0xc0000000
 ; CHECK-NEXT: pop {lr}
@@ -264,7 +239,7 @@
 
 ; CHECK-LABEL: test_store
 ;             Search for call at end of bundle.
-; CHECK:      {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK:      {{[0-9a-f]*}}c: {{.+}} bl
 ; CHECK-NEXT: mov [[REG:r[0-9]]], #0
 ; CHECK-NEXT: mov
 ; CHECK-NEXT: bic [[REG]], [[REG]], {{.+}} ; 0xc0000000
@@ -283,7 +258,7 @@
 }
 ; CHECK-LABEL: test_store_with_padding
 ;             Search for call at end of bundle.
-; CHECK:      {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK:      {{[0-9a-f]*}}c: {{.+}} bl
 ; CHECK-NEXT: b
 ; CHECK-NEXT: mov [[REG:r[0-9]]], #0
 ; CHECK-NEXT: mov
@@ -303,7 +278,7 @@
 
 ; CHECK-LABEL: test_load
 ;             Search for call at end of bundle.
-; CHECK:      {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK:      {{[0-9a-f]*}}c: {{.+}} bl
 ; CHECK-NEXT: mov [[REG:r[0-9]]], #0
 ; CHECK-NEXT: bic [[REG]], [[REG]], {{.+}} ; 0xc0000000
 ; CHECK-NEXT: ldr r{{.+}}[[REG]]
@@ -323,7 +298,7 @@
 
 ; CHECK-LABEL: test_load_with_padding
 ;             Search for call at end of bundle.
-; CHECK:      {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK:      {{[0-9a-f]*}}c: {{.+}} bl
 ; CHECK-NEXT: b
 ; CHECK-NEXT: b
 ; CHECK-NEXT: mov [[REG:r[0-9]]], #0
diff --git a/tests_lit/assembler/arm32/vpush.ll b/tests_lit/assembler/arm32/vpush.ll
index 449c412..1aa9c7a 100644
--- a/tests_lit/assembler/arm32/vpush.ll
+++ b/tests_lit/assembler/arm32/vpush.ll
@@ -26,10 +26,10 @@
 
 define internal double @testVpushVpop(double %v1, double %v2) {
 ; ASM-LABEL: testVpushVpop:
-; DIS-LABEL: {{.+}} <testVpushVpop>:
+; DIS-LABEL: 00000000 <testVpushVpop>:
 
 ; ASM:  vpush   {s18, s19, s20, s21}
-; DIS:  {{.+}}: ed2d9a04
+; DIS:    0:    ed2d9a04
 ; IASM-NOT: vpush
 
   call void @foo()
@@ -37,7 +37,7 @@
   ret double %res
 
 ; ASM:  vpop    {s18, s19, s20, s21}
-; DIS:  {{.+}}: ecbd9a04
+; DIS:   28:       ecbd9a04
 ; IASM-NOT: vpopd
 
 }
diff --git a/tests_lit/llvm2ice_tests/64bit.pnacl.ll b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
index 4106d3c..a818b2a 100644
--- a/tests_lit/llvm2ice_tests/64bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
@@ -10,15 +10,18 @@
 ; RUN:   --target x8632 -i %s --args -Om1 -allow-externally-defined-symbols \
 ; RUN:   | %if --need=target_X8632 --command FileCheck --check-prefix=OPTM1 %s
 
+; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
+; once enough infrastructure is in. Also, switch to --filetype=obj
+; when possible.
 ; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN:   --command %p2i --filetype=obj --assemble \
-; RUN:   --disassemble --target arm32 -i %s --args -O2 \
+; RUN:   --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
 ; RUN:   -allow-externally-defined-symbols \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix ARM32 --check-prefix ARM32-O2 %s
 ; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN:   --command %p2i --filetype=obj --assemble --disassemble --target arm32 \
-; RUN:   -i %s --args -Om1 \
+; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
+; RUN:   -i %s --args -Om1 --skip-unimplemented \
 ; RUN:   -allow-externally-defined-symbols \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix ARM32 --check-prefix ARM32-OM1 %s
@@ -99,24 +102,18 @@
 
 ; ARM32-LABEL: pass64BitArg
 ; ARM32:      str     {{.*}}, [sp]
-; ARM32:      movw    [[CALL:r[0-9]]], {{.+}} ignore64BitArgNoInline
-; ARM32:      movt    [[CALL]], {{.+}} ignore64BitArgNoInline
 ; ARM32:      mov     r2, #123
-; ARM32:      blx     [[CALL]]
+; ARM32:      bl      {{.*}} ignore64BitArgNoInline
 ; ARM32:      str     {{.*}}, [sp]
-; ARM32:      movw    [[CALL:r[0-9]]], {{.+}} ignore64BitArgNoInline
-; ARM32:      movt    [[CALL]], {{.+}} ignore64BitArgNoInline
 ; ARM32:      {{mov|ldr}} r0
 ; ARM32:      {{mov|ldr}} r1
 ; ARM32:      mov     r2, #123
-; ARM32:      blx     [[CALL]]
+; ARM32:      bl      {{.*}} ignore64BitArgNoInline
 ; ARM32:      str     {{.*}}, [sp]
-; ARM32:      movw    [[CALL:r[0-9]]], {{.+}} ignore64BitArgNoInline
-; ARM32:      movt    [[CALL]], {{.+}} ignore64BitArgNoInline
 ; ARM32:      {{mov|ldr}} r0
 ; ARM32:      {{mov|ldr}} r1
 ; ARM32:      mov     r2, #123
-; ARM32:      blx      [[CALL]]
+; ARM32:      bl      {{.*}} ignore64BitArgNoInline
 
 ; MIPS32-LABEL: pass64BitArg
 
@@ -158,13 +155,10 @@
 ; ARM32:      movt    [[REG2]], {{.*}}     ; 0x1234
 ; ARM32:      str     [[REG1]], [sp, #4]
 ; ARM32:      str     [[REG2]], [sp]
-; ARM32:      movw    [[CALL:r[0-9]]], {{.+}} ignore64BitArgNoInline
-; ARM32:      movt    [[CALL]], {{.+}} ignore64BitArgNoInline
 ; ARM32:      {{mov|ldr}} r0
 ; ARM32:      {{mov|ldr}} r1
 ; ARM32:      mov     r2, #123
-; ARM32:      blx     [[CALL]]
-
+; ARM32:      bl      {{.*}} ignore64BitArgNoInline
 
 define internal i32 @pass64BitUndefArg() {
 entry:
@@ -185,10 +179,8 @@
 ; ARM32: sub sp
 ; ARM32: mov {{.*}}, #0
 ; ARM32: str
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} ignore64BitArgNoInline
-; ARM32: movt [[CALL]], {{.+}} ignore64BitArgNoInline
 ; ARM32: mov {{.*}}, #123
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} ignore64BitArgNoInline
 
 ; MIPS32-LABEL: pass64BitUndefArg
 ; MIPS32: jr  ra
@@ -414,9 +406,7 @@
 ; ARM32-LABEL: div64BitSigned
 ; ARM32: orrs {{r.*}}, {{r.*}}
 ; ARM32: bne
-; ARM32: movw	[[CALL:r[0-9]]], {{.+}} __divdi3
-; ARM32: movt	[[CALL]], {{.+}} __divdi3
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} __divdi3
 
 define internal i64 @div64BitSignedConst(i64 %a) {
 entry:
@@ -436,12 +426,10 @@
 ; ARM32-LABEL: div64BitSignedConst
 ; For a constant, we should be able to optimize-out the divide by zero check.
 ; ARM32-NOT: orrs
-; ARM32: movw	[[CALL:r[0-9]]], {{.+}} __divdi3
-; ARM32: movt	[[CALL]], {{.+}} __divdi3
 ; ARM32: movw {{.*}} ; 0x2ff2
 ; ARM32: movt {{.*}} ; 0x73ce
 ; ARM32: movw {{.*}} ; 0xb3a
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} __divdi3
 
 define internal i64 @div64BitUnsigned(i64 %a, i64 %b) {
 entry:
@@ -457,9 +445,7 @@
 ; ARM32-LABEL: div64BitUnsigned
 ; ARM32: orrs {{r.*}}, {{r.*}}
 ; ARM32: bne
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} __udivdi3
-; ARM32: movt [[CALL]], {{.+}} __udivdi3
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} __udivdi3
 
 define internal i64 @rem64BitSigned(i64 %a, i64 %b) {
 entry:
@@ -475,9 +461,7 @@
 ; ARM32-LABEL: rem64BitSigned
 ; ARM32: orrs {{r.*}}, {{r.*}}
 ; ARM32: bne
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} __moddi3
-; ARM32: movt [[CALL]], {{.+}} __moddi3
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} __moddi3
 
 define internal i64 @rem64BitUnsigned(i64 %a, i64 %b) {
 entry:
@@ -493,9 +477,7 @@
 ; ARM32-LABEL: rem64BitUnsigned
 ; ARM32: orrs {{r.*}}, {{r.*}}
 ; ARM32: bne
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} __umoddi3
-; ARM32: movt [[CALL]], {{.+}} __umoddi3
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} __umoddi3
 
 define internal i64 @shl64BitSigned(i64 %a, i64 %b) {
 entry:
@@ -1158,16 +1140,12 @@
 ; ARM32: cmpeq
 ; ARM32-OM1: tst
 ; ARM32: bne
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} func
-; ARM32: movt [[CALL]], {{.+}} func
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} <func>
 ; ARM32: cmp
 ; ARM32: cmpeq
 ; ARM32-OM1: tst
 ; ARM32: bne
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} func
-; ARM32: movt [[CALL]], {{.+}} func
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} <func>
 ; ARM32: bx
 
 declare void @func()
@@ -1232,9 +1210,7 @@
 ; ARM32-OM1: tst
 ; ARM32-OM1: bne
 ; ARM32-O2: beq
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} func
-; ARM32: movt [[CALL]], {{.+}} func
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} <func>
 ; ARM32: cmp
 ; ARM32: cmpeq
 ; ARM32-OM1: tst
diff --git a/tests_lit/llvm2ice_tests/alloc.ll b/tests_lit/llvm2ice_tests/alloc.ll
index 7dadda2..cf0926c 100644
--- a/tests_lit/llvm2ice_tests/alloc.ll
+++ b/tests_lit/llvm2ice_tests/alloc.ll
@@ -9,16 +9,19 @@
 ; RUN:   | %if --need=target_X8632 --command FileCheck \
 ; RUN:   --check-prefix CHECK-OPTM1 %s
 
+; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
+; once enough infrastructure is in. Also, switch to --filetype=obj
+; when possible.
 ; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN:   --command %p2i --filetype=obj --assemble \
-; RUN:   --disassemble --target arm32 -i %s --args -O2 \
+; RUN:   --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
 ; RUN:   -allow-externally-defined-symbols \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPT2 %s
 
 ; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN:   --command %p2i --filetype=obj --assemble \
-; RUN:   --disassemble --target arm32 -i %s --args -Om1 \
+; RUN:   --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
 ; RUN:   -allow-externally-defined-symbols \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPTM1 %s
@@ -45,9 +48,7 @@
 ; ARM32-LABEL: fixed_416_align_16
 ; ARM32-OPT2:  sub sp, sp, #428
 ; ARM32-OPTM1: sub sp, sp, #416
-; ARM32:       movw [[CALL:r[0-9]]], {{.+}} f1
-; ARM32:       movt [[CALL]], {{.+}} f1
-; ARM32:       blx [[CALL]]
+; ARM32:       bl {{.*}} R_{{.*}}    f1
 
 define internal void @fixed_416_align_32(i32 %n) {
 entry:
@@ -69,9 +70,7 @@
 ; ARM32-OPT2:  sub sp, sp, #424
 ; ARM32-OPTM1: sub sp, sp, #416
 ; ARM32:       bic sp, sp, #31
-; ARM32:       movw [[CALL:r[0-9]]], {{.+}} f1
-; ARM32:       movt [[CALL]], {{.+}} f1
-; ARM32:       blx [[CALL]]
+; ARM32:       bl {{.*}} R_{{.*}}    f1
 
 ; Show that the amount to allocate will be rounded up.
 define internal void @fixed_351_align_16(i32 %n) {
@@ -96,9 +95,7 @@
 ; ARM32-LABEL: fixed_351_align_16
 ; ARM32-OPT2:  sub sp, sp, #364
 ; ARM32-OPTM1: sub sp, sp, #352
-; ARM32:       movw [[CALL:r[0-9]]], {{.+}} f1
-; ARM32:       movt [[CALL]], {{.+}} f1
-; ARM32:       blx [[CALL]]
+; ARM32:       bl {{.*}} R_{{.*}}    f1
 
 define internal void @fixed_351_align_32(i32 %n) {
 entry:
@@ -120,9 +117,7 @@
 ; ARM32-OPT2:  sub sp, sp, #360
 ; ARM32-OPTM1: sub sp, sp, #352
 ; ARM32:       bic sp, sp, #31
-; ARM32:       movw [[CALL:r[0-9]]], {{.+}} f1
-; ARM32:       movt [[CALL]], {{.+}} f1
-; ARM32:       blx [[CALL]]
+; ARM32:       bl {{.*}} R_{{.*}}    f1
 
 declare void @f1(i32 %ignored)
 
@@ -149,9 +144,7 @@
 ; ARM32:      add r0, r0, #15
 ; ARM32:      bic r0, r0, #15
 ; ARM32:      sub sp, sp, r0
-; ARM32:      movw [[CALL:r[0-9]]], {{.+}} f2
-; ARM32:      movt [[CALL]], {{.+}} f2
-; ARM32:      blx [[CALL]]
+; ARM32:      bl {{.*}} R_{{.*}}    f2
 
 define internal void @variable_n_align_32(i32 %n) {
 entry:
@@ -183,9 +176,7 @@
 ; ARM32:      add r0, r0, #31
 ; ARM32:      bic r0, r0, #31
 ; ARM32:      sub sp, sp, r0
-; ARM32:      movw [[CALL:r[0-9]]], {{.+}} f2
-; ARM32:      movt [[CALL]], {{.+}} f2
-; ARM32:      blx [[CALL]]
+; ARM32:      bl {{.*}} R_{{.*}}    f2
 ; ARM32:      mov sp, fp
 ; ARM32:      pop {fp, lr}
 
diff --git a/tests_lit/llvm2ice_tests/arith.ll b/tests_lit/llvm2ice_tests/arith.ll
index d67bd17..ed34a60 100644
--- a/tests_lit/llvm2ice_tests/arith.ll
+++ b/tests_lit/llvm2ice_tests/arith.ll
@@ -4,28 +4,28 @@
 ; RUN:   --target x8632 -i %s --args -O2 \
 ; RUN:   | %if --need=target_X8632 --command FileCheck %s
 
+; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
+; once enough infrastructure is in. Also, switch to --filetype=obj
+; when possible.
 ; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN:   --command %p2i --filetype=obj --assemble --disassemble --target arm32 \
-; RUN:   -i %s --args -O2 \
+; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
+; RUN:   -i %s --args -O2 --skip-unimplemented \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix ARM32 --check-prefix ARM-OPT2 %s
 ; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN:   --command %p2i --filetype=obj --assemble --disassemble --target arm32 \
-; RUN:   -i %s --args -O2 --mattr=hwdiv-arm \
+; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
+; RUN:   -i %s --args -O2 --mattr=hwdiv-arm --skip-unimplemented \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix ARM32HWDIV %s
 ; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN:   --command %p2i --filetype=obj --assemble --disassemble --target arm32 \
-; RUN:   -i %s --args -Om1 \
+; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
+; RUN:   -i %s --args -Om1 --skip-unimplemented \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix ARM32 --check-prefix ARM32-OPTM1 %s
 ;
-; TODO(kschimpf): Stop skipping unimplemented parts (via --skip-unimplemented)
-; once enough infrastructure is in. Also, switch to --filetype=obj
-; when possible.
 ; RUN: %if --need=target_MIPS32 --need=allow_dump \
 ; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target mips32\
-; RUN:   -i %s --args -O2 -skip-unimplemented \
+; RUN:   -i %s --args -O2 --skip-unimplemented \
 ; RUN:   | %if --need=target_MIPS32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix MIPS32 %s
 
@@ -164,9 +164,7 @@
 ; ARM32: bne
 ; The following instruction is ".word 0xe7fedef0 = udf #60896 ; 0xede0".
 ; ARM32: e7fedef0
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} __divsi3
-; ARM32: movt [[CALL]], {{.+}} __divsi3
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} __divsi3
 ; ARM32HWDIV-LABEL: Sdiv
 ; ARM32HWDIV: tst
 ; ARM32HWDIV: bne
@@ -185,9 +183,7 @@
 ;
 ; ARM32-LABEL: SdivConst
 ; ARM32-NOT: tst
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} __divsi3
-; ARM32: movt [[CALL]], {{.+}} __divsi3
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} __divsi3
 ; ARM32HWDIV-LABEL: SdivConst
 ; ARM32HWDIV-NOT: tst
 ; ARM32HWDIV: sdiv
@@ -206,9 +202,7 @@
 ; ARM32-LABEL: Srem
 ; ARM32: tst [[DENOM:r.*]], [[DENOM]]
 ; ARM32: bne
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} __modsi3
-; ARM32: movt [[CALL]], {{.+}} __modsi3
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} __modsi3
 ; ARM32HWDIV-LABEL: Srem
 ; ARM32HWDIV: tst
 ; ARM32HWDIV: bne
@@ -228,9 +222,7 @@
 ; ARM32-LABEL: Udiv
 ; ARM32: tst [[DENOM:r.*]], [[DENOM]]
 ; ARM32: bne
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} __udivsi3
-; ARM32: movt [[CALL]], {{.+}} __udivsi3
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} __udivsi3
 ; ARM32HWDIV-LABEL: Udiv
 ; ARM32HWDIV: tst
 ; ARM32HWDIV: bne
@@ -249,9 +241,7 @@
 ; ARM32-LABEL: Urem
 ; ARM32: tst [[DENOM:r.*]], [[DENOM]]
 ; ARM32: bne
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} __umodsi3
-; ARM32: movt [[CALL]], {{.+}} __umodsi3
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} __umodsi3
 ; ARM32HWDIV-LABEL: Urem
 ; ARM32HWDIV: tst
 ; ARM32HWDIV: bne
diff --git a/tests_lit/llvm2ice_tests/bool-folding.ll b/tests_lit/llvm2ice_tests/bool-folding.ll
index 0bb1cac..6f87128 100644
--- a/tests_lit/llvm2ice_tests/bool-folding.ll
+++ b/tests_lit/llvm2ice_tests/bool-folding.ll
@@ -55,9 +55,7 @@
 ; CHECK: jge
 ; ARM32-LABEL: fold_cmp_br_intervening_insts
 ; ARM32: push {{[{].*[}]}}
-; ARM32: movw [[CALL:r[0-9]]], #:lower16:use_value
-; ARM32: movt [[CALL]], #:upper16:use_value
-; ARM32: blx [[CALL]]
+; ARM32: bl use_value
 ; ARM32: cmp {{r[0-9]+}}, {{r[0-9]+}}
 ; ARM32: bge
 ; ARM32: mov r0, #1
@@ -192,9 +190,7 @@
 ; CHECK: cmp
 ; CHECK: cmovl
 ; ARM32-LABEL: fold_cmp_select_intervening_insts
-; ARM32: movw [[CALL:r[0-9]]], #:lower16:use_value
-; ARM32: movt [[CALL]], #:upper16:use_value
-; ARM32: blx [[CALL]]
+; ARM32: bl use_value
 ; ARM32: cmp r{{[0-9]+}}, r{{[0-9]+}}
 ; ARM32: movlt
 ; ARM32: bx lr
diff --git a/tests_lit/llvm2ice_tests/branch-opt.ll b/tests_lit/llvm2ice_tests/branch-opt.ll
index 965b502..3bfed0d 100644
--- a/tests_lit/llvm2ice_tests/branch-opt.ll
+++ b/tests_lit/llvm2ice_tests/branch-opt.ll
@@ -9,15 +9,18 @@
 ; RUN:   --target x8632 -i %s --args -Om1 -allow-externally-defined-symbols \
 ; RUN:   | %if --need=target_X8632 --command FileCheck --check-prefix=OM1 %s
 
+; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
+; once enough infrastructure is in. Also, switch to --filetype=obj
+; when possible.
 ; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN:   --command %p2i --filetype=obj --assemble \
+; RUN:   --command %p2i --filetype=asm --assemble \
 ; RUN:   --disassemble --target arm32 -i %s --args -O2 \
 ; RUN:   -allow-externally-defined-symbols \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix ARM32O2 %s
 
 ; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN:   --command %p2i --filetype=obj --assemble \
+; RUN:   --command %p2i --filetype=asm --assemble \
 ; RUN:   --disassemble --target arm32 -i %s --args -Om1 \
 ; RUN:   -allow-externally-defined-symbols \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
@@ -48,22 +51,13 @@
 ; OM1: call
 
 ; ARM32O2-LABEL: testUncondToNextBlock
-; ARM32O2:      movw {{.+}} dummy
-; ARM32O2-NEXT: movt
-; ARM32O2-NEXT: blx
-; ARM32O2-NEXT: movw {{.+}} dummy
-; ARM32O2-NEXT: movt
-; ARM32O2-NEXT: blx
+; ARM32O2: bl {{.*}} dummy
+; ARM32O2-NEXT: bl {{.*}} dummy
 
 ; ARM32OM1-LABEL: testUncondToNextBlock
-; ARM32OM1:      movw {{.+}} dummy
-; ARM32OM1-NEXT: movt
-; ARM32OM1-NEXT: blx
+; ARM32OM1: bl {{.*}} dummy
 ; ARM32OM1-NEXT: b
-; ARM32OM1-NEXT: movw {{.+}} dummy
-; ARM32OM1-NEXT: movt
-; ARM32OM1-NEXT: blx
-
+; ARM32OM1-NEXT: bl {{.*}} dummy
 
 ; For a conditional branch with a fallthrough to the next block, the
 ; fallthrough branch should be removed.
@@ -99,16 +93,12 @@
 ; OM1: ret
 
 ; ARM32O2-LABEL: testCondFallthroughToNextBlock
-; ARM32O2:      cmp {{.*}}, #123
+; ARM32O2: cmp {{.*}}, #123
 ; ARM32O2-NEXT: bge
-; ARM32O2-NEXT: movw {{.+}} dummy
-; ARM32O2-NEXT: movt
-; ARM32O2-NEXT: blx
-; ARM32O2:      bx lr
-; ARM32O2-NEXT: movw {{.+}} dummy
-; ARM32O2-NEXT: movt
-; ARM32O2-NEXT: blx
-; ARM32O2:      bx lr
+; ARM32O2-NEXT: bl
+; ARM32O2: bx lr
+; ARM32O2: bl
+; ARM32O2: bx lr
 
 ; ARM32OM1-LABEL: testCondFallthroughToNextBlock
 ; ARM32OM1: mov {{.*}}, #0
@@ -117,13 +107,9 @@
 ; ARM32OM1: tst {{.*}}, #1
 ; ARM32OM1: bne
 ; ARM32OM1: b
-; ARM32OM1: movw
-; ARM32OM1: movt
-; ARM32OM1: blx
+; ARM32OM1: bl
 ; ARM32OM1: bx lr
-; ARM32OM1: movw
-; ARM32OM1: movt
-; ARM32OM1: blx
+; ARM32OM1: bl
 ; ARM32OM1: bx lr
 
 ; For a conditional branch with the next block as the target and a
@@ -164,16 +150,12 @@
 ; Note that compare and branch folding isn't implemented yet
 ; (compared to x86-32).
 ; ARM32O2-LABEL: testCondTargetNextBlock
-; ARM32O2:      cmp {{.*}}, #123
+; ARM32O2: cmp {{.*}}, #123
 ; ARM32O2-NEXT: blt
-; ARM32O2-NEXT: movw
-; ARM32O2-NEXT: movt
-; ARM32O2-NEXT: blx
-; ARM32O2:      bx lr
-; ARM32O2-NEXT: movw
-; ARM32O2-NEXT: movt
-; ARM32O2-NEXT: blx
-; ARM32O2:      bx lr
+; ARM32O2-NEXT: bl
+; ARM32O2: bx lr
+; ARM32O2: bl
+; ARM32O2: bx lr
 
 ; ARM32OM1-LABEL: testCondTargetNextBlock
 ; ARM32OM1: cmp {{.*}}, #123
@@ -181,9 +163,9 @@
 ; ARM32OM1: tst {{.*}}, #1
 ; ARM32OM1: bne
 ; ARM32OM1: b
-; ARM32OM1: blx
+; ARM32OM1: bl
 ; ARM32OM1: bx lr
-; ARM32OM1: blx
+; ARM32OM1: bl
 ; ARM32OM1: bx lr
 
 ; Unconditional branches to the block after a contracted block should be
@@ -212,18 +194,10 @@
 ; OM1: call
 
 ; ARM32O2-LABEL: testUncondToBlockAfterContract
-; ARM32O2:      movw {{.+}} dummy
-; ARM32O2-NEXT: movt
-; ARM32O2-NEXT: blx
-; ARM32O2-NEXT: movw {{.+}} dummy
-; ARM32O2-NEXT: movt
-; ARM32O2-NEXT: blx
+; ARM32O2: bl {{.*}} dummy
+; ARM32O2-NEXT: bl {{.*}} dummy
 
 ; ARM32OM1-LABEL: testUncondToBlockAfterContract
-; ARM32OM1:      movw {{.+}} dummy
-; ARM32OM1-NEXT: movt
-; ARM32OM1-NEXT: blx
+; ARM32OM1: bl {{.*}} dummy
 ; ARM32OM1-NEXT: b
-; ARM32OM1-NEXT: movw {{.+}} dummy
-; ARM32OM1-NEXT: movt
-; ARM32OM1-NEXT: blx
+; ARM32OM1-NEXT: bl {{.*}} dummy
diff --git a/tests_lit/llvm2ice_tests/fp.arith.ll b/tests_lit/llvm2ice_tests/fp.arith.ll
index c9982ae..c57045a 100644
--- a/tests_lit/llvm2ice_tests/fp.arith.ll
+++ b/tests_lit/llvm2ice_tests/fp.arith.ll
@@ -117,9 +117,7 @@
 ; CHECK-LABEL: remFloat
 ; CHECK: call {{.*}} R_{{.*}} fmodf
 ; ARM32-LABEL: remFloat
-; ARM32: movw {{.+}} fmodf
-; ARM32: movt
-; ARM32: blx
+; ARM32: bl {{.*}} fmodf
 
 define internal double @remDouble(double %a, double %b) {
 entry:
@@ -129,6 +127,4 @@
 ; CHECK-LABEL: remDouble
 ; CHECK: call {{.*}} R_{{.*}} fmod
 ; ARM32-LABEL: remDouble
-; ARM32: movw {{.+}} fmod
-; ARM32: movt
-; ARM32: blx
+; ARM32: bl {{.*}} fmod
diff --git a/tests_lit/llvm2ice_tests/fp.arm.call.ll b/tests_lit/llvm2ice_tests/fp.arm.call.ll
index 10ae316..1e0c6e7 100644
--- a/tests_lit/llvm2ice_tests/fp.arm.call.ll
+++ b/tests_lit/llvm2ice_tests/fp.arm.call.ll
@@ -63,39 +63,29 @@
 ; CHECK-LABEL: floatHarness
   call void @float1(float 1.0)
 ; CHECK-DAG: vmov.f32 s0
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float1
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float1
   call void @float2(float 1.0, float 2.0)
 ; CHECK-DAG: vmov.f32 s0
 ; CHECK-DAG: vmov.f32 s1
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float2
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float2
   call void @float3(float 1.0, float 2.0, float 3.0)
 ; CHECK-DAG: vmov.f32 s0
 ; CHECK-DAG: vmov.f32 s1
 ; CHECK-DAG: vmov.f32 s2
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float3
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float3
   call void @float4(float 1.0, float 2.0, float 3.0, float 4.0)
 ; CHECK-DAG: vmov.f32 s0
 ; CHECK-DAG: vmov.f32 s1
 ; CHECK-DAG: vmov.f32 s2
 ; CHECK-DAG: vmov.f32 s3
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float4
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float4
   call void @float5(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0)
 ; CHECK-DAG: vmov.f32 s0
 ; CHECK-DAG: vmov.f32 s1
 ; CHECK-DAG: vmov.f32 s2
 ; CHECK-DAG: vmov.f32 s3
 ; CHECK-DAG: vmov.f32 s4
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float5
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float5
   call void @float6(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
                     float 6.0)
 ; CHECK-DAG: vmov.f32 s0
@@ -104,9 +94,7 @@
 ; CHECK-DAG: vmov.f32 s3
 ; CHECK-DAG: vmov.f32 s4
 ; CHECK-DAG: vmov.f32 s5
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float6
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float6
   call void @float7(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
                     float 6.0, float 7.0)
 ; CHECK-DAG: vmov.f32 s0
@@ -116,9 +104,7 @@
 ; CHECK-DAG: vmov.f32 s4
 ; CHECK-DAG: vmov.f32 s5
 ; CHECK-DAG: vmov.f32 s6
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float7
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float7
   call void @float8(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
                     float 6.0, float 7.0, float 8.0)
 ; CHECK-DAG: vmov.f32 s0
@@ -129,9 +115,7 @@
 ; CHECK-DAG: vmov.f32 s5
 ; CHECK-DAG: vmov.f32 s6
 ; CHECK-DAG: vmov.f32 s7
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float8
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float8
   call void @float9(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
                     float 6.0, float 7.0, float 8.0, float 9.0)
 ; CHECK-DAG: vmov.f32 s0
@@ -143,9 +127,7 @@
 ; CHECK-DAG: vmov.f32 s6
 ; CHECK-DAG: vmov.f32 s7
 ; CHECK-DAG: vmov.f32 s8
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float9
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float9
   call void @float10(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
                     float 6.0, float 7.0, float 8.0, float 9.0, float 10.0)
 ; CHECK-DAG: vmov.f32 s0
@@ -158,9 +140,7 @@
 ; CHECK-DAG: vmov.f32 s7
 ; CHECK-DAG: vmov.f32 s8
 ; CHECK-DAG: vmov.f32 s9
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float10
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float10
   call void @float11(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
                     float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
                     float 11.0)
@@ -175,9 +155,7 @@
 ; CHECK-DAG: vmov.f32 s8
 ; CHECK-DAG: vmov.f32 s9
 ; CHECK-DAG: vmov.f32 s10
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float11
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float11
   call void @float12(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
                     float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
                     float 11.0, float 12.0)
@@ -193,9 +171,7 @@
 ; CHECK-DAG: vmov.f32 s9
 ; CHECK-DAG: vmov.f32 s10
 ; CHECK-DAG: vmov.f32 s11
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float12
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float12
   call void @float13(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
                     float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
                     float 11.0, float 12.0, float 13.0)
@@ -212,9 +188,7 @@
 ; CHECK-DAG: vmov.f32 s10
 ; CHECK-DAG: vmov.f32 s11
 ; CHECK-DAG: vmov.f32 s12
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float13
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float13
   call void @float14(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
                     float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
                     float 11.0, float 12.0, float 13.0, float 14.0)
@@ -232,9 +206,7 @@
 ; CHECK-DAG: vmov.f32 s11
 ; CHECK-DAG: vmov.f32 s12
 ; CHECK-DAG: vmov.f32 s13
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float14
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float14
   call void @float15(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
                     float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
                     float 11.0, float 12.0, float 13.0, float 14.0,
@@ -254,9 +226,7 @@
 ; CHECK-DAG: vmov.f32 s12
 ; CHECK-DAG: vmov.f32 s13
 ; CHECK-DAG: vmov.f32 s14
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float15
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float15
   call void @float16(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
                     float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
                     float 11.0, float 12.0, float 13.0, float 14.0,
@@ -277,9 +247,7 @@
 ; CHECK-DAG: vmov.f32 s13
 ; CHECK-DAG: vmov.f32 s14
 ; CHECK-DAG: vmov.f32 s15
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float16
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float16
   call void @float17(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
                     float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
                     float 11.0, float 12.0, float 13.0, float 14.0,
@@ -301,9 +269,7 @@
 ; CHECK-DAG: vmov.f32 s14
 ; CHECK-DAG: vmov.f32 s15
 ; CHECK-DAG: vstr s{{.*}}, [sp]
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float17
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float17
   call void @float18(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
                     float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
                     float 11.0, float 12.0, float 13.0, float 14.0,
@@ -326,9 +292,7 @@
 ; CHECK-DAG: vmov.f32 s15
 ; CHECK-DAG: vstr s{{.*}}, [sp]
 ; CHECK-DAG: vstr s{{.*}}, [sp, #4]
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float18
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} float18
   ret void
 }
 
@@ -354,30 +318,22 @@
 ; CHECK-LABEL: doubleHarness
   call void @double1(double 1.0)
 ; CHECK-DAG: vmov.f64 d0
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double1
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} double1
   call void @double2(double 1.0, double 2.0)
 ; CHECK-DAG: vmov.f64 d0
 ; CHECK-DAG: vmov.f64 d1
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double2
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} double2
   call void @double3(double 1.0, double 2.0, double 3.0)
 ; CHECK-DAG: vmov.f64 d0
 ; CHECK-DAG: vmov.f64 d1
 ; CHECK-DAG: vmov.f64 d2
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double3
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} double3
   call void @double4(double 1.0, double 2.0, double 3.0, double 4.0)
 ; CHECK-DAG: vmov.f64 d0
 ; CHECK-DAG: vmov.f64 d1
 ; CHECK-DAG: vmov.f64 d2
 ; CHECK-DAG: vmov.f64 d3
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double4
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} double4
   call void @double5(double 1.0, double 2.0, double 3.0, double 4.0,
                      double 5.0)
 ; CHECK-DAG: vmov.f64 d0
@@ -385,9 +341,7 @@
 ; CHECK-DAG: vmov.f64 d2
 ; CHECK-DAG: vmov.f64 d3
 ; CHECK-DAG: vmov.f64 d4
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double5
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} double5
   call void @double6(double 1.0, double 2.0, double 3.0, double 4.0,
                      double 5.0, double 6.0)
 ; CHECK-DAG: vmov.f64 d0
@@ -396,9 +350,7 @@
 ; CHECK-DAG: vmov.f64 d3
 ; CHECK-DAG: vmov.f64 d4
 ; CHECK-DAG: vmov.f64 d5
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double6
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} double6
   call void @double7(double 1.0, double 2.0, double 3.0, double 4.0,
                      double 5.0, double 6.0, double 7.0)
 ; CHECK-DAG: vmov.f64 d0
@@ -408,9 +360,7 @@
 ; CHECK-DAG: vmov.f64 d4
 ; CHECK-DAG: vmov.f64 d5
 ; CHECK-DAG: vmov.f64 d6
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double7
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} double7
   call void @double8(double 1.0, double 2.0, double 3.0, double 4.0,
                      double 5.0, double 6.0, double 7.0, double 8.0)
 ; CHECK-DAG: vmov.f64 d0
@@ -421,9 +371,7 @@
 ; CHECK-DAG: vmov.f64 d5
 ; CHECK-DAG: vmov.f64 d6
 ; CHECK-DAG: vmov.f64 d7
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double8
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} double8
   call void @double9(double 1.0, double 2.0, double 3.0, double 4.0,
                      double 5.0, double 6.0, double 7.0, double 8.0,
                      double 9.0)
@@ -435,10 +383,8 @@
 ; CHECK-DAG: vmov.f64 d5
 ; CHECK-DAG: vmov.f64 d6
 ; CHECK-DAG: vmov.f64 d7
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double9
-; CHECK-DAG: movt [[CALL]]
 ; CHECK-DAG: vstr d{{.*}}, [sp]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} double9
   call void @double10(double 1.0, double 2.0, double 3.0, double 4.0,
                      double 5.0, double 6.0, double 7.0, double 8.0,
                      double 9.0, double 10.0)
@@ -452,9 +398,7 @@
 ; CHECK-DAG: vmov.f64 d7
 ; CHECK-DAG: vstr d{{.*}}, [sp]
 ; CHECK-DAG: vstr d{{.*}}, [sp, #8]
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double10
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} double10
 
   ret void
 }
@@ -493,17 +437,13 @@
 ; CHECK-DAG: vmov.f32 s0
 ; CHECK-DAG: vmov.f64 d1
 ; CHECK-DAG: vmov.f32 s1
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDF
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} testFDF
   call void @testFDDF(float 1.0, double 2.0, double 3.0, float 4.0)
 ; CHECK-DAG: vmov.f32 s0
 ; CHECK-DAG: vmov.f64 d1
 ; CHECK-DAG: vmov.f64 d2
 ; CHECK-DAG: vmov.f32 s1
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDF
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} testFDDF
   call void @testFDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
                        float 5.0)
 ; CHECK-DAG: vmov.f32 s0
@@ -511,9 +451,7 @@
 ; CHECK-DAG: vmov.f64 d2
 ; CHECK-DAG: vmov.f64 d3
 ; CHECK-DAG: vmov.f32 s1
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDF
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} testFDDDF
   call void @testFDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
                         double 5.0, float 6.0)
 ; CHECK-DAG: vmov.f32 s0
@@ -522,9 +460,7 @@
 ; CHECK-DAG: vmov.f64 d3
 ; CHECK-DAG: vmov.f64 d4
 ; CHECK-DAG: vmov.f32 s1
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDF
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} testFDDDDF
   call void @testFDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
                          double 5.0, double 6.0, float 7.0)
 ; CHECK-DAG: vmov.f32 s0
@@ -534,9 +470,7 @@
 ; CHECK-DAG: vmov.f64 d4
 ; CHECK-DAG: vmov.f64 d5
 ; CHECK-DAG: vmov.f32 s1
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDF
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} testFDDDDDF
   call void @testFDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
                           double 5.0, double 6.0, double 7.0, float 8.0)
 ; CHECK-DAG: vmov.f32 s0
@@ -547,9 +481,7 @@
 ; CHECK-DAG: vmov.f64 d5
 ; CHECK-DAG: vmov.f64 d6
 ; CHECK-DAG: vmov.f32 s1
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDDF
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} testFDDDDDDF
   call void @testFDDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
                            double 5.0, double 6.0, double 7.0, double 8.0,
                            float 9.0)
@@ -562,9 +494,7 @@
 ; CHECK-DAG: vmov.f64 d6
 ; CHECK-DAG: vmov.f64 d7
 ; CHECK-DAG: vmov.f32 s1
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDDDF
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} testFDDDDDDDF
   call void @testFDDDDDDDFD(float 1.0, double 2.0, double 3.0, double 4.0,
                             double 5.0, double 6.0, double 7.0, double 8.0,
                             float 9.0, double 10.0)
@@ -578,9 +508,7 @@
 ; CHECK-DAG: vmov.f64 d7
 ; CHECK-DAG: vstr d{{.*}}, [sp]
 ; CHECK-DAG: vmov.f32 s1
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDDDFD
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} testFDDDDDDDFD
   call void @testFDDDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
                             double 5.0, double 6.0, double 7.0, double 8.0,
                             double 9.0, float 10.0)
@@ -594,9 +522,7 @@
 ; CHECK-DAG: vmov.f64 d7
 ; CHECK-DAG: vstr d{{.*}}, [sp]
 ; CHECK-DAG: vstr s{{.*}}, [sp, #8]
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDDDDF
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} testFDDDDDDDDF
   call void @testFDDDDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
                              double 5.0, double 6.0, double 7.0, double 8.0,
                              double 9.0, double 10.0, float 11.0)
@@ -611,9 +537,7 @@
 ; CHECK-DAG: vstr d{{.*}}, [sp]
 ; CHECK-DAG: vstr d{{.*}}, [sp, #8]
 ; CHECK-DAG: vstr s{{.*}}, [sp, #16]
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDDDDDF
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} testFDDDDDDDDDF
   call void @testFDDDDDDDDFD(float 1.0, double 2.0, double 3.0, double 4.0,
                              double 5.0, double 6.0, double 7.0, double 8.0,
                              double 9.0, float 10.0, double 11.0)
@@ -628,9 +552,7 @@
 ; CHECK-DAG: vstr d{{.*}}, [sp]
 ; CHECK-DAG: vstr s{{.*}}, [sp, #8]
 ; CHECK-DAG: vstr d{{.*}}, [sp, #16]
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDDDDFD
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} testFDDDDDDDDFD
   call void @testFDDDDDDDDFDF(float 1.0, double 2.0, double 3.0, double 4.0,
                               double 5.0, double 6.0, double 7.0, double 8.0,
                               double 9.0, float 10.0, double 11.0, float 12.0)
@@ -646,9 +568,7 @@
 ; CHECK-DAG: vstr s{{.*}}, [sp, #8]
 ; CHECK-DAG: vstr d{{.*}}, [sp, #16]
 ; CHECK-DAG: vstr s{{.*}}, [sp, #24]
-; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDDDDFD
-; CHECK-DAG: movt [[CALL]]
-; CHECK:     blx [[CALL]]
+; CHECK: bl {{.*}} testFDDDDDDDDFD
 
   ret void
 }
diff --git a/tests_lit/llvm2ice_tests/fp.cmp.ll b/tests_lit/llvm2ice_tests/fp.cmp.ll
index abc8dc9..3a95cdd 100644
--- a/tests_lit/llvm2ice_tests/fp.cmp.ll
+++ b/tests_lit/llvm2ice_tests/fp.cmp.ll
@@ -61,9 +61,7 @@
 ; ARM32-OM1: mov [[R0:r[0-9]+]], #0
 ; ARM32-OM1: moveq [[R0]], #1
 ; ARM32-O2: bne
-; ARM32: movw [[CALL:r[0-9]]], #:lower16:func
-; ARM32: movt [[CALL:r[0-9]]], #:upper16:func
-; ARM32: blx [[CALL]]
+; ARM32: bl func
 ; ARM32: vcmp.f64
 ; ARM32: vmrs
 ; ARM32-OM1: mov [[R1:r[0-9]+]], #0
diff --git a/tests_lit/llvm2ice_tests/large_stack_offs.ll b/tests_lit/llvm2ice_tests/large_stack_offs.ll
index 5b00528..8449df7 100644
--- a/tests_lit/llvm2ice_tests/large_stack_offs.ll
+++ b/tests_lit/llvm2ice_tests/large_stack_offs.ll
@@ -7,7 +7,7 @@
 
 ; RUN: %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
-; RUN:   -i %s --args -Om1 --test-stack-extra 4096 \
+; RUN:   -i %s --args -Om1 --skip-unimplemented --test-stack-extra 4096 \
 ; RUN:   -allow-externally-defined-symbols \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix ARM32 %s
@@ -63,16 +63,12 @@
 ; Now skip ahead to where the call in br_1 begins, to check how %t2 is used.
 ; ARM32: movw ip, #4232
 ; ARM32-NEXT: add ip, sp, ip
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} dummy
-; ARM32: movt [[CALL]]
 ; ARM32: ldr r2, [ip, #-4]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} dummy
 ; The call clobbers ip, so we need to re-create the base register.
 ; ARM32: movw ip, #4{{.*}}
 ; ARM32: b {{[a-f0-9]+}}
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} dummy
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} dummy
 
 ; Similar, but test a function that uses FP as the base register (originally).
 define internal i64 @usesFrameReg(i32 %a, i32 %b, i32 %c, i32 %d) {
@@ -122,13 +118,9 @@
 ; Now skip ahead to where the call in br_1 begins, to check how %t2 is used.
 ; ARM32: movw ip, #4120
 ; ARM32-NEXT: sub ip, fp, ip
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} dummy
-; ARM32: movt [[CALL]]
 ; ARM32: ldr r2, [ip, #-4]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} dummy
 ; The call clobbers ip, so we need to re-create the base register.
 ; ARM32: movw ip, #4{{.*}}
 ; ARM32: b {{[a-f0-9]+}}
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} dummy
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} dummy
diff --git a/tests_lit/llvm2ice_tests/nacl-mem-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-mem-intrinsics.ll
index e2c2db1..7fb65f1 100644
--- a/tests_lit/llvm2ice_tests/nacl-mem-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-mem-intrinsics.ll
@@ -11,8 +11,8 @@
 ; RUN:   | %if --need=target_X8632 --command FileCheck --check-prefix OM1 %s
 
 ; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN:   --command %p2i --filetype=obj --assemble --disassemble --target arm32 \
-; RUN:   -i %s --args -O2 \
+; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
+; RUN:   -i %s --args -O2 --skip-unimplemented \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix ARM32 %s
 
@@ -33,9 +33,7 @@
 ; OM1-LABEL: test_memcpy
 ; OM1: call  {{.*}} memcpy
 ; ARM32-LABEL: test_memcpy
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memcpy
 
 define internal void @test_memcpy_long_const_len(i32 %iptr_dst, i32 %iptr_src) {
 entry:
@@ -50,9 +48,7 @@
 ; OM1-LABEL: test_memcpy_long_const_len
 ; OM1: call {{.*}} memcpy
 ; ARM32-LABEL: test_memcpy_long_const_len
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memcpy
 
 define internal void @test_memcpy_very_small_const_len(i32 %iptr_dst,
                                                        i32 %iptr_src) {
@@ -70,9 +66,7 @@
 ; OM1-LABEL: test_memcpy_very_small_const_len
 ; OM1: call {{.*}} memcpy
 ; ARM32-LABEL: test_memcpy_very_small_const_len
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memcpy
 
 define internal void @test_memcpy_const_len_3(i32 %iptr_dst, i32 %iptr_src) {
 entry:
@@ -91,9 +85,7 @@
 ; OM1-LABEL: test_memcpy_const_len_3
 ; OM1: call {{.*}} memcpy
 ; ARM32-LABEL: test_memcpy_const_len_3
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memcpy
 
 define internal void @test_memcpy_mid_const_len(i32 %iptr_dst, i32 %iptr_src) {
 entry:
@@ -112,9 +104,7 @@
 ; OM1-LABEL: test_memcpy_mid_const_len
 ; OM1: call {{.*}} memcpy
 ; ARM32-LABEL: test_memcpy_mid_const_len
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memcpy
 
 define internal void @test_memcpy_mid_const_len_overlap(i32 %iptr_dst,
                                                         i32 %iptr_src) {
@@ -134,9 +124,7 @@
 ; OM1-LABEL: test_memcpy_mid_const_len_overlap
 ; OM1: call {{.*}} memcpy
 ; ARM32-LABEL: test_memcpy_mid_const_len_overlap
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memcpy
 
 define internal void @test_memcpy_big_const_len_overlap(i32 %iptr_dst,
                                                         i32 %iptr_src) {
@@ -156,9 +144,7 @@
 ; OM1-LABEL: test_memcpy_big_const_len_overlap
 ; OM1: call {{.*}} memcpy
 ; ARM32-LABEL: test_memcpy_big_const_len_overlap
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memcpy
 
 define internal void @test_memcpy_large_const_len(i32 %iptr_dst,
                                                   i32 %iptr_src) {
@@ -180,9 +166,7 @@
 ; OM1-LABEL: test_memcpy_large_const_len
 ; OM1: call {{.*}} memcpy
 ; ARM32-LABEL: test_memcpy_large_const_len
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memcpy
 
 define internal void @test_memmove(i32 %iptr_dst, i32 %iptr_src, i32 %len) {
 entry:
@@ -197,9 +181,7 @@
 ; OM1-LABEL: test_memmove
 ; OM1: call {{.*}} memmove
 ; ARM32-LABEL: test_memmove
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memmove
 
 define internal void @test_memmove_long_const_len(i32 %iptr_dst,
                                                   i32 %iptr_src) {
@@ -215,9 +197,7 @@
 ; OM1-LABEL: test_memmove_long_const_len
 ; OM1: call {{.*}} memmove
 ; ARM32-LABEL: test_memmove_long_const_len
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memmove
 
 define internal void @test_memmove_very_small_const_len(i32 %iptr_dst,
                                                         i32 %iptr_src) {
@@ -235,9 +215,7 @@
 ; OM1-LABEL: test_memmove_very_small_const_len
 ; OM1: call {{.*}} memmove
 ; ARM32-LABEL: test_memmove_very_small_const_len
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memmove
 
 define internal void @test_memmove_const_len_3(i32 %iptr_dst, i32 %iptr_src) {
 entry:
@@ -256,9 +234,7 @@
 ; OM1-LABEL: test_memmove_const_len_3
 ; OM1: call {{.*}} memmove
 ; ARM32-LABEL: test_memmove_const_len_3
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memmove
 
 define internal void @test_memmove_mid_const_len(i32 %iptr_dst, i32 %iptr_src) {
 entry:
@@ -277,9 +253,7 @@
 ; OM1-LABEL: test_memmove_mid_const_len
 ; OM1: call {{.*}} memmove
 ; ARM32-LABEL: test_memmove_mid_const_len
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memmove
 
 define internal void @test_memmove_mid_const_len_overlap(i32 %iptr_dst,
                                                          i32 %iptr_src) {
@@ -299,9 +273,7 @@
 ; OM1-LABEL: test_memmove_mid_const_len_overlap
 ; OM1: call {{.*}} memmove
 ; ARM32-LABEL: test_memmove_mid_const_len_overlap
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memmove
 
 define internal void @test_memmove_big_const_len_overlap(i32 %iptr_dst,
                                                          i32 %iptr_src) {
@@ -321,9 +293,7 @@
 ; OM1-LABEL: test_memmove_big_const_len_overlap
 ; OM1: call {{.*}} memmove
 ; ARM32-LABEL: test_memmove_big_const_len_overlap
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memmove
 
 define internal void @test_memmove_large_const_len(i32 %iptr_dst,
                                                    i32 %iptr_src) {
@@ -345,9 +315,7 @@
 ; OM1-LABEL: test_memmove_large_const_len
 ; OM1: call {{.*}} memmove
 ; ARM32-LABEL: test_memmove_large_const_len
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memmove
 
 define internal void @test_memset(i32 %iptr_dst, i32 %wide_val, i32 %len) {
 entry:
@@ -365,9 +333,7 @@
 ; OM1: call {{.*}} R_{{.*}} memset
 ; ARM32-LABEL: test_memset
 ; ARM32: uxtb
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memset
 
 define internal void @test_memset_const_len_align(i32 %iptr_dst,
                                                   i32 %wide_val) {
@@ -386,9 +352,7 @@
 ; OM1: call {{.*}} R_{{.*}} memset
 ; ARM32-LABEL: test_memset_const_len_align
 ; ARM32: uxtb
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memset
 
 define internal void @test_memset_long_const_len_zero_val_align(
     i32 %iptr_dst) {
@@ -404,9 +368,7 @@
 ; OM1: call {{.*}} R_{{.*}} memset
 ; ARM32-LABEL: test_memset_long_const_len_zero_val_align
 ; ARM32: uxtb
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memset
 
 define internal void @test_memset_const_val(i32 %iptr_dst, i32 %len) {
 entry:
@@ -421,9 +383,7 @@
 ; OM1: call {{.*}} R_{{.*}} memset
 ; ARM32-LABEL: test_memset_const_val
 ; ARM32: uxtb
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memset
 
 define internal void @test_memset_const_val_len_very_small(i32 %iptr_dst) {
 entry:
@@ -438,9 +398,7 @@
 ; OM1: call {{.*}} R_{{.*}} memset
 ; ARM32-LABEL: test_memset_const_val_len_very_small
 ; ARM32: uxtb
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memset
 
 define internal void @test_memset_const_val_len_3(i32 %iptr_dst) {
 entry:
@@ -456,9 +414,7 @@
 ; OM1: call {{.*}} R_{{.*}} memset
 ; ARM32-LABEL: test_memset_const_val_len_3
 ; ARM32: uxtb
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memset
 
 define internal void @test_memset_const_val_len_mid(i32 %iptr_dst) {
 entry:
@@ -475,9 +431,7 @@
 ; OM1: call {{.*}} R_{{.*}} memset
 ; ARM32-LABEL: test_memset_const_val_len_mid
 ; ARM32: uxtb
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memset
 
 define internal void @test_memset_zero_const_len_small(i32 %iptr_dst) {
 entry:
@@ -494,9 +448,7 @@
 ; OM1: call {{.*}} R_{{.*}} memset
 ; ARM32-LABEL: test_memset_zero_const_len_small
 ; ARM32: uxtb
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memset
 
 define internal void @test_memset_zero_const_len_small_overlap(i32 %iptr_dst) {
 entry:
@@ -513,9 +465,7 @@
 ; OM1: call {{.*}} R_{{.*}} memset
 ; ARM32-LABEL: test_memset_zero_const_len_small_overlap
 ; ARM32: uxtb
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memset
 
 define internal void @test_memset_zero_const_len_big_overlap(i32 %iptr_dst) {
 entry:
@@ -532,9 +482,7 @@
 ; OM1: call {{.*}} R_{{.*}} memset
 ; ARM32-LABEL: test_memset_zero_const_len_big_overlap
 ; ARM32: uxtb
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memset
 
 define internal void @test_memset_zero_const_len_large(i32 %iptr_dst) {
 entry:
@@ -552,6 +500,4 @@
 ; OM1: call {{.*}} R_{{.*}} memset
 ; ARM32-LABEL: test_memset_zero_const_len_large
 ; ARM32: uxtb
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} memset
diff --git a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
index 3ceabf9..371db88 100644
--- a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
@@ -129,12 +129,8 @@
 ; CHECKO2REM: call {{.*}} R_{{.*}} setjmp
 ; CHECKO2REM: call {{.*}} R_{{.*}} longjmp
 ; ARM32-LABEL: test_setjmplongjmp
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} setjmp
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} longjmp
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} setjmp
+; ARM32: bl {{.*}} longjmp
 
 define internal i32 @test_setjmp_unused(i32 %iptr_env, i32 %i_other) {
 entry:
@@ -475,9 +471,7 @@
 ; CHECK-LABEL: test_popcount_32
 ; CHECK: call {{.*}} R_{{.*}} __popcountsi2
 ; ARM32-LABEL: test_popcount_32
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} __popcountsi2
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} __popcountsi2
 
 define internal i64 @test_popcount_64(i64 %x) {
 entry:
@@ -490,9 +484,7 @@
 ; the return value just in case.
 ; CHECK: mov {{.*}},0x0
 ; ARM32-LABEL: test_popcount_64
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} __popcountdi2
-; ARM32: movt [[CALL]]
-; ARM32: blx [[CALL]]
+; ARM32: bl {{.*}} __popcountdi2
 ; ARM32: mov {{.*}}, #0
 
 define internal i32 @test_popcount_64_ret_i32(i64 %x) {
diff --git a/tests_lit/llvm2ice_tests/nonsfi.ll b/tests_lit/llvm2ice_tests/nonsfi.ll
index cb2a25b..7da6cd4 100644
--- a/tests_lit/llvm2ice_tests/nonsfi.ll
+++ b/tests_lit/llvm2ice_tests/nonsfi.ll
@@ -25,11 +25,7 @@
 ; DEFAULT-LABEL: testCallRegular
 
 ; ARM32-NONSFI-LABEL: testCallRegular
-; ARM32-NONSFI:      movw [[REG:r[0-9]+]], {{.*}} R_ARM_MOVW_PREL_NC GOTOFF{{.*}}testLoadBasic
-; ARM32-NONSFI-NEXT: movt [[REG:r[0-9]+]], {{.*}} R_ARM_MOVT_PREL GOTOFF{{.*}}testLoadBasic
-; ARM32-NONSFI-NEXT: ldr [[GOTOFF:r[0-9]+]], [pc, [[REG]]]
-; ARM32-NONSFI-NEXT: add [[CT:r[0-9]+]], {{.*}}, [[CT]]
-; ARM32-NONSFI:      blx [[CT]]
+; ARM32-NONSFI: bl {{.*}} R_ARM_CALL {{.*}}testLoadBasic
 
 define internal double @testCallBuiltin(double %val) {
 entry:
@@ -42,11 +38,7 @@
 ; DEFAULT-LABEL: testCallBuiltin
 
 ; ARM32-NONSFI-LABEL: testCallBuiltin
-; ARM32-NONSFI:      movw [[REG:r[0-9]+]], {{.*}} R_ARM_MOVW_PREL_NC GOTOFF{{.*}}fmod
-; ARM32-NONSFI-NEXT: movt [[REG:r[0-9]+]], {{.*}} R_ARM_MOVT_PREL GOTOFF{{.*}}fmod
-; ARM32-NONSFI-NEXT: ldr [[GOTOFF:r[0-9]+]], [pc, [[REG]]]
-; ARM32-NONSFI-NEXT: add [[CT:r[0-9]+]], {{.*}}, [[CT]]
-; ARM32-NONSFI:      blx [[CT]]
+; ARM32-NONSFI: bl {{.*}} R_ARM_CALL {{.*}}fmod
 
 define internal i32 @testLoadBasic() {
 entry:
diff --git a/tests_lit/llvm2ice_tests/select-opt.ll b/tests_lit/llvm2ice_tests/select-opt.ll
index b82c67b..aa2c4d9 100644
--- a/tests_lit/llvm2ice_tests/select-opt.ll
+++ b/tests_lit/llvm2ice_tests/select-opt.ll
@@ -51,23 +51,15 @@
 ; CHECK:      ret
 ; ARM32-LABEL: testSelect
 ; ARM32: cmp
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} useInt
-; ARM32: movt [[CALL]]
-; ARM32; blx [[CALL]]
+; ARM32: bl {{.*}} useInt
 ; ARM32-Om1: mov {{.*}}, #20
 ; ARM32-O2: mov [[REG:r[0-9]+]], #20
 ; ARM32: tst
 ; ARM32-Om1: movne {{.*}}, #10
 ; ARM32-O2: movne [[REG]], #10
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} useInt
-; ARM32: movt [[CALL]]
-; ARM32; blx [[CALL]]
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} useInt
-; ARM32: movt [[CALL]]
-; ARM32; blx [[CALL]]
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} useInt
-; ARM32: movt [[CALL]]
-; ARM32; blx [[CALL]]
+; ARM32: bl {{.*}} useInt
+; ARM32: bl {{.*}} useInt
+; ARM32: bl {{.*}} useInt
 ; ARM32: bx lr
 
 ; Check for valid addressing mode in the cmp instruction when the
diff --git a/tests_lit/llvm2ice_tests/unreachable.ll b/tests_lit/llvm2ice_tests/unreachable.ll
index 6cde0e1..71260cc 100644
--- a/tests_lit/llvm2ice_tests/unreachable.ll
+++ b/tests_lit/llvm2ice_tests/unreachable.ll
@@ -9,12 +9,12 @@
 
 ; RUN: %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command %p2i --filetype=asm --assemble \
-; RUN:   --disassemble --target arm32 -i %s --args -O2 \
+; RUN:   --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix ARM32 %s
 ; RUN: %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command %p2i --filetype=asm --assemble \
-; RUN:   --disassemble --target arm32 -i %s --args -Om1 \
+; RUN:   --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix ARM32 %s
 
@@ -41,7 +41,5 @@
 ; ARM32-LABEL: divide
 ; ARM32: tst
 ; ARM32: .word 0xe7fedef0
-; ARM32: movw [[CALL:r[0-9]]], {{.+}} __divsi3
-; ARM32: movt [[CALL]]
-; ARM32; blx [[CALL]]
+; ARM32: bl {{.*}} __divsi3
 ; ARM32: bx lr