Add hybrid assembler concept to ARM assembler.

Adds a notion of a hybrid assembler. That is, if the integrated
assembler can lower an instruction to bytes, it does. Otherwise, it
uses the standalone assembler to generate text as the placeholder for
the instruction. This is done using a textual fixup in the assembly
buffer.

The advantage of the hybrid assembler is that one can incrementally
implement the integrated assembler and still test the generated
assembly.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4334
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1418523002 .
diff --git a/src/IceAssembler.cpp b/src/IceAssembler.cpp
index 7eb8b61..747ae58 100644
--- a/src/IceAssembler.cpp
+++ b/src/IceAssembler.cpp
@@ -35,15 +35,28 @@
   return Result;
 }
 
+void AssemblerBuffer::installFixup(AssemblerFixup *F) {
+  F->set_position(0);
+  if (!Assemblr.getPreliminary())
+    Fixups.push_back(F);
+}
+
 AssemblerFixup *AssemblerBuffer::createFixup(FixupKind Kind,
                                              const Constant *Value) {
   AssemblerFixup *F =
       new (Assemblr.allocate<AssemblerFixup>()) AssemblerFixup();
-  F->set_position(0);
   F->set_kind(Kind);
   F->set_value(Value);
-  if (!Assemblr.getPreliminary())
-    Fixups.push_back(F);
+  installFixup(F);
+  return F;
+}
+
+AssemblerTextFixup *AssemblerBuffer::createTextFixup(const std::string &Text,
+                                                     size_t BytesUsed) {
+  AssemblerTextFixup *F = new (Assemblr.allocate<AssemblerTextFixup>())
+      AssemblerTextFixup(Text, BytesUsed);
+  installFixup(F);
+  TextFixupNeeded = false;
   return F;
 }
 
@@ -72,12 +85,13 @@
 }
 
 AssemblerBuffer::AssemblerBuffer(Assembler &Asm) : Assemblr(Asm) {
-  const intptr_t OneKB = 1024;
-  static const intptr_t kInitialBufferCapacity = 4 * OneKB;
+  constexpr intptr_t OneKB = 1024;
+  static constexpr intptr_t kInitialBufferCapacity = 4 * OneKB;
   Contents = NewContents(Assemblr, kInitialBufferCapacity);
   Cursor = Contents;
   Limit = computeLimit(Contents, kInitialBufferCapacity);
   HasEnsuredCapacity = false;
+  TextFixupNeeded = false;
 
   // Verify internal state.
   assert(capacity() == kInitialBufferCapacity);
@@ -89,7 +103,7 @@
 void AssemblerBuffer::extendCapacity() {
   intptr_t old_size = size();
   intptr_t old_capacity = capacity();
-  const intptr_t OneMB = 1 << 20;
+  constexpr intptr_t OneMB = 1 << 20;
   intptr_t new_capacity = std::min(old_capacity * 2, old_capacity + OneMB);
   if (new_capacity < old_capacity) {
     llvm::report_fatal_error(
@@ -123,7 +137,6 @@
   Ostream &Str = Ctx->getStrEmit();
   intptr_t EndPosition = Buffer.size();
   intptr_t CurPosition = 0;
-  const intptr_t FixupSize = 4;
   for (const AssemblerFixup *NextFixup : fixups()) {
     intptr_t NextFixupLoc = NextFixup->position();
     for (intptr_t i = CurPosition; i < NextFixupLoc; ++i) {
@@ -131,16 +144,13 @@
       Str.write_hex(Buffer.load<uint8_t>(i));
       Str << "\n";
     }
-    Str << "\t.long ";
     // For PCRel fixups, we write the pc-offset from a symbol into the Buffer
     // (e.g., -4), but we don't represent that in the fixup's offset. Otherwise
     // the fixup holds the true offset, and so does the Buffer. Just load the
     // offset from the buffer.
-    NextFixup->emit(Ctx, Buffer.load<RelocOffsetT>(NextFixupLoc));
-    if (fixupIsPCRel(NextFixup->kind()))
-      Str << " - .";
-    Str << "\n";
-    CurPosition = NextFixupLoc + FixupSize;
+    CurPosition = NextFixupLoc +
+                  NextFixup->emit(Ctx, Buffer.load<RelocOffsetT>(NextFixupLoc),
+                                  fixupIsPCRel(NextFixup->kind()));
     assert(CurPosition <= EndPosition);
   }
   // Handle any bytes that are not prefixed by a fixup.
diff --git a/src/IceAssembler.h b/src/IceAssembler.h
index 2f5a505..5e7815d 100644
--- a/src/IceAssembler.h
+++ b/src/IceAssembler.h
@@ -174,6 +174,20 @@
   /// Create and track a fixup in the current function.
   AssemblerFixup *createFixup(FixupKind Kind, const Constant *Value);
 
+  /// Create and track a textual fixup in the current function.
+  AssemblerTextFixup *createTextFixup(const std::string &Text,
+                                      size_t BytesUsed);
+
+  /// Mark that an attempt was made to emit, but failed. Hence, in order to
+  /// continue, one must emit a text fixup.
+  void setNeedsTextFixup() { TextFixupNeeded = true; }
+
+  /// Returns true if last emit failed and needs a text fixup.
+  bool needsTextFixup() const { return TextFixupNeeded; }
+
+  /// Installs a created fixup, after it has been allocated.
+  void installFixup(AssemblerFixup *F);
+
   const FixupRefList &fixups() const { return Fixups; }
 
   void setSize(intptr_t NewSize) {
@@ -194,6 +208,9 @@
   Assembler &Assemblr;
   /// List of pool-allocated fixups relative to the current function.
   FixupRefList Fixups;
+  // True if a textual fixup is needed, because the assembler was unable to
+  // emit the last request.
+  bool TextFixupNeeded;
 
   uintptr_t cursor() const { return Cursor; }
   uintptr_t limit() const { return Limit; }
@@ -268,12 +285,24 @@
   // Return a view of all the bytes of code for the current function.
   llvm::StringRef getBufferView() const;
 
+  /// Emit a fixup at the current location.
+  void emitFixup(AssemblerFixup *Fixup) { Buffer.emitFixup(Fixup); }
+
   const FixupRefList &fixups() const { return Buffer.fixups(); }
 
   AssemblerFixup *createFixup(FixupKind Kind, const Constant *Value) {
     return Buffer.createFixup(Kind, Value);
   }
 
+  AssemblerTextFixup *createTextFixup(const std::string &Text,
+                                      size_t BytesUsed) {
+    return Buffer.createTextFixup(Text, BytesUsed);
+  }
+
+  void setNeedsTextFixup() { Buffer.setNeedsTextFixup(); }
+
+  bool needsTextFixup() const { return Buffer.needsTextFixup(); }
+
   void emitIASBytes() const;
   bool getInternal() const { return IsInternal; }
   void setInternal(bool Internal) { IsInternal = Internal; }
@@ -306,6 +335,9 @@
   /// fully committed (Preliminary=false).
   bool Preliminary = false;
 
+  /// Installs a created fixup, after it has been allocated.
+  void installFixup(AssemblerFixup *F) { Buffer.installFixup(F); }
+
 protected:
   GlobalContext *Ctx;
   // Buffer's constructor uses the Allocator, so it needs to appear after it.
diff --git a/src/IceAssemblerARM32.cpp b/src/IceAssemblerARM32.cpp
index fc60e0d..06ba9c4 100644
--- a/src/IceAssemblerARM32.cpp
+++ b/src/IceAssemblerARM32.cpp
@@ -70,6 +70,11 @@
 static constexpr uint32_t kImmed12Bits = 12;
 static constexpr uint32_t kImm12Shift = 0;
 
+// Type of instruction encoding (bits 25-27). See ARM section A5.1
+static constexpr uint32_t kInstTypeDataRegister = 0;  // i.e. 000
+static constexpr uint32_t kInstTypeDataImmediate = 1; // i.e. 001
+static constexpr uint32_t kInstTypeMemImmediate = 2;  // i.e. 010
+
 inline uint32_t encodeBool(bool b) { return b ? 1 : 0; }
 
 inline uint32_t encodeGPRRegister(RegARM32::GPRRegister Rn) {
@@ -231,6 +236,14 @@
   label->bindTo(bound);
 }
 
+void ARM32::AssemblerARM32::emitTextInst(const std::string &Text) {
+  static constexpr uint32_t Placeholder = 0;
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  AssemblerFixup *F = createTextFixup(Text, sizeof(Placeholder));
+  emitFixup(F);
+  emitInst(Placeholder);
+}
+
 void ARM32::AssemblerARM32::emitType01(CondARM32::Cond Cond, uint32_t Type,
                                        uint32_t Opcode, bool SetCc, uint32_t Rn,
                                        uint32_t Rd, uint32_t Imm12) {
@@ -261,54 +274,48 @@
 void ARM32::AssemblerARM32::add(const Operand *OpRd, const Operand *OpRn,
                                 const Operand *OpSrc1, bool SetFlags,
                                 CondARM32::Cond Cond) {
-  // Note: Loop is used so that we can short circuit using break;
-  do {
-    uint32_t Rd;
-    if (decodeOperand(OpRd, Rd) != DecodedAsRegister)
-      break;
-    uint32_t Rn;
-    if (decodeOperand(OpRn, Rn) != DecodedAsRegister)
-      break;
-    constexpr uint32_t Add = B2; // 0100
-    uint32_t Src1Value;
-    // TODO(kschimpf) Other possible decodings of add.
-    switch (decodeOperand(OpSrc1, Src1Value)) {
-    default:
-      break;
-    case DecodedAsRegister: {
-      // ADD (register) - ARM section A8.8.7, encoding A1:
-      //   add{s}<c> <Rd>, <Rn>, <Rm>{, <shiff>}
-      // ADD (Sp plus register) - ARM section A8.8.11, encoding A1:
-      //   add{s}<c> sp, <Rn>, <Rm>{, <shiff>}
-      //
-      // cccc0000100snnnnddddiiiiitt0mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
-      // mmmm=Rm, iiiii=Shift, tt=ShiftKind, and s=SetFlags
-      Src1Value = encodeShiftRotateImm5(Src1Value, OperandARM32::kNoShift, 0);
-      if (((Rd == RegARM32::Encoded_Reg_pc) && SetFlags))
-        // Conditions of rule violated.
-        break;
-      constexpr uint32_t InstTypeRegister = 0;
-      emitType01(Cond, InstTypeRegister, Add, SetFlags, Rn, Rd, Src1Value);
-      return;
-    }
-    case DecodedAsRotatedImm8: {
-      // ADD (Immediate) - ARM section A8.8.5, encoding A1:
-      //   add{s}<c> <Rd>, <Rn>, #<RotatedImm8>
-      // ADD (SP plus immediate) - ARM section A8.8.9, encoding A1.
-      //   add{s}<c> <Rd>, sp, #<RotatedImm8>
-      //
-      // cccc0010100snnnnddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
-      // s=SetFlags and iiiiiiiiiiii=Src1Value=RotatedImm8.
-      if ((Rd == RegARM32::Encoded_Reg_pc && SetFlags))
-        // Conditions of rule violated.
-        break;
-      constexpr uint32_t InstTypeImmediate = 1;
-      emitType01(Cond, InstTypeImmediate, Add, SetFlags, Rn, Rd, Src1Value);
-      return;
-    }
-    }
-  } while (0);
-  UnimplementedError(Ctx->getFlags());
+  uint32_t Rd;
+  if (decodeOperand(OpRd, Rd) != DecodedAsRegister)
+    return setNeedsTextFixup();
+  uint32_t Rn;
+  if (decodeOperand(OpRn, Rn) != DecodedAsRegister)
+    return setNeedsTextFixup();
+  constexpr uint32_t Add = B2; // 0100
+  uint32_t Src1Value;
+  // TODO(kschimpf) Other possible decodings of add.
+  switch (decodeOperand(OpSrc1, Src1Value)) {
+  default:
+    return setNeedsTextFixup();
+  case DecodedAsRegister: {
+    // ADD (register) - ARM section A8.8.7, encoding A1:
+    //   add{s}<c> <Rd>, <Rn>, <Rm>{, <shiff>}
+    // ADD (Sp plus register) - ARM section A8.8.11, encoding A1:
+    //   add{s}<c> sp, <Rn>, <Rm>{, <shiff>}
+    //
+    // cccc0000100snnnnddddiiiiitt0mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
+    // mmmm=Rm, iiiii=Shift, tt=ShiftKind, and s=SetFlags
+    Src1Value = encodeShiftRotateImm5(Src1Value, OperandARM32::kNoShift, 0);
+    if (((Rd == RegARM32::Encoded_Reg_pc) && SetFlags))
+      // Conditions of rule violated.
+      return setNeedsTextFixup();
+    emitType01(Cond, kInstTypeDataRegister, Add, SetFlags, Rn, Rd, Src1Value);
+    return;
+  }
+  case DecodedAsRotatedImm8: {
+    // ADD (Immediate) - ARM section A8.8.5, encoding A1:
+    //   add{s}<c> <Rd>, <Rn>, #<RotatedImm8>
+    // ADD (SP plus immediate) - ARM section A8.8.9, encoding A1.
+    //   add{s}<c> <Rd>, sp, #<RotatedImm8>
+    //
+    // cccc0010100snnnnddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
+    // s=SetFlags and iiiiiiiiiiii=Src1Value=RotatedImm8.
+    if ((Rd == RegARM32::Encoded_Reg_pc && SetFlags))
+      // Conditions of rule violated.
+      return setNeedsTextFixup();
+    emitType01(Cond, kInstTypeDataImmediate, Add, SetFlags, Rn, Rd, Src1Value);
+    return;
+  }
+  };
 }
 
 void ARM32::AssemblerARM32::bkpt(uint16_t Imm16) {
@@ -327,11 +334,8 @@
   //   bx<c> <Rm>
   //
   // cccc000100101111111111110001mmmm where mmmm=rm and cccc=Cond.
-  assert(isGPRRegisterDefined(Rm));
-  // TODO(kschimpf): Remove void cast when MINIMAL build allows.
-  (void)isGPRRegisterDefined(Rm);
-  assert(isConditionDefined(Cond));
-  (void)isConditionDefined(Cond);
+  if (!(isGPRRegisterDefined(Rm) && isConditionDefined(Cond)))
+    return setNeedsTextFixup();
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   const uint32_t Encoding = (encodeCondition(Cond) << kConditionShift) | B24 |
                             B21 | (0xfff << 8) | B4 |
@@ -341,169 +345,145 @@
 
 void ARM32::AssemblerARM32::ldr(const Operand *OpRt, const Operand *OpAddress,
                                 CondARM32::Cond Cond) {
-  // Note: Loop is used so that we can short ciruit using break;
-  do {
-    uint32_t Rt;
-    if (decodeOperand(OpRt, Rt) != DecodedAsRegister)
-      break;
-    uint32_t Address;
-    if (decodeAddress(OpAddress, Address) != DecodedAsImmRegOffset)
-      break;
-    // LDR (immediate) - ARM section A8.8.63, encoding A1:
-    //   ldr<c> <Rt>, [<Rn>{, #+/-<imm12>}]      ; p=1, w=0
-    //   ldr<c> <Rt>, [<Rn>], #+/-<imm12>        ; p=1, w=1
-    //   ldr<c> <Rt>, [<Rn>, #+/-<imm12>]!       ; p=0, w=1
-    // LDRB (immediate) - ARM section A8.8.68, encoding A1:
-    //   ldrb<c> <Rt>, [<Rn>{, #+/-<imm12>}]     ; p=1, w=0
-    //   ldrb<c> <Rt>, [<Rn>], #+/-<imm12>       ; p=1, w=1
-    //   ldrb<c> <Rt>, [<Rn>, #+/-<imm12>]!      ; p=0, w=1
-    //
-    // cccc010pubw1nnnnttttiiiiiiiiiiii where cccc=Cond, tttt=Rt, nnnn=Rn,
-    // iiiiiiiiiiii=imm12, b=1 if STRB, u=1 if +.
-    constexpr uint32_t InstType = B1; // 010
-    constexpr bool IsLoad = true;
-    const Type Ty = OpRt->getType();
-    if (!(Ty == IceType_i32 || Ty == IceType_i8)) // TODO(kschimpf) Expand?
-      break;
-    const bool IsByte = typeWidthInBytes(Ty) == 1;
-    if ((getGPRReg(kRnShift, Address) == RegARM32::Encoded_Reg_pc) ||
-        (!isBitSet(P, Address) && isBitSet(W, Address)) ||
-        (!IsByte &&
-         (getGPRReg(kRnShift, Address) == RegARM32::Encoded_Reg_sp) &&
-         !isBitSet(P, Address) &&
-         isBitSet(U, Address) & !isBitSet(W, Address) &&
-         (mask(Address, kImm12Shift, kImmed12Bits) == 0x8 /* 000000000100 */)))
-      break;
-    emitMemOp(Cond, InstType, IsLoad, IsByte, Rt, Address);
-    return;
-  } while (0);
-  UnimplementedError(Ctx->getFlags());
+  uint32_t Rt;
+  if (decodeOperand(OpRt, Rt) != DecodedAsRegister)
+    return setNeedsTextFixup();
+  uint32_t Address;
+  if (decodeAddress(OpAddress, Address) != DecodedAsImmRegOffset)
+    return setNeedsTextFixup();
+  // LDR (immediate) - ARM section A8.8.63, encoding A1:
+  //   ldr<c> <Rt>, [<Rn>{, #+/-<imm12>}]      ; p=1, w=0
+  //   ldr<c> <Rt>, [<Rn>], #+/-<imm12>        ; p=1, w=1
+  //   ldr<c> <Rt>, [<Rn>, #+/-<imm12>]!       ; p=0, w=1
+  // LDRB (immediate) - ARM section A8.8.68, encoding A1:
+  //   ldrb<c> <Rt>, [<Rn>{, #+/-<imm12>}]     ; p=1, w=0
+  //   ldrb<c> <Rt>, [<Rn>], #+/-<imm12>       ; p=1, w=1
+  //   ldrb<c> <Rt>, [<Rn>, #+/-<imm12>]!      ; p=0, w=1
+  //
+  // cccc010pubw1nnnnttttiiiiiiiiiiii where cccc=Cond, tttt=Rt, nnnn=Rn,
+  // iiiiiiiiiiii=imm12, b=1 if STRB, u=1 if +.
+  constexpr bool IsLoad = true;
+  const Type Ty = OpRt->getType();
+  if (!(Ty == IceType_i32 || Ty == IceType_i8)) // TODO(kschimpf) Expand?
+    return setNeedsTextFixup();
+  const bool IsByte = typeWidthInBytes(Ty) == 1;
+  // Check conditions of rules violated.
+  if (getGPRReg(kRnShift, Address) == RegARM32::Encoded_Reg_pc)
+    return setNeedsTextFixup();
+  if (!isBitSet(P, Address) && isBitSet(W, Address))
+    return setNeedsTextFixup();
+  if (!IsByte && (getGPRReg(kRnShift, Address) == RegARM32::Encoded_Reg_sp) &&
+      !isBitSet(P, Address) && isBitSet(U, Address) & !isBitSet(W, Address) &&
+      (mask(Address, kImm12Shift, kImmed12Bits) == 0x8 /* 000000000100 */))
+    return setNeedsTextFixup();
+  emitMemOp(Cond, kInstTypeMemImmediate, IsLoad, IsByte, Rt, Address);
 }
 
 void ARM32::AssemblerARM32::mov(const Operand *OpRd, const Operand *OpSrc,
                                 CondARM32::Cond Cond) {
-  // Note: Loop is used so that we can short ciruit using break;
-  do {
-    uint32_t Rd;
-    if (decodeOperand(OpRd, Rd) != DecodedAsRegister)
-      break;
-    uint32_t Src;
-    // TODO(kschimpf) Handle other forms of mov.
-    if (decodeOperand(OpSrc, Src) == DecodedAsRotatedImm8) {
-      // MOV (immediate) - ARM section A8.8.102, encoding A1:
-      //   mov{S}<c> <Rd>, #<RotatedImm8>
-      //
-      // cccc0011101s0000ddddiiiiiiiiiiii where cccc=Cond, s=SetFlags, dddd=Rd,
-      // and iiiiiiiiiiii=RotatedImm8=Src.  Note: We don't use movs in this
-      // assembler.
-      constexpr bool SetFlags = false;
-      if ((Rd == RegARM32::Encoded_Reg_pc && SetFlags))
-        // Conditions of rule violated.
-        break;
-      constexpr uint32_t Rn = 0;
-      constexpr uint32_t Mov = B3 | B2 | B0; // 1101.
-      constexpr uint32_t InstType = 1;
-      emitType01(Cond, InstType, Mov, SetFlags, Rn, Rd, Src);
-      return;
-    }
-  } while (0);
-  UnimplementedError(Ctx->getFlags());
+  uint32_t Rd;
+  if (decodeOperand(OpRd, Rd) != DecodedAsRegister)
+    return setNeedsTextFixup();
+  uint32_t Src;
+  // TODO(kschimpf) Handle other forms of mov.
+  if (decodeOperand(OpSrc, Src) != DecodedAsRotatedImm8)
+    return setNeedsTextFixup();
+  // MOV (immediate) - ARM section A8.8.102, encoding A1:
+  //   mov{S}<c> <Rd>, #<RotatedImm8>
+  //
+  // cccc0011101s0000ddddiiiiiiiiiiii where cccc=Cond, s=SetFlags, dddd=Rd,
+  // and iiiiiiiiiiii=RotatedImm8=Src.  Note: We don't use movs in this
+  // assembler.
+  constexpr bool SetFlags = false;
+  if ((Rd == RegARM32::Encoded_Reg_pc && SetFlags))
+    // Conditions of rule violated.
+    return setNeedsTextFixup();
+  constexpr uint32_t Rn = 0;
+  constexpr uint32_t Mov = B3 | B2 | B0; // 1101.
+  emitType01(Cond, kInstTypeDataImmediate, Mov, SetFlags, Rn, Rd, Src);
 }
 
 void ARM32::AssemblerARM32::str(const Operand *OpRt, const Operand *OpAddress,
                                 CondARM32::Cond Cond) {
-  // Note: Loop is used so that we can short ciruit using break;
-  do {
-    uint32_t Rt;
-    if (decodeOperand(OpRt, Rt) != DecodedAsRegister)
-      break;
-    uint32_t Address;
-    if (decodeAddress(OpAddress, Address) != DecodedAsImmRegOffset)
-      break;
-    // STR (immediate) - ARM section A8.8.204, encoding A1:
-    //   str<c> <Rt>, [<Rn>{, #+/-<imm12>}]      ; p=1, w=0
-    //   str<c> <Rt>, [<Rn>], #+/-<imm12>        ; p=1, w=1
-    //   str<c> <Rt>, [<Rn>, #+/-<imm12>]!       ; p=0, w=1
-    // STRB (immediate) - ARM section A8.8.207, encoding A1:
-    //   strb<c> <Rt>, [<Rn>{, #+/-<imm12>}]     ; p=1, w=0
-    //   strb<c> <Rt>, [<Rn>], #+/-<imm12>       ; p=1, w=1
-    //   strb<c> <Rt>, [<Rn>, #+/-<imm12>]!      ; p=0, w=1
-    //
-    // cccc010pubw0nnnnttttiiiiiiiiiiii where cccc=Cond, tttt=Rt, nnnn=Rn,
-    // iiiiiiiiiiii=imm12, b=1 if STRB, u=1 if +.
-    constexpr uint32_t InstType = B1; // 010
-    constexpr bool IsLoad = false;
-    const Type Ty = OpRt->getType();
-    if (!(Ty == IceType_i32 || Ty == IceType_i8)) // TODO(kschimpf) Expand?
-      break;
-    const bool IsByte = typeWidthInBytes(Ty) == 1;
-    // Check for rule violations.
-    if ((getGPRReg(kRnShift, Address) == RegARM32::Encoded_Reg_pc) ||
-        (!isBitSet(P, Address) && isBitSet(W, Address)) ||
-        (!IsByte &&
-         (getGPRReg(kRnShift, Address) == RegARM32::Encoded_Reg_sp) &&
-         isBitSet(P, Address) && !isBitSet(U, Address) &&
-         isBitSet(W, Address) &&
-         (mask(Address, kImm12Shift, kImmed12Bits) == 0x8 /* 000000000100 */)))
-      // Conditions of rule violated.
-      break;
-    emitMemOp(Cond, InstType, IsLoad, IsByte, Rt, Address);
-    return;
-  } while (0);
-  UnimplementedError(Ctx->getFlags());
+  uint32_t Rt;
+  if (decodeOperand(OpRt, Rt) != DecodedAsRegister)
+    return setNeedsTextFixup();
+  uint32_t Address;
+  if (decodeAddress(OpAddress, Address) != DecodedAsImmRegOffset)
+    return setNeedsTextFixup();
+  // STR (immediate) - ARM section A8.8.204, encoding A1:
+  //   str<c> <Rt>, [<Rn>{, #+/-<imm12>}]      ; p=1, w=0
+  //   str<c> <Rt>, [<Rn>], #+/-<imm12>        ; p=1, w=1
+  //   str<c> <Rt>, [<Rn>, #+/-<imm12>]!       ; p=0, w=1
+  // STRB (immediate) - ARM section A8.8.207, encoding A1:
+  //   strb<c> <Rt>, [<Rn>{, #+/-<imm12>}]     ; p=1, w=0
+  //   strb<c> <Rt>, [<Rn>], #+/-<imm12>       ; p=1, w=1
+  //   strb<c> <Rt>, [<Rn>, #+/-<imm12>]!      ; p=0, w=1
+  //
+  // cccc010pubw0nnnnttttiiiiiiiiiiii where cccc=Cond, tttt=Rt, nnnn=Rn,
+  // iiiiiiiiiiii=imm12, b=1 if STRB, u=1 if +.
+  constexpr bool IsLoad = false;
+  const Type Ty = OpRt->getType();
+  if (!(Ty == IceType_i32 || Ty == IceType_i8)) // TODO(kschimpf) Expand?
+    return setNeedsTextFixup();
+  const bool IsByte = typeWidthInBytes(Ty) == 1;
+  // Check for rule violations.
+  if ((getGPRReg(kRnShift, Address) == RegARM32::Encoded_Reg_pc))
+    return setNeedsTextFixup();
+  if (!isBitSet(P, Address) && isBitSet(W, Address))
+    return setNeedsTextFixup();
+  if (!IsByte && (getGPRReg(kRnShift, Address) == RegARM32::Encoded_Reg_sp) &&
+      isBitSet(P, Address) && !isBitSet(U, Address) && isBitSet(W, Address) &&
+      (mask(Address, kImm12Shift, kImmed12Bits) == 0x8 /* 000000000100 */))
+    return setNeedsTextFixup();
+  emitMemOp(Cond, kInstTypeMemImmediate, IsLoad, IsByte, Rt, Address);
 }
 
 void ARM32::AssemblerARM32::sub(const Operand *OpRd, const Operand *OpRn,
                                 const Operand *OpSrc1, bool SetFlags,
                                 CondARM32::Cond Cond) {
-  // Note: Loop is used so that we can short circuit using break;
-  do {
-    uint32_t Rd;
-    if (decodeOperand(OpRd, Rd) != DecodedAsRegister)
-      break;
-    uint32_t Rn;
-    if (decodeOperand(OpRn, Rn) != DecodedAsRegister)
-      break;
-    constexpr uint32_t Sub = B1; // 0010
-    uint32_t Src1Value;
-    // TODO(kschimpf) Other possible decodings of sub.
-    switch (decodeOperand(OpSrc1, Src1Value)) {
-    default:
-      break;
-    case DecodedAsRegister: {
-      // SUB (register) - ARM section A8.8.223, encoding A1:
-      //   sub{s}<c> <Rd>, <Rn>, <Rm>{, <shift>}
-      // SUB (SP minus register): See ARM section 8.8.226, encoding A1:
-      //   sub{s}<c> <Rd>, sp, <Rm>{, <Shift>}
-      //
-      // cccc0000010snnnnddddiiiiitt0mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
-      // mmmm=Rm, iiiiii=shift, tt=ShiftKind, and s=SetFlags.
-      Src1Value = encodeShiftRotateImm5(Src1Value, OperandARM32::kNoShift, 0);
-      constexpr uint32_t InstType = 0; // i.e. register
-      if (((Rd == RegARM32::Encoded_Reg_pc) && SetFlags))
-        // Conditions of rule violated.
-        break;
-      emitType01(Cond, InstType, Sub, SetFlags, Rn, Rd, Src1Value);
-      return;
-    }
-    case DecodedAsRotatedImm8: {
-      // Sub (Immediate) - ARM section A8.8.222, encoding A1:
-      //    sub{s}<c> <Rd>, <Rn>, #<RotatedImm8>
-      // Sub (Sp minus immediate) - ARM section A8.*.225, encoding A1:
-      //    sub{s}<c> sp, <Rn>, #<RotatedImm8>
-      //
-      // cccc0010010snnnnddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
-      // s=SetFlags and iiiiiiiiiiii=Src1Value=RotatedImm8
-      if (Rd == RegARM32::Encoded_Reg_pc)
-        // Conditions of rule violated.
-        break;
-      constexpr uint32_t InstType = 1;
-      emitType01(Cond, InstType, Sub, SetFlags, Rn, Rd, Src1Value);
-      return;
-    }
-    }
-  } while (0);
-  UnimplementedError(Ctx->getFlags());
+  uint32_t Rd;
+  if (decodeOperand(OpRd, Rd) != DecodedAsRegister)
+    return setNeedsTextFixup();
+  uint32_t Rn;
+  if (decodeOperand(OpRn, Rn) != DecodedAsRegister)
+    return setNeedsTextFixup();
+  constexpr uint32_t Sub = B1; // 0010
+  uint32_t Src1Value;
+  // TODO(kschimpf) Other possible decodings of sub.
+  switch (decodeOperand(OpSrc1, Src1Value)) {
+  default:
+    return setNeedsTextFixup();
+  case DecodedAsRegister: {
+    // SUB (register) - ARM section A8.8.223, encoding A1:
+    //   sub{s}<c> <Rd>, <Rn>, <Rm>{, <shift>}
+    // SUB (SP minus register): See ARM section 8.8.226, encoding A1:
+    //   sub{s}<c> <Rd>, sp, <Rm>{, <Shift>}
+    //
+    // cccc0000010snnnnddddiiiiitt0mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
+    // mmmm=Rm, iiiiii=shift, tt=ShiftKind, and s=SetFlags.
+    Src1Value = encodeShiftRotateImm5(Src1Value, OperandARM32::kNoShift, 0);
+    if (((Rd == RegARM32::Encoded_Reg_pc) && SetFlags))
+      // Conditions of rule violated.
+      return setNeedsTextFixup();
+    emitType01(Cond, kInstTypeDataRegister, Sub, SetFlags, Rn, Rd, Src1Value);
+    return;
+  }
+  case DecodedAsRotatedImm8: {
+    // Sub (Immediate) - ARM section A8.8.222, encoding A1:
+    //    sub{s}<c> <Rd>, <Rn>, #<RotatedImm8>
+    // Sub (Sp minus immediate) - ARM section A8.*.225, encoding A1:
+    //    sub{s}<c> sp, <Rn>, #<RotatedImm8>
+    //
+    // cccc0010010snnnnddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
+    // s=SetFlags and iiiiiiiiiiii=Src1Value=RotatedImm8
+    if (Rd == RegARM32::Encoded_Reg_pc)
+      // Conditions of rule violated.
+      return setNeedsTextFixup();
+    emitType01(Cond, kInstTypeDataImmediate, Sub, SetFlags, Rn, Rd, Src1Value);
+    return;
+  }
+  }
 }
 
 } // end of namespace Ice
diff --git a/src/IceAssemblerARM32.h b/src/IceAssemblerARM32.h
index 4f2495c..20b4e67 100644
--- a/src/IceAssemblerARM32.h
+++ b/src/IceAssemblerARM32.h
@@ -53,11 +53,12 @@
   void alignFunction() override {
     const SizeT Align = 1 << getBundleAlignLog2Bytes();
     SizeT BytesNeeded = Utils::OffsetToAlignment(Buffer.getPosition(), Align);
+    constexpr uint32_t UndefinedInst = 0xe7fedef0; // udf #60896
     constexpr SizeT InstSize = sizeof(int32_t);
     assert(BytesNeeded % InstSize == 0);
     while (BytesNeeded > 0) {
-      // TODO(kschimpf) Should this be NOP or some other instruction?
-      bkpt(0);
+      AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+      emitInst(UndefinedInst);
       BytesNeeded -= InstSize;
     }
   }
@@ -91,7 +92,8 @@
 
   bool fixupIsPCRel(FixupKind Kind) const override {
     (void)Kind;
-    llvm_unreachable("Not yet implemented.");
+    // TODO(kschimpf) Decide if we need this.
+    return false;
   }
 
   void bind(Label *label);
@@ -118,6 +120,8 @@
     return Asm->getKind() == Asm_ARM32;
   }
 
+  void emitTextInst(const std::string &Text);
+
 private:
   // A vector of pool-allocated x86 labels for CFG nodes.
   using LabelVector = std::vector<Label *>;
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 6b8f3c1..b1c70e0 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -885,7 +885,6 @@
   inline void emitRegisterOperand(int rm, int reg);
   template <typename RegType, typename RmType>
   inline void emitXmmRegisterOperand(RegType reg, RmType rm);
-  inline void emitFixup(AssemblerFixup *fixup);
   inline void emitOperandSizeOverride();
 
   void emitOperand(int rm, const typename Traits::Operand &operand);
@@ -1063,11 +1062,6 @@
 }
 
 template <class Machine>
-inline void AssemblerX86Base<Machine>::emitFixup(AssemblerFixup *fixup) {
-  Buffer.emitFixup(fixup);
-}
-
-template <class Machine>
 inline void AssemblerX86Base<Machine>::emitOperandSizeOverride() {
   emitUint8(0x66);
 }
diff --git a/src/IceClFlags.cpp b/src/IceClFlags.cpp
index 7ee5228..907e240 100644
--- a/src/IceClFlags.cpp
+++ b/src/IceClFlags.cpp
@@ -74,6 +74,11 @@
                         cl::desc("Define default global prefix for naming "
                                  "unnamed globals"),
                         cl::init("Global"));
+
+cl::opt<bool> DisableHybridAssembly(
+    "no-hybrid-asm", cl::desc("Disable hybrid assembly when -filetype=iasm"),
+    cl::init(false));
+
 cl::opt<bool> DisableInternal("externalize",
                               cl::desc("Externalize all symbols"));
 // Note: Modifiable only if ALLOW_DISABLE_IR_GEN.
@@ -380,6 +385,7 @@
   OutFlags.AllowUninitializedGlobals = false;
   OutFlags.DataSections = false;
   OutFlags.DecorateAsm = false;
+  OutFlags.DisableHybridAssembly = false;
   OutFlags.DisableInternal = false;
   OutFlags.DisableIRGeneration = false;
   OutFlags.DisableTranslation = false;
@@ -445,6 +451,7 @@
   OutFlags.setDecorateAsm(::DecorateAsm);
   OutFlags.setDefaultFunctionPrefix(::DefaultFunctionPrefix);
   OutFlags.setDefaultGlobalPrefix(::DefaultGlobalPrefix);
+  OutFlags.setDisableHybridAssembly(::DisableHybridAssembly);
   OutFlags.setDisableInternal(::DisableInternal);
   OutFlags.setDisableIRGeneration(::DisableIRGeneration);
   OutFlags.setDisableTranslation(::DisableTranslation);
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index d4d7737..1072bd0 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -62,6 +62,11 @@
   bool getDecorateAsm() const { return DecorateAsm; }
   void setDecorateAsm(bool NewValue) { DecorateAsm = NewValue; }
 
+  bool getDisableHybridAssembly() const { return DisableHybridAssembly; }
+  void setDisableHybridAssembly(bool NewValue) {
+    DisableHybridAssembly = NewValue;
+  }
+
   bool getDisableInternal() const { return DisableInternal; }
   void setDisableInternal(bool NewValue) { DisableInternal = NewValue; }
 
@@ -253,6 +258,7 @@
   bool AllowUninitializedGlobals;
   bool DataSections;
   bool DecorateAsm;
+  bool DisableHybridAssembly;
   bool DisableInternal;
   bool DisableIRGeneration;
   bool DisableTranslation;
diff --git a/src/IceFixups.cpp b/src/IceFixups.cpp
index 6f50f85..7b2025e 100644
--- a/src/IceFixups.cpp
+++ b/src/IceFixups.cpp
@@ -48,11 +48,13 @@
   return Str.str();
 }
 
-void AssemblerFixup::emit(GlobalContext *Ctx,
-                          RelocOffsetT OverrideOffset) const {
+size_t AssemblerFixup::emit(GlobalContext *Ctx, RelocOffsetT OverrideOffset,
+                            bool IsPCRel) const {
+  static constexpr const size_t FixupSize = 4;
   if (!BuildDefs::dump())
-    return;
+    return FixupSize;
   Ostream &Str = Ctx->getStrEmit();
+  Str << "\t.long ";
   if (isNullSymbol())
     Str << "__Sz_AbsoluteZero";
   else
@@ -60,6 +62,22 @@
   RelocOffsetT Offset = OverrideOffset;
   if (Offset)
     Str << " + " << Offset;
+  // For PCRel fixups, we write the pc-offset from a symbol into the Buffer
+  // (e.g., -4), but we don't represent that in the fixup's offset. Otherwise
+  // the fixup holds the true offset, and so does the Buffer. Just load the
+  // offset from the buffer.
+  if (IsPCRel)
+    Str << " - .";
+  Str << "\n";
+  return FixupSize;
+}
+
+size_t AssemblerTextFixup::emit(GlobalContext *Ctx, RelocOffsetT OverrideOffset,
+                                bool IsPCRel) const {
+  (void)OverrideOffset;
+  (void)IsPCRel;
+  Ctx->getStrEmit() << Message << "\n";
+  return NumBytes;
 }
 
 } // end of namespace Ice
diff --git a/src/IceFixups.h b/src/IceFixups.h
index 43284ac..303e592 100644
--- a/src/IceFixups.h
+++ b/src/IceFixups.h
@@ -45,7 +45,9 @@
 
   void set_value(const Constant *Value) { value_ = Value; }
 
-  void emit(GlobalContext *Ctx, RelocOffsetT OverrideOffset) const;
+  /// Emits fixup, then returns the number of bytes to skip.
+  virtual size_t emit(GlobalContext *Ctx, RelocOffsetT OverrideOffset,
+                      bool IsPCRel) const;
 
 private:
   intptr_t position_ = 0;
@@ -53,6 +55,26 @@
   const Constant *value_ = nullptr;
 };
 
+/// Extends a fixup to be textual. That is, it emits text instead of a sequence
+/// of bytes. This class is used as a fallback for unimplemented emitIAS
+/// methods, allowing them to generate compilable assembly code.
+class AssemblerTextFixup : public AssemblerFixup {
+  AssemblerTextFixup() = delete;
+  AssemblerTextFixup(const AssemblerTextFixup &) = delete;
+  AssemblerTextFixup &operator=(const AssemblerTextFixup &) = delete;
+
+public:
+  AssemblerTextFixup(const std::string &Message, size_t NumBytes)
+      : AssemblerFixup(), Message(Message), NumBytes(NumBytes) {}
+  ~AssemblerTextFixup() = default;
+  virtual size_t emit(GlobalContext *Ctx, RelocOffsetT OverrideOffset,
+                      bool isPcRel) const;
+
+private:
+  const std::string Message;
+  const size_t NumBytes;
+};
+
 using FixupList = std::vector<AssemblerFixup>;
 using FixupRefList = std::vector<AssemblerFixup *>;
 
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index baab9ca..e4b1440 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -175,6 +175,7 @@
   Ostream &getStrDump() { return *StrDump; }
   Ostream &getStrError() { return *StrError; }
   Ostream &getStrEmit() { return *StrEmit; }
+  void setStrEmit(Ostream &NewStrEmit) { StrEmit = &NewStrEmit; }
 
   LockedPtr<ErrorCode> getErrorStatus() {
     return LockedPtr<ErrorCode>(&ErrorStatus, &ErrorStatusLock);
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index d7136b3..c903249 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -84,6 +84,27 @@
   return InstARM32CondAttributes[Cond].Opposite;
 }
 
+void InstARM32::emitUsingTextFixup(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  GlobalContext *Ctx = Func->getContext();
+  if (Ctx->getFlags().getDisableHybridAssembly()) {
+    UnimplementedError(Ctx->getFlags());
+    return;
+  }
+  ARM32::AssemblerARM32 *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+  std::string Buffer;
+  llvm::raw_string_ostream StrBuf(Buffer);
+  OstreamLocker L(Ctx);
+  Ostream &OldStr = Ctx->getStrEmit();
+  Ctx->setStrEmit(StrBuf);
+  emit(Func);
+  Ctx->setStrEmit(OldStr);
+  Asm->emitTextInst(StrBuf.str());
+}
+
+void InstARM32::emitIAS(const Cfg *Func) const { emitUsingTextFixup(Func); }
+
 void InstARM32Pred::emitUnaryopGPR(const char *Opcode,
                                    const InstARM32Pred *Inst, const Cfg *Func,
                                    bool NeedsWidthSuffix) {
@@ -320,20 +341,23 @@
 
 template <InstARM32::InstKindARM32 K>
 void InstARM32ThreeAddrGPR<K>::emitIAS(const Cfg *Func) const {
-  (void)Func;
-  UnimplementedError(Func->getContext()->getFlags());
+  emitUsingTextFixup(Func);
 }
 
 template <>
 void InstARM32ThreeAddrGPR<InstARM32::Add>::emitIAS(const Cfg *Func) const {
   ARM32::AssemblerARM32 *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
   Asm->add(getDest(), getSrc(0), getSrc(1), SetFlags, getPredicate());
+  if (Asm->needsTextFixup())
+    emitUsingTextFixup(Func);
 }
 
 template <>
 void InstARM32ThreeAddrGPR<InstARM32::Sub>::emitIAS(const Cfg *Func) const {
   ARM32::AssemblerARM32 *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
   Asm->sub(getDest(), getSrc(0), getSrc(1), SetFlags, getPredicate());
+  if (Asm->needsTextFixup())
+    emitUsingTextFixup(Func);
 }
 
 InstARM32Call::InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget)
@@ -614,35 +638,29 @@
   ARM32::AssemblerARM32 *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
   Variable *Dest = getDest();
   Operand *Src0 = getSrc(0);
-  // Note: Loop is used so that we can short circuit using break.
-  do {
-    if (Dest->hasReg()) {
-      const Type DestTy = Dest->getType();
-      const bool DestIsVector = isVectorType(DestTy);
-      const bool DestIsScalarFP = isScalarFloatingType(DestTy);
-      const bool CoreVFPMove = isMoveBetweenCoreAndVFPRegisters(Dest, Src0);
-      if (DestIsVector || DestIsScalarFP || CoreVFPMove)
-        break;
-      if (isMemoryAccess(Src0)) {
-        // TODO(kschimpf) Figure out how to do ldr on CoreVPFMove? (see
-        // emitSingleDestSingleSource, local variable LoadOpcode).
-        Asm->ldr(Dest, Src0, getPredicate());
-      } else {
-        Asm->mov(Dest, Src0, getPredicate());
-      }
-      return;
-    } else {
-      const Type Src0Type = Src0->getType();
-      const bool Src0IsVector = isVectorType(Src0Type);
-      const bool Src0IsScalarFP = isScalarFloatingType(Src0Type);
-      const bool CoreVFPMove = isMoveBetweenCoreAndVFPRegisters(Dest, Src0);
-      if (Src0IsVector || Src0IsScalarFP || CoreVFPMove)
-        break;
-      Asm->str(Src0, Dest, getPredicate());
-      return;
+  if (Dest->hasReg()) {
+    const Type DestTy = Dest->getType();
+    const bool DestIsVector = isVectorType(DestTy);
+    const bool DestIsScalarFP = isScalarFloatingType(DestTy);
+    const bool CoreVFPMove = isMoveBetweenCoreAndVFPRegisters(Dest, Src0);
+    if (DestIsVector || DestIsScalarFP || CoreVFPMove)
+      return Asm->setNeedsTextFixup();
+    if (isMemoryAccess(Src0)) {
+      // TODO(kschimpf) Figure out how to do ldr on CoreVPFMove? (see
+      // emitSingleDestSingleSource, local variable LoadOpcode).
+      return Asm->ldr(Dest, Src0, getPredicate());
     }
-  } while (0);
-  llvm_unreachable("not yet implemented");
+    return Asm->mov(Dest, Src0, getPredicate());
+  } else {
+    const Type Src0Type = Src0->getType();
+    const bool Src0IsVector = isVectorType(Src0Type);
+    const bool Src0IsScalarFP = isScalarFloatingType(Src0Type);
+    const bool CoreVFPMove = isMoveBetweenCoreAndVFPRegisters(Dest, Src0);
+    if (Src0IsVector || Src0IsScalarFP || CoreVFPMove)
+      return Asm->setNeedsTextFixup();
+    return Asm->str(Src0, Dest, getPredicate());
+  }
+  Asm->setNeedsTextFixup();
 }
 
 void InstARM32Mov::emit(const Cfg *Func) const {
@@ -666,12 +684,13 @@
   assert(getSrcSize() == 1);
   (void)Func;
   assert(!(isMultiDest() && isMultiSource()) && "Invalid vmov type.");
-  if (isMultiDest())
-    llvm_unreachable("Not yet implemented");
-  if (isMultiSource())
-    llvm_unreachable("Not yet implemented");
-  // Must be single source/dest.
-  emitIASSingleDestSingleSource(Func);
+  ARM32::AssemblerARM32 *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+  if (!(isMultiDest() || isMultiSource())) {
+    // Must be single source/dest.
+    emitIASSingleDestSingleSource(Func);
+  }
+  if (Asm->needsTextFixup())
+    emitUsingTextFixup(Func);
 }
 
 void InstARM32Mov::dump(const Cfg *Func) const {
@@ -715,11 +734,6 @@
   }
 }
 
-void InstARM32Br::emitIAS(const Cfg *Func) const {
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 void InstARM32Br::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -768,11 +782,6 @@
   Func->getTarget()->resetStackAdjustment();
 }
 
-void InstARM32Call::emitIAS(const Cfg *Func) const {
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 void InstARM32Call::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -792,11 +801,6 @@
   Str << getName(Func) << ":";
 }
 
-void InstARM32Label::emitIAS(const Cfg *Func) const {
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 void InstARM32Label::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -825,12 +829,6 @@
   getSrc(0)->emit(Func);
 }
 
-template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 1);
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 template <> void InstARM32Ldrex::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -847,10 +845,9 @@
   getSrc(0)->emit(Func);
 }
 
-template <> void InstARM32Ldrex::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 1);
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
+template <InstARM32::InstKindARM32 K>
+void InstARM32TwoAddrGPR<K>::emitIAS(const Cfg *Func) const {
+  emitUsingTextFixup(Func);
 }
 
 template <> void InstARM32Movw::emit(const Cfg *Func) const {
@@ -926,11 +923,6 @@
   }
 }
 
-void InstARM32Pop::emitIAS(const Cfg *Func) const {
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 void InstARM32Pop::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -960,12 +952,6 @@
   Func->getTarget()->updateStackAdjustment(Amount);
 }
 
-void InstARM32AdjustStack::emitIAS(const Cfg *Func) const {
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-  Func->getTarget()->updateStackAdjustment(Amount);
-}
-
 void InstARM32AdjustStack::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -1016,11 +1002,6 @@
   }
 }
 
-void InstARM32Push::emitIAS(const Cfg *Func) const {
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 void InstARM32Push::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -1047,6 +1028,8 @@
 void InstARM32Ret::emitIAS(const Cfg *Func) const {
   ARM32::AssemblerARM32 *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
   Asm->bx(RegARM32::Encoded_Reg_lr);
+  if (Asm->needsTextFixup())
+    emitUsingTextFixup(Func);
 }
 
 void InstARM32Ret::dump(const Cfg *Func) const {
@@ -1075,12 +1058,6 @@
   getSrc(1)->emit(Func);
 }
 
-void InstARM32Str::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 void InstARM32Str::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -1109,12 +1086,6 @@
   emitSources(Func);
 }
 
-void InstARM32Strex::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 void InstARM32Strex::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -1144,12 +1115,6 @@
   }
 }
 
-void InstARM32Trap::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 0);
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 void InstARM32Trap::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -1174,12 +1139,6 @@
   getSrc(1)->emit(Func);
 }
 
-void InstARM32Umull::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 void InstARM32Umull::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -1232,12 +1191,6 @@
   getSrc(0)->emit(Func);
 }
 
-void InstARM32Vcvt::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 1);
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 void InstARM32Vcvt::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -1261,12 +1214,6 @@
   getSrc(1)->emit(Func);
 }
 
-void InstARM32Vcmp::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 void InstARM32Vcmp::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -1287,12 +1234,6 @@
                                      "FPSCR";
 }
 
-void InstARM32Vmrs::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 0);
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 void InstARM32Vmrs::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -1314,12 +1255,6 @@
   getSrc(0)->emit(Func);
 }
 
-void InstARM32Vabs::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 1);
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 void InstARM32Vabs::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -1339,12 +1274,6 @@
          "sy";
 }
 
-void InstARM32Dmb::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 1);
-  (void)Func;
-  llvm_unreachable("Not yet implemented");
-}
-
 void InstARM32Dmb::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -1486,9 +1415,12 @@
 template class InstARM32ThreeAddrGPR<InstARM32::Sdiv>;
 template class InstARM32ThreeAddrGPR<InstARM32::Sub>;
 template class InstARM32ThreeAddrGPR<InstARM32::Udiv>;
+
 template class InstARM32ThreeAddrFP<InstARM32::Vadd>;
 template class InstARM32ThreeAddrFP<InstARM32::Vdiv>;
 template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
 template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
 
+template class InstARM32TwoAddrGPR<InstARM32::Movt>;
+
 } // end of namespace Ice
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h
index 461fa48..898f6a4 100644
--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -344,6 +344,8 @@
 
   void dump(const Cfg *Func) const override;
 
+  void emitIAS(const Cfg *Func) const override;
+
 protected:
   InstARM32(Cfg *Func, InstKindARM32 Kind, SizeT Maxsrcs, Variable *Dest)
       : InstTarget(Func, static_cast<InstKind>(Kind), Maxsrcs, Dest) {}
@@ -351,6 +353,10 @@
   static bool isClassof(const Inst *Inst, InstKindARM32 MyKind) {
     return Inst->getKind() == static_cast<InstKind>(MyKind);
   }
+
+  // Generates text of assembly instruction using method emit(), and then adds
+  // to the assembly buffer as a Fixup.
+  void emitUsingTextFixup(const Cfg *Func) const;
 };
 
 /// A predicable ARM instruction.
@@ -412,10 +418,6 @@
       return;
     emitUnaryopGPR(Opcode, this, Func, NeedsWidthSuffix);
   }
-  void emitIAS(const Cfg *Func) const override {
-    (void)Func;
-    llvm_unreachable("Not yet implemented");
-  }
   void dump(const Cfg *Func) const override {
     if (!BuildDefs::dump())
       return;
@@ -456,10 +458,6 @@
       return;
     emitUnaryopFP(Opcode, this, Func);
   }
-  void emitIAS(const Cfg *Func) const override {
-    (void)Func;
-    llvm::report_fatal_error("Not yet implemented");
-  }
   void dump(const Cfg *Func) const override {
     if (!BuildDefs::dump())
       return;
@@ -501,10 +499,7 @@
       return;
     emitTwoAddr(Opcode, this, Func);
   }
-  void emitIAS(const Cfg *Func) const override {
-    (void)Func;
-    llvm::report_fatal_error("Not yet implemented");
-  }
+  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override {
     if (!BuildDefs::dump())
       return;
@@ -542,7 +537,6 @@
         InstARM32LoadBase(Func, Dest, Source, Predicate);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override {
     if (!BuildDefs::dump())
       return;
@@ -636,10 +630,6 @@
       return;
     emitThreeAddrFP(Opcode, this, Func);
   }
-  void emitIAS(const Cfg *Func) const override {
-    (void)Func;
-    llvm::report_fatal_error("Not yet implemented");
-  }
   void dump(const Cfg *Func) const override {
     if (!BuildDefs::dump())
       return;
@@ -682,10 +672,6 @@
       return;
     emitFourAddr(Opcode, this, Func);
   }
-  void emitIAS(const Cfg *Func) const override {
-    (void)Func;
-    llvm::report_fatal_error("Not yet implemented");
-  }
   void dump(const Cfg *Func) const override {
     if (!BuildDefs::dump())
       return;
@@ -729,10 +715,6 @@
       return;
     emitCmpLike(Opcode, this, Func);
   }
-  void emitIAS(const Cfg *Func) const override {
-    (void)Func;
-    llvm_unreachable("Not yet implemented");
-  }
   void dump(const Cfg *Func) const override {
     if (!BuildDefs::dump())
       return;
@@ -810,7 +792,6 @@
   IceString getName(const Cfg *Func) const;
   SizeT getNumber() const { return Number; }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
 
 private:
@@ -879,7 +860,6 @@
   }
   bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) override;
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Br); }
 
@@ -910,7 +890,6 @@
         InstARM32AdjustStack(Func, SP, Amount, SrcAmount);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Adjuststack); }
   SizeT getAmount() const { return Amount; }
@@ -936,7 +915,6 @@
   }
   Operand *getCallTarget() const { return getSrc(0); }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Call); }
 
@@ -956,7 +934,6 @@
     return new (Func->allocate<InstARM32Pop>()) InstARM32Pop(Func, Dests);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Pop); }
 
@@ -978,7 +955,6 @@
     return new (Func->allocate<InstARM32Push>()) InstARM32Push(Func, Srcs);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Push); }
 
@@ -1029,7 +1005,6 @@
         InstARM32Str(Func, Value, Mem, Predicate);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Str); }
 
@@ -1055,7 +1030,6 @@
         InstARM32Strex(Func, Dest, Value, Mem, Predicate);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Strex); }
 
@@ -1074,7 +1048,6 @@
     return new (Func->allocate<InstARM32Trap>()) InstARM32Trap(Func);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Trap); }
 
@@ -1097,7 +1070,6 @@
         InstARM32Umull(Func, DestLo, DestHi, Src0, Src1, Predicate);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Umull); }
 
@@ -1122,7 +1094,6 @@
         InstARM32Vcvt(Func, Dest, Src, Variant, Predicate);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Vcvt); }
 
@@ -1189,7 +1160,6 @@
         InstARM32Vcmp(Func, Src0, Src1, Predicate);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Vcmp); }
 
@@ -1209,7 +1179,6 @@
     return new (Func->allocate<InstARM32Vmrs>()) InstARM32Vmrs(Func, Predicate);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Vmrs); }
 
@@ -1229,7 +1198,6 @@
         InstARM32Vabs(Func, Dest, Src, Predicate);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Vabs); }
 
@@ -1248,7 +1216,6 @@
     return new (Func->allocate<InstARM32Dmb>()) InstARM32Dmb(Func);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Dmb); }
 
diff --git a/src/IceInstMIPS32.cpp b/src/IceInstMIPS32.cpp
index 7a4939a..d3c9c18 100644
--- a/src/IceInstMIPS32.cpp
+++ b/src/IceInstMIPS32.cpp
@@ -48,7 +48,6 @@
   return "TBD";
 }
 
-
 template <> const char *InstMIPS32Addiu::Opcode = "addiu";
 template <> const char *InstMIPS32Lui::Opcode = "lui";
 template <> const char *InstMIPS32La::Opcode = "la";
diff --git a/src/IceInstMIPS32.h b/src/IceInstMIPS32.h
index fe3e274..87ba99a 100644
--- a/src/IceInstMIPS32.h
+++ b/src/IceInstMIPS32.h
@@ -44,6 +44,7 @@
     if (BuildDefs::dump())
       Str << "<OperandMIPS32>";
   }
+
 protected:
   OperandMIPS32(OperandKindMIPS32 Kind, Type Ty)
       : Operand(static_cast<OperandKind>(Kind), Ty) {}
@@ -272,13 +273,12 @@
 
 private:
   InstMIPS32Imm16(Cfg *Func, Variable *Dest, Operand *Source, uint32_t Imm)
-      : InstMIPS32(Func, K, 1, Dest), Imm(Imm){
+      : InstMIPS32(Func, K, 1, Dest), Imm(Imm) {
     addSource(Source);
   }
 
   InstMIPS32Imm16(Cfg *Func, Variable *Dest, uint32_t Imm)
-      : InstMIPS32(Func, K, 0, Dest), Imm(Imm) {
-  }
+      : InstMIPS32(Func, K, 0, Dest), Imm(Imm) {}
 
   static const char *Opcode;
 
@@ -304,7 +304,7 @@
     return !isMultiDest() && !isMultiSource() &&
            checkForRedundantAssign(getDest(), getSrc(0));
   }
-  //bool isSimpleAssign() const override { return true; }
+  // bool isSimpleAssign() const override { return true; }
   void emit(const Cfg *Func) const override;
   void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
diff --git a/tests_lit/assembler/arm32/add.ll b/tests_lit/assembler/arm32/add.ll
index 2322dfc..e8fab0f 100644
--- a/tests_lit/assembler/arm32/add.ll
+++ b/tests_lit/assembler/arm32/add.ll
@@ -4,11 +4,22 @@
 
 ; REQUIRES: allow_dump
 
+; Compile using standalone assembler.
 ; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
 ; RUN:   | FileCheck %s --check-prefix=ASM
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
 ; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
 ; RUN:   | FileCheck %s --check-prefix=IASM
 
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 | FileCheck %s --check-prefix=DIS
+
 define internal i32 @add1ToR0(i32 %p) {
   %v = add i32 %p, 1
   ret i32 %v
@@ -18,11 +29,21 @@
 ; ASM:       add     r0, r0, #1
 ; ASM-NEXT:  bx      lr
 
+; DIS-LABEL:00000000 <add1ToR0>:
+; DIS-NEXT:   0:        e2800001
+; DIS-NEXT:   4:        e12fff1e
+
 ; IASM-LABEL: add1ToR0:
-; IASM:      .byte 0x1
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x80
-; IASM-NEXT: .byte 0xe2
+
+; IASM-NEXT:    .byte 0x1
+; IASM-NEXT:    .byte 0x0
+; IASM-NEXT:    .byte 0x80
+; IASM-NEXT:    .byte 0xe2
+
+; IASM-NEXT:    .byte 0x1e
+; IASM-NEXT:    .byte 0xff
+; IASM-NEXT:    .byte 0x2f
+; IASM-NEXT:    .byte 0xe1
 
 define internal i32 @Add2Regs(i32 %p1, i32 %p2) {
   %v = add i32 %p1, %p2
@@ -33,9 +54,18 @@
 ; ASM:       add r0, r0, r1
 ; ASM-NEXT:  bx lr
 
+; DIS-LABEL:00000010 <Add2Regs>:
+; DIS-NEXT:  10:        e0800001
+; DIS-NEXT:  14:        e12fff1e
+
 ; IASM-LABEL: Add2Regs:
 
-; IASM:      .byte 0x1
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x80
-; IASM-NEXT: .byte 0xe0
+; IASM-NEXT:    .byte 0x1
+; IASM-NEXT:    .byte 0x0
+; IASM-NEXT:    .byte 0x80
+; IASM-NEXT:    .byte 0xe0
+
+; IASM-NEXT:    .byte 0x1e
+; IASM-NEXT:    .byte 0xff
+; IASM-NEXT:    .byte 0x2f
+; IASM-NEXT:    .byte 0xe1
diff --git a/tests_lit/assembler/arm32/global-load-store.ll b/tests_lit/assembler/arm32/global-load-store.ll
new file mode 100644
index 0000000..1f33085
--- /dev/null
+++ b/tests_lit/assembler/arm32/global-load-store.ll
@@ -0,0 +1,89 @@
+; TODO(kschimpf): Show that we can handle global variable loads/stores.
+
+; REQUIRES: allow_dump
+
+; Compile using standalone assembler.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
+; RUN:   | FileCheck %s --check-prefix=ASM
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
+; RUN:   | FileCheck %s --check-prefix=IASM
+
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 | FileCheck %s --check-prefix=DIS
+
+@global1 = internal global [4 x i8] zeroinitializer, align 4
+
+; ASM-LABEL: global1:
+; ASM-NEXT:    .zero   4
+; ASM-NEXT:    .size   global1, 4
+; ASM-NEXT:    .text
+; ASM-NEXT:    .p2alignl 4,0xe7fedef0
+
+; IASM-LABEL:global1:
+; IASM-NEXT:    .zero   4
+; IASM-NEXT:    .size   global1, 4
+; IASM-NEXT:    .text
+; IASM-NEXT:    .p2alignl 4,0xe7fedef0
+
+define internal i32 @load() {
+  %addr = bitcast [4 x i8]* @global1 to i32*
+  %v = load i32, i32* %addr, align 1
+  ret i32 %v
+}
+
+; ASM-LABEL: load:
+; ASM-NEXT: .Lload$__0:
+; ASM-NEXT:    movw    r0, #:lower16:global1
+; ASM-NEXT:    movt    r0, #:upper16:global1
+; ASM-NEXT:    ldr     r0, [r0]
+; ASM-NEXT:    bx      lr
+
+; DIS-LABEL:00000000 <load>:
+; DIS-NEXT:   0:   e3000000
+; DIS-NEXT:   4:   e3400000
+; DIS-NEXT:   8:   e5900000
+; DIS-NEXT:   c:   e12fff1e
+
+; IASM-LABEL:load:
+; IASM-NEXT:    movw    r0, #:lower16:global1
+; IASM-NEXT:    movt    r0, #:upper16:global1
+; IASM-NEXT:    ldr     r0, [r0]
+; IASM-NEXT:    .byte 0x1e
+; IASM-NEXT:    .byte 0xff
+; IASM-NEXT:    .byte 0x2f
+; IASM-NEXT:    .byte 0xe1
+
+define internal void @store(i32 %v) {
+  %addr = bitcast [4 x i8]* @global1 to i32*
+  store i32 %v, i32* %addr, align 1
+  ret void
+}
+
+; ASM-LABEL:store:
+; ASM-NEXT: .Lstore$__0:
+; ASM-NEXT:     movw    r1, #:lower16:global1
+; ASM-NEXT:     movt    r1, #:upper16:global1
+; ASM-NEXT:     str     r0, [r1]
+; ASM-NEXT:     bx      lr
+
+; DIS-LABEL:00000010 <store>:
+; DIS-NEXT:  10:   e3001000
+; DIS-NEXT:  14:   e3401000
+; DIS-NEXT:  18:   e5810000
+; DIS-NEXT:  1c:   e12fff1e
+
+; IASM-LABEL:store:
+; IASM-NEXT:    movw    r1, #:lower16:global1
+; IASM-NEXT:    movt    r1, #:upper16:global1
+; IASM-NEXT:    str     r0, [r1]
+; IASM-NEXT:    .byte 0x1e
+; IASM-NEXT:    .byte 0xff
+; IASM-NEXT:    .byte 0x2f
+; IASM-NEXT:    .byte 0xe1
diff --git a/tests_lit/assembler/arm32/load-store.ll b/tests_lit/assembler/arm32/load-store.ll
index c398de9..2dcd886 100644
--- a/tests_lit/assembler/arm32/load-store.ll
+++ b/tests_lit/assembler/arm32/load-store.ll
@@ -2,11 +2,22 @@
 
 ; REQUIRES: allow_dump
 
+; Compile using standalone assembler.
 ; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -Om1 \
 ; RUN:   | FileCheck %s --check-prefix=ASM
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -Om1 | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
 ; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -Om1 \
 ; RUN:   | FileCheck %s --check-prefix=IASM
 
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -Om1 | FileCheck %s --check-prefix=DIS
+
 define internal i32 @add1ToR0(i32 %p) {
   %v = add i32 %p, 1
   ret i32 %v
@@ -14,52 +25,61 @@
 
 ; ASM-LABEL: add1ToR0:
 ; IASM-LABEL: add1ToR0:
+; DIS-LABEL:00000000 <add1ToR0>:
 
 ; ASM:          sub     sp, sp, #8
+; DIS-NEXT:   0:        e24dd008
 ; IASM:         .byte 0x8
 ; IASM-NEXT:    .byte 0xd0
 ; IASM-NEXT:    .byte 0x4d
 ; IASM-NEXT:    .byte 0xe2
 
 ; ASM-NEXT:     str     r0, [sp, #4]
+; DIS-NEXT:   4:        e58d0004
 ; IASM-NEXT:    .byte 0x4
 ; IASM-NEXT:    .byte 0x0
 ; IASM-NEXT:    .byte 0x8d
 ; IASM-NEXT:    .byte 0xe5
 
 ; ASM-NEXT:     ldr     r0, [sp, #4]
+; DIS-NEXT:   8:        e59d0004
 ; IASM-NEXT:    .byte 0x4
 ; IASM-NEXT:    .byte 0x0
 ; IASM-NEXT:    .byte 0x9d
 ; IASM-NEXT:    .byte 0xe5
 
 ; ASM-NEXT:     add     r0, r0, #1
+; DIS-NEXT:   c:        e2800001
 ; IASM-NEXT:    .byte 0x1
 ; IASM-NEXT:    .byte 0x0
 ; IASM-NEXT:    .byte 0x80
 ; IASM-NEXT:    .byte 0xe2
 
 ; ASM-NEXT:     str     r0, [sp]
+; DIS-NEXT:  10:        e58d0000
 ; IASM-NEXT:    .byte 0x0
 ; IASM-NEXT:    .byte 0x0
 ; IASM-NEXT:    .byte 0x8d
 ; IASM-NEXT:    .byte 0xe5
 
 ; ASM-NEXT:     ldr     r0, [sp]
+; DIS-NEXT:  14:        e59d0000
 ; IASM-NEXT:    .byte 0x0
 ; IASM-NEXT:    .byte 0x0
 ; IASM-NEXT:    .byte 0x9d
 ; IASM-NEXT:    .byte 0xe5
 
 ; ASM-NEXT:     add     sp, sp, #8
+; DIS-NEXT:  18:        e28dd008
 ; IASM-NEXT:    .byte 0x8
 ; IASM-NEXT:    .byte 0xd0
 ; IASM-NEXT:    .byte 0x8d
 ; IASM-NEXT:    .byte 0xe2
 
 ; ASM-NEXT:     bx      lr
-; IASM-NEXT:	.byte 0x1e
-; IASM-NEXT:	.byte 0xff
-; IASM-NEXT:	.byte 0x2f
-; IASM-NEXT:	.byte 0xe1
+; DIS-NEXT:  1c:        e12fff1e
+; IASM-NEXT:    .byte 0x1e
+; IASM-NEXT:    .byte 0xff
+; IASM-NEXT:    .byte 0x2f
+; IASM-NEXT:    .byte 0xe1
 
diff --git a/tests_lit/assembler/arm32/mov-imm.ll b/tests_lit/assembler/arm32/mov-imm.ll
index df939f9..d7c97e3 100644
--- a/tests_lit/assembler/arm32/mov-imm.ll
+++ b/tests_lit/assembler/arm32/mov-imm.ll
@@ -2,22 +2,37 @@
 
 ; REQUIRES: allow_dump
 
-; RUN: %p2i --filetype=asm -i %s --target=arm32 \
+; Compile using standalone assembler.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
 ; RUN:   | FileCheck %s --check-prefix=ASM
-; RUN: %p2i --filetype=iasm -i %s --target=arm32 \
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
 ; RUN:   | FileCheck %s --check-prefix=IASM
 
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 | FileCheck %s --check-prefix=DIS
+
 define internal i32 @Imm1() {
   ret i32 1
 }
 
 ; ASM-LABEL: Imm1:
-; ASM: mov	r0, #1
+; ASM: mov      r0, #1
+
+; DIS-LABEL:00000000 <Imm1>:
+; DIS-NEXT:   0:        e3a00001
+
 ; IASM-LABEL: Imm1:
-; IASM:	.byte 0x1
-; IASM:	.byte 0x0
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0x1
+; IASM: .byte 0x0
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 
 define internal i32 @rotateFImmAA() {
@@ -26,13 +41,16 @@
 }
 
 ; ASM-LABEL: rotateFImmAA:
-; ASM: mov	r0, #680
+; ASM: mov      r0, #680
+
+; DIS-LABEL:00000010 <rotateFImmAA>:
+; DIS-NEXT:  10:        e3a00faa
 
 ; IASM-LABEL: rotateFImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0xf
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0xf
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotateEImmAA() {
  ; immediate = 0x00000aa0 = b 0000 0000 0000 0000 0000 1010 1010 0000
@@ -40,13 +58,16 @@
 }
 
 ; ASM-LABEL: rotateEImmAA:
-; ASM: mov	r0, #2720
+; ASM: mov      r0, #2720
+
+; DIS-LABEL:00000020 <rotateEImmAA>:
+; DIS-NEXT:  20:        e3a00eaa
 
 ; IASM-LABEL: rotateEImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0xe
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0xe
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotateDImmAA() {
   ; immediate = 0x00002a80 = b 0000 0000 0000 0000 0010 1010 1000 0000
@@ -54,13 +75,16 @@
 }
 
 ; ASM-LABEL: rotateDImmAA:
-; ASM: mov	r0, #10880
+; ASM: mov      r0, #10880
+
+; DIS-LABEL:00000030 <rotateDImmAA>:
+; DIS-NEXT:  30:        e3a00daa
 
 ; IASM-LABEL: rotateDImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0xd
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0xd
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotateCImmAA() {
   ; immediate = 0x0000aa00 = b 0000 0000 0000 0000 1010 1010 0000 0000
@@ -68,13 +92,16 @@
 }
 
 ; ASM-LABEL: rotateCImmAA:
-; ASM: mov	r0, #43520
+; ASM: mov      r0, #43520
+
+; DIS-LABEL:00000040 <rotateCImmAA>:
+; DIS-NEXT:  40:        e3a00caa
 
 ; IASM-LABEL: rotateCImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0xc
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0xc
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotateBImmAA() {
   ; immediate = 0x0002a800 = b 0000 0000 0000 0010 1010 1000 0000 0000
@@ -82,13 +109,16 @@
 }
 
 ; ASM-LABEL: rotateBImmAA:
-; ASM: mov	r0, #174080
+; ASM: mov      r0, #174080
+
+; DIS-LABEL:00000050 <rotateBImmAA>:
+; DIS-NEXT:  50:        e3a00baa
 
 ; IASM-LABEL: rotateBImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0xb
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0xb
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotateAImmAA() {
   ; immediate = 0x000aa000 = b 0000 0000 0000 1010 1010 0000 0000 0000
@@ -96,13 +126,16 @@
 }
 
 ; ASM-LABEL: rotateAImmAA:
-; ASM: mov	r0, #696320
+; ASM: mov      r0, #696320
+
+; DIS-LABEL:00000060 <rotateAImmAA>:
+; DIS-NEXT:  60:        e3a00aaa
 
 ; IASM-LABEL: rotateAImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0xa
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0xa
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotate9ImmAA() {
   ; immediate = 0x002a8000 = b 0000 0000 0010 1010 1000 0000 0000 0000
@@ -110,13 +143,16 @@
 }
 
 ; ASM-LABEL: rotate9ImmAA:
-; ASM: mov	r0, #2785280
+; ASM: mov      r0, #2785280
+
+; DIS-LABEL:00000070 <rotate9ImmAA>:
+; DIS-NEXT:  70:        e3a009aa
 
 ; IASM-LABEL: rotate9ImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0x9
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0x9
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotate8ImmAA() {
   ; immediate = 0x00aa0000 = b 0000 0000 1010 1010 0000 0000 0000 0000
@@ -124,13 +160,16 @@
 }
 
 ; ASM-LABEL: rotate8ImmAA:
-; ASM: mov	r0, #11141120
+; ASM: mov      r0, #11141120
+
+; DIS-LABEL:00000080 <rotate8ImmAA>:
+; DIS-NEXT:  80:        e3a008aa
 
 ; IASM-LABEL: rotate8ImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0x8
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0x8
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotate7ImmAA() {
   ; immediate = 0x02a80000 = b 0000 0010 1010 1000 0000 0000 0000 0000
@@ -138,13 +177,16 @@
 }
 
 ; ASM-LABEL: rotate7ImmAA:
-; ASM: 	mov	r0, #44564480
+; ASM:  mov     r0, #44564480
+
+; DIS-LABEL:00000090 <rotate7ImmAA>:
+; DIS-NEXT:  90:        e3a007aa
 
 ; IASM-LABEL: rotate7ImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0x7
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0x7
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotate6ImmAA() {
   ; immediate = 0x0aa00000 = b 0000 1010 1010 0000 0000 0000 0000 0000
@@ -152,13 +194,16 @@
 }
 
 ; ASM-LABEL: rotate6ImmAA:
-; ASM: 	mov	r0, #178257920
+; ASM:  mov     r0, #178257920
+
+; DIS-LABEL:000000a0 <rotate6ImmAA>:
+; DIS-NEXT:  a0:        e3a006aa
 
 ; IASM-LABEL: rotate6ImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0x6
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0x6
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotate5ImmAA() {
   ; immediate = 0x2a800000 = b 0010 1010 1000 0000 0000 0000 0000 0000
@@ -166,13 +211,16 @@
 }
 
 ; ASM-LABEL: rotate5ImmAA:
-; ASM: 	mov	r0, #713031680
+; ASM:  mov     r0, #713031680
+
+; DIS-LABEL:000000b0 <rotate5ImmAA>:
+; DIS-NEXT:  b0:        e3a005aa
 
 ; IASM-LABEL: rotate5ImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0x5
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0x5
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotate4ImmAA() {
   ; immediate = 0xaa000000 = b 1010 1010 0000 0000 0000 0000 0000 0000
@@ -180,13 +228,16 @@
 }
 
 ; ASM-LABEL: rotate4ImmAA:
-; ASM: mov	r0, #2852126720
+; ASM: mov      r0, #2852126720
+
+; DIS-LABEL:000000c0 <rotate4ImmAA>:
+; DIS-NEXT:  c0:        e3a004aa
 
 ; IASM-LABEL: rotate4ImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0x4
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0x4
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotate3ImmAA() {
   ; immediate = 0xa8000002 = b 1010 1000 0000 0000 0000 0000 0000 0010
@@ -194,13 +245,16 @@
 }
 
 ; ASM-LABEL: rotate3ImmAA:
-; ASM: mov	r0, #2818572290
+; ASM: mov      r0, #2818572290
+
+; DIS-LABEL:000000d0 <rotate3ImmAA>:
+; DIS-NEXT:  d0:        e3a003aa
 
 ; IASM-LABEL: rotate3ImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0x3
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0x3
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotate2ImmAA() {
   ; immediate = 0xa000000a = b 1010 0000 0000 0000 0000 0000 0000 1010
@@ -208,13 +262,16 @@
 }
 
 ; ASM-LABEL: rotate2ImmAA:
-; ASM: 	mov	r0, #2684354570
+; ASM:  mov     r0, #2684354570
+
+; DIS-LABEL:000000e0 <rotate2ImmAA>:
+; DIS-NEXT:  e0:        e3a002aa
 
 ; IASM-LABEL: rotate2ImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0x2
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0x2
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotate1ImmAA() {
   ; immediate = 0x8000002a = b 1000 1000 0000 0000 0000 0000 0010 1010
@@ -222,13 +279,16 @@
 }
 
 ; ASM-LABEL: rotate1ImmAA:
-; ASM: mov	r0, #2147483690
+; ASM: mov      r0, #2147483690
+
+; DIS-LABEL:000000f0 <rotate1ImmAA>:
+; DIS-NEXT:  f0:        e3a001aa
 
 ; IASM-LABEL: rotate1ImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0x1
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0x1
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
 
 define internal i32 @rotate0ImmAA() {
   ; immediate = 0x000000aa = b 0000 0000 0000 0000 0000 0000 1010 1010
@@ -236,10 +296,13 @@
 }
 
 ; ASM-LABEL: rotate0ImmAA:
-; ASM: mov	r0, #170
+; ASM: mov      r0, #170
+
+; DIS-LABEL:00000100 <rotate0ImmAA>:
+; DIS-NEXT: 100:        e3a000aa
 
 ; IASM-LABEL: rotate0ImmAA:
-; IASM:	.byte 0xaa
-; IASM:	.byte 0x0
-; IASM:	.byte 0xa0
-; IASM:	.byte 0xe3
+; IASM: .byte 0xaa
+; IASM: .byte 0x0
+; IASM: .byte 0xa0
+; IASM: .byte 0xe3
diff --git a/tests_lit/assembler/arm32/ret.ll b/tests_lit/assembler/arm32/ret.ll
index 95fce97..5df61d4 100644
--- a/tests_lit/assembler/arm32/ret.ll
+++ b/tests_lit/assembler/arm32/ret.ll
@@ -3,36 +3,63 @@
 
 ; REQUIRES: allow_dump
 
-; RUN: %p2i --filetype=asm -i %s --target=arm32 \
+; Compile using standalone assembler.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -Om1 \
 ; RUN:   | FileCheck %s --check-prefix=ASM
-; RUN: %p2i --filetype=iasm -i %s --target=arm32 \
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -Om1 | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -Om1 \
 ; RUN:   | FileCheck %s --check-prefix=IASM
 
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -Om1 | FileCheck %s --check-prefix=DIS
+
 define internal void @f() {
   ret void
 }
 
 ; ASM-LABEL:f:
 ; ASM-NEXT: .Lf$__0:
-; ASM-NEXT: 	bx	lr
+; ASM-NEXT:     bx      lr
 
+
+
+; DIS-LABEL:00000000 <f>:
 ; IASM-LABEL:f:
-; IASM-NEXT: 	.byte 0x1e
-; IASM-NEXT: 	.byte 0xff
-; IASM-NEXT: 	.byte 0x2f
-; IASM-NEXT: 	.byte 0xe1
 
-; IASM-NEXT: 	.byte 0x70
-; IASM-NEXT: 	.byte 0x0
-; IASM-NEXT: 	.byte 0x20
-; IASM-NEXT: 	.byte 0xe1
+; DIS-NEXT:   0:        e12fff1e
+; IASM-NEXT:    .byte 0x1e
+; IASM-NEXT:    .byte 0xff
+; IASM-NEXT:    .byte 0x2f
+; IASM-NEXT:    .byte 0xe1
 
-; IASM-NEXT: 	.byte 0x70
-; IASM-NEXT: 	.byte 0x0
-; IASM-NEXT: 	.byte 0x20
-; IASM-NEXT: 	.byte 0xe1
+; DIS-NEXT:   4:        e7fedef0
+; IASM-NEXT:    .byte 0xf0
+; IASM-NEXT:    .byte 0xde
+; IASM-NEXT:    .byte 0xfe
+; IASM-NEXT:    .byte 0xe7
 
-; IASM-NEXT: 	.byte 0x70
-; IASM-NEXT: 	.byte 0x0
-; IASM-NEXT: 	.byte 0x20
-; IASM-NEXT: 	.byte 0xe1
+; DIS-NEXT:   8:        e7fedef0
+; IASM-NEXT:    .byte 0xf0
+; IASM-NEXT:    .byte 0xde
+; IASM-NEXT:    .byte 0xfe
+; IASM-NEXT:    .byte 0xe7
+
+; DIS-NEXT:   c:        e7fedef0
+; IASM-NEXT:    .byte 0xf0
+; IASM-NEXT:    .byte 0xde
+; IASM-NEXT:    .byte 0xfe
+; IASM-NEXT:    .byte 0xe7
+
+define internal void @ignore() {
+  ret void
+}
+
+; ASM-LABEL:ignore:
+; DIS-LABEL:00000010 <ignore>:
+; IASM-LABEL:ignore:
diff --git a/tests_lit/assembler/arm32/sub.ll b/tests_lit/assembler/arm32/sub.ll
index 138f7bd..1ee0b33 100644
--- a/tests_lit/assembler/arm32/sub.ll
+++ b/tests_lit/assembler/arm32/sub.ll
@@ -1,14 +1,23 @@
 ; Show that we know how to translate instruction sub.
 
-; NOTE: We use -O2 to get rid of memory stores.
-
 ; REQUIRES: allow_dump
 
+; Compile using standalone assembler.
 ; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
 ; RUN:   | FileCheck %s --check-prefix=ASM
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
 ; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
 ; RUN:   | FileCheck %s --check-prefix=IASM
 
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 | FileCheck %s --check-prefix=DIS
+
 define internal i32 @sub1FromR0(i32 %p) {
   %v = sub i32 %p, 1
   ret i32 %v
@@ -18,6 +27,9 @@
 ; ASM:	sub	r0, r0, #1
 ; ASM:	bx	lr
 
+; DIS-LABEL:00000000 <sub1FromR0>:
+; DIS-NEXT:   0:	e2400001
+
 ; IASM-LABEL: sub1FromR0:
 ; IASM:	     .byte 0x1
 ; IASM-NEXT: .byte 0x0
@@ -34,9 +46,11 @@
 ; ASM:       sub r0, r0, r1
 ; ASM-NEXT:  bx lr
 
-; IASM-LABEL: Sub2Regs:
+; DIS-LABEL:00000010 <Sub2Regs>:
+; DIS-NEXT:  10:	e0400001
 
-; IASM:      .byte 0x1
+; IASM-LABEL: Sub2Regs:
+; IASM-NEXT: .byte 0x1
 ; IASM-NEXT: .byte 0x0
 ; IASM-NEXT: .byte 0x40
 ; IASM-NEXT: .byte 0xe0