Rearrange emit vs emitIAS. Wait till function is done before dumping text.

Eventually, I wanted to have a flag "UseELFWriter" like:
https://codereview.chromium.org/678533005/diff/120001/src/IceCfg.cpp

Where the emit OStream would not have text, and only have
binary. This refactor hopefully means fewer places to
check for a flag to disable the text version of IAS,
and be able to write binary. Otherwise, there are some
text labels for branches that are still being dumped out.

BUG=none
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/700263003
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index a3b30ee..bd1894c 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -390,6 +390,22 @@
 
 // ======================== Dump routines ======================== //
 
+void Cfg::emitTextHeader(const IceString &MangledName) {
+  Ostream &Str = Ctx->getStrEmit();
+  Str << "\t.text\n";
+  if (Ctx->getFlags().FunctionSections)
+    Str << "\t.section\t.text." << MangledName << ",\"ax\",@progbits\n";
+  if (!getInternal() || Ctx->getFlags().DisableInternal) {
+    Str << "\t.globl\t" << MangledName << "\n";
+    Str << "\t.type\t" << MangledName << ",@function\n";
+  }
+  Str << "\t.p2align " << getTarget()->getBundleAlignLog2Bytes() << ",0x";
+  for (AsmCodeByte I : getTarget()->getNonExecBundlePadding())
+    Str.write_hex(I);
+  Str << "\n";
+  Str << MangledName << ":\n";
+}
+
 void Cfg::emit() {
   TimerMarker T(TimerStack::TT_emit, this);
   if (Ctx->getFlags().DecorateAsm) {
@@ -409,23 +425,23 @@
         << " -o=MyObj.o"
         << "\n\n";
   }
-  Str << "\t.text\n";
   IceString MangledName = getContext()->mangleName(getFunctionName());
-  if (Ctx->getFlags().FunctionSections)
-    Str << "\t.section\t.text." << MangledName << ",\"ax\",@progbits\n";
-  if (!getInternal() || Ctx->getFlags().DisableInternal) {
-    Str << "\t.globl\t" << MangledName << "\n";
-    Str << "\t.type\t" << MangledName << ",@function\n";
-  }
-  Str << "\t.p2align " << getTarget()->getBundleAlignLog2Bytes() << ",0x";
-  for (AsmCodeByte I : getTarget()->getNonExecBundlePadding())
-    Str.write_hex(I);
-  Str << "\n";
+  emitTextHeader(MangledName);
   for (CfgNode *Node : Nodes)
     Node->emit(this);
   Str << "\n";
 }
 
+void Cfg::emitIAS() {
+  TimerMarker T(TimerStack::TT_emit, this);
+  assert(!Ctx->getFlags().DecorateAsm);
+  IceString MangledName = getContext()->mangleName(getFunctionName());
+  emitTextHeader(MangledName);
+  for (CfgNode *Node : Nodes)
+    Node->emitIAS(this);
+  getAssembler<Assembler>()->emitIASBytes(Ctx);
+}
+
 // Dumps the IR with an optional introductory message.
 void Cfg::dump(const IceString &Message) {
   if (!Ctx->isVerbose())
diff --git a/src/IceCfg.h b/src/IceCfg.h
index fb8ebd9..2a1e602 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -94,9 +94,6 @@
   template <typename T> T *getAssembler() const {
     return static_cast<T *>(TargetAssembler.get());
   }
-  bool useIntegratedAssembler() const {
-    return getContext()->getFlags().UseIntegratedAssembler;
-  }
   bool hasComputedFrame() const;
   bool getFocusedTiming() const { return FocusedTiming; }
   void setFocusedTiming() { FocusedTiming = true; }
@@ -131,6 +128,8 @@
   const CfgNode *getCurrentNode() const { return CurrentNode; }
 
   void emit();
+  void emitIAS();
+  void emitTextHeader(const IceString &MangledName);
   void dump(const IceString &Message = "");
 
   // Allocate data of type T using the per-Cfg allocator.
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 52789b9..543f4e9 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -864,6 +864,24 @@
   }
 }
 
+void updateStats(Cfg *Func, const Inst *I) {
+  // Update emitted instruction count, plus fill/spill count for
+  // Variable operands without a physical register.
+  if (uint32_t Count = I->getEmitInstCount()) {
+    Func->getContext()->statsUpdateEmitted(Count);
+    if (Variable *Dest = I->getDest()) {
+      if (!Dest->hasReg())
+        Func->getContext()->statsUpdateFills();
+    }
+    for (SizeT S = 0; S < I->getSrcSize(); ++S) {
+      if (Variable *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) {
+        if (!Src->hasReg())
+          Func->getContext()->statsUpdateSpills();
+      }
+    }
+  }
+}
+
 } // end of anonymous namespace
 
 void CfgNode::emit(Cfg *Func) const {
@@ -871,14 +889,7 @@
   Ostream &Str = Func->getContext()->getStrEmit();
   Liveness *Liveness = Func->getLiveness();
   bool DecorateAsm = Liveness && Func->getContext()->getFlags().DecorateAsm;
-  if (Func->getEntryNode() == this) {
-    Str << Func->getContext()->mangleName(Func->getFunctionName()) << ":\n";
-  }
   Str << getAsmName() << ":\n";
-  if (Func->useIntegratedAssembler()) {
-    Assembler *Asm = Func->getAssembler<Assembler>();
-    Asm->BindCfgNodeLabel(getIndex());
-  }
   std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
   if (DecorateAsm)
     emitRegisterUsage(Str, Func, this, true, LiveRegCount);
@@ -899,34 +910,37 @@
         ++LiveRegCount[Dest->getRegNum()];
       continue;
     }
-    if (Func->useIntegratedAssembler()) {
-      I->emitIAS(Func);
-    } else {
-      I->emit(Func);
-      if (DecorateAsm)
-        emitLiveRangesEnded(Str, Func, I, LiveRegCount);
-      Str << "\n";
-    }
-    // Update emitted instruction count, plus fill/spill count for
-    // Variable operands without a physical register.
-    if (uint32_t Count = I->getEmitInstCount()) {
-      Func->getContext()->statsUpdateEmitted(Count);
-      if (Variable *Dest = I->getDest()) {
-        if (!Dest->hasReg())
-          Func->getContext()->statsUpdateFills();
-      }
-      for (SizeT S = 0; S < I->getSrcSize(); ++S) {
-        if (Variable *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) {
-          if (!Src->hasReg())
-            Func->getContext()->statsUpdateSpills();
-        }
-      }
-    }
+    I->emit(Func);
+    if (DecorateAsm)
+      emitLiveRangesEnded(Str, Func, I, LiveRegCount);
+    Str << "\n";
+    updateStats(Func, I);
   }
   if (DecorateAsm)
     emitRegisterUsage(Str, Func, this, false, LiveRegCount);
 }
 
+void CfgNode::emitIAS(Cfg *Func) const {
+  Func->setCurrentNode(this);
+  Assembler *Asm = Func->getAssembler<Assembler>();
+  Asm->BindCfgNodeLabel(getIndex());
+  for (InstPhi *Phi : Phis) {
+    if (Phi->isDeleted())
+      continue;
+    // Emitting a Phi instruction should cause an error.
+    Inst *Instr = Phi;
+    Instr->emitIAS(Func);
+  }
+  for (Inst *I : Insts) {
+    if (I->isDeleted())
+      continue;
+    if (I->isRedundantAssign())
+      continue;
+    I->emitIAS(Func);
+    updateStats(Func, I);
+  }
+}
+
 void CfgNode::dump(Cfg *Func) const {
   Func->setCurrentNode(this);
   Ostream &Str = Func->getContext()->getStrDump();
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index aa836c0..9cde8c1 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -82,6 +82,7 @@
   void contractIfEmpty();
   void doBranchOpt(const CfgNode *NextNode);
   void emit(Cfg *Func) const;
+  void emitIAS(Cfg *Func) const;
   void dump(Cfg *Func) const;
 
 private:
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index deed2bc..7ebf187 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -342,84 +342,6 @@
 
 // ======================== Dump routines ======================== //
 
-namespace {
-
-void emitIASBytes(const Cfg *Func, const x86::AssemblerX86 *Asm,
-                  intptr_t StartPosition) {
-  GlobalContext *Ctx = Func->getContext();
-  Ostream &Str = Ctx->getStrEmit();
-  intptr_t EndPosition = Asm->GetPosition();
-  AssemblerFixup *LastFixup = Asm->GetLatestFixup(StartPosition);
-  if (!LastFixup) {
-    // The fixup doesn't apply to this current block.
-    for (intptr_t i = StartPosition; i < EndPosition; ++i) {
-      Str << "\t.byte 0x";
-      Str.write_hex(Asm->LoadBuffer<uint8_t>(i));
-      Str << "\n";
-    }
-    return;
-  }
-  intptr_t LastFixupLoc = LastFixup->position();
-  const intptr_t FixupSize = 4;
-  // The fixup does apply to this current block.
-  for (intptr_t i = StartPosition; i < LastFixupLoc; ++i) {
-    Str << "\t.byte 0x";
-    Str.write_hex(Asm->LoadBuffer<uint8_t>(i));
-    Str << "\n";
-  }
-  while (LastFixup) {
-    Str << "\t.long ";
-    const ConstantRelocatable *Reloc = LastFixup->value();
-    if (Reloc->getSuppressMangling())
-      Str << Reloc->getName();
-    else
-      Str << Ctx->mangleName(Reloc->getName());
-    if (LastFixup->value()->getOffset()) {
-      Str << " + " << LastFixup->value()->getOffset();
-    }
-    Str << "\n";
-    LastFixupLoc += FixupSize;
-    assert(LastFixupLoc <= EndPosition);
-    LastFixup = Asm->GetLatestFixup(LastFixupLoc);
-    // Assume multi-fixups are adjacent in the instruction encoding.
-    assert(!LastFixup || LastFixup->position() == LastFixupLoc);
-  }
-  for (intptr_t i = LastFixupLoc; i < EndPosition; ++i) {
-    Str << "\t.byte 0x";
-    Str.write_hex(Asm->LoadBuffer<uint8_t>(i));
-    Str << "\n";
-  }
-}
-
-void emitIASBytesBranch(const Cfg *Func, const x86::AssemblerX86 *Asm,
-                        intptr_t StartPosition, const x86::Label *Label,
-                        const IceString &LabelName, bool Near) {
-  // If this is a backward branch (label is bound), we're good and know
-  // the offset. If this is a forward branch, then we can't actually emit
-  // the thing as text in a streaming manner, because the fixup hasn't
-  // happened yet. Instead, emit .long ($BranchLabel) - (. + 4), in that
-  // case and let the external assembler take care of that fixup.
-  if (Label->IsBound()) {
-    emitIASBytes(Func, Asm, StartPosition);
-    return;
-  }
-  const intptr_t FwdBranchSize = Near ? 1 : 4;
-  const IceString FwdBranchDirective = Near ? ".byte" : ".long";
-  Ostream &Str = Func->getContext()->getStrEmit();
-  intptr_t EndPosition = Asm->GetPosition();
-  assert(EndPosition - StartPosition > FwdBranchSize);
-  for (intptr_t i = StartPosition; i < EndPosition - FwdBranchSize; ++i) {
-    Str << "\t.byte 0x";
-    Str.write_hex(Asm->LoadBuffer<uint8_t>(i));
-    Str << "\n";
-  }
-  Str << "\t" << FwdBranchDirective << " " << LabelName << " - (. + "
-      << FwdBranchSize << ")\n";
-  return;
-}
-
-} // end of anonymous namespace
-
 void InstX8632::dump(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "[X8632] ";
@@ -434,10 +356,6 @@
 void InstX8632Label::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
   Asm->BindLocalLabel(Number);
-  // TODO(jvoung): remove the the textual label once forward branch
-  // fixups are used (and text assembler is not used).
-  Ostream &Str = Func->getContext()->getStrEmit();
-  Str << getName(Func) << ":\n";
 }
 
 void InstX8632Label::dump(const Cfg *Func) const {
@@ -471,7 +389,6 @@
 
 void InstX8632Br::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   if (Label) {
     x86::Label *L = Asm->GetOrCreateLocalLabel(Label->getNumber());
     // In all these cases, local Labels should only be used for Near.
@@ -481,7 +398,6 @@
     } else {
       Asm->j(Condition, L, Near);
     }
-    emitIASBytesBranch(Func, Asm, StartPosition, L, Label->getName(Func), Near);
   } else {
     // Pessimistically assume it's far. This only affects Labels that
     // are not Bound.
@@ -491,20 +407,13 @@
           Asm->GetOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
       assert(!getTargetTrue());
       Asm->jmp(L, Near);
-      emitIASBytesBranch(Func, Asm, StartPosition, L,
-                         getTargetFalse()->getAsmName(), Near);
     } else {
       x86::Label *L = Asm->GetOrCreateCfgNodeLabel(getTargetTrue()->getIndex());
       Asm->j(Condition, L, Near);
-      emitIASBytesBranch(Func, Asm, StartPosition, L,
-                         getTargetTrue()->getAsmName(), Near);
-      StartPosition = Asm->GetPosition();
       if (getTargetFalse()) {
         x86::Label *L2 =
             Asm->GetOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
         Asm->jmp(L2, Near);
-        emitIASBytesBranch(Func, Asm, StartPosition, L2,
-                           getTargetFalse()->getAsmName(), Near);
       }
     }
   }
@@ -549,9 +458,7 @@
 
 void InstX8632Call::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Operand *Target = getCallTarget();
-  bool NeedsFallback = false;
   if (const auto Var = llvm::dyn_cast<Variable>(Target)) {
     if (Var->hasReg()) {
       Asm->call(RegX8632::getEncodedGPR(Var->getRegNum()));
@@ -565,7 +472,6 @@
   } else if (const auto CR = llvm::dyn_cast<ConstantRelocatable>(Target)) {
     assert(CR->getOffset() == 0 && "We only support calling a function");
     Asm->call(CR);
-    NeedsFallback = true;
   } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Target)) {
     // NaCl trampoline calls refer to an address within the sandbox directly.
     // This is usually only needed for non-IRT builds and otherwise not
@@ -579,18 +485,6 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  if (NeedsFallback) {
-    // TODO(jvoung): The ".long sym" hack doesn't work, since we need
-    // a pc-rel relocation and not an absolute relocation.
-    //
-    // Still, we have at least filled the assembler buffer so that the
-    // instruction sizes/positions are correct for jumps.
-    // For now, fall back to the regular .s emission, after filling the buffer.
-    emit(Func);
-    Func->getContext()->getStrEmit() << "\n";
-  } else {
-    emitIASBytes(Func, Asm, StartPosition);
-  }
   Func->getTarget()->resetStackAdjustment();
 }
 
@@ -628,7 +522,6 @@
 void emitIASOpTyGPR(const Cfg *Func, Type Ty, const Operand *Op,
                     const x86::AssemblerX86::GPREmitterOneOp &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   if (const auto Var = llvm::dyn_cast<Variable>(Op)) {
     if (Var->hasReg()) {
       // We cheat a little and use GPRRegister even for byte operations.
@@ -646,7 +539,6 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 template <bool VarCanBeByte, bool SrcCanBeByte>
@@ -654,7 +546,6 @@
                        const Operand *Src,
                        const x86::AssemblerX86::GPREmitterRegOp &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(Var->hasReg());
   // We cheat a little and use GPRRegister even for byte operations.
   RegX8632::GPRRegister VarReg =
@@ -686,14 +577,12 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void emitIASAddrOpTyGPR(const Cfg *Func, Type Ty, const x86::Address &Addr,
                         const Operand *Src,
                         const x86::AssemblerX86::GPREmitterAddrOp &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   // Src can only be Reg or Immediate.
   if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
     assert(SrcVar->hasReg());
@@ -709,7 +598,6 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void emitIASAsAddrOpTyGPR(const Cfg *Func, Type Ty, const Operand *Op0,
@@ -735,7 +623,6 @@
                      const Operand *Src,
                      const x86::AssemblerX86::GPREmitterShiftOp &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   // Technically, the Dest Var can be mem as well, but we only use Reg.
   // We can extend this to check Dest if we decide to use that form.
   assert(Var->hasReg());
@@ -754,14 +641,12 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void emitIASGPRShiftDouble(const Cfg *Func, const Variable *Dest,
                            const Operand *Src1Op, const Operand *Src2Op,
                            const x86::AssemblerX86::GPREmitterShiftD &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   // Dest can be reg or mem, but we only use the reg variant.
   assert(Dest->hasReg());
   RegX8632::GPRRegister DestReg = RegX8632::getEncodedGPR(Dest->getRegNum());
@@ -778,14 +663,12 @@
     assert(llvm::cast<Variable>(Src2Op)->getRegNum() == RegX8632::Reg_ecx);
     (Asm->*(Emitter.GPRGPR))(Ty, DestReg, SrcReg);
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void emitIASXmmShift(const Cfg *Func, Type Ty, const Variable *Var,
                      const Operand *Src,
                      const x86::AssemblerX86::XmmEmitterShiftOp &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(Var->hasReg());
   RegX8632::XmmRegister VarReg = RegX8632::getEncodedXmm(Var->getRegNum());
   if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
@@ -806,14 +689,12 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void emitIASRegOpTyXMM(const Cfg *Func, Type Ty, const Variable *Var,
                        const Operand *Src,
                        const x86::AssemblerX86::XmmEmitterRegOp &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(Var->hasReg());
   RegX8632::XmmRegister VarReg = RegX8632::getEncodedXmm(Var->getRegNum());
   if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
@@ -835,7 +716,6 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 template <typename DReg_t, typename SReg_t, DReg_t (*destEnc)(int32_t),
@@ -844,7 +724,6 @@
     const Cfg *Func, Type DispatchTy, const Variable *Dest, const Operand *Src,
     const x86::AssemblerX86::CastEmitterRegOp<DReg_t, SReg_t> Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(Dest->hasReg());
   DReg_t DestReg = destEnc(Dest->getRegNum());
   if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
@@ -862,7 +741,6 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 template <typename DReg_t, typename SReg_t, DReg_t (*destEnc)(int32_t),
@@ -872,7 +750,6 @@
     const Operand *Src1,
     const x86::AssemblerX86::ThreeOpImmEmitter<DReg_t, SReg_t> Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   // This only handles Dest being a register, and Src1 being an immediate.
   assert(Dest->hasReg());
   DReg_t DestReg = destEnc(Dest->getRegNum());
@@ -893,14 +770,12 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void emitIASMovlikeXMM(const Cfg *Func, const Variable *Dest,
                        const Operand *Src,
                        const x86::AssemblerX86::XmmEmitterMovOps Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   if (Dest->hasReg()) {
     RegX8632::XmmRegister DestReg = RegX8632::getEncodedXmm(Dest->getRegNum());
     if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
@@ -927,7 +802,6 @@
     (Asm->*(Emitter.AddrXmm))(StackAddr,
                               RegX8632::getEncodedXmm(SrcVar->getRegNum()));
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 bool checkForRedundantAssign(const Variable *Dest, const Operand *Source) {
@@ -1432,7 +1306,6 @@
 
 template <> void InstX8632Cbwdq::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(getSrcSize() == 1);
   Operand *Src0 = getSrc(0);
   assert(llvm::isa<Variable>(Src0));
@@ -1454,7 +1327,6 @@
     Asm->cdq();
     break;
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Mul::emit(const Cfg *Func) const {
@@ -1582,10 +1454,8 @@
   assert(SrcVar->hasReg());
   assert(SrcVar->getType() == IceType_i32);
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Asm->cmov(Condition, RegX8632::getEncodedGPR(getDest()->getRegNum()),
             RegX8632::getEncodedGPR(SrcVar->getRegNum()));
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Cmov::dump(const Cfg *Func) const {
@@ -1611,7 +1481,6 @@
 
 void InstX8632Cmpps::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(getSrcSize() == 2);
   assert(Condition < CondX86::Cmpps_Invalid);
   // Assuming there isn't any load folding for cmpps, and vector constants
@@ -1627,7 +1496,6 @@
     Asm->cmpps(RegX8632::getEncodedXmm(getDest()->getRegNum()), SrcStackAddr,
                Condition);
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Cmpps::dump(const Cfg *Func) const {
@@ -1654,7 +1522,6 @@
 void InstX8632Cmpxchg::emitIAS(const Cfg *Func) const {
   assert(getSrcSize() == 3);
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Type Ty = getSrc(0)->getType();
   const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
   assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
@@ -1668,7 +1535,6 @@
   } else {
     Asm->cmpxchg(Ty, Addr, Reg);
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Cmpxchg::dump(const Cfg *Func) const {
@@ -1693,7 +1559,6 @@
 void InstX8632Cmpxchg8b::emitIAS(const Cfg *Func) const {
   assert(getSrcSize() == 5);
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
   assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
   const x86::Address Addr = Mem->toAsmAddress(Asm);
@@ -1701,7 +1566,6 @@
     Asm->lock();
   }
   Asm->cmpxchg8b(Addr);
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Cmpxchg8b::dump(const Cfg *Func) const {
@@ -1868,9 +1732,7 @@
 
 void InstX8632UD2::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Asm->ud2();
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632UD2::dump(const Cfg *Func) const {
@@ -1922,9 +1784,7 @@
 
 void InstX8632Mfence::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Asm->mfence();
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Mfence::dump(const Cfg *Func) const {
@@ -1954,7 +1814,6 @@
     assert(SrcVar->hasReg());
     RegX8632::XmmRegister SrcReg = RegX8632::getEncodedXmm(SrcVar->getRegNum());
     x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-    intptr_t StartPosition = Asm->GetPosition();
     if (const auto DestVar = llvm::dyn_cast<Variable>(Dest)) {
       assert(!DestVar->hasReg());
       x86::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
@@ -1965,7 +1824,6 @@
       assert(DestMem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
       Asm->movss(DestTy, DestMem->toAsmAddress(Asm), SrcReg);
     }
-    emitIASBytes(Func, Asm, StartPosition);
     return;
   } else {
     assert(isScalarIntegerType(DestTy));
@@ -1994,7 +1852,6 @@
 
 void InstX8632StoreP::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(getSrcSize() == 2);
   const auto SrcVar = llvm::cast<Variable>(getSrc(0));
   const auto DestMem = llvm::cast<OperandX8632Mem>(getSrc(1));
@@ -2002,7 +1859,6 @@
   assert(SrcVar->hasReg());
   Asm->movups(DestMem->toAsmAddress(Asm),
               RegX8632::getEncodedXmm(SrcVar->getRegNum()));
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632StoreP::dump(const Cfg *Func) const {
@@ -2026,7 +1882,6 @@
 
 void InstX8632StoreQ::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(getSrcSize() == 2);
   const auto SrcVar = llvm::cast<Variable>(getSrc(0));
   const auto DestMem = llvm::cast<OperandX8632Mem>(getSrc(1));
@@ -2034,7 +1889,6 @@
   assert(SrcVar->hasReg());
   Asm->movq(DestMem->toAsmAddress(Asm),
             RegX8632::getEncodedXmm(SrcVar->getRegNum()));
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632StoreQ::dump(const Cfg *Func) const {
@@ -2145,10 +1999,8 @@
       const auto SrcVar = llvm::cast<Variable>(Src);
       assert(SrcVar->hasReg());
       x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-      intptr_t StartPosition = Asm->GetPosition();
       Asm->movss(SrcTy, StackAddr,
                  RegX8632::getEncodedXmm(SrcVar->getRegNum()));
-      emitIASBytes(Func, Asm, StartPosition);
       return;
     } else {
       // Src can be a register or immediate.
@@ -2162,7 +2014,6 @@
 
 template <> void InstX8632Movd::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(getSrcSize() == 1);
   const Variable *Dest = getDest();
   const auto SrcVar = llvm::cast<Variable>(getSrc(0));
@@ -2192,7 +2043,6 @@
       Asm->movd(StackAddr, SrcReg);
     }
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 template <> void InstX8632Movp::emit(const Cfg *Func) const {
@@ -2252,10 +2102,8 @@
   const auto SrcVar = llvm::cast<Variable>(getSrc(1));
   assert(Dest->hasReg() && SrcVar->hasReg());
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Asm->movss(IceType_f32, RegX8632::getEncodedXmm(Dest->getRegNum()),
              RegX8632::getEncodedXmm(SrcVar->getRegNum()));
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 template <> void InstX8632Movsx::emitIAS(const Cfg *Func) const {
@@ -2289,10 +2137,8 @@
 
 void InstX8632Nop::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   // TODO: Emit the right code for the variant.
   Asm->nop();
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Nop::dump(const Cfg *Func) const {
@@ -2325,7 +2171,6 @@
 
 void InstX8632Fld::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(getSrcSize() == 1);
   const Operand *Src = getSrc(0);
   Type Ty = Src->getType();
@@ -2352,7 +2197,6 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Fld::dump(const Cfg *Func) const {
@@ -2395,7 +2239,6 @@
 
 void InstX8632Fstp::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(getSrcSize() == 0);
   const Variable *Dest = getDest();
   // TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to
@@ -2404,7 +2247,6 @@
   // of popping the stack.
   if (!Dest) {
     Asm->fstp(RegX8632::getEncodedSTReg(0));
-    emitIASBytes(Func, Asm, StartPosition);
     return;
   }
   Type Ty = Dest->getType();
@@ -2424,7 +2266,6 @@
     Asm->movss(Ty, RegX8632::getEncodedXmm(Dest->getRegNum()), StackSlot);
     Asm->add(IceType_i32, RegX8632::Encoded_Reg_esp, Width);
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Fstp::dump(const Cfg *Func) const {
@@ -2575,14 +2416,12 @@
 void InstX8632Pop::emitIAS(const Cfg *Func) const {
   assert(getSrcSize() == 0);
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   if (getDest()->hasReg()) {
     Asm->popl(RegX8632::getEncodedGPR(getDest()->getRegNum()));
   } else {
     Asm->popl(static_cast<TargetX8632 *>(Func->getTarget())
                   ->stackVarToAsmOperand(getDest()));
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Pop::dump(const Cfg *Func) const {
@@ -2599,9 +2438,7 @@
 
 void InstX8632AdjustStack::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Asm->sub(IceType_i32, RegX8632::Encoded_Reg_esp, x86::Immediate(Amount));
-  emitIASBytes(Func, Asm, StartPosition);
   Func->getTarget()->updateStackAdjustment(Amount);
 }
 
@@ -2626,9 +2463,7 @@
   const auto Var = llvm::cast<Variable>(getSrc(0));
   assert(Var->hasReg());
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Asm->pushl(RegX8632::getEncodedGPR(Var->getRegNum()));
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Push::dump(const Cfg *Func) const {
@@ -2666,9 +2501,7 @@
 
 void InstX8632Ret::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Asm->ret();
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Ret::dump(const Cfg *Func) const {
@@ -2692,7 +2525,6 @@
 void InstX8632Xadd::emitIAS(const Cfg *Func) const {
   assert(getSrcSize() == 2);
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Type Ty = getSrc(0)->getType();
   const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
   assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
@@ -2705,7 +2537,6 @@
     Asm->lock();
   }
   Asm->xadd(Ty, Addr, Reg);
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Xadd::dump(const Cfg *Func) const {
@@ -2729,7 +2560,6 @@
 void InstX8632Xchg::emitIAS(const Cfg *Func) const {
   assert(getSrcSize() == 2);
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Type Ty = getSrc(0)->getType();
   const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
   assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
@@ -2739,7 +2569,6 @@
   const RegX8632::GPRRegister Reg =
       RegX8632::getEncodedGPR(VarReg->getRegNum());
   Asm->xchg(Ty, Addr, Reg);
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Xchg::dump(const Cfg *Func) const {
diff --git a/src/IceTranslator.cpp b/src/IceTranslator.cpp
index 0391b6c..edaa74f 100644
--- a/src/IceTranslator.cpp
+++ b/src/IceTranslator.cpp
@@ -83,7 +83,11 @@
       ErrorStatus = true;
     }
 
-    Func->emit();
+    if (Ctx->getFlags().UseIntegratedAssembler) {
+      Func->emitIAS();
+    } else {
+      Func->emit();
+    }
     Ctx->dumpStats(Func->getFunctionName());
   }
 
diff --git a/src/assembler.cpp b/src/assembler.cpp
index 50d82b7..b0bd297 100644
--- a/src/assembler.cpp
+++ b/src/assembler.cpp
@@ -19,7 +19,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "assembler.h"
+#include "IceGlobalContext.h"
 #include "IceMemoryRegion.h"
+#include "IceOperand.h"
 
 namespace Ice {
 
@@ -74,18 +76,6 @@
 
 AssemblerBuffer::~AssemblerBuffer() {}
 
-// Returns the latest fixup at or after the given position, or nullptr if
-// there is none.  Assumes fixups were added in increasing order.
-AssemblerFixup *AssemblerBuffer::GetLatestFixup(intptr_t position) const {
-  AssemblerFixup *latest_fixup = nullptr;
-  for (auto I = fixups_.rbegin(), E = fixups_.rend(); I != E; ++I) {
-    if ((*I)->position() < position)
-      return latest_fixup;
-    latest_fixup = *I;
-  }
-  return latest_fixup;
-}
-
 void AssemblerBuffer::ProcessFixups(const MemoryRegion &region) {
   for (SizeT I = 0; I < fixups_.size(); ++I) {
     AssemblerFixup *fixup = fixups_[I];
@@ -133,4 +123,43 @@
   assert(Size() == old_size);
 }
 
+void Assembler::emitIASBytes(GlobalContext *Ctx) const {
+  Ostream &Str = Ctx->getStrEmit();
+  intptr_t EndPosition = buffer_.Size();
+  intptr_t CurPosition = 0;
+  const intptr_t FixupSize = 4;
+  for (AssemblerBuffer::FixupList::const_iterator
+           FixupI = buffer_.fixups_begin(),
+           FixupE = buffer_.fixups_end(); FixupI != FixupE; ++FixupI) {
+    AssemblerFixup *NextFixup = *FixupI;
+    intptr_t NextFixupLoc = NextFixup->position();
+    for (intptr_t i = CurPosition; i < NextFixupLoc; ++i) {
+      Str << "\t.byte 0x";
+      Str.write_hex(buffer_.Load<uint8_t>(i));
+      Str << "\n";
+    }
+    Str << "\t.long ";
+    const ConstantRelocatable *Reloc = NextFixup->value();
+    if (Reloc->getSuppressMangling())
+      Str << Reloc->getName();
+    else
+      Str << Ctx->mangleName(Reloc->getName());
+    if (Reloc->getOffset()) {
+      Str << " + " << Reloc->getOffset();
+    }
+    bool IsPCRel = NextFixup->kind() == FK_PcRel_4;
+    if (IsPCRel)
+      Str << " - (. + " << FixupSize << ")";
+    Str << "\n";
+    CurPosition = NextFixupLoc + FixupSize;
+    assert(CurPosition <= EndPosition);
+  }
+  // Handle any bytes that are not prefixed by a fixup.
+  for (intptr_t i = CurPosition; i < EndPosition; ++i) {
+    Str << "\t.byte 0x";
+    Str.write_hex(buffer_.Load<uint8_t>(i));
+    Str << "\n";
+  }
+}
+
 } // end of namespace Ice
diff --git a/src/assembler.h b/src/assembler.h
index dcb83ed..37963c4 100644
--- a/src/assembler.h
+++ b/src/assembler.h
@@ -162,8 +162,10 @@
   // Returns the position in the instruction stream.
   intptr_t GetPosition() const { return cursor_ - contents_; }
 
-  // For bringup only.
-  AssemblerFixup *GetLatestFixup(intptr_t position) const;
+  // List of pool-allocated fixups.
+  typedef std::vector<AssemblerFixup *> FixupList;
+  FixupList::const_iterator fixups_begin() const { return fixups_.begin(); }
+  FixupList::const_iterator fixups_end() const { return fixups_.end(); }
 
 private:
   // The limit is set to kMinimumGap bytes before the end of the data area.
@@ -175,7 +177,7 @@
   uintptr_t cursor_;
   uintptr_t limit_;
   Assembler &assembler_;
-  std::vector<AssemblerFixup *> fixups_;
+  FixupList fixups_;
 #ifndef NDEBUG
   bool fixups_processed_;
 #endif // !NDEBUG
@@ -206,7 +208,7 @@
   Assembler &operator=(const Assembler &) = delete;
 
 public:
-  Assembler() {}
+  Assembler() : buffer_(*this) {}
   virtual ~Assembler() {}
 
   // Allocate a chunk of bytes using the per-Assembler allocator.
@@ -226,8 +228,13 @@
 
   virtual void BindCfgNodeLabel(SizeT NodeNumber) = 0;
 
+  void emitIASBytes(GlobalContext *Ctx) const;
+
 private:
   llvm::BumpPtrAllocator Allocator;
+
+protected:
+  AssemblerBuffer buffer_;
 };
 
 } // end of namespace Ice
diff --git a/src/assembler_ia32.h b/src/assembler_ia32.h
index 77159c4..a7ffc9e 100644
--- a/src/assembler_ia32.h
+++ b/src/assembler_ia32.h
@@ -364,7 +364,7 @@
   AssemblerX86 &operator=(const AssemblerX86 &) = delete;
 
 public:
-  explicit AssemblerX86(bool use_far_branches = false) : buffer_(*this) {
+  explicit AssemblerX86(bool use_far_branches = false) : Assembler() {
     // This mode is only needed and implemented for MIPS and ARM.
     assert(!use_far_branches);
     (void)use_far_branches;
@@ -829,15 +829,6 @@
     buffer_.FinalizeInstructions(region);
   }
 
-  // Expose the buffer, for bringup...
-  intptr_t GetPosition() const { return buffer_.GetPosition(); }
-  template <typename T> T LoadBuffer(intptr_t position) const {
-    return buffer_.Load<T>(position);
-  }
-  AssemblerFixup *GetLatestFixup(intptr_t position) const {
-    return buffer_.GetLatestFixup(position);
-  }
-
 private:
   inline void EmitUint8(uint8_t value);
   inline void EmitInt16(int16_t value);
@@ -868,8 +859,6 @@
   LabelVector LocalLabels;
 
   Label *GetOrCreateLabel(SizeT Number, LabelVector &Labels);
-
-  AssemblerBuffer buffer_;
 };
 
 inline void AssemblerX86::EmitUint8(uint8_t value) {
diff --git a/tests_lit/llvm2ice_tests/64bit.pnacl.ll b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
index ed67ce2..e2bde1c 100644
--- a/tests_lit/llvm2ice_tests/64bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
@@ -44,7 +44,7 @@
 ; CHECK:      mov     dword ptr [esp + 16]
 ; CHECK:      mov     dword ptr [esp + 12]
 ; CHECK:      call    -4
-; CALLTARGETS: call ignore64BitArgNoInline
+; CALLTARGETS: .long ignore64BitArgNoInline
 ; CHECK:      sub     esp
 ; CHECK:      mov     dword ptr [esp + 4]
 ; CHECK:      mov     dword ptr [esp]
@@ -52,7 +52,7 @@
 ; CHECK:      mov     dword ptr [esp + 16]
 ; CHECK:      mov     dword ptr [esp + 12]
 ; CHECK:      call    -4
-; CALLTARGETS: call ignore64BitArgNoInline
+; CALLTARGETS: .long ignore64BitArgNoInline
 ; CHECK:      sub     esp
 ; CHECK:      mov     dword ptr [esp + 4]
 ; CHECK:      mov     dword ptr [esp]
@@ -60,7 +60,7 @@
 ; CHECK:      mov     dword ptr [esp + 16]
 ; CHECK:      mov     dword ptr [esp + 12]
 ; CHECK:      call    -4
-; CALLTARGETS: call ignore64BitArgNoInline
+; CALLTARGETS: .long ignore64BitArgNoInline
 ;
 ; OPTM1-LABEL: pass64BitArg
 ; OPTM1:      sub     esp
@@ -104,7 +104,7 @@
 ; Bundle padding will push the call down.
 ; CHECK-NOT:  mov
 ; CHECK:      call    -4
-; CALLTARGETS: call ignore64BitArgNoInline
+; CALLTARGETS: .long ignore64BitArgNoInline
 ;
 ; OPTM1-LABEL: pass64BitConstArg
 ; OPTM1:      sub     esp
@@ -239,7 +239,7 @@
 ; CHECK-LABEL: div64BitSigned
 ; CALLTARGETS-LABEL: div64BitSigned
 ; CHECK: call    -4
-; CALLTARGETS: call __divdi3
+; CALLTARGETS: .long __divdi3
 
 ; OPTM1-LABEL: div64BitSigned
 ; OPTM1: call    -4
@@ -254,7 +254,7 @@
 ; CHECK: mov     dword ptr [esp + 12], 2874
 ; CHECK: mov     dword ptr [esp + 8],  1942892530
 ; CHECK: call    -4
-; CALLTARGETS: call __divdi3
+; CALLTARGETS: .long __divdi3
 ;
 ; OPTM1-LABEL: div64BitSignedConst
 ; OPTM1: mov     dword ptr [esp + 12], 2874
@@ -269,7 +269,7 @@
 ; CHECK-LABEL: div64BitUnsigned
 ; CALLTARGETS-LABEL: div64BitUnsigned
 ; CHECK: call    -4
-; CALLTARGETS: call __udivdi3
+; CALLTARGETS: .long __udivdi3
 ;
 ; OPTM1-LABEL: div64BitUnsigned
 ; OPTM1: call    -4
@@ -282,7 +282,7 @@
 ; CHECK-LABEL: rem64BitSigned
 ; CALLTARGETS-LABEL: rem64BitSigned
 ; CHECK: call    -4
-; CALLTARGETS: call __moddi3
+; CALLTARGETS: .long __moddi3
 ;
 ; OPTM1-LABEL: rem64BitSigned
 ; OPTM1: call    -4
@@ -295,7 +295,7 @@
 ; CHECK-LABEL: rem64BitUnsigned
 ; CALLTARGETS-LABEL: rem64BitUnsigned
 ; CHECK: call    -4
-; CALLTARGETS: call __umoddi3
+; CALLTARGETS: .long __umoddi3
 ;
 ; OPTM1-LABEL: rem64BitUnsigned
 ; OPTM1: call    -4
diff --git a/tests_lit/llvm2ice_tests/arith-opt.ll b/tests_lit/llvm2ice_tests/arith-opt.ll
index 7176f00..a21388a 100644
--- a/tests_lit/llvm2ice_tests/arith-opt.ll
+++ b/tests_lit/llvm2ice_tests/arith-opt.ll
@@ -1,7 +1,7 @@
 ; This is a very early test that just checks the representation of i32
 ; arithmetic instructions.  No assembly tests are done.
 
-; RUN: %p2i -i %s --args --verbose inst | FileCheck %s
+; RUN: %p2i -i %s --args --verbose inst -ias=0 | FileCheck %s
 
 define i32 @Add(i32 %a, i32 %b) {
 ; CHECK: define i32 @Add
diff --git a/tests_lit/llvm2ice_tests/fp.pnacl.ll b/tests_lit/llvm2ice_tests/fp.pnacl.ll
index f8c316e..8507063 100644
--- a/tests_lit/llvm2ice_tests/fp.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/fp.pnacl.ll
@@ -47,17 +47,18 @@
   ret i32 %add3
 }
 ; CHECK-LABEL: passFpArgs
+; CALLTARGETS-LABEL: passFpArgs
 ; CHECK: mov dword ptr [esp + 4], 123
-; CHECK: call ignoreFpArgsNoInline
+; CHECK: call -4
+; CALLTARGETS: .long ignoreFpArgsNoInline
 ; CHECK: mov dword ptr [esp + 4], 123
-; CHECK: call ignoreFpArgsNoInline
+; CHECK: call -4
+; CALLTARGETS: .long ignoreFpArgsNoInline
 ; CHECK: mov dword ptr [esp + 4], 123
-; CHECK: call ignoreFpArgsNoInline
+; CHECK: call -4
+; CALLTARGETS: .long ignoreFpArgsNoInline
 
-define i32 @ignoreFpArgsNoInline(float %x, i32 %y, double %z) {
-entry:
-  ret i32 %y
-}
+declare i32 @ignoreFpArgsNoInline(float %x, i32 %y, double %z)
 
 define internal i32 @passFpConstArg(float %a, double %b) {
 entry:
@@ -65,8 +66,10 @@
   ret i32 %call
 }
 ; CHECK-LABEL: passFpConstArg
+; CALLTARGETS-LABEL: passFpConstArg
 ; CHECK: mov dword ptr [esp + 4], 123
-; CHECK: call ignoreFpArgsNoInline
+; CHECK: call -4
+; CALLTARGETS: .long ignoreFpArgsNoInline
 
 define internal i32 @passFp32ConstArg(float %a) {
 entry:
@@ -191,7 +194,7 @@
 ; CHECK-LABEL: remFloat
 ; CALLTARGETS-LABEL: remFloat
 ; CHECK: call -4
-; CALLTARGETS: call fmodf
+; CALLTARGETS: .long fmodf
 
 define internal double @remDouble(double %a, double %b) {
 entry:
@@ -201,7 +204,7 @@
 ; CHECK-LABEL: remDouble
 ; CALLTARGETS-LABEL: remDouble
 ; CHECK: call -4
-; CALLTARGETS: call fmod
+; CALLTARGETS: .long fmod
 
 define internal float @fptrunc(double %a) {
 entry:
@@ -229,7 +232,7 @@
 ; CHECK-LABEL: doubleToSigned64
 ; CALLTARGETS-LABEL: doubleToSigned64
 ; CHECK: call -4
-; CALLTARGETS: call cvtdtosi64
+; CALLTARGETS: .long cvtdtosi64
 
 define internal i64 @floatToSigned64(float %a) {
 entry:
@@ -239,7 +242,7 @@
 ; CHECK-LABEL: floatToSigned64
 ; CALLTARGETS-LABEL: floatToSigned64
 ; CHECK: call -4
-; CALLTARGETS: call cvtftosi64
+; CALLTARGETS: .long cvtftosi64
 
 define internal i64 @doubleToUnsigned64(double %a) {
 entry:
@@ -249,7 +252,7 @@
 ; CHECK-LABEL: doubleToUnsigned64
 ; CALLTARGETS-LABEL: doubleToUnsigned64
 ; CHECK: call -4
-; CALLTARGETS: call cvtdtoui64
+; CALLTARGETS: .long cvtdtoui64
 
 define internal i64 @floatToUnsigned64(float %a) {
 entry:
@@ -259,7 +262,7 @@
 ; CHECK-LABEL: floatToUnsigned64
 ; CALLTARGETS-LABEL: floatToUnsigned64
 ; CHECK: call -4
-; CALLTARGETS: call cvtftoui64
+; CALLTARGETS: .long cvtftoui64
 
 define internal i32 @doubleToSigned32(double %a) {
 entry:
@@ -293,7 +296,7 @@
 ; CHECK-LABEL: doubleToUnsigned32
 ; CALLTARGETS-LABEL: doubleToUnsigned32
 ; CHECK: call -4
-; CALLTARGETS: call cvtdtoui32
+; CALLTARGETS: .long cvtdtoui32
 
 define internal i32 @floatToUnsigned32(float %a) {
 entry:
@@ -303,7 +306,7 @@
 ; CHECK-LABEL: floatToUnsigned32
 ; CALLTARGETS-LABEL: floatToUnsigned32
 ; CHECK: call -4
-; CALLTARGETS: call cvtftoui32
+; CALLTARGETS: .long cvtftoui32
 
 
 define internal i32 @doubleToSigned16(double %a) {
@@ -414,7 +417,7 @@
 ; CHECK-LABEL: signed64ToDouble
 ; CALLTARGETS-LABEL: signed64ToDouble
 ; CHECK: call -4
-; CALLTARGETS: call cvtsi64tod
+; CALLTARGETS: .long cvtsi64tod
 ; CHECK: fstp qword
 
 define internal float @signed64ToFloat(i64 %a) {
@@ -425,7 +428,7 @@
 ; CHECK-LABEL: signed64ToFloat
 ; CALLTARGETS-LABEL: signed64ToFloat
 ; CHECK: call -4
-; CALLTARGETS: call cvtsi64tof
+; CALLTARGETS: .long cvtsi64tof
 ; CHECK: fstp dword
 
 define internal double @unsigned64ToDouble(i64 %a) {
@@ -436,7 +439,7 @@
 ; CHECK-LABEL: unsigned64ToDouble
 ; CALLTARGETS-LABEL: unsigned64ToDouble
 ; CHECK: call -4
-; CALLTARGETS: call cvtui64tod
+; CALLTARGETS: .long cvtui64tod
 ; CHECK: fstp
 
 define internal float @unsigned64ToFloat(i64 %a) {
@@ -447,7 +450,7 @@
 ; CHECK-LABEL: unsigned64ToFloat
 ; CALLTARGETS-LABEL: unsigned64ToFloat
 ; CHECK: call -4
-; CALLTARGETS: call cvtui64tof
+; CALLTARGETS: .long cvtui64tof
 ; CHECK: fstp
 
 define internal double @unsigned64ToDoubleConst() {
@@ -460,7 +463,7 @@
 ; CHECK: mov dword ptr [esp + 4], 2874
 ; CHECK: mov dword ptr [esp], 1942892530
 ; CHECK: call -4
-; CALLTARGETS: call cvtui64tod
+; CALLTARGETS: .long cvtui64tod
 ; CHECK: fstp
 
 define internal double @signed32ToDouble(i32 %a) {
@@ -498,7 +501,7 @@
 ; CHECK-LABEL: unsigned32ToDouble
 ; CALLTARGETS-LABEL: unsigned32ToDouble
 ; CHECK: call -4
-; CALLTARGETS: call cvtui32tod
+; CALLTARGETS: .long cvtui32tod
 ; CHECK: fstp qword
 
 define internal float @unsigned32ToFloat(i32 %a) {
@@ -509,7 +512,7 @@
 ; CHECK-LABEL: unsigned32ToFloat
 ; CALLTARGETS-LABEL: unsigned32ToFloat
 ; CHECK: call -4
-; CALLTARGETS: call cvtui32tof
+; CALLTARGETS: .long cvtui32tof
 ; CHECK: fstp dword
 
 define internal double @signed16ToDouble(i32 %a) {
diff --git a/tests_lit/llvm2ice_tests/ias-multi-reloc.ll b/tests_lit/llvm2ice_tests/ias-multi-reloc.ll
index 4e25c66..2d3f6bf 100644
--- a/tests_lit/llvm2ice_tests/ias-multi-reloc.ll
+++ b/tests_lit/llvm2ice_tests/ias-multi-reloc.ll
@@ -12,7 +12,7 @@
 
 @global_char = internal global [1 x i8] zeroinitializer, align 1
 @p_global_char = internal global [4 x i8] zeroinitializer, align 4
-declare void @dummy(i32)
+declare void @dummy()
 
 define internal void @store_immediate_to_global() {
 entry:
@@ -48,8 +48,7 @@
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %dummy.bc = bitcast void (i32)* @dummy to void ()*
-  tail call void %dummy.bc()
+  tail call void @dummy()
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %entry
@@ -58,4 +57,4 @@
 ; CHECK-LABEL: cmp_global_immediate
 ; CHECK: .long p_global_char
 ; CHECK: .long global_char
-; CHECK: call dummy
+; CHECK: .long dummy
diff --git a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
index 0cc3913..59eba0d 100644
--- a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
@@ -63,7 +63,7 @@
 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp
 ; CHECKO2UNSANDBOXEDREM: call -4
 ; CALLTARGETS-LABEL: test_nacl_read_tp
-; CALLTARGETS: call __nacl_read_tp
+; CALLTARGETS: .long __nacl_read_tp
 
 define i32 @test_nacl_read_tp_more_addressing() {
 entry:
@@ -92,8 +92,8 @@
 ; CHECKO2UNSANDBOXEDREM: call -4
 ; CHECKO2UNSANDBOXEDREM: call -4
 ; CALLTARGETS-LABEL: test_nacl_read_tp_more_addressing
-; CALLTARGETS: call __nacl_read_tp
-; CALLTARGETS: call __nacl_read_tp
+; CALLTARGETS: .long __nacl_read_tp
+; CALLTARGETS: .long __nacl_read_tp
 
 define i32 @test_nacl_read_tp_dead(i32 %a) {
 entry:
@@ -121,7 +121,7 @@
 ; CHECK-LABEL: test_memcpy
 ; CHECK: call -4
 ; CALLTARGETS-LABEL: test_memcpy
-; CALLTARGETS: call memcpy
+; CALLTARGETS: .long memcpy
 ; CHECKO2REM-LABEL: test_memcpy
 ; CHECKO2UNSANDBOXEDREM-LABEL: test_memcpy
 
@@ -138,7 +138,7 @@
 ; CHECK-LABEL: test_memcpy_const_len_align
 ; CHECK: call -4
 ; CALLTARGETS-LABEL: test_memcpy_const_len_align
-; CALLTARGETS: call memcpy
+; CALLTARGETS: .long memcpy
 
 define void @test_memmove(i32 %iptr_dst, i32 %iptr_src, i32 %len) {
 entry:
@@ -151,7 +151,7 @@
 ; CHECK-LABEL: test_memmove
 ; CHECK: call -4
 ; CALLTARGETS-LABEL: test_memmove
-; CALLTARGETS: call memmove
+; CALLTARGETS: .long memmove
 
 define void @test_memmove_const_len_align(i32 %iptr_dst, i32 %iptr_src) {
 entry:
@@ -164,7 +164,7 @@
 ; CHECK-LABEL: test_memmove_const_len_align
 ; CHECK: call -4
 ; CALLTARGETS-LABEL: test_memmove_const_len_align
-; CALLTARGETS: call memmove
+; CALLTARGETS: .long memmove
 
 define void @test_memset(i32 %iptr_dst, i32 %wide_val, i32 %len) {
 entry:
@@ -178,7 +178,7 @@
 ; CHECK: movzx
 ; CHECK: call -4
 ; CALLTARGETS-LABEL: test_memset
-; CALLTARGETS: call memset
+; CALLTARGETS: .long memset
 
 define void @test_memset_const_len_align(i32 %iptr_dst, i32 %wide_val) {
 entry:
@@ -192,7 +192,7 @@
 ; CHECK: movzx
 ; CHECK: call -4
 ; CALLTARGETS-LABEL: test_memset_const_len_align
-; CALLTARGETS: call memset
+; CALLTARGETS: .long memset
 
 define void @test_memset_const_val(i32 %iptr_dst, i32 %len) {
 entry:
@@ -205,7 +205,7 @@
 ; CHECK: movzx {{.*}}, {{[^0]}}
 ; CHECK: call -4
 ; CALLTARGETS-LABEL: test_memset_const_val
-; CALLTARGETS: call memset
+; CALLTARGETS: .long memset
 
 
 define i32 @test_setjmplongjmp(i32 %iptr_env) {
@@ -229,8 +229,8 @@
 ; CHECKO2REM: call -4
 ; CHECKO2REM: call -4
 ; CALLTARGETS-LABEL: test_setjmplongjmp
-; CALLTARGETS: call setjmp
-; CALLTARGETS: call longjmp
+; CALLTARGETS: .long setjmp
+; CALLTARGETS: .long longjmp
 
 define i32 @test_setjmp_unused(i32 %iptr_env, i32 %i_other) {
 entry:
@@ -243,7 +243,7 @@
 ; CHECKO2REM-LABEL: test_setjmp_unused
 ; CHECKO2REM: call -4
 ; CALLTARGETS-LABEL: test_setjmp_unused
-; CALLTARGETS: call setjmp
+; CALLTARGETS: .long setjmp
 
 define float @test_sqrt_float(float %x, i32 %iptr) {
 entry:
@@ -451,7 +451,7 @@
 ; CHECK-LABEL: test_popcount_32
 ; CHECK: call -4
 ; CALLTARGETS-LABEL: test_popcount_32
-; CALLTARGETS: call __popcountsi2
+; CALLTARGETS: .long __popcountsi2
 
 define i64 @test_popcount_64(i64 %x) {
 entry:
@@ -464,7 +464,7 @@
 ; the return value just in case.
 ; CHECK: mov {{.*}}, 0
 ; CALLTARGETS-LABEL: test_popcount_64
-; CALLTARGETS: call __popcountdi2
+; CALLTARGETS: .long __popcountdi2
 
 
 define i32 @test_popcount_64_ret_i32(i64 %x) {
@@ -478,7 +478,7 @@
 ; CHECKO2REM: call -4
 ; CHECKO2REM-NOT: mov {{.*}}, 0
 ; CALLTARGETS-LABEL: test_popcount_64_ret_i32
-; CALLTARGETS: call __popcountdi2
+; CALLTARGETS: .long __popcountdi2
 
 define void @test_stacksave_noalloca() {
 entry:
diff --git a/tests_lit/llvm2ice_tests/unreachable.ll b/tests_lit/llvm2ice_tests/unreachable.ll
index 3a8cf40..5fda09c 100644
--- a/tests_lit/llvm2ice_tests/unreachable.ll
+++ b/tests_lit/llvm2ice_tests/unreachable.ll
@@ -30,7 +30,7 @@
 ; CALLTARGETS-LABEL: divide
 ; CHECK: cmp
 ; CHECK: call -4
-; CALLTARGETS: call ice_unreachable
+; CALLTARGETS: .long ice_unreachable
 ; CHECK: cdq
 ; CHECK: idiv
 ; CHECK: ret
diff --git a/tests_lit/llvm2ice_tests/vector-bitcast.ll b/tests_lit/llvm2ice_tests/vector-bitcast.ll
index f85612b..0fd2628 100644
--- a/tests_lit/llvm2ice_tests/vector-bitcast.ll
+++ b/tests_lit/llvm2ice_tests/vector-bitcast.ll
@@ -167,7 +167,7 @@
 ; CHECK-LABEL: test_bitcast_v8i1_to_i8:
 ; CALLTARGETS-LABEL: test_bitcast_v8i1_to_i8:
 ; CHECK: call -4
-; CALLTARGETS: call Sz_bitcast_v8i1_to_i8
+; CALLTARGETS: .long Sz_bitcast_v8i1_to_i8
 
 ; OPTM1-LABEL: test_bitcast_v8i1_to_i8:
 ; OPMT1: call -4
@@ -181,7 +181,7 @@
 ; CHECK-LABEL: test_bitcast_v16i1_to_i16:
 ; CALLTARGETS-LABEL: test_bitcast_v16i1_to_i16:
 ; CHECK: call -4
-; CALLTARGETS: call Sz_bitcast_v16i1_to_i16
+; CALLTARGETS: .long Sz_bitcast_v16i1_to_i16
 
 ; OPTM1-LABEL: test_bitcast_v16i1_to_i16:
 ; OPMT1: call -4
@@ -196,7 +196,7 @@
 ; CHECK-LABEL: test_bitcast_i8_to_v8i1:
 ; CALLTARGETS-LABEL: test_bitcast_i8_to_v8i1
 ; CHECK: call -4
-; CALLTARGETS: call Sz_bitcast_i8_to_v8i1
+; CALLTARGETS: .long Sz_bitcast_i8_to_v8i1
 
 ; OPTM1-LABEL: test_bitcast_i8_to_v8i1:
 ; OPTM1: call -4
@@ -211,7 +211,7 @@
 ; CHECK-LABEL: test_bitcast_i16_to_v16i1:
 ; CALLTARGETS-LABEL: test_bitcast_i16_to_v16i1
 ; CHECK: call -4
-; CALLTARGETS: call Sz_bitcast_i16_to_v16i1
+; CALLTARGETS: .long Sz_bitcast_i16_to_v16i1
 
 ; OPTM1-LABEL: test_bitcast_i16_to_v16i1:
 ; OPTM1: call -4
diff --git a/tests_lit/llvm2ice_tests/vector-cast.ll b/tests_lit/llvm2ice_tests/vector-cast.ll
index 8f55c74..c95d787 100644
--- a/tests_lit/llvm2ice_tests/vector-cast.ll
+++ b/tests_lit/llvm2ice_tests/vector-cast.ll
@@ -145,7 +145,7 @@
 ; CHECK-LABEL: test_fptoui_v4f32_to_v4i32:
 ; CHECK: call -4
 ; CALLTARGETS-LABEL: test_fptoui_v4f32_to_v4i32
-; CALLTARGETS: call Sz_fptoui_v4f32
+; CALLTARGETS: .long Sz_fptoui_v4f32
 }
 
 ; [su]itofp operations
@@ -167,5 +167,5 @@
 ; CHECK-LABEL: test_uitofp_v4i32_to_v4f32:
 ; CHECK: call -4
 ; CALLTARGETS-LABEL: test_uitofp_v4i32_to_v4f32
-; CALLTARGETS: call Sz_uitofp_v4i32
+; CALLTARGETS: .long Sz_uitofp_v4i32
 }