Rearrange emit vs emitIAS. Wait till function is done before dumping text.

Eventually, I wanted to have a flag "UseELFWriter" like:
https://codereview.chromium.org/678533005/diff/120001/src/IceCfg.cpp

Where the emit OStream would not have text, and only have
binary. This refactor hopefully means fewer places to
check for a flag to disable the text version of IAS,
and be able to write binary. Otherwise, there are some
text labels for branches that are still being dumped out.

BUG=none
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/700263003
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index a3b30ee..bd1894c 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -390,6 +390,22 @@
 
 // ======================== Dump routines ======================== //
 
+void Cfg::emitTextHeader(const IceString &MangledName) {
+  Ostream &Str = Ctx->getStrEmit();
+  Str << "\t.text\n";
+  if (Ctx->getFlags().FunctionSections)
+    Str << "\t.section\t.text." << MangledName << ",\"ax\",@progbits\n";
+  if (!getInternal() || Ctx->getFlags().DisableInternal) {
+    Str << "\t.globl\t" << MangledName << "\n";
+    Str << "\t.type\t" << MangledName << ",@function\n";
+  }
+  Str << "\t.p2align " << getTarget()->getBundleAlignLog2Bytes() << ",0x";
+  for (AsmCodeByte I : getTarget()->getNonExecBundlePadding())
+    Str.write_hex(I);
+  Str << "\n";
+  Str << MangledName << ":\n";
+}
+
 void Cfg::emit() {
   TimerMarker T(TimerStack::TT_emit, this);
   if (Ctx->getFlags().DecorateAsm) {
@@ -409,23 +425,23 @@
         << " -o=MyObj.o"
         << "\n\n";
   }
-  Str << "\t.text\n";
   IceString MangledName = getContext()->mangleName(getFunctionName());
-  if (Ctx->getFlags().FunctionSections)
-    Str << "\t.section\t.text." << MangledName << ",\"ax\",@progbits\n";
-  if (!getInternal() || Ctx->getFlags().DisableInternal) {
-    Str << "\t.globl\t" << MangledName << "\n";
-    Str << "\t.type\t" << MangledName << ",@function\n";
-  }
-  Str << "\t.p2align " << getTarget()->getBundleAlignLog2Bytes() << ",0x";
-  for (AsmCodeByte I : getTarget()->getNonExecBundlePadding())
-    Str.write_hex(I);
-  Str << "\n";
+  emitTextHeader(MangledName);
   for (CfgNode *Node : Nodes)
     Node->emit(this);
   Str << "\n";
 }
 
+void Cfg::emitIAS() {
+  TimerMarker T(TimerStack::TT_emit, this);
+  assert(!Ctx->getFlags().DecorateAsm);
+  IceString MangledName = getContext()->mangleName(getFunctionName());
+  emitTextHeader(MangledName);
+  for (CfgNode *Node : Nodes)
+    Node->emitIAS(this);
+  getAssembler<Assembler>()->emitIASBytes(Ctx);
+}
+
 // Dumps the IR with an optional introductory message.
 void Cfg::dump(const IceString &Message) {
   if (!Ctx->isVerbose())
diff --git a/src/IceCfg.h b/src/IceCfg.h
index fb8ebd9..2a1e602 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -94,9 +94,6 @@
   template <typename T> T *getAssembler() const {
     return static_cast<T *>(TargetAssembler.get());
   }
-  bool useIntegratedAssembler() const {
-    return getContext()->getFlags().UseIntegratedAssembler;
-  }
   bool hasComputedFrame() const;
   bool getFocusedTiming() const { return FocusedTiming; }
   void setFocusedTiming() { FocusedTiming = true; }
@@ -131,6 +128,8 @@
   const CfgNode *getCurrentNode() const { return CurrentNode; }
 
   void emit();
+  void emitIAS();
+  void emitTextHeader(const IceString &MangledName);
   void dump(const IceString &Message = "");
 
   // Allocate data of type T using the per-Cfg allocator.
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 52789b9..543f4e9 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -864,6 +864,24 @@
   }
 }
 
+void updateStats(Cfg *Func, const Inst *I) {
+  // Update emitted instruction count, plus fill/spill count for
+  // Variable operands without a physical register.
+  if (uint32_t Count = I->getEmitInstCount()) {
+    Func->getContext()->statsUpdateEmitted(Count);
+    if (Variable *Dest = I->getDest()) {
+      if (!Dest->hasReg())
+        Func->getContext()->statsUpdateFills();
+    }
+    for (SizeT S = 0; S < I->getSrcSize(); ++S) {
+      if (Variable *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) {
+        if (!Src->hasReg())
+          Func->getContext()->statsUpdateSpills();
+      }
+    }
+  }
+}
+
 } // end of anonymous namespace
 
 void CfgNode::emit(Cfg *Func) const {
@@ -871,14 +889,7 @@
   Ostream &Str = Func->getContext()->getStrEmit();
   Liveness *Liveness = Func->getLiveness();
   bool DecorateAsm = Liveness && Func->getContext()->getFlags().DecorateAsm;
-  if (Func->getEntryNode() == this) {
-    Str << Func->getContext()->mangleName(Func->getFunctionName()) << ":\n";
-  }
   Str << getAsmName() << ":\n";
-  if (Func->useIntegratedAssembler()) {
-    Assembler *Asm = Func->getAssembler<Assembler>();
-    Asm->BindCfgNodeLabel(getIndex());
-  }
   std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
   if (DecorateAsm)
     emitRegisterUsage(Str, Func, this, true, LiveRegCount);
@@ -899,34 +910,37 @@
         ++LiveRegCount[Dest->getRegNum()];
       continue;
     }
-    if (Func->useIntegratedAssembler()) {
-      I->emitIAS(Func);
-    } else {
-      I->emit(Func);
-      if (DecorateAsm)
-        emitLiveRangesEnded(Str, Func, I, LiveRegCount);
-      Str << "\n";
-    }
-    // Update emitted instruction count, plus fill/spill count for
-    // Variable operands without a physical register.
-    if (uint32_t Count = I->getEmitInstCount()) {
-      Func->getContext()->statsUpdateEmitted(Count);
-      if (Variable *Dest = I->getDest()) {
-        if (!Dest->hasReg())
-          Func->getContext()->statsUpdateFills();
-      }
-      for (SizeT S = 0; S < I->getSrcSize(); ++S) {
-        if (Variable *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) {
-          if (!Src->hasReg())
-            Func->getContext()->statsUpdateSpills();
-        }
-      }
-    }
+    I->emit(Func);
+    if (DecorateAsm)
+      emitLiveRangesEnded(Str, Func, I, LiveRegCount);
+    Str << "\n";
+    updateStats(Func, I);
   }
   if (DecorateAsm)
     emitRegisterUsage(Str, Func, this, false, LiveRegCount);
 }
 
+void CfgNode::emitIAS(Cfg *Func) const {
+  Func->setCurrentNode(this);
+  Assembler *Asm = Func->getAssembler<Assembler>();
+  Asm->BindCfgNodeLabel(getIndex());
+  for (InstPhi *Phi : Phis) {
+    if (Phi->isDeleted())
+      continue;
+    // Emitting a Phi instruction should cause an error.
+    Inst *Instr = Phi;
+    Instr->emitIAS(Func);
+  }
+  for (Inst *I : Insts) {
+    if (I->isDeleted())
+      continue;
+    if (I->isRedundantAssign())
+      continue;
+    I->emitIAS(Func);
+    updateStats(Func, I);
+  }
+}
+
 void CfgNode::dump(Cfg *Func) const {
   Func->setCurrentNode(this);
   Ostream &Str = Func->getContext()->getStrDump();
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index aa836c0..9cde8c1 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -82,6 +82,7 @@
   void contractIfEmpty();
   void doBranchOpt(const CfgNode *NextNode);
   void emit(Cfg *Func) const;
+  void emitIAS(Cfg *Func) const;
   void dump(Cfg *Func) const;
 
 private:
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index deed2bc..7ebf187 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -342,84 +342,6 @@
 
 // ======================== Dump routines ======================== //
 
-namespace {
-
-void emitIASBytes(const Cfg *Func, const x86::AssemblerX86 *Asm,
-                  intptr_t StartPosition) {
-  GlobalContext *Ctx = Func->getContext();
-  Ostream &Str = Ctx->getStrEmit();
-  intptr_t EndPosition = Asm->GetPosition();
-  AssemblerFixup *LastFixup = Asm->GetLatestFixup(StartPosition);
-  if (!LastFixup) {
-    // The fixup doesn't apply to this current block.
-    for (intptr_t i = StartPosition; i < EndPosition; ++i) {
-      Str << "\t.byte 0x";
-      Str.write_hex(Asm->LoadBuffer<uint8_t>(i));
-      Str << "\n";
-    }
-    return;
-  }
-  intptr_t LastFixupLoc = LastFixup->position();
-  const intptr_t FixupSize = 4;
-  // The fixup does apply to this current block.
-  for (intptr_t i = StartPosition; i < LastFixupLoc; ++i) {
-    Str << "\t.byte 0x";
-    Str.write_hex(Asm->LoadBuffer<uint8_t>(i));
-    Str << "\n";
-  }
-  while (LastFixup) {
-    Str << "\t.long ";
-    const ConstantRelocatable *Reloc = LastFixup->value();
-    if (Reloc->getSuppressMangling())
-      Str << Reloc->getName();
-    else
-      Str << Ctx->mangleName(Reloc->getName());
-    if (LastFixup->value()->getOffset()) {
-      Str << " + " << LastFixup->value()->getOffset();
-    }
-    Str << "\n";
-    LastFixupLoc += FixupSize;
-    assert(LastFixupLoc <= EndPosition);
-    LastFixup = Asm->GetLatestFixup(LastFixupLoc);
-    // Assume multi-fixups are adjacent in the instruction encoding.
-    assert(!LastFixup || LastFixup->position() == LastFixupLoc);
-  }
-  for (intptr_t i = LastFixupLoc; i < EndPosition; ++i) {
-    Str << "\t.byte 0x";
-    Str.write_hex(Asm->LoadBuffer<uint8_t>(i));
-    Str << "\n";
-  }
-}
-
-void emitIASBytesBranch(const Cfg *Func, const x86::AssemblerX86 *Asm,
-                        intptr_t StartPosition, const x86::Label *Label,
-                        const IceString &LabelName, bool Near) {
-  // If this is a backward branch (label is bound), we're good and know
-  // the offset. If this is a forward branch, then we can't actually emit
-  // the thing as text in a streaming manner, because the fixup hasn't
-  // happened yet. Instead, emit .long ($BranchLabel) - (. + 4), in that
-  // case and let the external assembler take care of that fixup.
-  if (Label->IsBound()) {
-    emitIASBytes(Func, Asm, StartPosition);
-    return;
-  }
-  const intptr_t FwdBranchSize = Near ? 1 : 4;
-  const IceString FwdBranchDirective = Near ? ".byte" : ".long";
-  Ostream &Str = Func->getContext()->getStrEmit();
-  intptr_t EndPosition = Asm->GetPosition();
-  assert(EndPosition - StartPosition > FwdBranchSize);
-  for (intptr_t i = StartPosition; i < EndPosition - FwdBranchSize; ++i) {
-    Str << "\t.byte 0x";
-    Str.write_hex(Asm->LoadBuffer<uint8_t>(i));
-    Str << "\n";
-  }
-  Str << "\t" << FwdBranchDirective << " " << LabelName << " - (. + "
-      << FwdBranchSize << ")\n";
-  return;
-}
-
-} // end of anonymous namespace
-
 void InstX8632::dump(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "[X8632] ";
@@ -434,10 +356,6 @@
 void InstX8632Label::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
   Asm->BindLocalLabel(Number);
-  // TODO(jvoung): remove the the textual label once forward branch
-  // fixups are used (and text assembler is not used).
-  Ostream &Str = Func->getContext()->getStrEmit();
-  Str << getName(Func) << ":\n";
 }
 
 void InstX8632Label::dump(const Cfg *Func) const {
@@ -471,7 +389,6 @@
 
 void InstX8632Br::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   if (Label) {
     x86::Label *L = Asm->GetOrCreateLocalLabel(Label->getNumber());
     // In all these cases, local Labels should only be used for Near.
@@ -481,7 +398,6 @@
     } else {
       Asm->j(Condition, L, Near);
     }
-    emitIASBytesBranch(Func, Asm, StartPosition, L, Label->getName(Func), Near);
   } else {
     // Pessimistically assume it's far. This only affects Labels that
     // are not Bound.
@@ -491,20 +407,13 @@
           Asm->GetOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
       assert(!getTargetTrue());
       Asm->jmp(L, Near);
-      emitIASBytesBranch(Func, Asm, StartPosition, L,
-                         getTargetFalse()->getAsmName(), Near);
     } else {
       x86::Label *L = Asm->GetOrCreateCfgNodeLabel(getTargetTrue()->getIndex());
       Asm->j(Condition, L, Near);
-      emitIASBytesBranch(Func, Asm, StartPosition, L,
-                         getTargetTrue()->getAsmName(), Near);
-      StartPosition = Asm->GetPosition();
       if (getTargetFalse()) {
         x86::Label *L2 =
             Asm->GetOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
         Asm->jmp(L2, Near);
-        emitIASBytesBranch(Func, Asm, StartPosition, L2,
-                           getTargetFalse()->getAsmName(), Near);
       }
     }
   }
@@ -549,9 +458,7 @@
 
 void InstX8632Call::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Operand *Target = getCallTarget();
-  bool NeedsFallback = false;
   if (const auto Var = llvm::dyn_cast<Variable>(Target)) {
     if (Var->hasReg()) {
       Asm->call(RegX8632::getEncodedGPR(Var->getRegNum()));
@@ -565,7 +472,6 @@
   } else if (const auto CR = llvm::dyn_cast<ConstantRelocatable>(Target)) {
     assert(CR->getOffset() == 0 && "We only support calling a function");
     Asm->call(CR);
-    NeedsFallback = true;
   } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Target)) {
     // NaCl trampoline calls refer to an address within the sandbox directly.
     // This is usually only needed for non-IRT builds and otherwise not
@@ -579,18 +485,6 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  if (NeedsFallback) {
-    // TODO(jvoung): The ".long sym" hack doesn't work, since we need
-    // a pc-rel relocation and not an absolute relocation.
-    //
-    // Still, we have at least filled the assembler buffer so that the
-    // instruction sizes/positions are correct for jumps.
-    // For now, fall back to the regular .s emission, after filling the buffer.
-    emit(Func);
-    Func->getContext()->getStrEmit() << "\n";
-  } else {
-    emitIASBytes(Func, Asm, StartPosition);
-  }
   Func->getTarget()->resetStackAdjustment();
 }
 
@@ -628,7 +522,6 @@
 void emitIASOpTyGPR(const Cfg *Func, Type Ty, const Operand *Op,
                     const x86::AssemblerX86::GPREmitterOneOp &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   if (const auto Var = llvm::dyn_cast<Variable>(Op)) {
     if (Var->hasReg()) {
       // We cheat a little and use GPRRegister even for byte operations.
@@ -646,7 +539,6 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 template <bool VarCanBeByte, bool SrcCanBeByte>
@@ -654,7 +546,6 @@
                        const Operand *Src,
                        const x86::AssemblerX86::GPREmitterRegOp &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(Var->hasReg());
   // We cheat a little and use GPRRegister even for byte operations.
   RegX8632::GPRRegister VarReg =
@@ -686,14 +577,12 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void emitIASAddrOpTyGPR(const Cfg *Func, Type Ty, const x86::Address &Addr,
                         const Operand *Src,
                         const x86::AssemblerX86::GPREmitterAddrOp &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   // Src can only be Reg or Immediate.
   if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
     assert(SrcVar->hasReg());
@@ -709,7 +598,6 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void emitIASAsAddrOpTyGPR(const Cfg *Func, Type Ty, const Operand *Op0,
@@ -735,7 +623,6 @@
                      const Operand *Src,
                      const x86::AssemblerX86::GPREmitterShiftOp &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   // Technically, the Dest Var can be mem as well, but we only use Reg.
   // We can extend this to check Dest if we decide to use that form.
   assert(Var->hasReg());
@@ -754,14 +641,12 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void emitIASGPRShiftDouble(const Cfg *Func, const Variable *Dest,
                            const Operand *Src1Op, const Operand *Src2Op,
                            const x86::AssemblerX86::GPREmitterShiftD &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   // Dest can be reg or mem, but we only use the reg variant.
   assert(Dest->hasReg());
   RegX8632::GPRRegister DestReg = RegX8632::getEncodedGPR(Dest->getRegNum());
@@ -778,14 +663,12 @@
     assert(llvm::cast<Variable>(Src2Op)->getRegNum() == RegX8632::Reg_ecx);
     (Asm->*(Emitter.GPRGPR))(Ty, DestReg, SrcReg);
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void emitIASXmmShift(const Cfg *Func, Type Ty, const Variable *Var,
                      const Operand *Src,
                      const x86::AssemblerX86::XmmEmitterShiftOp &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(Var->hasReg());
   RegX8632::XmmRegister VarReg = RegX8632::getEncodedXmm(Var->getRegNum());
   if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
@@ -806,14 +689,12 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void emitIASRegOpTyXMM(const Cfg *Func, Type Ty, const Variable *Var,
                        const Operand *Src,
                        const x86::AssemblerX86::XmmEmitterRegOp &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(Var->hasReg());
   RegX8632::XmmRegister VarReg = RegX8632::getEncodedXmm(Var->getRegNum());
   if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
@@ -835,7 +716,6 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 template <typename DReg_t, typename SReg_t, DReg_t (*destEnc)(int32_t),
@@ -844,7 +724,6 @@
     const Cfg *Func, Type DispatchTy, const Variable *Dest, const Operand *Src,
     const x86::AssemblerX86::CastEmitterRegOp<DReg_t, SReg_t> Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(Dest->hasReg());
   DReg_t DestReg = destEnc(Dest->getRegNum());
   if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
@@ -862,7 +741,6 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 template <typename DReg_t, typename SReg_t, DReg_t (*destEnc)(int32_t),
@@ -872,7 +750,6 @@
     const Operand *Src1,
     const x86::AssemblerX86::ThreeOpImmEmitter<DReg_t, SReg_t> Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   // This only handles Dest being a register, and Src1 being an immediate.
   assert(Dest->hasReg());
   DReg_t DestReg = destEnc(Dest->getRegNum());
@@ -893,14 +770,12 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void emitIASMovlikeXMM(const Cfg *Func, const Variable *Dest,
                        const Operand *Src,
                        const x86::AssemblerX86::XmmEmitterMovOps Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   if (Dest->hasReg()) {
     RegX8632::XmmRegister DestReg = RegX8632::getEncodedXmm(Dest->getRegNum());
     if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
@@ -927,7 +802,6 @@
     (Asm->*(Emitter.AddrXmm))(StackAddr,
                               RegX8632::getEncodedXmm(SrcVar->getRegNum()));
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 bool checkForRedundantAssign(const Variable *Dest, const Operand *Source) {
@@ -1432,7 +1306,6 @@
 
 template <> void InstX8632Cbwdq::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(getSrcSize() == 1);
   Operand *Src0 = getSrc(0);
   assert(llvm::isa<Variable>(Src0));
@@ -1454,7 +1327,6 @@
     Asm->cdq();
     break;
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Mul::emit(const Cfg *Func) const {
@@ -1582,10 +1454,8 @@
   assert(SrcVar->hasReg());
   assert(SrcVar->getType() == IceType_i32);
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Asm->cmov(Condition, RegX8632::getEncodedGPR(getDest()->getRegNum()),
             RegX8632::getEncodedGPR(SrcVar->getRegNum()));
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Cmov::dump(const Cfg *Func) const {
@@ -1611,7 +1481,6 @@
 
 void InstX8632Cmpps::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(getSrcSize() == 2);
   assert(Condition < CondX86::Cmpps_Invalid);
   // Assuming there isn't any load folding for cmpps, and vector constants
@@ -1627,7 +1496,6 @@
     Asm->cmpps(RegX8632::getEncodedXmm(getDest()->getRegNum()), SrcStackAddr,
                Condition);
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Cmpps::dump(const Cfg *Func) const {
@@ -1654,7 +1522,6 @@
 void InstX8632Cmpxchg::emitIAS(const Cfg *Func) const {
   assert(getSrcSize() == 3);
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Type Ty = getSrc(0)->getType();
   const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
   assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
@@ -1668,7 +1535,6 @@
   } else {
     Asm->cmpxchg(Ty, Addr, Reg);
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Cmpxchg::dump(const Cfg *Func) const {
@@ -1693,7 +1559,6 @@
 void InstX8632Cmpxchg8b::emitIAS(const Cfg *Func) const {
   assert(getSrcSize() == 5);
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
   assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
   const x86::Address Addr = Mem->toAsmAddress(Asm);
@@ -1701,7 +1566,6 @@
     Asm->lock();
   }
   Asm->cmpxchg8b(Addr);
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Cmpxchg8b::dump(const Cfg *Func) const {
@@ -1868,9 +1732,7 @@
 
 void InstX8632UD2::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Asm->ud2();
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632UD2::dump(const Cfg *Func) const {
@@ -1922,9 +1784,7 @@
 
 void InstX8632Mfence::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Asm->mfence();
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Mfence::dump(const Cfg *Func) const {
@@ -1954,7 +1814,6 @@
     assert(SrcVar->hasReg());
     RegX8632::XmmRegister SrcReg = RegX8632::getEncodedXmm(SrcVar->getRegNum());
     x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-    intptr_t StartPosition = Asm->GetPosition();
     if (const auto DestVar = llvm::dyn_cast<Variable>(Dest)) {
       assert(!DestVar->hasReg());
       x86::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
@@ -1965,7 +1824,6 @@
       assert(DestMem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
       Asm->movss(DestTy, DestMem->toAsmAddress(Asm), SrcReg);
     }
-    emitIASBytes(Func, Asm, StartPosition);
     return;
   } else {
     assert(isScalarIntegerType(DestTy));
@@ -1994,7 +1852,6 @@
 
 void InstX8632StoreP::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(getSrcSize() == 2);
   const auto SrcVar = llvm::cast<Variable>(getSrc(0));
   const auto DestMem = llvm::cast<OperandX8632Mem>(getSrc(1));
@@ -2002,7 +1859,6 @@
   assert(SrcVar->hasReg());
   Asm->movups(DestMem->toAsmAddress(Asm),
               RegX8632::getEncodedXmm(SrcVar->getRegNum()));
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632StoreP::dump(const Cfg *Func) const {
@@ -2026,7 +1882,6 @@
 
 void InstX8632StoreQ::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(getSrcSize() == 2);
   const auto SrcVar = llvm::cast<Variable>(getSrc(0));
   const auto DestMem = llvm::cast<OperandX8632Mem>(getSrc(1));
@@ -2034,7 +1889,6 @@
   assert(SrcVar->hasReg());
   Asm->movq(DestMem->toAsmAddress(Asm),
             RegX8632::getEncodedXmm(SrcVar->getRegNum()));
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632StoreQ::dump(const Cfg *Func) const {
@@ -2145,10 +1999,8 @@
       const auto SrcVar = llvm::cast<Variable>(Src);
       assert(SrcVar->hasReg());
       x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-      intptr_t StartPosition = Asm->GetPosition();
       Asm->movss(SrcTy, StackAddr,
                  RegX8632::getEncodedXmm(SrcVar->getRegNum()));
-      emitIASBytes(Func, Asm, StartPosition);
       return;
     } else {
       // Src can be a register or immediate.
@@ -2162,7 +2014,6 @@
 
 template <> void InstX8632Movd::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(getSrcSize() == 1);
   const Variable *Dest = getDest();
   const auto SrcVar = llvm::cast<Variable>(getSrc(0));
@@ -2192,7 +2043,6 @@
       Asm->movd(StackAddr, SrcReg);
     }
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 template <> void InstX8632Movp::emit(const Cfg *Func) const {
@@ -2252,10 +2102,8 @@
   const auto SrcVar = llvm::cast<Variable>(getSrc(1));
   assert(Dest->hasReg() && SrcVar->hasReg());
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Asm->movss(IceType_f32, RegX8632::getEncodedXmm(Dest->getRegNum()),
              RegX8632::getEncodedXmm(SrcVar->getRegNum()));
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 template <> void InstX8632Movsx::emitIAS(const Cfg *Func) const {
@@ -2289,10 +2137,8 @@
 
 void InstX8632Nop::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   // TODO: Emit the right code for the variant.
   Asm->nop();
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Nop::dump(const Cfg *Func) const {
@@ -2325,7 +2171,6 @@
 
 void InstX8632Fld::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(getSrcSize() == 1);
   const Operand *Src = getSrc(0);
   Type Ty = Src->getType();
@@ -2352,7 +2197,6 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Fld::dump(const Cfg *Func) const {
@@ -2395,7 +2239,6 @@
 
 void InstX8632Fstp::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   assert(getSrcSize() == 0);
   const Variable *Dest = getDest();
   // TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to
@@ -2404,7 +2247,6 @@
   // of popping the stack.
   if (!Dest) {
     Asm->fstp(RegX8632::getEncodedSTReg(0));
-    emitIASBytes(Func, Asm, StartPosition);
     return;
   }
   Type Ty = Dest->getType();
@@ -2424,7 +2266,6 @@
     Asm->movss(Ty, RegX8632::getEncodedXmm(Dest->getRegNum()), StackSlot);
     Asm->add(IceType_i32, RegX8632::Encoded_Reg_esp, Width);
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Fstp::dump(const Cfg *Func) const {
@@ -2575,14 +2416,12 @@
 void InstX8632Pop::emitIAS(const Cfg *Func) const {
   assert(getSrcSize() == 0);
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   if (getDest()->hasReg()) {
     Asm->popl(RegX8632::getEncodedGPR(getDest()->getRegNum()));
   } else {
     Asm->popl(static_cast<TargetX8632 *>(Func->getTarget())
                   ->stackVarToAsmOperand(getDest()));
   }
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Pop::dump(const Cfg *Func) const {
@@ -2599,9 +2438,7 @@
 
 void InstX8632AdjustStack::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Asm->sub(IceType_i32, RegX8632::Encoded_Reg_esp, x86::Immediate(Amount));
-  emitIASBytes(Func, Asm, StartPosition);
   Func->getTarget()->updateStackAdjustment(Amount);
 }
 
@@ -2626,9 +2463,7 @@
   const auto Var = llvm::cast<Variable>(getSrc(0));
   assert(Var->hasReg());
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Asm->pushl(RegX8632::getEncodedGPR(Var->getRegNum()));
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Push::dump(const Cfg *Func) const {
@@ -2666,9 +2501,7 @@
 
 void InstX8632Ret::emitIAS(const Cfg *Func) const {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Asm->ret();
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Ret::dump(const Cfg *Func) const {
@@ -2692,7 +2525,6 @@
 void InstX8632Xadd::emitIAS(const Cfg *Func) const {
   assert(getSrcSize() == 2);
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Type Ty = getSrc(0)->getType();
   const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
   assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
@@ -2705,7 +2537,6 @@
     Asm->lock();
   }
   Asm->xadd(Ty, Addr, Reg);
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Xadd::dump(const Cfg *Func) const {
@@ -2729,7 +2560,6 @@
 void InstX8632Xchg::emitIAS(const Cfg *Func) const {
   assert(getSrcSize() == 2);
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-  intptr_t StartPosition = Asm->GetPosition();
   Type Ty = getSrc(0)->getType();
   const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
   assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
@@ -2739,7 +2569,6 @@
   const RegX8632::GPRRegister Reg =
       RegX8632::getEncodedGPR(VarReg->getRegNum());
   Asm->xchg(Ty, Addr, Reg);
-  emitIASBytes(Func, Asm, StartPosition);
 }
 
 void InstX8632Xchg::dump(const Cfg *Func) const {
diff --git a/src/IceTranslator.cpp b/src/IceTranslator.cpp
index 0391b6c..edaa74f 100644
--- a/src/IceTranslator.cpp
+++ b/src/IceTranslator.cpp
@@ -83,7 +83,11 @@
       ErrorStatus = true;
     }
 
-    Func->emit();
+    if (Ctx->getFlags().UseIntegratedAssembler) {
+      Func->emitIAS();
+    } else {
+      Func->emit();
+    }
     Ctx->dumpStats(Func->getFunctionName());
   }
 
diff --git a/src/assembler.cpp b/src/assembler.cpp
index 50d82b7..b0bd297 100644
--- a/src/assembler.cpp
+++ b/src/assembler.cpp
@@ -19,7 +19,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "assembler.h"
+#include "IceGlobalContext.h"
 #include "IceMemoryRegion.h"
+#include "IceOperand.h"
 
 namespace Ice {
 
@@ -74,18 +76,6 @@
 
 AssemblerBuffer::~AssemblerBuffer() {}
 
-// Returns the latest fixup at or after the given position, or nullptr if
-// there is none.  Assumes fixups were added in increasing order.
-AssemblerFixup *AssemblerBuffer::GetLatestFixup(intptr_t position) const {
-  AssemblerFixup *latest_fixup = nullptr;
-  for (auto I = fixups_.rbegin(), E = fixups_.rend(); I != E; ++I) {
-    if ((*I)->position() < position)
-      return latest_fixup;
-    latest_fixup = *I;
-  }
-  return latest_fixup;
-}
-
 void AssemblerBuffer::ProcessFixups(const MemoryRegion &region) {
   for (SizeT I = 0; I < fixups_.size(); ++I) {
     AssemblerFixup *fixup = fixups_[I];
@@ -133,4 +123,43 @@
   assert(Size() == old_size);
 }
 
+void Assembler::emitIASBytes(GlobalContext *Ctx) const {
+  Ostream &Str = Ctx->getStrEmit();
+  intptr_t EndPosition = buffer_.Size();
+  intptr_t CurPosition = 0;
+  const intptr_t FixupSize = 4;
+  for (AssemblerBuffer::FixupList::const_iterator
+           FixupI = buffer_.fixups_begin(),
+           FixupE = buffer_.fixups_end(); FixupI != FixupE; ++FixupI) {
+    AssemblerFixup *NextFixup = *FixupI;
+    intptr_t NextFixupLoc = NextFixup->position();
+    for (intptr_t i = CurPosition; i < NextFixupLoc; ++i) {
+      Str << "\t.byte 0x";
+      Str.write_hex(buffer_.Load<uint8_t>(i));
+      Str << "\n";
+    }
+    Str << "\t.long ";
+    const ConstantRelocatable *Reloc = NextFixup->value();
+    if (Reloc->getSuppressMangling())
+      Str << Reloc->getName();
+    else
+      Str << Ctx->mangleName(Reloc->getName());
+    if (Reloc->getOffset()) {
+      Str << " + " << Reloc->getOffset();
+    }
+    bool IsPCRel = NextFixup->kind() == FK_PcRel_4;
+    if (IsPCRel)
+      Str << " - (. + " << FixupSize << ")";
+    Str << "\n";
+    CurPosition = NextFixupLoc + FixupSize;
+    assert(CurPosition <= EndPosition);
+  }
+  // Handle any bytes that are not prefixed by a fixup.
+  for (intptr_t i = CurPosition; i < EndPosition; ++i) {
+    Str << "\t.byte 0x";
+    Str.write_hex(buffer_.Load<uint8_t>(i));
+    Str << "\n";
+  }
+}
+
 } // end of namespace Ice
diff --git a/src/assembler.h b/src/assembler.h
index dcb83ed..37963c4 100644
--- a/src/assembler.h
+++ b/src/assembler.h
@@ -162,8 +162,10 @@
   // Returns the position in the instruction stream.
   intptr_t GetPosition() const { return cursor_ - contents_; }
 
-  // For bringup only.
-  AssemblerFixup *GetLatestFixup(intptr_t position) const;
+  // List of pool-allocated fixups.
+  typedef std::vector<AssemblerFixup *> FixupList;
+  FixupList::const_iterator fixups_begin() const { return fixups_.begin(); }
+  FixupList::const_iterator fixups_end() const { return fixups_.end(); }
 
 private:
   // The limit is set to kMinimumGap bytes before the end of the data area.
@@ -175,7 +177,7 @@
   uintptr_t cursor_;
   uintptr_t limit_;
   Assembler &assembler_;
-  std::vector<AssemblerFixup *> fixups_;
+  FixupList fixups_;
 #ifndef NDEBUG
   bool fixups_processed_;
 #endif // !NDEBUG
@@ -206,7 +208,7 @@
   Assembler &operator=(const Assembler &) = delete;
 
 public:
-  Assembler() {}
+  Assembler() : buffer_(*this) {}
   virtual ~Assembler() {}
 
   // Allocate a chunk of bytes using the per-Assembler allocator.
@@ -226,8 +228,13 @@
 
   virtual void BindCfgNodeLabel(SizeT NodeNumber) = 0;
 
+  void emitIASBytes(GlobalContext *Ctx) const;
+
 private:
   llvm::BumpPtrAllocator Allocator;
+
+protected:
+  AssemblerBuffer buffer_;
 };
 
 } // end of namespace Ice
diff --git a/src/assembler_ia32.h b/src/assembler_ia32.h
index 77159c4..a7ffc9e 100644
--- a/src/assembler_ia32.h
+++ b/src/assembler_ia32.h
@@ -364,7 +364,7 @@
   AssemblerX86 &operator=(const AssemblerX86 &) = delete;
 
 public:
-  explicit AssemblerX86(bool use_far_branches = false) : buffer_(*this) {
+  explicit AssemblerX86(bool use_far_branches = false) : Assembler() {
     // This mode is only needed and implemented for MIPS and ARM.
     assert(!use_far_branches);
     (void)use_far_branches;
@@ -829,15 +829,6 @@
     buffer_.FinalizeInstructions(region);
   }
 
-  // Expose the buffer, for bringup...
-  intptr_t GetPosition() const { return buffer_.GetPosition(); }
-  template <typename T> T LoadBuffer(intptr_t position) const {
-    return buffer_.Load<T>(position);
-  }
-  AssemblerFixup *GetLatestFixup(intptr_t position) const {
-    return buffer_.GetLatestFixup(position);
-  }
-
 private:
   inline void EmitUint8(uint8_t value);
   inline void EmitInt16(int16_t value);
@@ -868,8 +859,6 @@
   LabelVector LocalLabels;
 
   Label *GetOrCreateLabel(SizeT Number, LabelVector &Labels);
-
-  AssemblerBuffer buffer_;
 };
 
 inline void AssemblerX86::EmitUint8(uint8_t value) {