X8632 Templatization completed.

This CL introduces the X86Inst templates. The previous implementation relied on template specialization which did not played nice with the new design. This required a lot of other boilerplate code (i.e., tons of new named constructors, one for each X86Inst.)

This CL also moves X8632 code out of the X86Base{Impl}?.h files so that they are **almost** target agnostic. As we move to adding other X86 targets more methods will be moved to the target-specific trait class (e.g., call/ret/argument lowering.)

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4077
R=jvoung@chromium.org

Review URL: https://codereview.chromium.org/1216933015.
diff --git a/src/IceAssemblerX8632.h b/src/IceAssemblerX8632.h
index 6c22a24..fb9eedf 100644
--- a/src/IceAssemblerX8632.h
+++ b/src/IceAssemblerX8632.h
@@ -37,6 +37,9 @@
 
 namespace X8632 {
 
+using Immediate = ::Ice::X86Internal::Immediate;
+using Label = ::Ice::X86Internal::Label;
+
 class AssemblerX8632 : public X86Internal::AssemblerX86Base<TargetX8632> {
   AssemblerX8632(const AssemblerX8632 &) = delete;
   AssemblerX8632 &operator=(const AssemblerX8632 &) = delete;
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index e6516f6..8a556f0 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -948,10 +948,6 @@
 
 } // end of namespace X86Internal
 
-namespace X8632 {
-using Immediate = ::Ice::X86Internal::Immediate;
-using Label = ::Ice::X86Internal::Label;
-} // end of namespace X8632
 } // end of namespace Ice
 
 #include "IceAssemblerX86BaseImpl.h"
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index fd33c55..c6d6abf 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -8,11 +8,13 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements the InstX8632 and OperandX8632 classes,
-/// primarily the constructors and the dump()/emit() methods.
+/// This file defines X8632 specific data related to X8632 Instructions and
+/// Instruction traits. These are declared in the IceTargetLoweringX8632Traits.h
+/// header file.
+///
+/// This file also defines X8632 operand specific methods (dump and emit.)
 ///
 //===----------------------------------------------------------------------===//
-
 #include "IceInstX8632.h"
 
 #include "IceAssemblerX8632.h"
@@ -26,75 +28,57 @@
 
 namespace Ice {
 
-namespace {
+namespace X86Internal {
 
-const struct InstX8632BrAttributes_ {
-  X8632::Traits::Cond::BrCond Opposite;
-  const char *DisplayString;
-  const char *EmitString;
-} InstX8632BrAttributes[] = {
+const MachineTraits<TargetX8632>::InstBrAttributesType
+    MachineTraits<TargetX8632>::InstBrAttributes[] = {
 #define X(tag, encode, opp, dump, emit)                                        \
   { X8632::Traits::Cond::opp, dump, emit }                                     \
   ,
-    ICEINSTX8632BR_TABLE
+        ICEINSTX8632BR_TABLE
 #undef X
 };
 
-const struct InstX8632CmppsAttributes_ {
-  const char *EmitString;
-} InstX8632CmppsAttributes[] = {
+const MachineTraits<TargetX8632>::InstCmppsAttributesType
+    MachineTraits<TargetX8632>::InstCmppsAttributes[] = {
 #define X(tag, emit)                                                           \
   { emit }                                                                     \
   ,
-    ICEINSTX8632CMPPS_TABLE
+        ICEINSTX8632CMPPS_TABLE
 #undef X
 };
 
-const struct TypeX8632Attributes_ {
-  const char *CvtString;   // i (integer), s (single FP), d (double FP)
-  const char *SdSsString;  // ss, sd, or <blank>
-  const char *PackString;  // b, w, d, or <blank>
-  const char *WidthString; // b, w, l, q, or <blank>
-  const char *FldString;   // s, l, or <blank>
-} TypeX8632Attributes[] = {
+const MachineTraits<TargetX8632>::TypeAttributesType
+    MachineTraits<TargetX8632>::TypeAttributes[] = {
 #define X(tag, elementty, cvt, sdss, pack, width, fld)                         \
   { cvt, sdss, pack, width, fld }                                              \
   ,
-    ICETYPEX8632_TABLE
+        ICETYPEX8632_TABLE
 #undef X
 };
 
-const char *InstX8632SegmentRegNames[] = {
+const char *MachineTraits<TargetX8632>::InstSegmentRegNames[] = {
 #define X(val, name, prefix) name,
     SEG_REGX8632_TABLE
 #undef X
 };
 
-uint8_t InstX8632SegmentPrefixes[] = {
+uint8_t MachineTraits<TargetX8632>::InstSegmentPrefixes[] = {
 #define X(val, name, prefix) prefix,
     SEG_REGX8632_TABLE
 #undef X
 };
 
-} // end of anonymous namespace
-
-const char *InstX8632::getWidthString(Type Ty) {
-  return TypeX8632Attributes[Ty].WidthString;
+void MachineTraits<TargetX8632>::X86Operand::dump(const Cfg *,
+                                                  Ostream &Str) const {
+  if (BuildDefs::dump())
+    Str << "<OperandX8632>";
 }
 
-const char *InstX8632::getFldString(Type Ty) {
-  return TypeX8632Attributes[Ty].FldString;
-}
-
-X8632::Traits::Cond::BrCond
-InstX8632::getOppositeCondition(X8632::Traits::Cond::BrCond Cond) {
-  return InstX8632BrAttributes[Cond].Opposite;
-}
-
-OperandX8632Mem::OperandX8632Mem(Cfg *Func, Type Ty, Variable *Base,
-                                 Constant *Offset, Variable *Index,
-                                 uint16_t Shift, SegmentRegisters SegmentReg)
-    : OperandX8632(kMem, Ty), Base(Base), Offset(Offset), Index(Index),
+MachineTraits<TargetX8632>::X86OperandMem::X86OperandMem(
+    Cfg *Func, Type Ty, Variable *Base, Constant *Offset, Variable *Index,
+    uint16_t Shift, SegmentRegisters SegmentReg)
+    : X86Operand(kMem, Ty), Base(Base), Offset(Offset), Index(Index),
       Shift(Shift), SegmentReg(SegmentReg), Randomized(false) {
   assert(Shift <= 3);
   Vars = nullptr;
@@ -114,2806 +98,13 @@
   }
 }
 
-InstX8632FakeRMW::InstX8632FakeRMW(Cfg *Func, Operand *Data, Operand *Addr,
-                                   InstArithmetic::OpKind Op, Variable *Beacon)
-    : InstX8632(Func, InstX8632::FakeRMW, 3, nullptr), Op(Op) {
-  addSource(Data);
-  addSource(Addr);
-  addSource(Beacon);
-}
-
-InstX8632AdjustStack::InstX8632AdjustStack(Cfg *Func, SizeT Amount,
-                                           Variable *Esp)
-    : InstX8632(Func, InstX8632::Adjuststack, 1, Esp), Amount(Amount) {
-  addSource(Esp);
-}
-
-InstX8632Mul::InstX8632Mul(Cfg *Func, Variable *Dest, Variable *Source1,
-                           Operand *Source2)
-    : InstX8632(Func, InstX8632::Mul, 2, Dest) {
-  addSource(Source1);
-  addSource(Source2);
-}
-
-InstX8632Shld::InstX8632Shld(Cfg *Func, Variable *Dest, Variable *Source1,
-                             Variable *Source2)
-    : InstX8632(Func, InstX8632::Shld, 3, Dest) {
-  addSource(Dest);
-  addSource(Source1);
-  addSource(Source2);
-}
-
-InstX8632Shrd::InstX8632Shrd(Cfg *Func, Variable *Dest, Variable *Source1,
-                             Variable *Source2)
-    : InstX8632(Func, InstX8632::Shrd, 3, Dest) {
-  addSource(Dest);
-  addSource(Source1);
-  addSource(Source2);
-}
-
-InstX8632Label::InstX8632Label(Cfg *Func, TargetX8632 *Target)
-    : InstX8632(Func, InstX8632::Label, 0, nullptr),
-      Number(Target->makeNextLabelNumber()) {}
-
-IceString InstX8632Label::getName(const Cfg *Func) const {
-  return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number);
-}
-
-InstX8632Br::InstX8632Br(Cfg *Func, const CfgNode *TargetTrue,
-                         const CfgNode *TargetFalse,
-                         const InstX8632Label *Label,
-                         X8632::Traits::Cond::BrCond Condition)
-    : InstX8632(Func, InstX8632::Br, 0, nullptr), Condition(Condition),
-      TargetTrue(TargetTrue), TargetFalse(TargetFalse), Label(Label) {}
-
-bool InstX8632Br::optimizeBranch(const CfgNode *NextNode) {
-  // If there is no next block, then there can be no fallthrough to
-  // optimize.
-  if (NextNode == nullptr)
-    return false;
-  // Intra-block conditional branches can't be optimized.
-  if (Label)
-    return false;
-  // If there is no fallthrough node, such as a non-default case label
-  // for a switch instruction, then there is no opportunity to
-  // optimize.
-  if (getTargetFalse() == nullptr)
-    return false;
-
-  // Unconditional branch to the next node can be removed.
-  if (Condition == X8632::Traits::Cond::Br_None &&
-      getTargetFalse() == NextNode) {
-    assert(getTargetTrue() == nullptr);
-    setDeleted();
-    return true;
-  }
-  // If the fallthrough is to the next node, set fallthrough to nullptr
-  // to indicate.
-  if (getTargetFalse() == NextNode) {
-    TargetFalse = nullptr;
-    return true;
-  }
-  // If TargetTrue is the next node, and TargetFalse is not nullptr
-  // (which was already tested above), then invert the branch
-  // condition, swap the targets, and set new fallthrough to nullptr.
-  if (getTargetTrue() == NextNode) {
-    assert(Condition != X8632::Traits::Cond::Br_None);
-    Condition = getOppositeCondition(Condition);
-    TargetTrue = getTargetFalse();
-    TargetFalse = nullptr;
-    return true;
-  }
-  return false;
-}
-
-bool InstX8632Br::repointEdge(CfgNode *OldNode, CfgNode *NewNode) {
-  if (TargetFalse == OldNode) {
-    TargetFalse = NewNode;
-    return true;
-  } else if (TargetTrue == OldNode) {
-    TargetTrue = NewNode;
-    return true;
-  }
-  return false;
-}
-
-InstX8632Jmp::InstX8632Jmp(Cfg *Func, Operand *Target)
-    : InstX8632(Func, InstX8632::Jmp, 1, nullptr) {
-  addSource(Target);
-}
-
-InstX8632Call::InstX8632Call(Cfg *Func, Variable *Dest, Operand *CallTarget)
-    : InstX8632(Func, InstX8632::Call, 1, Dest) {
-  HasSideEffects = true;
-  addSource(CallTarget);
-}
-
-InstX8632Cmov::InstX8632Cmov(Cfg *Func, Variable *Dest, Operand *Source,
-                             X8632::Traits::Cond::BrCond Condition)
-    : InstX8632(Func, InstX8632::Cmov, 2, Dest), Condition(Condition) {
-  // The final result is either the original Dest, or Source, so mark
-  // both as sources.
-  addSource(Dest);
-  addSource(Source);
-}
-
-InstX8632Cmpps::InstX8632Cmpps(Cfg *Func, Variable *Dest, Operand *Source,
-                               X8632::Traits::Cond::CmppsCond Condition)
-    : InstX8632(Func, InstX8632::Cmpps, 2, Dest), Condition(Condition) {
-  addSource(Dest);
-  addSource(Source);
-}
-
-InstX8632Cmpxchg::InstX8632Cmpxchg(Cfg *Func, Operand *DestOrAddr,
-                                   Variable *Eax, Variable *Desired,
-                                   bool Locked)
-    : InstX8632Lockable(Func, InstX8632::Cmpxchg, 3,
-                        llvm::dyn_cast<Variable>(DestOrAddr), Locked) {
-  assert(Eax->getRegNum() == RegX8632::Reg_eax);
-  addSource(DestOrAddr);
-  addSource(Eax);
-  addSource(Desired);
-}
-
-InstX8632Cmpxchg8b::InstX8632Cmpxchg8b(Cfg *Func, OperandX8632Mem *Addr,
-                                       Variable *Edx, Variable *Eax,
-                                       Variable *Ecx, Variable *Ebx,
-                                       bool Locked)
-    : InstX8632Lockable(Func, InstX8632::Cmpxchg, 5, nullptr, Locked) {
-  assert(Edx->getRegNum() == RegX8632::Reg_edx);
-  assert(Eax->getRegNum() == RegX8632::Reg_eax);
-  assert(Ecx->getRegNum() == RegX8632::Reg_ecx);
-  assert(Ebx->getRegNum() == RegX8632::Reg_ebx);
-  addSource(Addr);
-  addSource(Edx);
-  addSource(Eax);
-  addSource(Ecx);
-  addSource(Ebx);
-}
-
-InstX8632Cvt::InstX8632Cvt(Cfg *Func, Variable *Dest, Operand *Source,
-                           CvtVariant Variant)
-    : InstX8632(Func, InstX8632::Cvt, 1, Dest), Variant(Variant) {
-  addSource(Source);
-}
-
-InstX8632Icmp::InstX8632Icmp(Cfg *Func, Operand *Src0, Operand *Src1)
-    : InstX8632(Func, InstX8632::Icmp, 2, nullptr) {
-  addSource(Src0);
-  addSource(Src1);
-}
-
-InstX8632Ucomiss::InstX8632Ucomiss(Cfg *Func, Operand *Src0, Operand *Src1)
-    : InstX8632(Func, InstX8632::Ucomiss, 2, nullptr) {
-  addSource(Src0);
-  addSource(Src1);
-}
-
-InstX8632UD2::InstX8632UD2(Cfg *Func)
-    : InstX8632(Func, InstX8632::UD2, 0, nullptr) {}
-
-InstX8632Test::InstX8632Test(Cfg *Func, Operand *Src1, Operand *Src2)
-    : InstX8632(Func, InstX8632::Test, 2, nullptr) {
-  addSource(Src1);
-  addSource(Src2);
-}
-
-InstX8632Mfence::InstX8632Mfence(Cfg *Func)
-    : InstX8632(Func, InstX8632::Mfence, 0, nullptr) {
-  HasSideEffects = true;
-}
-
-InstX8632Store::InstX8632Store(Cfg *Func, Operand *Value, OperandX8632 *Mem)
-    : InstX8632(Func, InstX8632::Store, 2, nullptr) {
-  addSource(Value);
-  addSource(Mem);
-}
-
-InstX8632StoreP::InstX8632StoreP(Cfg *Func, Variable *Value,
-                                 OperandX8632Mem *Mem)
-    : InstX8632(Func, InstX8632::StoreP, 2, nullptr) {
-  addSource(Value);
-  addSource(Mem);
-}
-
-InstX8632StoreQ::InstX8632StoreQ(Cfg *Func, Variable *Value,
-                                 OperandX8632Mem *Mem)
-    : InstX8632(Func, InstX8632::StoreQ, 2, nullptr) {
-  addSource(Value);
-  addSource(Mem);
-}
-
-InstX8632Nop::InstX8632Nop(Cfg *Func, InstX8632Nop::NopVariant Variant)
-    : InstX8632(Func, InstX8632::Nop, 0, nullptr), Variant(Variant) {}
-
-InstX8632Fld::InstX8632Fld(Cfg *Func, Operand *Src)
-    : InstX8632(Func, InstX8632::Fld, 1, nullptr) {
-  addSource(Src);
-}
-
-InstX8632Fstp::InstX8632Fstp(Cfg *Func, Variable *Dest)
-    : InstX8632(Func, InstX8632::Fstp, 0, Dest) {}
-
-InstX8632Pop::InstX8632Pop(Cfg *Func, Variable *Dest)
-    : InstX8632(Func, InstX8632::Pop, 0, Dest) {
-  // A pop instruction affects the stack pointer and so it should not
-  // be allowed to be automatically dead-code eliminated.  (The
-  // corresponding push instruction doesn't need this treatment
-  // because it has no dest variable and therefore won't be dead-code
-  // eliminated.)  This is needed for late-stage liveness analysis
-  // (e.g. asm-verbose mode).
-  HasSideEffects = true;
-}
-
-InstX8632Push::InstX8632Push(Cfg *Func, Variable *Source)
-    : InstX8632(Func, InstX8632::Push, 1, nullptr) {
-  addSource(Source);
-}
-
-InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source)
-    : InstX8632(Func, InstX8632::Ret, Source ? 1 : 0, nullptr) {
-  if (Source)
-    addSource(Source);
-}
-
-InstX8632Setcc::InstX8632Setcc(Cfg *Func, Variable *Dest,
-                               X8632::Traits::Cond::BrCond Cond)
-    : InstX8632(Func, InstX8632::Setcc, 0, Dest), Condition(Cond) {}
-
-InstX8632Xadd::InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source,
-                             bool Locked)
-    : InstX8632Lockable(Func, InstX8632::Xadd, 2,
-                        llvm::dyn_cast<Variable>(Dest), Locked) {
-  addSource(Dest);
-  addSource(Source);
-}
-
-InstX8632Xchg::InstX8632Xchg(Cfg *Func, Operand *Dest, Variable *Source)
-    : InstX8632(Func, InstX8632::Xchg, 2, llvm::dyn_cast<Variable>(Dest)) {
-  addSource(Dest);
-  addSource(Source);
-}
-
-// ======================== Dump routines ======================== //
-
-void InstX8632::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "[X8632] ";
-  Inst::dump(Func);
-}
-
-void InstX8632FakeRMW::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Type Ty = getData()->getType();
-  Str << "rmw " << InstArithmetic::getOpName(getOp()) << " " << Ty << " *";
-  getAddr()->dump(Func);
-  Str << ", ";
-  getData()->dump(Func);
-  Str << ", beacon=";
-  getBeacon()->dump(Func);
-}
-
-void InstX8632Label::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  Str << getName(Func) << ":";
-}
-
-void InstX8632Label::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  Asm->BindLocalLabel(Number);
-}
-
-void InstX8632Label::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << getName(Func) << ":";
-}
-
-void InstX8632Br::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  Str << "\t";
-
-  if (Condition == X8632::Traits::Cond::Br_None) {
-    Str << "jmp";
-  } else {
-    Str << InstX8632BrAttributes[Condition].EmitString;
-  }
-
-  if (Label) {
-    Str << "\t" << Label->getName(Func);
-  } else {
-    if (Condition == X8632::Traits::Cond::Br_None) {
-      Str << "\t" << getTargetFalse()->getAsmName();
-    } else {
-      Str << "\t" << getTargetTrue()->getAsmName();
-      if (getTargetFalse()) {
-        Str << "\n\tjmp\t" << getTargetFalse()->getAsmName();
-      }
-    }
-  }
-}
-
-void InstX8632Br::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  if (Label) {
-    X8632::Label *L = Asm->GetOrCreateLocalLabel(Label->getNumber());
-    // In all these cases, local Labels should only be used for Near.
-    const bool Near = true;
-    if (Condition == X8632::Traits::Cond::Br_None) {
-      Asm->jmp(L, Near);
-    } else {
-      Asm->j(Condition, L, Near);
-    }
-  } else {
-    // Pessimistically assume it's far. This only affects Labels that
-    // are not Bound.
-    const bool Near = false;
-    if (Condition == X8632::Traits::Cond::Br_None) {
-      X8632::Label *L =
-          Asm->GetOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
-      assert(!getTargetTrue());
-      Asm->jmp(L, Near);
-    } else {
-      X8632::Label *L =
-          Asm->GetOrCreateCfgNodeLabel(getTargetTrue()->getIndex());
-      Asm->j(Condition, L, Near);
-      if (getTargetFalse()) {
-        X8632::Label *L2 =
-            Asm->GetOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
-        Asm->jmp(L2, Near);
-      }
-    }
-  }
-}
-
-void InstX8632Br::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "br ";
-
-  if (Condition == X8632::Traits::Cond::Br_None) {
-    Str << "label %"
-        << (Label ? Label->getName(Func) : getTargetFalse()->getName());
-    return;
-  }
-
-  Str << InstX8632BrAttributes[Condition].DisplayString;
-  if (Label) {
-    Str << ", label %" << Label->getName(Func);
-  } else {
-    Str << ", label %" << getTargetTrue()->getName();
-    if (getTargetFalse()) {
-      Str << ", label %" << getTargetFalse()->getName();
-    }
-  }
-}
-
-void InstX8632Jmp::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 1);
-  Str << "\tjmp\t*";
-  getJmpTarget()->emit(Func);
-}
-
-void InstX8632Jmp::emitIAS(const Cfg *Func) const {
-  // Note: Adapted (mostly copied) from InstX8632Call::emitIAS().
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  Operand *Target = getJmpTarget();
-  if (const auto Var = llvm::dyn_cast<Variable>(Target)) {
-    if (Var->hasReg()) {
-      Asm->jmp(RegX8632::getEncodedGPR(Var->getRegNum()));
-    } else {
-      // The jmp instruction with a memory operand should be possible
-      // to encode, but it isn't a valid sandboxed instruction, and
-      // there shouldn't be a register allocation issue to jump
-      // through a scratch register, so we don't really need to bother
-      // implementing it.
-      llvm::report_fatal_error("Assembler can't jmp to memory operand");
-    }
-  } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Target)) {
-    (void)Mem;
-    assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-    llvm::report_fatal_error("Assembler can't jmp to memory operand");
-  } else if (const auto CR = llvm::dyn_cast<ConstantRelocatable>(Target)) {
-    assert(CR->getOffset() == 0 && "We only support jumping to a function");
-    Asm->jmp(CR);
-  } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Target)) {
-    // NaCl trampoline calls refer to an address within the sandbox directly.
-    // This is usually only needed for non-IRT builds and otherwise not
-    // very portable or stable. Usually this is only done for "calls"
-    // and not jumps.
-    // TODO(jvoung): Support this when there is a lowering that
-    // actually triggers this case.
-    (void)Imm;
-    llvm::report_fatal_error("Unexpected jmp to absolute address");
-  } else {
-    llvm::report_fatal_error("Unexpected operand type");
-  }
-}
-
-void InstX8632Jmp::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "jmp ";
-  getJmpTarget()->dump(Func);
-}
-
-void InstX8632Call::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 1);
-  Str << "\tcall\t";
-  if (const auto CI = llvm::dyn_cast<ConstantInteger32>(getCallTarget())) {
-    // Emit without a leading '$'.
-    Str << CI->getValue();
-  } else if (const auto CallTarget =
-                 llvm::dyn_cast<ConstantRelocatable>(getCallTarget())) {
-    CallTarget->emitWithoutPrefix(Func->getTarget());
-  } else {
-    Str << "*";
-    getCallTarget()->emit(Func);
-  }
-  Func->getTarget()->resetStackAdjustment();
-}
-
-void InstX8632Call::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  Operand *Target = getCallTarget();
-  if (const auto Var = llvm::dyn_cast<Variable>(Target)) {
-    if (Var->hasReg()) {
-      Asm->call(RegX8632::getEncodedGPR(Var->getRegNum()));
-    } else {
-      Asm->call(static_cast<TargetX8632 *>(Func->getTarget())
-                    ->stackVarToAsmOperand(Var));
-    }
-  } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Target)) {
-    assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-    Asm->call(Mem->toAsmAddress(Asm));
-  } else if (const auto CR = llvm::dyn_cast<ConstantRelocatable>(Target)) {
-    assert(CR->getOffset() == 0 && "We only support calling a function");
-    Asm->call(CR);
-  } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Target)) {
-    Asm->call(X8632::Immediate(Imm->getValue()));
-  } else {
-    llvm_unreachable("Unexpected operand type");
-  }
-  Func->getTarget()->resetStackAdjustment();
-}
-
-void InstX8632Call::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  if (getDest()) {
-    dumpDest(Func);
-    Str << " = ";
-  }
-  Str << "call ";
-  getCallTarget()->dump(Func);
-}
-
-// The ShiftHack parameter is used to emit "cl" instead of "ecx" for
-// shift instructions, in order to be syntactically valid.  The
-// Opcode parameter needs to be char* and not IceString because of
-// template issues.
-void InstX8632::emitTwoAddress(const char *Opcode, const Inst *Inst,
-                               const Cfg *Func, bool ShiftHack) {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(Inst->getSrcSize() == 2);
-  Operand *Dest = Inst->getDest();
-  if (Dest == nullptr)
-    Dest = Inst->getSrc(0);
-  assert(Dest == Inst->getSrc(0));
-  Operand *Src1 = Inst->getSrc(1);
-  Str << "\t" << Opcode << InstX8632::getWidthString(Dest->getType()) << "\t";
-  const auto ShiftReg = llvm::dyn_cast<Variable>(Src1);
-  if (ShiftHack && ShiftReg && ShiftReg->getRegNum() == RegX8632::Reg_ecx)
-    Str << "%cl";
-  else
-    Src1->emit(Func);
-  Str << ", ";
-  Dest->emit(Func);
-}
-
-void emitIASOpTyGPR(const Cfg *Func, Type Ty, const Operand *Op,
-                    const X8632::AssemblerX8632::GPREmitterOneOp &Emitter) {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  if (const auto Var = llvm::dyn_cast<Variable>(Op)) {
-    if (Var->hasReg()) {
-      // We cheat a little and use GPRRegister even for byte operations.
-      RegX8632::GPRRegister VarReg =
-          RegX8632::getEncodedByteRegOrGPR(Ty, Var->getRegNum());
-      (Asm->*(Emitter.Reg))(Ty, VarReg);
-    } else {
-      X8632::Traits::Address StackAddr(
-          static_cast<TargetX8632 *>(Func->getTarget())
-              ->stackVarToAsmOperand(Var));
-      (Asm->*(Emitter.Addr))(Ty, StackAddr);
-    }
-  } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Op)) {
-    Mem->emitSegmentOverride(Asm);
-    (Asm->*(Emitter.Addr))(Ty, Mem->toAsmAddress(Asm));
-  } else {
-    llvm_unreachable("Unexpected operand type");
-  }
-}
-
-template <bool VarCanBeByte, bool SrcCanBeByte>
-void emitIASRegOpTyGPR(const Cfg *Func, Type Ty, const Variable *Var,
-                       const Operand *Src,
-                       const X8632::AssemblerX8632::GPREmitterRegOp &Emitter) {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  assert(Var->hasReg());
-  // We cheat a little and use GPRRegister even for byte operations.
-  RegX8632::GPRRegister VarReg =
-      VarCanBeByte ? RegX8632::getEncodedByteRegOrGPR(Ty, Var->getRegNum())
-                   : RegX8632::getEncodedGPR(Var->getRegNum());
-  if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
-    if (SrcVar->hasReg()) {
-      RegX8632::GPRRegister SrcReg =
-          SrcCanBeByte
-              ? RegX8632::getEncodedByteRegOrGPR(Ty, SrcVar->getRegNum())
-              : RegX8632::getEncodedGPR(SrcVar->getRegNum());
-      (Asm->*(Emitter.GPRGPR))(Ty, VarReg, SrcReg);
-    } else {
-      X8632::Traits::Address SrcStackAddr =
-          static_cast<TargetX8632 *>(Func->getTarget())
-              ->stackVarToAsmOperand(SrcVar);
-      (Asm->*(Emitter.GPRAddr))(Ty, VarReg, SrcStackAddr);
-    }
-  } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Src)) {
-    Mem->emitSegmentOverride(Asm);
-    (Asm->*(Emitter.GPRAddr))(Ty, VarReg, Mem->toAsmAddress(Asm));
-  } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
-    (Asm->*(Emitter.GPRImm))(Ty, VarReg, X8632::Immediate(Imm->getValue()));
-  } else if (const auto Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
-    AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Reloc);
-    (Asm->*(Emitter.GPRImm))(Ty, VarReg,
-                             X8632::Immediate(Reloc->getOffset(), Fixup));
-  } else if (const auto Split = llvm::dyn_cast<VariableSplit>(Src)) {
-    (Asm->*(Emitter.GPRAddr))(Ty, VarReg, Split->toAsmAddress(Func));
-  } else {
-    llvm_unreachable("Unexpected operand type");
-  }
-}
-
-void emitIASAddrOpTyGPR(
-    const Cfg *Func, Type Ty, const X8632::Traits::Address &Addr,
-    const Operand *Src,
-    const X8632::AssemblerX8632::GPREmitterAddrOp &Emitter) {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  // Src can only be Reg or Immediate.
-  if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
-    assert(SrcVar->hasReg());
-    RegX8632::GPRRegister SrcReg =
-        RegX8632::getEncodedByteRegOrGPR(Ty, SrcVar->getRegNum());
-    (Asm->*(Emitter.AddrGPR))(Ty, Addr, SrcReg);
-  } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
-    (Asm->*(Emitter.AddrImm))(Ty, Addr, X8632::Immediate(Imm->getValue()));
-  } else if (const auto Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
-    AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Reloc);
-    (Asm->*(Emitter.AddrImm))(Ty, Addr,
-                              X8632::Immediate(Reloc->getOffset(), Fixup));
-  } else {
-    llvm_unreachable("Unexpected operand type");
-  }
-}
-
-void emitIASAsAddrOpTyGPR(
-    const Cfg *Func, Type Ty, const Operand *Op0, const Operand *Op1,
-    const X8632::AssemblerX8632::GPREmitterAddrOp &Emitter) {
-  if (const auto Op0Var = llvm::dyn_cast<Variable>(Op0)) {
-    assert(!Op0Var->hasReg());
-    X8632::Traits::Address StackAddr(
-        static_cast<TargetX8632 *>(Func->getTarget())
-            ->stackVarToAsmOperand(Op0Var));
-    emitIASAddrOpTyGPR(Func, Ty, StackAddr, Op1, Emitter);
-  } else if (const auto Op0Mem = llvm::dyn_cast<OperandX8632Mem>(Op0)) {
-    X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-    Op0Mem->emitSegmentOverride(Asm);
-    emitIASAddrOpTyGPR(Func, Ty, Op0Mem->toAsmAddress(Asm), Op1, Emitter);
-  } else if (const auto Split = llvm::dyn_cast<VariableSplit>(Op0)) {
-    emitIASAddrOpTyGPR(Func, Ty, Split->toAsmAddress(Func), Op1, Emitter);
-  } else {
-    llvm_unreachable("Unexpected operand type");
-  }
-}
-
-void InstX8632::emitIASGPRShift(
-    const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
-    const X8632::AssemblerX8632::GPREmitterShiftOp &Emitter) {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  // Technically, the Dest Var can be mem as well, but we only use Reg.
-  // We can extend this to check Dest if we decide to use that form.
-  assert(Var->hasReg());
-  // We cheat a little and use GPRRegister even for byte operations.
-  RegX8632::GPRRegister VarReg =
-      RegX8632::getEncodedByteRegOrGPR(Ty, Var->getRegNum());
-  // Src must be reg == ECX or an Imm8.
-  // This is asserted by the assembler.
-  if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
-    assert(SrcVar->hasReg());
-    RegX8632::GPRRegister SrcReg =
-        RegX8632::getEncodedByteRegOrGPR(Ty, SrcVar->getRegNum());
-    (Asm->*(Emitter.GPRGPR))(Ty, VarReg, SrcReg);
-  } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
-    (Asm->*(Emitter.GPRImm))(Ty, VarReg, X8632::Immediate(Imm->getValue()));
-  } else {
-    llvm_unreachable("Unexpected operand type");
-  }
-}
-
-void emitIASGPRShiftDouble(
-    const Cfg *Func, const Variable *Dest, const Operand *Src1Op,
-    const Operand *Src2Op,
-    const X8632::AssemblerX8632::GPREmitterShiftD &Emitter) {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  // Dest can be reg or mem, but we only use the reg variant.
-  assert(Dest->hasReg());
-  RegX8632::GPRRegister DestReg = RegX8632::getEncodedGPR(Dest->getRegNum());
-  // SrcVar1 must be reg.
-  const auto SrcVar1 = llvm::cast<Variable>(Src1Op);
-  assert(SrcVar1->hasReg());
-  RegX8632::GPRRegister SrcReg = RegX8632::getEncodedGPR(SrcVar1->getRegNum());
-  Type Ty = SrcVar1->getType();
-  // Src2 can be the implicit CL register or an immediate.
-  if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src2Op)) {
-    (Asm->*(Emitter.GPRGPRImm))(Ty, DestReg, SrcReg,
-                                X8632::Immediate(Imm->getValue()));
-  } else {
-    assert(llvm::cast<Variable>(Src2Op)->getRegNum() == RegX8632::Reg_ecx);
-    (Asm->*(Emitter.GPRGPR))(Ty, DestReg, SrcReg);
-  }
-}
-
-void emitIASXmmShift(const Cfg *Func, Type Ty, const Variable *Var,
-                     const Operand *Src,
-                     const X8632::AssemblerX8632::XmmEmitterShiftOp &Emitter) {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  assert(Var->hasReg());
-  RegX8632::XmmRegister VarReg = RegX8632::getEncodedXmm(Var->getRegNum());
-  if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
-    if (SrcVar->hasReg()) {
-      RegX8632::XmmRegister SrcReg =
-          RegX8632::getEncodedXmm(SrcVar->getRegNum());
-      (Asm->*(Emitter.XmmXmm))(Ty, VarReg, SrcReg);
-    } else {
-      X8632::Traits::Address SrcStackAddr =
-          static_cast<TargetX8632 *>(Func->getTarget())
-              ->stackVarToAsmOperand(SrcVar);
-      (Asm->*(Emitter.XmmAddr))(Ty, VarReg, SrcStackAddr);
-    }
-  } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Src)) {
-    assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-    (Asm->*(Emitter.XmmAddr))(Ty, VarReg, Mem->toAsmAddress(Asm));
-  } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
-    (Asm->*(Emitter.XmmImm))(Ty, VarReg, X8632::Immediate(Imm->getValue()));
-  } else {
-    llvm_unreachable("Unexpected operand type");
-  }
-}
-
-void emitIASRegOpTyXMM(const Cfg *Func, Type Ty, const Variable *Var,
-                       const Operand *Src,
-                       const X8632::AssemblerX8632::XmmEmitterRegOp &Emitter) {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  assert(Var->hasReg());
-  RegX8632::XmmRegister VarReg = RegX8632::getEncodedXmm(Var->getRegNum());
-  if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
-    if (SrcVar->hasReg()) {
-      RegX8632::XmmRegister SrcReg =
-          RegX8632::getEncodedXmm(SrcVar->getRegNum());
-      (Asm->*(Emitter.XmmXmm))(Ty, VarReg, SrcReg);
-    } else {
-      X8632::Traits::Address SrcStackAddr =
-          static_cast<TargetX8632 *>(Func->getTarget())
-              ->stackVarToAsmOperand(SrcVar);
-      (Asm->*(Emitter.XmmAddr))(Ty, VarReg, SrcStackAddr);
-    }
-  } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Src)) {
-    assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-    (Asm->*(Emitter.XmmAddr))(Ty, VarReg, Mem->toAsmAddress(Asm));
-  } else if (const auto Imm = llvm::dyn_cast<Constant>(Src)) {
-    (Asm->*(Emitter.XmmAddr))(Ty, VarReg,
-                              X8632::Traits::Address::ofConstPool(Asm, Imm));
-  } else {
-    llvm_unreachable("Unexpected operand type");
-  }
-}
-
-template <typename DReg_t, typename SReg_t, DReg_t (*destEnc)(int32_t),
-          SReg_t (*srcEnc)(int32_t)>
-void emitIASCastRegOp(
-    const Cfg *Func, Type DispatchTy, const Variable *Dest, const Operand *Src,
-    const X8632::AssemblerX8632::CastEmitterRegOp<DReg_t, SReg_t> Emitter) {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  assert(Dest->hasReg());
-  DReg_t DestReg = destEnc(Dest->getRegNum());
-  if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
-    if (SrcVar->hasReg()) {
-      SReg_t SrcReg = srcEnc(SrcVar->getRegNum());
-      (Asm->*(Emitter.RegReg))(DispatchTy, DestReg, SrcReg);
-    } else {
-      X8632::Traits::Address SrcStackAddr =
-          static_cast<TargetX8632 *>(Func->getTarget())
-              ->stackVarToAsmOperand(SrcVar);
-      (Asm->*(Emitter.RegAddr))(DispatchTy, DestReg, SrcStackAddr);
-    }
-  } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Src)) {
-    Mem->emitSegmentOverride(Asm);
-    (Asm->*(Emitter.RegAddr))(DispatchTy, DestReg, Mem->toAsmAddress(Asm));
-  } else {
-    llvm_unreachable("Unexpected operand type");
-  }
-}
-
-template <typename DReg_t, typename SReg_t, DReg_t (*destEnc)(int32_t),
-          SReg_t (*srcEnc)(int32_t)>
-void emitIASThreeOpImmOps(
-    const Cfg *Func, Type DispatchTy, const Variable *Dest, const Operand *Src0,
-    const Operand *Src1,
-    const X8632::AssemblerX8632::ThreeOpImmEmitter<DReg_t, SReg_t> Emitter) {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  // This only handles Dest being a register, and Src1 being an immediate.
-  assert(Dest->hasReg());
-  DReg_t DestReg = destEnc(Dest->getRegNum());
-  X8632::Immediate Imm(llvm::cast<ConstantInteger32>(Src1)->getValue());
-  if (const auto SrcVar = llvm::dyn_cast<Variable>(Src0)) {
-    if (SrcVar->hasReg()) {
-      SReg_t SrcReg = srcEnc(SrcVar->getRegNum());
-      (Asm->*(Emitter.RegRegImm))(DispatchTy, DestReg, SrcReg, Imm);
-    } else {
-      X8632::Traits::Address SrcStackAddr =
-          static_cast<TargetX8632 *>(Func->getTarget())
-              ->stackVarToAsmOperand(SrcVar);
-      (Asm->*(Emitter.RegAddrImm))(DispatchTy, DestReg, SrcStackAddr, Imm);
-    }
-  } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Src0)) {
-    Mem->emitSegmentOverride(Asm);
-    (Asm->*(Emitter.RegAddrImm))(DispatchTy, DestReg, Mem->toAsmAddress(Asm),
-                                 Imm);
-  } else {
-    llvm_unreachable("Unexpected operand type");
-  }
-}
-
-void emitIASMovlikeXMM(const Cfg *Func, const Variable *Dest,
-                       const Operand *Src,
-                       const X8632::AssemblerX8632::XmmEmitterMovOps Emitter) {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  if (Dest->hasReg()) {
-    RegX8632::XmmRegister DestReg = RegX8632::getEncodedXmm(Dest->getRegNum());
-    if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
-      if (SrcVar->hasReg()) {
-        (Asm->*(Emitter.XmmXmm))(DestReg,
-                                 RegX8632::getEncodedXmm(SrcVar->getRegNum()));
-      } else {
-        X8632::Traits::Address StackAddr(
-            static_cast<TargetX8632 *>(Func->getTarget())
-                ->stackVarToAsmOperand(SrcVar));
-        (Asm->*(Emitter.XmmAddr))(DestReg, StackAddr);
-      }
-    } else if (const auto SrcMem = llvm::dyn_cast<OperandX8632Mem>(Src)) {
-      assert(SrcMem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-      (Asm->*(Emitter.XmmAddr))(DestReg, SrcMem->toAsmAddress(Asm));
-    } else {
-      llvm_unreachable("Unexpected operand type");
-    }
-  } else {
-    X8632::Traits::Address StackAddr(
-        static_cast<TargetX8632 *>(Func->getTarget())
-            ->stackVarToAsmOperand(Dest));
-    // Src must be a register in this case.
-    const auto SrcVar = llvm::cast<Variable>(Src);
-    assert(SrcVar->hasReg());
-    (Asm->*(Emitter.AddrXmm))(StackAddr,
-                              RegX8632::getEncodedXmm(SrcVar->getRegNum()));
-  }
-}
-
-// In-place ops
-template <> const char *InstX8632Bswap::Opcode = "bswap";
-template <> const char *InstX8632Neg::Opcode = "neg";
-// Unary ops
-template <> const char *InstX8632Bsf::Opcode = "bsf";
-template <> const char *InstX8632Bsr::Opcode = "bsr";
-template <> const char *InstX8632Lea::Opcode = "lea";
-template <> const char *InstX8632Movd::Opcode = "movd";
-template <> const char *InstX8632Movsx::Opcode = "movs";
-template <> const char *InstX8632Movzx::Opcode = "movz";
-template <> const char *InstX8632Sqrtss::Opcode = "sqrtss";
-template <> const char *InstX8632Cbwdq::Opcode = "cbw/cwd/cdq";
-// Mov-like ops
-template <> const char *InstX8632Mov::Opcode = "mov";
-template <> const char *InstX8632Movp::Opcode = "movups";
-template <> const char *InstX8632Movq::Opcode = "movq";
-// Binary ops
-template <> const char *InstX8632Add::Opcode = "add";
-template <> const char *InstX8632AddRMW::Opcode = "add";
-template <> const char *InstX8632Addps::Opcode = "addps";
-template <> const char *InstX8632Adc::Opcode = "adc";
-template <> const char *InstX8632AdcRMW::Opcode = "adc";
-template <> const char *InstX8632Addss::Opcode = "addss";
-template <> const char *InstX8632Padd::Opcode = "padd";
-template <> const char *InstX8632Sub::Opcode = "sub";
-template <> const char *InstX8632SubRMW::Opcode = "sub";
-template <> const char *InstX8632Subps::Opcode = "subps";
-template <> const char *InstX8632Subss::Opcode = "subss";
-template <> const char *InstX8632Sbb::Opcode = "sbb";
-template <> const char *InstX8632SbbRMW::Opcode = "sbb";
-template <> const char *InstX8632Psub::Opcode = "psub";
-template <> const char *InstX8632And::Opcode = "and";
-template <> const char *InstX8632AndRMW::Opcode = "and";
-template <> const char *InstX8632Pand::Opcode = "pand";
-template <> const char *InstX8632Pandn::Opcode = "pandn";
-template <> const char *InstX8632Or::Opcode = "or";
-template <> const char *InstX8632OrRMW::Opcode = "or";
-template <> const char *InstX8632Por::Opcode = "por";
-template <> const char *InstX8632Xor::Opcode = "xor";
-template <> const char *InstX8632XorRMW::Opcode = "xor";
-template <> const char *InstX8632Pxor::Opcode = "pxor";
-template <> const char *InstX8632Imul::Opcode = "imul";
-template <> const char *InstX8632Mulps::Opcode = "mulps";
-template <> const char *InstX8632Mulss::Opcode = "mulss";
-template <> const char *InstX8632Pmull::Opcode = "pmull";
-template <> const char *InstX8632Pmuludq::Opcode = "pmuludq";
-template <> const char *InstX8632Div::Opcode = "div";
-template <> const char *InstX8632Divps::Opcode = "divps";
-template <> const char *InstX8632Idiv::Opcode = "idiv";
-template <> const char *InstX8632Divss::Opcode = "divss";
-template <> const char *InstX8632Rol::Opcode = "rol";
-template <> const char *InstX8632Shl::Opcode = "shl";
-template <> const char *InstX8632Psll::Opcode = "psll";
-template <> const char *InstX8632Shr::Opcode = "shr";
-template <> const char *InstX8632Sar::Opcode = "sar";
-template <> const char *InstX8632Psra::Opcode = "psra";
-template <> const char *InstX8632Psrl::Opcode = "psrl";
-template <> const char *InstX8632Pcmpeq::Opcode = "pcmpeq";
-template <> const char *InstX8632Pcmpgt::Opcode = "pcmpgt";
-template <> const char *InstX8632MovssRegs::Opcode = "movss";
-// Ternary ops
-template <> const char *InstX8632Insertps::Opcode = "insertps";
-template <> const char *InstX8632Shufps::Opcode = "shufps";
-template <> const char *InstX8632Pinsr::Opcode = "pinsr";
-template <> const char *InstX8632Blendvps::Opcode = "blendvps";
-template <> const char *InstX8632Pblendvb::Opcode = "pblendvb";
-// Three address ops
-template <> const char *InstX8632Pextr::Opcode = "pextr";
-template <> const char *InstX8632Pshufd::Opcode = "pshufd";
-
-// Inplace GPR ops
-template <>
-const X8632::AssemblerX8632::GPREmitterOneOp InstX8632Bswap::Emitter = {
-    &X8632::AssemblerX8632::bswap, nullptr /* only a reg form exists */
-};
-template <>
-const X8632::AssemblerX8632::GPREmitterOneOp InstX8632Neg::Emitter = {
-    &X8632::AssemblerX8632::neg, &X8632::AssemblerX8632::neg};
-
-// Unary GPR ops
-template <>
-const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Bsf::Emitter = {
-    &X8632::AssemblerX8632::bsf, &X8632::AssemblerX8632::bsf, nullptr};
-template <>
-const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Bsr::Emitter = {
-    &X8632::AssemblerX8632::bsr, &X8632::AssemblerX8632::bsr, nullptr};
-template <>
-const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Lea::Emitter = {
-    /* reg/reg and reg/imm are illegal */ nullptr, &X8632::AssemblerX8632::lea,
-    nullptr};
-template <>
-const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Movsx::Emitter = {
-    &X8632::AssemblerX8632::movsx, &X8632::AssemblerX8632::movsx, nullptr};
-template <>
-const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Movzx::Emitter = {
-    &X8632::AssemblerX8632::movzx, &X8632::AssemblerX8632::movzx, nullptr};
-
-// Unary XMM ops
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Sqrtss::Emitter = {
-    &X8632::AssemblerX8632::sqrtss, &X8632::AssemblerX8632::sqrtss};
-
-// Binary GPR ops
-template <>
-const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Add::Emitter = {
-    &X8632::AssemblerX8632::add, &X8632::AssemblerX8632::add,
-    &X8632::AssemblerX8632::add};
-template <>
-const X8632::AssemblerX8632::GPREmitterAddrOp InstX8632AddRMW::Emitter = {
-    &X8632::AssemblerX8632::add, &X8632::AssemblerX8632::add};
-template <>
-const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Adc::Emitter = {
-    &X8632::AssemblerX8632::adc, &X8632::AssemblerX8632::adc,
-    &X8632::AssemblerX8632::adc};
-template <>
-const X8632::AssemblerX8632::GPREmitterAddrOp InstX8632AdcRMW::Emitter = {
-    &X8632::AssemblerX8632::adc, &X8632::AssemblerX8632::adc};
-template <>
-const X8632::AssemblerX8632::GPREmitterRegOp InstX8632And::Emitter = {
-    &X8632::AssemblerX8632::And, &X8632::AssemblerX8632::And,
-    &X8632::AssemblerX8632::And};
-template <>
-const X8632::AssemblerX8632::GPREmitterAddrOp InstX8632AndRMW::Emitter = {
-    &X8632::AssemblerX8632::And, &X8632::AssemblerX8632::And};
-template <>
-const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Or::Emitter = {
-    &X8632::AssemblerX8632::Or, &X8632::AssemblerX8632::Or,
-    &X8632::AssemblerX8632::Or};
-template <>
-const X8632::AssemblerX8632::GPREmitterAddrOp InstX8632OrRMW::Emitter = {
-    &X8632::AssemblerX8632::Or, &X8632::AssemblerX8632::Or};
-template <>
-const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Sbb::Emitter = {
-    &X8632::AssemblerX8632::sbb, &X8632::AssemblerX8632::sbb,
-    &X8632::AssemblerX8632::sbb};
-template <>
-const X8632::AssemblerX8632::GPREmitterAddrOp InstX8632SbbRMW::Emitter = {
-    &X8632::AssemblerX8632::sbb, &X8632::AssemblerX8632::sbb};
-template <>
-const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Sub::Emitter = {
-    &X8632::AssemblerX8632::sub, &X8632::AssemblerX8632::sub,
-    &X8632::AssemblerX8632::sub};
-template <>
-const X8632::AssemblerX8632::GPREmitterAddrOp InstX8632SubRMW::Emitter = {
-    &X8632::AssemblerX8632::sub, &X8632::AssemblerX8632::sub};
-template <>
-const X8632::AssemblerX8632::GPREmitterRegOp InstX8632Xor::Emitter = {
-    &X8632::AssemblerX8632::Xor, &X8632::AssemblerX8632::Xor,
-    &X8632::AssemblerX8632::Xor};
-template <>
-const X8632::AssemblerX8632::GPREmitterAddrOp InstX8632XorRMW::Emitter = {
-    &X8632::AssemblerX8632::Xor, &X8632::AssemblerX8632::Xor};
-
-// Binary Shift GPR ops
-template <>
-const X8632::AssemblerX8632::GPREmitterShiftOp InstX8632Rol::Emitter = {
-    &X8632::AssemblerX8632::rol, &X8632::AssemblerX8632::rol};
-template <>
-const X8632::AssemblerX8632::GPREmitterShiftOp InstX8632Sar::Emitter = {
-    &X8632::AssemblerX8632::sar, &X8632::AssemblerX8632::sar};
-template <>
-const X8632::AssemblerX8632::GPREmitterShiftOp InstX8632Shl::Emitter = {
-    &X8632::AssemblerX8632::shl, &X8632::AssemblerX8632::shl};
-template <>
-const X8632::AssemblerX8632::GPREmitterShiftOp InstX8632Shr::Emitter = {
-    &X8632::AssemblerX8632::shr, &X8632::AssemblerX8632::shr};
-
-// Binary XMM ops
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Addss::Emitter = {
-    &X8632::AssemblerX8632::addss, &X8632::AssemblerX8632::addss};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Addps::Emitter = {
-    &X8632::AssemblerX8632::addps, &X8632::AssemblerX8632::addps};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Divss::Emitter = {
-    &X8632::AssemblerX8632::divss, &X8632::AssemblerX8632::divss};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Divps::Emitter = {
-    &X8632::AssemblerX8632::divps, &X8632::AssemblerX8632::divps};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Mulss::Emitter = {
-    &X8632::AssemblerX8632::mulss, &X8632::AssemblerX8632::mulss};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Mulps::Emitter = {
-    &X8632::AssemblerX8632::mulps, &X8632::AssemblerX8632::mulps};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Padd::Emitter = {
-    &X8632::AssemblerX8632::padd, &X8632::AssemblerX8632::padd};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Pand::Emitter = {
-    &X8632::AssemblerX8632::pand, &X8632::AssemblerX8632::pand};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Pandn::Emitter = {
-    &X8632::AssemblerX8632::pandn, &X8632::AssemblerX8632::pandn};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Pcmpeq::Emitter = {
-    &X8632::AssemblerX8632::pcmpeq, &X8632::AssemblerX8632::pcmpeq};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Pcmpgt::Emitter = {
-    &X8632::AssemblerX8632::pcmpgt, &X8632::AssemblerX8632::pcmpgt};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Pmull::Emitter = {
-    &X8632::AssemblerX8632::pmull, &X8632::AssemblerX8632::pmull};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Pmuludq::Emitter = {
-    &X8632::AssemblerX8632::pmuludq, &X8632::AssemblerX8632::pmuludq};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Por::Emitter = {
-    &X8632::AssemblerX8632::por, &X8632::AssemblerX8632::por};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Psub::Emitter = {
-    &X8632::AssemblerX8632::psub, &X8632::AssemblerX8632::psub};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Pxor::Emitter = {
-    &X8632::AssemblerX8632::pxor, &X8632::AssemblerX8632::pxor};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Subss::Emitter = {
-    &X8632::AssemblerX8632::subss, &X8632::AssemblerX8632::subss};
-template <>
-const X8632::AssemblerX8632::XmmEmitterRegOp InstX8632Subps::Emitter = {
-    &X8632::AssemblerX8632::subps, &X8632::AssemblerX8632::subps};
-
-// Binary XMM Shift ops
-template <>
-const X8632::AssemblerX8632::XmmEmitterShiftOp InstX8632Psll::Emitter = {
-    &X8632::AssemblerX8632::psll, &X8632::AssemblerX8632::psll,
-    &X8632::AssemblerX8632::psll};
-template <>
-const X8632::AssemblerX8632::XmmEmitterShiftOp InstX8632Psra::Emitter = {
-    &X8632::AssemblerX8632::psra, &X8632::AssemblerX8632::psra,
-    &X8632::AssemblerX8632::psra};
-template <>
-const X8632::AssemblerX8632::XmmEmitterShiftOp InstX8632Psrl::Emitter = {
-    &X8632::AssemblerX8632::psrl, &X8632::AssemblerX8632::psrl,
-    &X8632::AssemblerX8632::psrl};
-
-template <> void InstX8632Sqrtss::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 1);
-  Type Ty = getSrc(0)->getType();
-  assert(isScalarFloatingType(Ty));
-  Str << "\tsqrt" << TypeX8632Attributes[Ty].SdSsString << "\t";
-  getSrc(0)->emit(Func);
-  Str << ", ";
-  getDest()->emit(Func);
-}
-
-template <> void InstX8632Addss::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  char buf[30];
-  snprintf(buf, llvm::array_lengthof(buf), "add%s",
-           TypeX8632Attributes[getDest()->getType()].SdSsString);
-  emitTwoAddress(buf, this, Func);
-}
-
-template <> void InstX8632Padd::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  char buf[30];
-  snprintf(buf, llvm::array_lengthof(buf), "padd%s",
-           TypeX8632Attributes[getDest()->getType()].PackString);
-  emitTwoAddress(buf, this, Func);
-}
-
-template <> void InstX8632Pmull::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  char buf[30];
-  bool TypesAreValid = getDest()->getType() == IceType_v4i32 ||
-                       getDest()->getType() == IceType_v8i16;
-  bool InstructionSetIsValid =
-      getDest()->getType() == IceType_v8i16 ||
-      static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-          X8632::Traits::SSE4_1;
-  (void)TypesAreValid;
-  (void)InstructionSetIsValid;
-  assert(TypesAreValid);
-  assert(InstructionSetIsValid);
-  snprintf(buf, llvm::array_lengthof(buf), "pmull%s",
-           TypeX8632Attributes[getDest()->getType()].PackString);
-  emitTwoAddress(buf, this, Func);
-}
-
-template <> void InstX8632Pmull::emitIAS(const Cfg *Func) const {
-  Type Ty = getDest()->getType();
-  bool TypesAreValid = Ty == IceType_v4i32 || Ty == IceType_v8i16;
-  bool InstructionSetIsValid =
-      Ty == IceType_v8i16 ||
-      static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-          X8632::Traits::SSE4_1;
-  (void)TypesAreValid;
-  (void)InstructionSetIsValid;
-  assert(TypesAreValid);
-  assert(InstructionSetIsValid);
-  assert(getSrcSize() == 2);
-  Type ElementTy = typeElementType(Ty);
-  emitIASRegOpTyXMM(Func, ElementTy, getDest(), getSrc(1), Emitter);
-}
-
-template <> void InstX8632Subss::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  char buf[30];
-  snprintf(buf, llvm::array_lengthof(buf), "sub%s",
-           TypeX8632Attributes[getDest()->getType()].SdSsString);
-  emitTwoAddress(buf, this, Func);
-}
-
-template <> void InstX8632Psub::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  char buf[30];
-  snprintf(buf, llvm::array_lengthof(buf), "psub%s",
-           TypeX8632Attributes[getDest()->getType()].PackString);
-  emitTwoAddress(buf, this, Func);
-}
-
-template <> void InstX8632Mulss::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  char buf[30];
-  snprintf(buf, llvm::array_lengthof(buf), "mul%s",
-           TypeX8632Attributes[getDest()->getType()].SdSsString);
-  emitTwoAddress(buf, this, Func);
-}
-
-template <> void InstX8632Pmuludq::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  assert(getSrc(0)->getType() == IceType_v4i32 &&
-         getSrc(1)->getType() == IceType_v4i32);
-  emitTwoAddress(Opcode, this, Func);
-}
-
-template <> void InstX8632Divss::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  char buf[30];
-  snprintf(buf, llvm::array_lengthof(buf), "div%s",
-           TypeX8632Attributes[getDest()->getType()].SdSsString);
-  emitTwoAddress(buf, this, Func);
-}
-
-template <> void InstX8632Div::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 3);
-  Operand *Src1 = getSrc(1);
-  Str << "\t" << Opcode << getWidthString(Src1->getType()) << "\t";
-  Src1->emit(Func);
-}
-
-template <> void InstX8632Div::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 3);
-  const Operand *Src = getSrc(1);
-  Type Ty = Src->getType();
-  const static X8632::AssemblerX8632::GPREmitterOneOp Emitter = {
-      &X8632::AssemblerX8632::div, &X8632::AssemblerX8632::div};
-  emitIASOpTyGPR(Func, Ty, Src, Emitter);
-}
-
-template <> void InstX8632Idiv::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 3);
-  Operand *Src1 = getSrc(1);
-  Str << "\t" << Opcode << getWidthString(Src1->getType()) << "\t";
-  Src1->emit(Func);
-}
-
-template <> void InstX8632Idiv::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 3);
-  const Operand *Src = getSrc(1);
-  Type Ty = Src->getType();
-  const static X8632::AssemblerX8632::GPREmitterOneOp Emitter = {
-      &X8632::AssemblerX8632::idiv, &X8632::AssemblerX8632::idiv};
-  emitIASOpTyGPR(Func, Ty, Src, Emitter);
-}
-
-namespace {
-
-// pblendvb and blendvps take xmm0 as a final implicit argument.
-void emitVariableBlendInst(const char *Opcode, const Inst *Inst,
-                           const Cfg *Func) {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(Inst->getSrcSize() == 3);
-  assert(llvm::cast<Variable>(Inst->getSrc(2))->getRegNum() ==
-         RegX8632::Reg_xmm0);
-  Str << "\t" << Opcode << "\t";
-  Inst->getSrc(1)->emit(Func);
-  Str << ", ";
-  Inst->getDest()->emit(Func);
-}
-
-void emitIASVariableBlendInst(
-    const Inst *Inst, const Cfg *Func,
-    const X8632::AssemblerX8632::XmmEmitterRegOp &Emitter) {
-  assert(Inst->getSrcSize() == 3);
-  assert(llvm::cast<Variable>(Inst->getSrc(2))->getRegNum() ==
-         RegX8632::Reg_xmm0);
-  const Variable *Dest = Inst->getDest();
-  const Operand *Src = Inst->getSrc(1);
-  emitIASRegOpTyXMM(Func, Dest->getType(), Dest, Src, Emitter);
-}
-
-} // end anonymous namespace
-
-template <> void InstX8632Blendvps::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-         X8632::Traits::SSE4_1);
-  emitVariableBlendInst(Opcode, this, Func);
-}
-
-template <> void InstX8632Blendvps::emitIAS(const Cfg *Func) const {
-  assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-         X8632::Traits::SSE4_1);
-  static const X8632::AssemblerX8632::XmmEmitterRegOp Emitter = {
-      &X8632::AssemblerX8632::blendvps, &X8632::AssemblerX8632::blendvps};
-  emitIASVariableBlendInst(this, Func, Emitter);
-}
-
-template <> void InstX8632Pblendvb::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-         X8632::Traits::SSE4_1);
-  emitVariableBlendInst(Opcode, this, Func);
-}
-
-template <> void InstX8632Pblendvb::emitIAS(const Cfg *Func) const {
-  assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-         X8632::Traits::SSE4_1);
-  static const X8632::AssemblerX8632::XmmEmitterRegOp Emitter = {
-      &X8632::AssemblerX8632::pblendvb, &X8632::AssemblerX8632::pblendvb};
-  emitIASVariableBlendInst(this, Func, Emitter);
-}
-
-template <> void InstX8632Imul::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 2);
-  Variable *Dest = getDest();
-  if (isByteSizedArithType(Dest->getType())) {
-    // The 8-bit version of imul only allows the form "imul r/m8".
-    const auto Src0Var = llvm::dyn_cast<Variable>(getSrc(0));
-    (void)Src0Var;
-    assert(Src0Var && Src0Var->getRegNum() == RegX8632::Reg_eax);
-    Str << "\timulb\t";
-    getSrc(1)->emit(Func);
-  } else if (llvm::isa<Constant>(getSrc(1))) {
-    Str << "\timul" << getWidthString(Dest->getType()) << "\t";
-    getSrc(1)->emit(Func);
-    Str << ", ";
-    getSrc(0)->emit(Func);
-    Str << ", ";
-    Dest->emit(Func);
-  } else {
-    emitTwoAddress("imul", this, Func);
-  }
-}
-
-template <> void InstX8632Imul::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
-  const Variable *Var = getDest();
-  Type Ty = Var->getType();
-  const Operand *Src = getSrc(1);
-  if (isByteSizedArithType(Ty)) {
-    // The 8-bit version of imul only allows the form "imul r/m8".
-    const auto Src0Var = llvm::dyn_cast<Variable>(getSrc(0));
-    (void)Src0Var;
-    assert(Src0Var && Src0Var->getRegNum() == RegX8632::Reg_eax);
-    const X8632::AssemblerX8632::GPREmitterOneOp Emitter = {
-        &X8632::AssemblerX8632::imul, &X8632::AssemblerX8632::imul};
-    emitIASOpTyGPR(Func, Ty, getSrc(1), Emitter);
-  } else {
-    // We only use imul as a two-address instruction even though
-    // there is a 3 operand version when one of the operands is a constant.
-    assert(Var == getSrc(0));
-    const X8632::AssemblerX8632::GPREmitterRegOp Emitter = {
-        &X8632::AssemblerX8632::imul, &X8632::AssemblerX8632::imul,
-        &X8632::AssemblerX8632::imul};
-    emitIASRegOpTyGPR(Func, Ty, Var, Src, Emitter);
-  }
-}
-
-template <> void InstX8632Insertps::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 3);
-  assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-         X8632::Traits::SSE4_1);
-  const Variable *Dest = getDest();
-  assert(Dest == getSrc(0));
-  Type Ty = Dest->getType();
-  static const X8632::AssemblerX8632::ThreeOpImmEmitter<
-      RegX8632::XmmRegister, RegX8632::XmmRegister> Emitter = {
-      &X8632::AssemblerX8632::insertps, &X8632::AssemblerX8632::insertps};
-  emitIASThreeOpImmOps<RegX8632::XmmRegister, RegX8632::XmmRegister,
-                       RegX8632::getEncodedXmm, RegX8632::getEncodedXmm>(
-      Func, Ty, Dest, getSrc(1), getSrc(2), Emitter);
-}
-
-template <> void InstX8632Cbwdq::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 1);
-  Operand *Src0 = getSrc(0);
-  assert(llvm::isa<Variable>(Src0));
-  assert(llvm::cast<Variable>(Src0)->getRegNum() == RegX8632::Reg_eax);
-  switch (Src0->getType()) {
-  default:
-    llvm_unreachable("unexpected source type!");
-    break;
-  case IceType_i8:
-    assert(getDest()->getRegNum() == RegX8632::Reg_eax);
-    Str << "\tcbtw";
-    break;
-  case IceType_i16:
-    assert(getDest()->getRegNum() == RegX8632::Reg_edx);
-    Str << "\tcwtd";
-    break;
-  case IceType_i32:
-    assert(getDest()->getRegNum() == RegX8632::Reg_edx);
-    Str << "\tcltd";
-    break;
-  }
-}
-
-template <> void InstX8632Cbwdq::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  assert(getSrcSize() == 1);
-  Operand *Src0 = getSrc(0);
-  assert(llvm::isa<Variable>(Src0));
-  assert(llvm::cast<Variable>(Src0)->getRegNum() == RegX8632::Reg_eax);
-  switch (Src0->getType()) {
-  default:
-    llvm_unreachable("unexpected source type!");
-    break;
-  case IceType_i8:
-    assert(getDest()->getRegNum() == RegX8632::Reg_eax);
-    Asm->cbw();
-    break;
-  case IceType_i16:
-    assert(getDest()->getRegNum() == RegX8632::Reg_edx);
-    Asm->cwd();
-    break;
-  case IceType_i32:
-    assert(getDest()->getRegNum() == RegX8632::Reg_edx);
-    Asm->cdq();
-    break;
-  }
-}
-
-void InstX8632Mul::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 2);
-  assert(llvm::isa<Variable>(getSrc(0)));
-  assert(llvm::cast<Variable>(getSrc(0))->getRegNum() == RegX8632::Reg_eax);
-  assert(getDest()->getRegNum() == RegX8632::Reg_eax); // TODO: allow edx?
-  Str << "\tmul" << getWidthString(getDest()->getType()) << "\t";
-  getSrc(1)->emit(Func);
-}
-
-void InstX8632Mul::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
-  assert(llvm::isa<Variable>(getSrc(0)));
-  assert(llvm::cast<Variable>(getSrc(0))->getRegNum() == RegX8632::Reg_eax);
-  assert(getDest()->getRegNum() == RegX8632::Reg_eax); // TODO: allow edx?
-  const Operand *Src = getSrc(1);
-  Type Ty = Src->getType();
-  const static X8632::AssemblerX8632::GPREmitterOneOp Emitter = {
-      &X8632::AssemblerX8632::mul, &X8632::AssemblerX8632::mul};
-  emitIASOpTyGPR(Func, Ty, Src, Emitter);
-}
-
-void InstX8632Mul::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  dumpDest(Func);
-  Str << " = mul." << getDest()->getType() << " ";
-  dumpSources(Func);
-}
-
-void InstX8632Shld::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  Variable *Dest = getDest();
-  assert(getSrcSize() == 3);
-  assert(Dest == getSrc(0));
-  Str << "\tshld" << getWidthString(Dest->getType()) << "\t";
-  if (const auto ShiftReg = llvm::dyn_cast<Variable>(getSrc(2))) {
-    (void)ShiftReg;
-    assert(ShiftReg->getRegNum() == RegX8632::Reg_ecx);
-    Str << "%cl";
-  } else {
-    getSrc(2)->emit(Func);
-  }
-  Str << ", ";
-  getSrc(1)->emit(Func);
-  Str << ", ";
-  Dest->emit(Func);
-}
-
-void InstX8632Shld::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 3);
-  assert(getDest() == getSrc(0));
-  const Variable *Dest = getDest();
-  const Operand *Src1 = getSrc(1);
-  const Operand *Src2 = getSrc(2);
-  static const X8632::AssemblerX8632::GPREmitterShiftD Emitter = {
-      &X8632::AssemblerX8632::shld, &X8632::AssemblerX8632::shld};
-  emitIASGPRShiftDouble(Func, Dest, Src1, Src2, Emitter);
-}
-
-void InstX8632Shld::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  dumpDest(Func);
-  Str << " = shld." << getDest()->getType() << " ";
-  dumpSources(Func);
-}
-
-void InstX8632Shrd::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  Variable *Dest = getDest();
-  assert(getSrcSize() == 3);
-  assert(Dest == getSrc(0));
-  Str << "\tshrd" << getWidthString(Dest->getType()) << "\t";
-  if (const auto ShiftReg = llvm::dyn_cast<Variable>(getSrc(2))) {
-    (void)ShiftReg;
-    assert(ShiftReg->getRegNum() == RegX8632::Reg_ecx);
-    Str << "%cl";
-  } else {
-    getSrc(2)->emit(Func);
-  }
-  Str << ", ";
-  getSrc(1)->emit(Func);
-  Str << ", ";
-  Dest->emit(Func);
-}
-
-void InstX8632Shrd::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 3);
-  assert(getDest() == getSrc(0));
-  const Variable *Dest = getDest();
-  const Operand *Src1 = getSrc(1);
-  const Operand *Src2 = getSrc(2);
-  static const X8632::AssemblerX8632::GPREmitterShiftD Emitter = {
-      &X8632::AssemblerX8632::shrd, &X8632::AssemblerX8632::shrd};
-  emitIASGPRShiftDouble(Func, Dest, Src1, Src2, Emitter);
-}
-
-void InstX8632Shrd::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  dumpDest(Func);
-  Str << " = shrd." << getDest()->getType() << " ";
-  dumpSources(Func);
-}
-
-void InstX8632Cmov::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  Variable *Dest = getDest();
-  Str << "\t";
-  assert(Condition != X8632::Traits::Cond::Br_None);
-  assert(getDest()->hasReg());
-  Str << "cmov" << InstX8632BrAttributes[Condition].DisplayString
-      << getWidthString(Dest->getType()) << "\t";
-  getSrc(1)->emit(Func);
-  Str << ", ";
-  Dest->emit(Func);
-}
-
-void InstX8632Cmov::emitIAS(const Cfg *Func) const {
-  assert(Condition != X8632::Traits::Cond::Br_None);
-  assert(getDest()->hasReg());
-  assert(getSrcSize() == 2);
-  Operand *Src = getSrc(1);
-  Type SrcTy = Src->getType();
-  assert(SrcTy == IceType_i16 || SrcTy == IceType_i32);
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
-    if (SrcVar->hasReg()) {
-      Asm->cmov(SrcTy, Condition,
-                RegX8632::getEncodedGPR(getDest()->getRegNum()),
-                RegX8632::getEncodedGPR(SrcVar->getRegNum()));
-    } else {
-      Asm->cmov(SrcTy, Condition,
-                RegX8632::getEncodedGPR(getDest()->getRegNum()),
-                static_cast<TargetX8632 *>(Func->getTarget())
-                    ->stackVarToAsmOperand(SrcVar));
-    }
-  } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Src)) {
-    assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-    Asm->cmov(SrcTy, Condition, RegX8632::getEncodedGPR(getDest()->getRegNum()),
-              Mem->toAsmAddress(Asm));
-  } else {
-    llvm_unreachable("Unexpected operand type");
-  }
-}
-
-void InstX8632Cmov::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "cmov" << InstX8632BrAttributes[Condition].DisplayString << ".";
-  Str << getDest()->getType() << " ";
-  dumpDest(Func);
-  Str << ", ";
-  dumpSources(Func);
-}
-
-void InstX8632Cmpps::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 2);
-  assert(Condition < X8632::Traits::Cond::Cmpps_Invalid);
-  Str << "\t";
-  Str << "cmp" << InstX8632CmppsAttributes[Condition].EmitString << "ps"
-      << "\t";
-  getSrc(1)->emit(Func);
-  Str << ", ";
-  getDest()->emit(Func);
-}
-
-void InstX8632Cmpps::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  assert(getSrcSize() == 2);
-  assert(Condition < X8632::Traits::Cond::Cmpps_Invalid);
-  // Assuming there isn't any load folding for cmpps, and vector constants
-  // are not allowed in PNaCl.
-  assert(llvm::isa<Variable>(getSrc(1)));
-  const auto SrcVar = llvm::cast<Variable>(getSrc(1));
-  if (SrcVar->hasReg()) {
-    Asm->cmpps(RegX8632::getEncodedXmm(getDest()->getRegNum()),
-               RegX8632::getEncodedXmm(SrcVar->getRegNum()), Condition);
-  } else {
-    X8632::Traits::Address SrcStackAddr =
-        static_cast<TargetX8632 *>(Func->getTarget())
-            ->stackVarToAsmOperand(SrcVar);
-    Asm->cmpps(RegX8632::getEncodedXmm(getDest()->getRegNum()), SrcStackAddr,
-               Condition);
-  }
-}
-
-void InstX8632Cmpps::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  assert(Condition < X8632::Traits::Cond::Cmpps_Invalid);
-  dumpDest(Func);
-  Str << " = cmp" << InstX8632CmppsAttributes[Condition].EmitString << "ps"
-      << "\t";
-  dumpSources(Func);
-}
-
-void InstX8632Cmpxchg::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 3);
-  if (Locked) {
-    Str << "\tlock";
-  }
-  Str << "\tcmpxchg" << getWidthString(getSrc(0)->getType()) << "\t";
-  getSrc(2)->emit(Func);
-  Str << ", ";
-  getSrc(0)->emit(Func);
-}
-
-void InstX8632Cmpxchg::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 3);
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  Type Ty = getSrc(0)->getType();
-  const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
-  assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-  const X8632::Traits::Address Addr = Mem->toAsmAddress(Asm);
-  const auto VarReg = llvm::cast<Variable>(getSrc(2));
-  assert(VarReg->hasReg());
-  const RegX8632::GPRRegister Reg =
-      RegX8632::getEncodedGPR(VarReg->getRegNum());
-  Asm->cmpxchg(Ty, Addr, Reg, Locked);
-}
-
-void InstX8632Cmpxchg::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  if (Locked) {
-    Str << "lock ";
-  }
-  Str << "cmpxchg." << getSrc(0)->getType() << " ";
-  dumpSources(Func);
-}
-
-void InstX8632Cmpxchg8b::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 5);
-  if (Locked) {
-    Str << "\tlock";
-  }
-  Str << "\tcmpxchg8b\t";
-  getSrc(0)->emit(Func);
-}
-
-void InstX8632Cmpxchg8b::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 5);
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
-  assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-  const X8632::Traits::Address Addr = Mem->toAsmAddress(Asm);
-  Asm->cmpxchg8b(Addr, Locked);
-}
-
-void InstX8632Cmpxchg8b::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  if (Locked) {
-    Str << "lock ";
-  }
-  Str << "cmpxchg8b ";
-  dumpSources(Func);
-}
-
-void InstX8632Cvt::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 1);
-  Str << "\tcvt";
-  if (isTruncating())
-    Str << "t";
-  Str << TypeX8632Attributes[getSrc(0)->getType()].CvtString << "2"
-      << TypeX8632Attributes[getDest()->getType()].CvtString << "\t";
-  getSrc(0)->emit(Func);
-  Str << ", ";
-  getDest()->emit(Func);
-}
-
-void InstX8632Cvt::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 1);
-  const Variable *Dest = getDest();
-  const Operand *Src = getSrc(0);
-  Type DestTy = Dest->getType();
-  Type SrcTy = Src->getType();
-  switch (Variant) {
-  case Si2ss: {
-    assert(isScalarIntegerType(SrcTy));
-    assert(typeWidthInBytes(SrcTy) <= 4);
-    assert(isScalarFloatingType(DestTy));
-    static const X8632::AssemblerX8632::CastEmitterRegOp<
-        RegX8632::XmmRegister, RegX8632::GPRRegister> Emitter = {
-        &X8632::AssemblerX8632::cvtsi2ss, &X8632::AssemblerX8632::cvtsi2ss};
-    emitIASCastRegOp<RegX8632::XmmRegister, RegX8632::GPRRegister,
-                     RegX8632::getEncodedXmm, RegX8632::getEncodedGPR>(
-        Func, DestTy, Dest, Src, Emitter);
-    return;
-  }
-  case Tss2si: {
-    assert(isScalarFloatingType(SrcTy));
-    assert(isScalarIntegerType(DestTy));
-    assert(typeWidthInBytes(DestTy) <= 4);
-    static const X8632::AssemblerX8632::CastEmitterRegOp<
-        RegX8632::GPRRegister, RegX8632::XmmRegister> Emitter = {
-        &X8632::AssemblerX8632::cvttss2si, &X8632::AssemblerX8632::cvttss2si};
-    emitIASCastRegOp<RegX8632::GPRRegister, RegX8632::XmmRegister,
-                     RegX8632::getEncodedGPR, RegX8632::getEncodedXmm>(
-        Func, SrcTy, Dest, Src, Emitter);
-    return;
-  }
-  case Float2float: {
-    assert(isScalarFloatingType(SrcTy));
-    assert(isScalarFloatingType(DestTy));
-    assert(DestTy != SrcTy);
-    static const X8632::AssemblerX8632::XmmEmitterRegOp Emitter = {
-        &X8632::AssemblerX8632::cvtfloat2float,
-        &X8632::AssemblerX8632::cvtfloat2float};
-    emitIASRegOpTyXMM(Func, SrcTy, Dest, Src, Emitter);
-    return;
-  }
-  case Dq2ps: {
-    assert(isVectorIntegerType(SrcTy));
-    assert(isVectorFloatingType(DestTy));
-    static const X8632::AssemblerX8632::XmmEmitterRegOp Emitter = {
-        &X8632::AssemblerX8632::cvtdq2ps, &X8632::AssemblerX8632::cvtdq2ps};
-    emitIASRegOpTyXMM(Func, DestTy, Dest, Src, Emitter);
-    return;
-  }
-  case Tps2dq: {
-    assert(isVectorFloatingType(SrcTy));
-    assert(isVectorIntegerType(DestTy));
-    static const X8632::AssemblerX8632::XmmEmitterRegOp Emitter = {
-        &X8632::AssemblerX8632::cvttps2dq, &X8632::AssemblerX8632::cvttps2dq};
-    emitIASRegOpTyXMM(Func, DestTy, Dest, Src, Emitter);
-    return;
-  }
-  }
-}
-
-void InstX8632Cvt::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  dumpDest(Func);
-  Str << " = cvt";
-  if (isTruncating())
-    Str << "t";
-  Str << TypeX8632Attributes[getSrc(0)->getType()].CvtString << "2"
-      << TypeX8632Attributes[getDest()->getType()].CvtString << " ";
-  dumpSources(Func);
-}
-
-void InstX8632Icmp::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 2);
-  Str << "\tcmp" << getWidthString(getSrc(0)->getType()) << "\t";
-  getSrc(1)->emit(Func);
-  Str << ", ";
-  getSrc(0)->emit(Func);
-}
-
-void InstX8632Icmp::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
-  const Operand *Src0 = getSrc(0);
-  const Operand *Src1 = getSrc(1);
-  Type Ty = Src0->getType();
-  static const X8632::AssemblerX8632::GPREmitterRegOp RegEmitter = {
-      &X8632::AssemblerX8632::cmp, &X8632::AssemblerX8632::cmp,
-      &X8632::AssemblerX8632::cmp};
-  static const X8632::AssemblerX8632::GPREmitterAddrOp AddrEmitter = {
-      &X8632::AssemblerX8632::cmp, &X8632::AssemblerX8632::cmp};
-  if (const auto SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
-    if (SrcVar0->hasReg()) {
-      emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter);
-      return;
-    }
-  }
-  emitIASAsAddrOpTyGPR(Func, Ty, Src0, Src1, AddrEmitter);
-}
-
-void InstX8632Icmp::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "cmp." << getSrc(0)->getType() << " ";
-  dumpSources(Func);
-}
-
-void InstX8632Ucomiss::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 2);
-  Str << "\tucomi" << TypeX8632Attributes[getSrc(0)->getType()].SdSsString
-      << "\t";
-  getSrc(1)->emit(Func);
-  Str << ", ";
-  getSrc(0)->emit(Func);
-}
-
-void InstX8632Ucomiss::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
-  // Currently src0 is always a variable by convention, to avoid having
-  // two memory operands.
-  assert(llvm::isa<Variable>(getSrc(0)));
-  const auto Src0Var = llvm::cast<Variable>(getSrc(0));
-  Type Ty = Src0Var->getType();
-  const static X8632::AssemblerX8632::XmmEmitterRegOp Emitter = {
-      &X8632::AssemblerX8632::ucomiss, &X8632::AssemblerX8632::ucomiss};
-  emitIASRegOpTyXMM(Func, Ty, Src0Var, getSrc(1), Emitter);
-}
-
-void InstX8632Ucomiss::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "ucomiss." << getSrc(0)->getType() << " ";
-  dumpSources(Func);
-}
-
-void InstX8632UD2::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 0);
-  Str << "\tud2";
-}
-
-void InstX8632UD2::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  Asm->ud2();
-}
-
-void InstX8632UD2::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "ud2";
-}
-
-void InstX8632Test::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 2);
-  Str << "\ttest" << getWidthString(getSrc(0)->getType()) << "\t";
-  getSrc(1)->emit(Func);
-  Str << ", ";
-  getSrc(0)->emit(Func);
-}
-
-void InstX8632Test::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
-  const Operand *Src0 = getSrc(0);
-  const Operand *Src1 = getSrc(1);
-  Type Ty = Src0->getType();
-  // The Reg/Addr form of test is not encodeable.
-  static const X8632::AssemblerX8632::GPREmitterRegOp RegEmitter = {
-      &X8632::AssemblerX8632::test, nullptr, &X8632::AssemblerX8632::test};
-  static const X8632::AssemblerX8632::GPREmitterAddrOp AddrEmitter = {
-      &X8632::AssemblerX8632::test, &X8632::AssemblerX8632::test};
-  if (const auto SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
-    if (SrcVar0->hasReg()) {
-      emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter);
-      return;
-    }
-  }
-  llvm_unreachable("Nothing actually generates this so it's untested");
-  emitIASAsAddrOpTyGPR(Func, Ty, Src0, Src1, AddrEmitter);
-}
-
-void InstX8632Test::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "test." << getSrc(0)->getType() << " ";
-  dumpSources(Func);
-}
-
-void InstX8632Mfence::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 0);
-  Str << "\tmfence";
-}
-
-void InstX8632Mfence::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  Asm->mfence();
-}
-
-void InstX8632Mfence::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "mfence";
-}
-
-void InstX8632Store::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 2);
-  Type Ty = getSrc(0)->getType();
-  Str << "\tmov" << getWidthString(Ty) << TypeX8632Attributes[Ty].SdSsString
-      << "\t";
-  getSrc(0)->emit(Func);
-  Str << ", ";
-  getSrc(1)->emit(Func);
-}
-
-void InstX8632Store::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
-  const Operand *Dest = getSrc(1);
-  const Operand *Src = getSrc(0);
-  Type DestTy = Dest->getType();
-  if (isScalarFloatingType(DestTy)) {
-    // Src must be a register, since Dest is a Mem operand of some kind.
-    const auto SrcVar = llvm::cast<Variable>(Src);
-    assert(SrcVar->hasReg());
-    RegX8632::XmmRegister SrcReg = RegX8632::getEncodedXmm(SrcVar->getRegNum());
-    X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-    if (const auto DestVar = llvm::dyn_cast<Variable>(Dest)) {
-      assert(!DestVar->hasReg());
-      X8632::Traits::Address StackAddr(
-          static_cast<TargetX8632 *>(Func->getTarget())
-              ->stackVarToAsmOperand(DestVar));
-      Asm->movss(DestTy, StackAddr, SrcReg);
-    } else {
-      const auto DestMem = llvm::cast<OperandX8632Mem>(Dest);
-      assert(DestMem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-      Asm->movss(DestTy, DestMem->toAsmAddress(Asm), SrcReg);
-    }
-    return;
-  } else {
-    assert(isScalarIntegerType(DestTy));
-    static const X8632::AssemblerX8632::GPREmitterAddrOp GPRAddrEmitter = {
-        &X8632::AssemblerX8632::mov, &X8632::AssemblerX8632::mov};
-    emitIASAsAddrOpTyGPR(Func, DestTy, Dest, Src, GPRAddrEmitter);
-  }
-}
-
-void InstX8632Store::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "mov." << getSrc(0)->getType() << " ";
-  getSrc(1)->dump(Func);
-  Str << ", ";
-  getSrc(0)->dump(Func);
-}
-
-void InstX8632StoreP::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 2);
-  Str << "\tmovups\t";
-  getSrc(0)->emit(Func);
-  Str << ", ";
-  getSrc(1)->emit(Func);
-}
-
-void InstX8632StoreP::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  assert(getSrcSize() == 2);
-  const auto SrcVar = llvm::cast<Variable>(getSrc(0));
-  const auto DestMem = llvm::cast<OperandX8632Mem>(getSrc(1));
-  assert(DestMem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-  assert(SrcVar->hasReg());
-  Asm->movups(DestMem->toAsmAddress(Asm),
-              RegX8632::getEncodedXmm(SrcVar->getRegNum()));
-}
-
-void InstX8632StoreP::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "storep." << getSrc(0)->getType() << " ";
-  getSrc(1)->dump(Func);
-  Str << ", ";
-  getSrc(0)->dump(Func);
-}
-
-void InstX8632StoreQ::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 2);
-  assert(getSrc(1)->getType() == IceType_i64 ||
-         getSrc(1)->getType() == IceType_f64);
-  Str << "\tmovq\t";
-  getSrc(0)->emit(Func);
-  Str << ", ";
-  getSrc(1)->emit(Func);
-}
-
-void InstX8632StoreQ::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  assert(getSrcSize() == 2);
-  const auto SrcVar = llvm::cast<Variable>(getSrc(0));
-  const auto DestMem = llvm::cast<OperandX8632Mem>(getSrc(1));
-  assert(DestMem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-  assert(SrcVar->hasReg());
-  Asm->movq(DestMem->toAsmAddress(Asm),
-            RegX8632::getEncodedXmm(SrcVar->getRegNum()));
-}
-
-void InstX8632StoreQ::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "storeq." << getSrc(0)->getType() << " ";
-  getSrc(1)->dump(Func);
-  Str << ", ";
-  getSrc(0)->dump(Func);
-}
-
-template <> void InstX8632Lea::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 1);
-  assert(getDest()->hasReg());
-  Str << "\tleal\t";
-  Operand *Src0 = getSrc(0);
-  if (const auto Src0Var = llvm::dyn_cast<Variable>(Src0)) {
-    Type Ty = Src0Var->getType();
-    // lea on x86-32 doesn't accept mem128 operands, so cast VSrc0 to an
-    // acceptable type.
-    Src0Var->asType(isVectorType(Ty) ? IceType_i32 : Ty)->emit(Func);
-  } else {
-    Src0->emit(Func);
-  }
-  Str << ", ";
-  getDest()->emit(Func);
-}
-
-template <> void InstX8632Mov::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 1);
-  Operand *Src = getSrc(0);
-  Type SrcTy = Src->getType();
-  Type DestTy = getDest()->getType();
-  Str << "\tmov" << (!isScalarFloatingType(DestTy)
-                         ? getWidthString(SrcTy)
-                         : TypeX8632Attributes[DestTy].SdSsString) << "\t";
-  // For an integer truncation operation, src is wider than dest.
-  // Ideally, we use a mov instruction whose data width matches the
-  // narrower dest.  This is a problem if e.g. src is a register like
-  // esi or si where there is no 8-bit version of the register.  To be
-  // safe, we instead widen the dest to match src.  This works even
-  // for stack-allocated dest variables because typeWidthOnStack()
-  // pads to a 4-byte boundary even if only a lower portion is used.
-  // TODO: This assert disallows usages such as copying a floating point
-  // value between a vector and a scalar (which movss is used for).
-  // Clean this up.
-  assert(Func->getTarget()->typeWidthInBytesOnStack(DestTy) ==
-         Func->getTarget()->typeWidthInBytesOnStack(SrcTy));
-  Src->emit(Func);
-  Str << ", ";
-  getDest()->asType(SrcTy)->emit(Func);
-}
-
-template <> void InstX8632Mov::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 1);
-  const Variable *Dest = getDest();
-  const Operand *Src = getSrc(0);
-  Type DestTy = Dest->getType();
-  Type SrcTy = Src->getType();
-  // Mov can be used for GPRs or XMM registers. Also, the type does not
-  // necessarily match (Mov can be used for bitcasts). However, when
-  // the type does not match, one of the operands must be a register.
-  // Thus, the strategy is to find out if Src or Dest are a register,
-  // then use that register's type to decide on which emitter set to use.
-  // The emitter set will include reg-reg movs, but that case should
-  // be unused when the types don't match.
-  static const X8632::AssemblerX8632::XmmEmitterRegOp XmmRegEmitter = {
-      &X8632::AssemblerX8632::movss, &X8632::AssemblerX8632::movss};
-  static const X8632::AssemblerX8632::GPREmitterRegOp GPRRegEmitter = {
-      &X8632::AssemblerX8632::mov, &X8632::AssemblerX8632::mov,
-      &X8632::AssemblerX8632::mov};
-  static const X8632::AssemblerX8632::GPREmitterAddrOp GPRAddrEmitter = {
-      &X8632::AssemblerX8632::mov, &X8632::AssemblerX8632::mov};
-  // For an integer truncation operation, src is wider than dest.
-  // Ideally, we use a mov instruction whose data width matches the
-  // narrower dest.  This is a problem if e.g. src is a register like
-  // esi or si where there is no 8-bit version of the register.  To be
-  // safe, we instead widen the dest to match src.  This works even
-  // for stack-allocated dest variables because typeWidthOnStack()
-  // pads to a 4-byte boundary even if only a lower portion is used.
-  // TODO: This assert disallows usages such as copying a floating point
-  // value between a vector and a scalar (which movss is used for).
-  // Clean this up.
-  assert(Func->getTarget()->typeWidthInBytesOnStack(getDest()->getType()) ==
-         Func->getTarget()->typeWidthInBytesOnStack(Src->getType()));
-  if (Dest->hasReg()) {
-    if (isScalarFloatingType(DestTy)) {
-      emitIASRegOpTyXMM(Func, DestTy, Dest, Src, XmmRegEmitter);
-      return;
-    } else {
-      assert(isScalarIntegerType(DestTy));
-      // Widen DestTy for truncation (see above note). We should only do this
-      // when both Src and Dest are integer types.
-      if (isScalarIntegerType(SrcTy)) {
-        DestTy = SrcTy;
-      }
-      emitIASRegOpTyGPR(Func, DestTy, Dest, Src, GPRRegEmitter);
-      return;
-    }
-  } else {
-    // Dest must be Stack and Src *could* be a register. Use Src's type
-    // to decide on the emitters.
-    X8632::Traits::Address StackAddr(
-        static_cast<TargetX8632 *>(Func->getTarget())
-            ->stackVarToAsmOperand(Dest));
-    if (isScalarFloatingType(SrcTy)) {
-      // Src must be a register.
-      const auto SrcVar = llvm::cast<Variable>(Src);
-      assert(SrcVar->hasReg());
-      X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-      Asm->movss(SrcTy, StackAddr,
-                 RegX8632::getEncodedXmm(SrcVar->getRegNum()));
-      return;
-    } else {
-      // Src can be a register or immediate.
-      assert(isScalarIntegerType(SrcTy));
-      emitIASAddrOpTyGPR(Func, SrcTy, StackAddr, Src, GPRAddrEmitter);
-      return;
-    }
-    return;
-  }
-}
-
-template <> void InstX8632Movd::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  assert(getSrcSize() == 1);
-  const Variable *Dest = getDest();
-  const auto SrcVar = llvm::cast<Variable>(getSrc(0));
-  // For insert/extract element (one of Src/Dest is an Xmm vector and
-  // the other is an int type).
-  if (SrcVar->getType() == IceType_i32) {
-    assert(isVectorType(Dest->getType()));
-    assert(Dest->hasReg());
-    RegX8632::XmmRegister DestReg = RegX8632::getEncodedXmm(Dest->getRegNum());
-    if (SrcVar->hasReg()) {
-      Asm->movd(DestReg, RegX8632::getEncodedGPR(SrcVar->getRegNum()));
-    } else {
-      X8632::Traits::Address StackAddr(
-          static_cast<TargetX8632 *>(Func->getTarget())
-              ->stackVarToAsmOperand(SrcVar));
-      Asm->movd(DestReg, StackAddr);
-    }
-  } else {
-    assert(isVectorType(SrcVar->getType()));
-    assert(SrcVar->hasReg());
-    assert(Dest->getType() == IceType_i32);
-    RegX8632::XmmRegister SrcReg = RegX8632::getEncodedXmm(SrcVar->getRegNum());
-    if (Dest->hasReg()) {
-      Asm->movd(RegX8632::getEncodedGPR(Dest->getRegNum()), SrcReg);
-    } else {
-      X8632::Traits::Address StackAddr(
-          static_cast<TargetX8632 *>(Func->getTarget())
-              ->stackVarToAsmOperand(Dest));
-      Asm->movd(StackAddr, SrcReg);
-    }
-  }
-}
-
-template <> void InstX8632Movp::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  // TODO(wala,stichnot): movups works with all vector operands, but
-  // there exist other instructions (movaps, movdqa, movdqu) that may
-  // perform better, depending on the data type and alignment of the
-  // operands.
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 1);
-  Str << "\tmovups\t";
-  getSrc(0)->emit(Func);
-  Str << ", ";
-  getDest()->emit(Func);
-}
-
-template <> void InstX8632Movp::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 1);
-  assert(isVectorType(getDest()->getType()));
-  const Variable *Dest = getDest();
-  const Operand *Src = getSrc(0);
-  const static X8632::AssemblerX8632::XmmEmitterMovOps Emitter = {
-      &X8632::AssemblerX8632::movups, &X8632::AssemblerX8632::movups,
-      &X8632::AssemblerX8632::movups};
-  emitIASMovlikeXMM(Func, Dest, Src, Emitter);
-}
-
-template <> void InstX8632Movq::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 1);
-  assert(getDest()->getType() == IceType_i64 ||
-         getDest()->getType() == IceType_f64);
-  Str << "\tmovq\t";
-  getSrc(0)->emit(Func);
-  Str << ", ";
-  getDest()->emit(Func);
-}
-
-template <> void InstX8632Movq::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 1);
-  assert(getDest()->getType() == IceType_i64 ||
-         getDest()->getType() == IceType_f64);
-  const Variable *Dest = getDest();
-  const Operand *Src = getSrc(0);
-  const static X8632::AssemblerX8632::XmmEmitterMovOps Emitter = {
-      &X8632::AssemblerX8632::movq, &X8632::AssemblerX8632::movq,
-      &X8632::AssemblerX8632::movq};
-  emitIASMovlikeXMM(Func, Dest, Src, Emitter);
-}
-
-template <> void InstX8632MovssRegs::emitIAS(const Cfg *Func) const {
-  // This is Binop variant is only intended to be used for reg-reg moves
-  // where part of the Dest register is untouched.
-  assert(getSrcSize() == 2);
-  const Variable *Dest = getDest();
-  assert(Dest == getSrc(0));
-  const auto SrcVar = llvm::cast<Variable>(getSrc(1));
-  assert(Dest->hasReg() && SrcVar->hasReg());
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  Asm->movss(IceType_f32, RegX8632::getEncodedXmm(Dest->getRegNum()),
-             RegX8632::getEncodedXmm(SrcVar->getRegNum()));
-}
-
-template <> void InstX8632Movsx::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 1);
-  const Variable *Dest = getDest();
-  const Operand *Src = getSrc(0);
-  // Dest must be a > 8-bit register, but Src can be 8-bit. In practice
-  // we just use the full register for Dest to avoid having an
-  // OperandSizeOverride prefix. It also allows us to only dispatch on SrcTy.
-  Type SrcTy = Src->getType();
-  assert(typeWidthInBytes(Dest->getType()) > 1);
-  assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy));
-  emitIASRegOpTyGPR<false, true>(Func, SrcTy, Dest, Src, Emitter);
-}
-
-template <> void InstX8632Movzx::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 1);
-  const Variable *Dest = getDest();
-  const Operand *Src = getSrc(0);
-  Type SrcTy = Src->getType();
-  assert(typeWidthInBytes(Dest->getType()) > 1);
-  assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy));
-  emitIASRegOpTyGPR<false, true>(Func, SrcTy, Dest, Src, Emitter);
-}
-
-void InstX8632Nop::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  // TODO: Emit the right code for each variant.
-  Str << "\tnop\t# variant = " << Variant;
-}
-
-void InstX8632Nop::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  // TODO: Emit the right code for the variant.
-  Asm->nop();
-}
-
-void InstX8632Nop::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "nop (variant = " << Variant << ")";
-}
-
-void InstX8632Fld::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 1);
-  Type Ty = getSrc(0)->getType();
-  SizeT Width = typeWidthInBytes(Ty);
-  const auto Var = llvm::dyn_cast<Variable>(getSrc(0));
-  if (Var && Var->hasReg()) {
-    // This is a physical xmm register, so we need to spill it to a
-    // temporary stack slot.
-    Str << "\tsubl\t$" << Width << ", %esp"
-        << "\n";
-    Str << "\tmov" << TypeX8632Attributes[Ty].SdSsString << "\t";
-    Var->emit(Func);
-    Str << ", (%esp)\n";
-    Str << "\tfld" << getFldString(Ty) << "\t"
-        << "(%esp)\n";
-    Str << "\taddl\t$" << Width << ", %esp";
-    return;
-  }
-  Str << "\tfld" << getFldString(Ty) << "\t";
-  getSrc(0)->emit(Func);
-}
-
-void InstX8632Fld::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  assert(getSrcSize() == 1);
-  const Operand *Src = getSrc(0);
-  Type Ty = Src->getType();
-  if (const auto Var = llvm::dyn_cast<Variable>(Src)) {
-    if (Var->hasReg()) {
-      // This is a physical xmm register, so we need to spill it to a
-      // temporary stack slot.
-      X8632::Immediate Width(typeWidthInBytes(Ty));
-      Asm->sub(IceType_i32, RegX8632::Encoded_Reg_esp, Width);
-      X8632::Traits::Address StackSlot =
-          X8632::Traits::Address(RegX8632::Encoded_Reg_esp, 0);
-      Asm->movss(Ty, StackSlot, RegX8632::getEncodedXmm(Var->getRegNum()));
-      Asm->fld(Ty, StackSlot);
-      Asm->add(IceType_i32, RegX8632::Encoded_Reg_esp, Width);
-    } else {
-      X8632::Traits::Address StackAddr(
-          static_cast<TargetX8632 *>(Func->getTarget())
-              ->stackVarToAsmOperand(Var));
-      Asm->fld(Ty, StackAddr);
-    }
-  } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Src)) {
-    assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-    Asm->fld(Ty, Mem->toAsmAddress(Asm));
-  } else if (const auto Imm = llvm::dyn_cast<Constant>(Src)) {
-    Asm->fld(Ty, X8632::Traits::Address::ofConstPool(Asm, Imm));
-  } else {
-    llvm_unreachable("Unexpected operand type");
-  }
-}
-
-void InstX8632Fld::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "fld." << getSrc(0)->getType() << " ";
-  dumpSources(Func);
-}
-
-void InstX8632Fstp::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 0);
-  // TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to
-  // "partially" delete the fstp if the Dest is unused.
-  // Even if Dest is unused, the fstp should be kept for the SideEffects
-  // of popping the stack.
-  if (!getDest()) {
-    Str << "\tfstp\tst(0)";
-    return;
-  }
-  Type Ty = getDest()->getType();
-  size_t Width = typeWidthInBytes(Ty);
-  if (!getDest()->hasReg()) {
-    Str << "\tfstp" << getFldString(Ty) << "\t";
-    getDest()->emit(Func);
-    return;
-  }
-  // Dest is a physical (xmm) register, so st(0) needs to go through
-  // memory.  Hack this by creating a temporary stack slot, spilling
-  // st(0) there, loading it into the xmm register, and deallocating
-  // the stack slot.
-  Str << "\tsubl\t$" << Width << ", %esp\n";
-  Str << "\tfstp" << getFldString(Ty) << "\t"
-      << "(%esp)\n";
-  Str << "\tmov" << TypeX8632Attributes[Ty].SdSsString << "\t"
-      << "(%esp), ";
-  getDest()->emit(Func);
-  Str << "\n";
-  Str << "\taddl\t$" << Width << ", %esp";
-}
-
-void InstX8632Fstp::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  assert(getSrcSize() == 0);
-  const Variable *Dest = getDest();
-  // TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to
-  // "partially" delete the fstp if the Dest is unused.
-  // Even if Dest is unused, the fstp should be kept for the SideEffects
-  // of popping the stack.
-  if (!Dest) {
-    Asm->fstp(RegX8632::getEncodedSTReg(0));
-    return;
-  }
-  Type Ty = Dest->getType();
-  if (!Dest->hasReg()) {
-    X8632::Traits::Address StackAddr(
-        static_cast<TargetX8632 *>(Func->getTarget())
-            ->stackVarToAsmOperand(Dest));
-    Asm->fstp(Ty, StackAddr);
-  } else {
-    // Dest is a physical (xmm) register, so st(0) needs to go through
-    // memory.  Hack this by creating a temporary stack slot, spilling
-    // st(0) there, loading it into the xmm register, and deallocating
-    // the stack slot.
-    X8632::Immediate Width(typeWidthInBytes(Ty));
-    Asm->sub(IceType_i32, RegX8632::Encoded_Reg_esp, Width);
-    X8632::Traits::Address StackSlot =
-        X8632::Traits::Address(RegX8632::Encoded_Reg_esp, 0);
-    Asm->fstp(Ty, StackSlot);
-    Asm->movss(Ty, RegX8632::getEncodedXmm(Dest->getRegNum()), StackSlot);
-    Asm->add(IceType_i32, RegX8632::Encoded_Reg_esp, Width);
-  }
-}
-
-void InstX8632Fstp::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  dumpDest(Func);
-  Str << " = fstp." << getDest()->getType() << ", st(0)";
-}
-
-template <> void InstX8632Pcmpeq::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  char buf[30];
-  snprintf(buf, llvm::array_lengthof(buf), "pcmpeq%s",
-           TypeX8632Attributes[getDest()->getType()].PackString);
-  emitTwoAddress(buf, this, Func);
-}
-
-template <> void InstX8632Pcmpgt::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  char buf[30];
-  snprintf(buf, llvm::array_lengthof(buf), "pcmpgt%s",
-           TypeX8632Attributes[getDest()->getType()].PackString);
-  emitTwoAddress(buf, this, Func);
-}
-
-template <> void InstX8632Pextr::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 2);
-  // pextrb and pextrd are SSE4.1 instructions.
-  assert(getSrc(0)->getType() == IceType_v8i16 ||
-         getSrc(0)->getType() == IceType_v8i1 ||
-         static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-             X8632::Traits::SSE4_1);
-  Str << "\t" << Opcode << TypeX8632Attributes[getSrc(0)->getType()].PackString
-      << "\t";
-  getSrc(1)->emit(Func);
-  Str << ", ";
-  getSrc(0)->emit(Func);
-  Str << ", ";
-  Variable *Dest = getDest();
-  // pextrw must take a register dest. There is an SSE4.1 version that takes
-  // a memory dest, but we aren't using it. For uniformity, just restrict
-  // them all to have a register dest for now.
-  assert(Dest->hasReg());
-  Dest->asType(IceType_i32)->emit(Func);
-}
-
-template <> void InstX8632Pextr::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
-  // pextrb and pextrd are SSE4.1 instructions.
-  const Variable *Dest = getDest();
-  Type DispatchTy = Dest->getType();
-  assert(DispatchTy == IceType_i16 ||
-         static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-             X8632::Traits::SSE4_1);
-  // pextrw must take a register dest. There is an SSE4.1 version that takes
-  // a memory dest, but we aren't using it. For uniformity, just restrict
-  // them all to have a register dest for now.
-  assert(Dest->hasReg());
-  // pextrw's Src(0) must be a register (both SSE4.1 and SSE2).
-  assert(llvm::cast<Variable>(getSrc(0))->hasReg());
-  static const X8632::AssemblerX8632::ThreeOpImmEmitter<
-      RegX8632::GPRRegister, RegX8632::XmmRegister> Emitter = {
-      &X8632::AssemblerX8632::pextr, nullptr};
-  emitIASThreeOpImmOps<RegX8632::GPRRegister, RegX8632::XmmRegister,
-                       RegX8632::getEncodedGPR, RegX8632::getEncodedXmm>(
-      Func, DispatchTy, Dest, getSrc(0), getSrc(1), Emitter);
-}
-
-template <> void InstX8632Pinsr::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 3);
-  // pinsrb and pinsrd are SSE4.1 instructions.
-  assert(getDest()->getType() == IceType_v8i16 ||
-         getDest()->getType() == IceType_v8i1 ||
-         static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-             X8632::Traits::SSE4_1);
-  Str << "\t" << Opcode << TypeX8632Attributes[getDest()->getType()].PackString
-      << "\t";
-  getSrc(2)->emit(Func);
-  Str << ", ";
-  Operand *Src1 = getSrc(1);
-  if (const auto Src1Var = llvm::dyn_cast<Variable>(Src1)) {
-    // If src1 is a register, it should always be r32.
-    if (Src1Var->hasReg()) {
-      Src1Var->asType(IceType_i32)->emit(Func);
-    } else {
-      Src1Var->emit(Func);
-    }
-  } else {
-    Src1->emit(Func);
-  }
-  Str << ", ";
-  getDest()->emit(Func);
-}
-
-template <> void InstX8632Pinsr::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 3);
-  assert(getDest() == getSrc(0));
-  // pinsrb and pinsrd are SSE4.1 instructions.
-  const Operand *Src0 = getSrc(1);
-  Type DispatchTy = Src0->getType();
-  assert(DispatchTy == IceType_i16 ||
-         static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-             X8632::Traits::SSE4_1);
-  // If src1 is a register, it should always be r32 (this should fall out
-  // from the encodings for ByteRegs overlapping the encodings for r32),
-  // but we have to trust the regalloc to not choose "ah", where it
-  // doesn't overlap.
-  static const X8632::AssemblerX8632::ThreeOpImmEmitter<
-      RegX8632::XmmRegister, RegX8632::GPRRegister> Emitter = {
-      &X8632::AssemblerX8632::pinsr, &X8632::AssemblerX8632::pinsr};
-  emitIASThreeOpImmOps<RegX8632::XmmRegister, RegX8632::GPRRegister,
-                       RegX8632::getEncodedXmm, RegX8632::getEncodedGPR>(
-      Func, DispatchTy, getDest(), Src0, getSrc(2), Emitter);
-}
-
-template <> void InstX8632Pshufd::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
-  const Variable *Dest = getDest();
-  Type Ty = Dest->getType();
-  static const X8632::AssemblerX8632::ThreeOpImmEmitter<
-      RegX8632::XmmRegister, RegX8632::XmmRegister> Emitter = {
-      &X8632::AssemblerX8632::pshufd, &X8632::AssemblerX8632::pshufd};
-  emitIASThreeOpImmOps<RegX8632::XmmRegister, RegX8632::XmmRegister,
-                       RegX8632::getEncodedXmm, RegX8632::getEncodedXmm>(
-      Func, Ty, Dest, getSrc(0), getSrc(1), Emitter);
-}
-
-template <> void InstX8632Shufps::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 3);
-  const Variable *Dest = getDest();
-  assert(Dest == getSrc(0));
-  Type Ty = Dest->getType();
-  static const X8632::AssemblerX8632::ThreeOpImmEmitter<
-      RegX8632::XmmRegister, RegX8632::XmmRegister> Emitter = {
-      &X8632::AssemblerX8632::shufps, &X8632::AssemblerX8632::shufps};
-  emitIASThreeOpImmOps<RegX8632::XmmRegister, RegX8632::XmmRegister,
-                       RegX8632::getEncodedXmm, RegX8632::getEncodedXmm>(
-      Func, Ty, Dest, getSrc(1), getSrc(2), Emitter);
-}
-
-void InstX8632Pop::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 0);
-  Str << "\tpop\t";
-  getDest()->emit(Func);
-}
-
-void InstX8632Pop::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 0);
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  if (getDest()->hasReg()) {
-    Asm->popl(RegX8632::getEncodedGPR(getDest()->getRegNum()));
-  } else {
-    Asm->popl(static_cast<TargetX8632 *>(Func->getTarget())
-                  ->stackVarToAsmOperand(getDest()));
-  }
-}
-
-void InstX8632Pop::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  dumpDest(Func);
-  Str << " = pop." << getDest()->getType() << " ";
-}
-
-void InstX8632AdjustStack::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  Str << "\tsubl\t$" << Amount << ", %esp";
-  Func->getTarget()->updateStackAdjustment(Amount);
-}
-
-void InstX8632AdjustStack::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  Asm->sub(IceType_i32, RegX8632::Encoded_Reg_esp, X8632::Immediate(Amount));
-  Func->getTarget()->updateStackAdjustment(Amount);
-}
-
-void InstX8632AdjustStack::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "esp = sub.i32 esp, " << Amount;
-}
-
-void InstX8632Push::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 1);
-  // Push is currently only used for saving GPRs.
-  const auto Var = llvm::cast<Variable>(getSrc(0));
-  assert(Var->hasReg());
-  Str << "\tpush\t";
-  Var->emit(Func);
-}
-
-void InstX8632Push::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 1);
-  // Push is currently only used for saving GPRs.
-  const auto Var = llvm::cast<Variable>(getSrc(0));
-  assert(Var->hasReg());
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  Asm->pushl(RegX8632::getEncodedGPR(Var->getRegNum()));
-}
-
-void InstX8632Push::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "push." << getSrc(0)->getType() << " ";
-  dumpSources(Func);
-}
-
-template <> void InstX8632Psll::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  assert(getDest()->getType() == IceType_v8i16 ||
-         getDest()->getType() == IceType_v8i1 ||
-         getDest()->getType() == IceType_v4i32 ||
-         getDest()->getType() == IceType_v4i1);
-  char buf[30];
-  snprintf(buf, llvm::array_lengthof(buf), "psll%s",
-           TypeX8632Attributes[getDest()->getType()].PackString);
-  emitTwoAddress(buf, this, Func);
-}
-
-template <> void InstX8632Psra::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  assert(getDest()->getType() == IceType_v8i16 ||
-         getDest()->getType() == IceType_v8i1 ||
-         getDest()->getType() == IceType_v4i32 ||
-         getDest()->getType() == IceType_v4i1);
-  char buf[30];
-  snprintf(buf, llvm::array_lengthof(buf), "psra%s",
-           TypeX8632Attributes[getDest()->getType()].PackString);
-  emitTwoAddress(buf, this, Func);
-}
-
-template <> void InstX8632Psrl::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  char buf[30];
-  snprintf(buf, llvm::array_lengthof(buf), "psrl%s",
-           TypeX8632Attributes[getDest()->getType()].PackString);
-  emitTwoAddress(buf, this, Func);
-}
-
-void InstX8632Ret::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  Str << "\tret";
-}
-
-void InstX8632Ret::emitIAS(const Cfg *Func) const {
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  Asm->ret();
-}
-
-void InstX8632Ret::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Type Ty = (getSrcSize() == 0 ? IceType_void : getSrc(0)->getType());
-  Str << "ret." << Ty << " ";
-  dumpSources(Func);
-}
-
-void InstX8632Setcc::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  Str << "\tset" << InstX8632BrAttributes[Condition].DisplayString << "\t";
-  Dest->emit(Func);
-}
-
-void InstX8632Setcc::emitIAS(const Cfg *Func) const {
-  assert(Condition != X8632::Traits::Cond::Br_None);
-  assert(getDest()->getType() == IceType_i1);
-  assert(getSrcSize() == 0);
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  if (getDest()->hasReg())
-    Asm->setcc(Condition, RegX8632::getEncodedByteReg(getDest()->getRegNum()));
-  else
-    Asm->setcc(Condition, static_cast<TargetX8632 *>(Func->getTarget())
-                              ->stackVarToAsmOperand(getDest()));
-  return;
-}
-
-void InstX8632Setcc::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "setcc." << InstX8632BrAttributes[Condition].DisplayString << " ";
-  dumpDest(Func);
-}
-
-void InstX8632Xadd::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  if (Locked) {
-    Str << "\tlock";
-  }
-  Str << "\txadd" << getWidthString(getSrc(0)->getType()) << "\t";
-  getSrc(1)->emit(Func);
-  Str << ", ";
-  getSrc(0)->emit(Func);
-}
-
-void InstX8632Xadd::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  Type Ty = getSrc(0)->getType();
-  const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
-  assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-  const X8632::Traits::Address Addr = Mem->toAsmAddress(Asm);
-  const auto VarReg = llvm::cast<Variable>(getSrc(1));
-  assert(VarReg->hasReg());
-  const RegX8632::GPRRegister Reg =
-      RegX8632::getEncodedGPR(VarReg->getRegNum());
-  Asm->xadd(Ty, Addr, Reg, Locked);
-}
-
-void InstX8632Xadd::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  if (Locked) {
-    Str << "lock ";
-  }
-  Type Ty = getSrc(0)->getType();
-  Str << "xadd." << Ty << " ";
-  dumpSources(Func);
-}
-
-void InstX8632Xchg::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  Str << "\txchg" << getWidthString(getSrc(0)->getType()) << "\t";
-  getSrc(1)->emit(Func);
-  Str << ", ";
-  getSrc(0)->emit(Func);
-}
-
-void InstX8632Xchg::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
-  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
-  Type Ty = getSrc(0)->getType();
-  const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
-  assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-  const X8632::Traits::Address Addr = Mem->toAsmAddress(Asm);
-  const auto VarReg = llvm::cast<Variable>(getSrc(1));
-  assert(VarReg->hasReg());
-  const RegX8632::GPRRegister Reg =
-      RegX8632::getEncodedGPR(VarReg->getRegNum());
-  Asm->xchg(Ty, Addr, Reg);
-}
-
-void InstX8632Xchg::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Type Ty = getSrc(0)->getType();
-  Str << "xchg." << Ty << " ";
-  dumpSources(Func);
-}
-
-void OperandX8632Mem::emit(const Cfg *Func) const {
+void MachineTraits<TargetX8632>::X86OperandMem::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
   if (SegmentReg != DefaultSegment) {
     assert(SegmentReg >= 0 && SegmentReg < SegReg_NUM);
-    Str << "%" << InstX8632SegmentRegNames[SegmentReg] << ":";
+    Str << "%" << X8632::Traits::InstSegmentRegNames[SegmentReg] << ":";
   }
   // Emit as Offset(Base,Index,1<<Shift).
   // Offset is emitted without the leading '$'.
@@ -2943,12 +134,13 @@
   }
 }
 
-void OperandX8632Mem::dump(const Cfg *Func, Ostream &Str) const {
+void MachineTraits<TargetX8632>::X86OperandMem::dump(const Cfg *Func,
+                                                     Ostream &Str) const {
   if (!BuildDefs::dump())
     return;
   if (SegmentReg != DefaultSegment) {
     assert(SegmentReg >= 0 && SegmentReg < SegReg_NUM);
-    Str << InstX8632SegmentRegNames[SegmentReg] << ":";
+    Str << X8632::Traits::InstSegmentRegNames[SegmentReg] << ":";
   }
   bool Dumped = false;
   Str << "[";
@@ -2994,14 +186,17 @@
   Str << "]";
 }
 
-void OperandX8632Mem::emitSegmentOverride(X8632::AssemblerX8632 *Asm) const {
+void MachineTraits<TargetX8632>::X86OperandMem::emitSegmentOverride(
+    MachineTraits<TargetX8632>::Assembler *Asm) const {
   if (SegmentReg != DefaultSegment) {
     assert(SegmentReg >= 0 && SegmentReg < SegReg_NUM);
-    Asm->emitSegmentOverride(InstX8632SegmentPrefixes[SegmentReg]);
+    Asm->emitSegmentOverride(X8632::Traits::InstSegmentPrefixes[SegmentReg]);
   }
 }
 
-X8632::Traits::Address OperandX8632Mem::toAsmAddress(Assembler *Asm) const {
+MachineTraits<TargetX8632>::Address
+MachineTraits<TargetX8632>::X86OperandMem::toAsmAddress(
+    MachineTraits<TargetX8632>::Assembler *Asm) const {
   int32_t Disp = 0;
   AssemblerFixup *Fixup = nullptr;
   // Determine the offset (is it relocatable?)
@@ -3037,22 +232,23 @@
   }
 }
 
-X8632::Traits::Address VariableSplit::toAsmAddress(const Cfg *Func) const {
+MachineTraits<TargetX8632>::Address
+MachineTraits<TargetX8632>::VariableSplit::toAsmAddress(const Cfg *Func) const {
   assert(!Var->hasReg());
-  const TargetLowering *Target = Func->getTarget();
+  const ::Ice::TargetLowering *Target = Func->getTarget();
   int32_t Offset =
       Var->getStackOffset() + Target->getStackAdjustment() + getOffset();
   return X8632::Traits::Address(
       RegX8632::getEncodedGPR(Target->getFrameOrStackReg()), Offset);
 }
 
-void VariableSplit::emit(const Cfg *Func) const {
+void MachineTraits<TargetX8632>::VariableSplit::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(!Var->hasReg());
   // The following is copied/adapted from TargetX8632::emitVariable().
-  const TargetLowering *Target = Func->getTarget();
+  const ::Ice::TargetLowering *Target = Func->getTarget();
   const Type Ty = IceType_i32;
   int32_t Offset =
       Var->getStackOffset() + Target->getStackAdjustment() + getOffset();
@@ -3061,7 +257,8 @@
   Str << "(%" << Target->getRegName(Target->getFrameOrStackReg(), Ty) << ")";
 }
 
-void VariableSplit::dump(const Cfg *Func, Ostream &Str) const {
+void MachineTraits<TargetX8632>::VariableSplit::dump(const Cfg *Func,
+                                                     Ostream &Str) const {
   if (!BuildDefs::dump())
     return;
   switch (Part) {
@@ -3080,4 +277,7 @@
   Str << ")";
 }
 
+} // namespace X86Internal
 } // end of namespace Ice
+
+X86INSTS_DEFINE_STATIC_DATA(TargetX8632);
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 03a5205..44939dd 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -8,1784 +8,30 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares the InstX8632 and OperandX8632 classes and
-/// their subclasses.  This represents the machine instructions and
-/// operands used for x86-32 code selection.
+/// This file used to house all the X8632 instructions. Subzero has been
+/// modified to use templates for X86 instructions, so all those definitions are
+/// are in IceInstX86Base.h
+///
+/// When interacting with the X8632 target (which should only happen in the
+/// X8632 TargetLowering) clients have should use the Ice::X8632::Traits::Insts
+/// traits, which hides all the template verboseness behind a type alias.
+///
+/// For example, to create an X8632 MOV Instruction, clients should do
+///
+/// ::Ice::X8632::Traits::Insts::Mov::create
+///
+/// In the future, this file might be used to declare X8632 specific
+/// instructions (e.g., FLD, and FSTP.)
 ///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEINSTX8632_H
 #define SUBZERO_SRC_ICEINSTX8632_H
 
-#include "IceAssemblerX8632.h"
-#include "IceConditionCodesX8632.h"
 #include "IceDefs.h"
 #include "IceInst.h"
-#include "IceInstX8632.def"
+#include "IceInstX86Base.h"
 #include "IceOperand.h"
 #include "IceTargetLoweringX8632Traits.h"
 
-namespace Ice {
-
-class TargetX8632;
-
-/// OperandX8632 extends the Operand hierarchy.  Its subclasses are
-/// OperandX8632Mem and VariableSplit.
-class OperandX8632 : public Operand {
-  OperandX8632() = delete;
-  OperandX8632(const OperandX8632 &) = delete;
-  OperandX8632 &operator=(const OperandX8632 &) = delete;
-
-public:
-  enum OperandKindX8632 { k__Start = Operand::kTarget, kMem, kSplit };
-  using Operand::dump;
-  void dump(const Cfg *, Ostream &Str) const override {
-    if (BuildDefs::dump())
-      Str << "<OperandX8632>";
-  }
-
-protected:
-  OperandX8632(OperandKindX8632 Kind, Type Ty)
-      : Operand(static_cast<OperandKind>(Kind), Ty) {}
-};
-
-/// OperandX8632Mem represents the m32 addressing mode, with optional
-/// base and index registers, a constant offset, and a fixed shift
-/// value for the index register.
-class OperandX8632Mem : public OperandX8632 {
-  OperandX8632Mem() = delete;
-  OperandX8632Mem(const OperandX8632Mem &) = delete;
-  OperandX8632Mem &operator=(const OperandX8632Mem &) = delete;
-
-public:
-  enum SegmentRegisters {
-    DefaultSegment = -1,
-#define X(val, name, prefix) val,
-    SEG_REGX8632_TABLE
-#undef X
-        SegReg_NUM
-  };
-  static OperandX8632Mem *create(Cfg *Func, Type Ty, Variable *Base,
-                                 Constant *Offset, Variable *Index = nullptr,
-                                 uint16_t Shift = 0,
-                                 SegmentRegisters SegmentReg = DefaultSegment) {
-    return new (Func->allocate<OperandX8632Mem>())
-        OperandX8632Mem(Func, Ty, Base, Offset, Index, Shift, SegmentReg);
-  }
-  Variable *getBase() const { return Base; }
-  Constant *getOffset() const { return Offset; }
-  Variable *getIndex() const { return Index; }
-  uint16_t getShift() const { return Shift; }
-  SegmentRegisters getSegmentRegister() const { return SegmentReg; }
-  void emitSegmentOverride(X8632::AssemblerX8632 *Asm) const;
-  X8632::Traits::Address toAsmAddress(Assembler *Asm) const;
-  void emit(const Cfg *Func) const override;
-  using OperandX8632::dump;
-  void dump(const Cfg *Func, Ostream &Str) const override;
-
-  static bool classof(const Operand *Operand) {
-    return Operand->getKind() == static_cast<OperandKind>(kMem);
-  }
-
-  void setRandomized(bool R) { Randomized = R; }
-
-  bool getRandomized() const { return Randomized; }
-
-private:
-  OperandX8632Mem(Cfg *Func, Type Ty, Variable *Base, Constant *Offset,
-                  Variable *Index, uint16_t Shift, SegmentRegisters SegmentReg);
-
-  Variable *Base;
-  Constant *Offset;
-  Variable *Index;
-  uint16_t Shift;
-  SegmentRegisters SegmentReg : 16;
-  /// A flag to show if this memory operand is a randomized one.
-  /// Randomized memory operands are generated in
-  /// TargetX8632::randomizeOrPoolImmediate()
-  bool Randomized;
-};
-
-/// VariableSplit is a way to treat an f64 memory location as a pair
-/// of i32 locations (Low and High).  This is needed for some cases
-/// of the Bitcast instruction.  Since it's not possible for integer
-/// registers to access the XMM registers and vice versa, the
-/// lowering forces the f64 to be spilled to the stack and then
-/// accesses through the VariableSplit.
-class VariableSplit : public OperandX8632 {
-  VariableSplit() = delete;
-  VariableSplit(const VariableSplit &) = delete;
-  VariableSplit &operator=(const VariableSplit &) = delete;
-
-public:
-  enum Portion { Low, High };
-  static VariableSplit *create(Cfg *Func, Variable *Var, Portion Part) {
-    return new (Func->allocate<VariableSplit>()) VariableSplit(Func, Var, Part);
-  }
-  int32_t getOffset() const { return Part == High ? 4 : 0; }
-
-  X8632::Traits::Address toAsmAddress(const Cfg *Func) const;
-  void emit(const Cfg *Func) const override;
-  using OperandX8632::dump;
-  void dump(const Cfg *Func, Ostream &Str) const override;
-
-  static bool classof(const Operand *Operand) {
-    return Operand->getKind() == static_cast<OperandKind>(kSplit);
-  }
-
-private:
-  VariableSplit(Cfg *Func, Variable *Var, Portion Part)
-      : OperandX8632(kSplit, IceType_i32), Var(Var), Part(Part) {
-    assert(Var->getType() == IceType_f64);
-    Vars = Func->allocateArrayOf<Variable *>(1);
-    Vars[0] = Var;
-    NumVars = 1;
-  }
-
-  Variable *Var;
-  Portion Part;
-};
-
-/// SpillVariable decorates a Variable by linking it to another
-/// Variable.  When stack frame offsets are computed, the SpillVariable
-/// is given a distinct stack slot only if its linked Variable has a
-/// register.  If the linked Variable has a stack slot, then the
-/// Variable and SpillVariable share that slot.
-class SpillVariable : public Variable {
-  SpillVariable() = delete;
-  SpillVariable(const SpillVariable &) = delete;
-  SpillVariable &operator=(const SpillVariable &) = delete;
-
-public:
-  static SpillVariable *create(Cfg *Func, Type Ty, SizeT Index) {
-    return new (Func->allocate<SpillVariable>()) SpillVariable(Ty, Index);
-  }
-  const static OperandKind SpillVariableKind =
-      static_cast<OperandKind>(kVariable_Target);
-  static bool classof(const Operand *Operand) {
-    return Operand->getKind() == SpillVariableKind;
-  }
-  void setLinkedTo(Variable *Var) { LinkedTo = Var; }
-  Variable *getLinkedTo() const { return LinkedTo; }
-  // Inherit dump() and emit() from Variable.
-private:
-  SpillVariable(Type Ty, SizeT Index)
-      : Variable(SpillVariableKind, Ty, Index), LinkedTo(nullptr) {}
-  Variable *LinkedTo;
-};
-
-class InstX8632 : public InstTarget {
-  InstX8632() = delete;
-  InstX8632(const InstX8632 &) = delete;
-  InstX8632 &operator=(const InstX8632 &) = delete;
-
-public:
-  enum InstKindX8632 {
-    k__Start = Inst::Target,
-    Adc,
-    AdcRMW,
-    Add,
-    AddRMW,
-    Addps,
-    Addss,
-    Adjuststack,
-    And,
-    AndRMW,
-    Blendvps,
-    Br,
-    Bsf,
-    Bsr,
-    Bswap,
-    Call,
-    Cbwdq,
-    Cmov,
-    Cmpps,
-    Cmpxchg,
-    Cmpxchg8b,
-    Cvt,
-    Div,
-    Divps,
-    Divss,
-    FakeRMW,
-    Fld,
-    Fstp,
-    Icmp,
-    Idiv,
-    Imul,
-    Insertps,
-    Jmp,
-    Label,
-    Lea,
-    Load,
-    Mfence,
-    Mov,
-    Movd,
-    Movp,
-    Movq,
-    MovssRegs,
-    Movsx,
-    Movzx,
-    Mul,
-    Mulps,
-    Mulss,
-    Neg,
-    Nop,
-    Or,
-    OrRMW,
-    Padd,
-    Pand,
-    Pandn,
-    Pblendvb,
-    Pcmpeq,
-    Pcmpgt,
-    Pextr,
-    Pinsr,
-    Pmull,
-    Pmuludq,
-    Pop,
-    Por,
-    Pshufd,
-    Psll,
-    Psra,
-    Psrl,
-    Psub,
-    Push,
-    Pxor,
-    Ret,
-    Rol,
-    Sar,
-    Sbb,
-    SbbRMW,
-    Setcc,
-    Shl,
-    Shld,
-    Shr,
-    Shrd,
-    Shufps,
-    Sqrtss,
-    Store,
-    StoreP,
-    StoreQ,
-    Sub,
-    SubRMW,
-    Subps,
-    Subss,
-    Test,
-    Ucomiss,
-    UD2,
-    Xadd,
-    Xchg,
-    Xor,
-    XorRMW
-  };
-
-  static const char *getWidthString(Type Ty);
-  static const char *getFldString(Type Ty);
-  static X8632::Traits::Cond::BrCond
-  getOppositeCondition(X8632::Traits::Cond::BrCond Cond);
-  void dump(const Cfg *Func) const override;
-
-  /// Shared emit routines for common forms of instructions.
-  /// See the definition of emitTwoAddress() for a description of
-  /// ShiftHack.
-  static void emitTwoAddress(const char *Opcode, const Inst *Inst,
-                             const Cfg *Func, bool ShiftHack = false);
-
-  static void
-  emitIASGPRShift(const Cfg *Func, Type Ty, const Variable *Var,
-                  const Operand *Src,
-                  const X8632::AssemblerX8632::GPREmitterShiftOp &Emitter);
-
-protected:
-  InstX8632(Cfg *Func, InstKindX8632 Kind, SizeT Maxsrcs, Variable *Dest)
-      : InstTarget(Func, static_cast<InstKind>(Kind), Maxsrcs, Dest) {}
-
-  static bool isClassof(const Inst *Inst, InstKindX8632 MyKind) {
-    return Inst->getKind() == static_cast<InstKind>(MyKind);
-  }
-  /// Most instructions that operate on vector arguments require vector
-  /// memory operands to be fully aligned (16-byte alignment for PNaCl
-  /// vector types).  The stack frame layout and call ABI ensure proper
-  /// alignment for stack operands, but memory operands (originating
-  /// from load/store bitcode instructions) only have element-size
-  /// alignment guarantees.  This function validates that none of the
-  /// operands is a memory operand of vector type, calling
-  /// report_fatal_error() if one is found.  This function should be
-  /// called during emission, and maybe also in the ctor (as long as
-  /// that fits the lowering style).
-  void validateVectorAddrMode() const {
-    if (getDest())
-      validateVectorAddrModeOpnd(getDest());
-    for (SizeT i = 0; i < getSrcSize(); ++i) {
-      validateVectorAddrModeOpnd(getSrc(i));
-    }
-  }
-
-private:
-  static void validateVectorAddrModeOpnd(const Operand *Opnd) {
-    if (llvm::isa<OperandX8632Mem>(Opnd) && isVectorType(Opnd->getType())) {
-      llvm::report_fatal_error("Possible misaligned vector memory operation");
-    }
-  }
-};
-
-/// InstX8632FakeRMW represents a non-atomic read-modify-write operation on a
-/// memory location.  An InstX8632FakeRMW is a "fake" instruction in that it
-/// still needs to be lowered to some actual RMW instruction.
-///
-/// If A is some memory address, D is some data value to apply, and OP is an
-/// arithmetic operator, the instruction operates as: (*A) = (*A) OP D
-class InstX8632FakeRMW : public InstX8632 {
-  InstX8632FakeRMW() = delete;
-  InstX8632FakeRMW(const InstX8632FakeRMW &) = delete;
-  InstX8632FakeRMW &operator=(const InstX8632FakeRMW &) = delete;
-
-public:
-  static InstX8632FakeRMW *create(Cfg *Func, Operand *Data, Operand *Addr,
-                                  Variable *Beacon, InstArithmetic::OpKind Op,
-                                  uint32_t Align = 1) {
-    // TODO(stichnot): Stop ignoring alignment specification.
-    (void)Align;
-    return new (Func->allocate<InstX8632FakeRMW>())
-        InstX8632FakeRMW(Func, Data, Addr, Op, Beacon);
-  }
-  Operand *getAddr() const { return getSrc(1); }
-  Operand *getData() const { return getSrc(0); }
-  InstArithmetic::OpKind getOp() const { return Op; }
-  Variable *getBeacon() const { return llvm::cast<Variable>(getSrc(2)); }
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, FakeRMW); }
-
-private:
-  InstArithmetic::OpKind Op;
-  InstX8632FakeRMW(Cfg *Func, Operand *Data, Operand *Addr,
-                   InstArithmetic::OpKind Op, Variable *Beacon);
-};
-
-/// InstX8632Label represents an intra-block label that is the target
-/// of an intra-block branch.  The offset between the label and the
-/// branch must be fit into one byte (considered "near").  These are
-/// used for lowering i1 calculations, Select instructions, and 64-bit
-/// compares on a 32-bit architecture, without basic block splitting.
-/// Basic block splitting is not so desirable for several reasons, one
-/// of which is the impact on decisions based on whether a variable's
-/// live range spans multiple basic blocks.
-///
-/// Intra-block control flow must be used with caution.  Consider the
-/// sequence for "c = (a >= b ? x : y)".
-///     cmp a, b
-///     br lt, L1
-///     mov c, x
-///     jmp L2
-///   L1:
-///     mov c, y
-///   L2:
-///
-/// Labels L1 and L2 are intra-block labels.  Without knowledge of the
-/// intra-block control flow, liveness analysis will determine the "mov
-/// c, x" instruction to be dead.  One way to prevent this is to insert
-/// a "FakeUse(c)" instruction anywhere between the two "mov c, ..."
-/// instructions, e.g.:
-///
-///     cmp a, b
-///     br lt, L1
-///     mov c, x
-///     jmp L2
-///     FakeUse(c)
-///   L1:
-///     mov c, y
-///   L2:
-///
-/// The down-side is that "mov c, x" can never be dead-code eliminated
-/// even if there are no uses of c.  As unlikely as this situation is,
-/// it may be prevented by running dead code elimination before
-/// lowering.
-class InstX8632Label : public InstX8632 {
-  InstX8632Label() = delete;
-  InstX8632Label(const InstX8632Label &) = delete;
-  InstX8632Label &operator=(const InstX8632Label &) = delete;
-
-public:
-  static InstX8632Label *create(Cfg *Func, TargetX8632 *Target) {
-    return new (Func->allocate<InstX8632Label>()) InstX8632Label(Func, Target);
-  }
-  uint32_t getEmitInstCount() const override { return 0; }
-  IceString getName(const Cfg *Func) const;
-  SizeT getNumber() const { return Number; }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-
-private:
-  InstX8632Label(Cfg *Func, TargetX8632 *Target);
-
-  SizeT Number; /// used for unique label generation.
-};
-
-/// Conditional and unconditional branch instruction.
-class InstX8632Br : public InstX8632 {
-  InstX8632Br() = delete;
-  InstX8632Br(const InstX8632Br &) = delete;
-  InstX8632Br &operator=(const InstX8632Br &) = delete;
-
-public:
-  /// Create a conditional branch to a node.
-  static InstX8632Br *create(Cfg *Func, CfgNode *TargetTrue,
-                             CfgNode *TargetFalse,
-                             X8632::Traits::Cond::BrCond Condition) {
-    assert(Condition != X8632::Traits::Cond::Br_None);
-    const InstX8632Label *NoLabel = nullptr;
-    return new (Func->allocate<InstX8632Br>())
-        InstX8632Br(Func, TargetTrue, TargetFalse, NoLabel, Condition);
-  }
-  /// Create an unconditional branch to a node.
-  static InstX8632Br *create(Cfg *Func, CfgNode *Target) {
-    const CfgNode *NoCondTarget = nullptr;
-    const InstX8632Label *NoLabel = nullptr;
-    return new (Func->allocate<InstX8632Br>()) InstX8632Br(
-        Func, NoCondTarget, Target, NoLabel, X8632::Traits::Cond::Br_None);
-  }
-  /// Create a non-terminator conditional branch to a node, with a
-  /// fallthrough to the next instruction in the current node.  This is
-  /// used for switch lowering.
-  static InstX8632Br *create(Cfg *Func, CfgNode *Target,
-                             X8632::Traits::Cond::BrCond Condition) {
-    assert(Condition != X8632::Traits::Cond::Br_None);
-    const CfgNode *NoUncondTarget = nullptr;
-    const InstX8632Label *NoLabel = nullptr;
-    return new (Func->allocate<InstX8632Br>())
-        InstX8632Br(Func, Target, NoUncondTarget, NoLabel, Condition);
-  }
-  /// Create a conditional intra-block branch (or unconditional, if
-  /// Condition==Br_None) to a label in the current block.
-  static InstX8632Br *create(Cfg *Func, InstX8632Label *Label,
-                             X8632::Traits::Cond::BrCond Condition) {
-    const CfgNode *NoCondTarget = nullptr;
-    const CfgNode *NoUncondTarget = nullptr;
-    return new (Func->allocate<InstX8632Br>())
-        InstX8632Br(Func, NoCondTarget, NoUncondTarget, Label, Condition);
-  }
-  const CfgNode *getTargetTrue() const { return TargetTrue; }
-  const CfgNode *getTargetFalse() const { return TargetFalse; }
-  bool optimizeBranch(const CfgNode *NextNode);
-  uint32_t getEmitInstCount() const override {
-    uint32_t Sum = 0;
-    if (Label)
-      ++Sum;
-    if (getTargetTrue())
-      ++Sum;
-    if (getTargetFalse())
-      ++Sum;
-    return Sum;
-  }
-  bool isUnconditionalBranch() const override {
-    return !Label && Condition == X8632::Traits::Cond::Br_None;
-  }
-  bool repointEdge(CfgNode *OldNode, CfgNode *NewNode) override;
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Br); }
-
-private:
-  InstX8632Br(Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse,
-              const InstX8632Label *Label,
-              X8632::Traits::Cond::BrCond Condition);
-
-  X8632::Traits::Cond::BrCond Condition;
-  const CfgNode *TargetTrue;
-  const CfgNode *TargetFalse;
-  const InstX8632Label *Label; /// Intra-block branch target
-};
-
-/// Jump to a target outside this function, such as tailcall, nacljump,
-/// naclret, unreachable.  This is different from a Branch instruction
-/// in that there is no intra-function control flow to represent.
-class InstX8632Jmp : public InstX8632 {
-  InstX8632Jmp() = delete;
-  InstX8632Jmp(const InstX8632Jmp &) = delete;
-  InstX8632Jmp &operator=(const InstX8632Jmp &) = delete;
-
-public:
-  static InstX8632Jmp *create(Cfg *Func, Operand *Target) {
-    return new (Func->allocate<InstX8632Jmp>()) InstX8632Jmp(Func, Target);
-  }
-  Operand *getJmpTarget() const { return getSrc(0); }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Jmp); }
-
-private:
-  InstX8632Jmp(Cfg *Func, Operand *Target);
-};
-
-/// AdjustStack instruction - subtracts esp by the given amount and
-/// updates the stack offset during code emission.
-class InstX8632AdjustStack : public InstX8632 {
-  InstX8632AdjustStack() = delete;
-  InstX8632AdjustStack(const InstX8632AdjustStack &) = delete;
-  InstX8632AdjustStack &operator=(const InstX8632AdjustStack &) = delete;
-
-public:
-  static InstX8632AdjustStack *create(Cfg *Func, SizeT Amount, Variable *Esp) {
-    return new (Func->allocate<InstX8632AdjustStack>())
-        InstX8632AdjustStack(Func, Amount, Esp);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Adjuststack); }
-
-private:
-  InstX8632AdjustStack(Cfg *Func, SizeT Amount, Variable *Esp);
-  SizeT Amount;
-};
-
-/// Call instruction.  Arguments should have already been pushed.
-class InstX8632Call : public InstX8632 {
-  InstX8632Call() = delete;
-  InstX8632Call(const InstX8632Call &) = delete;
-  InstX8632Call &operator=(const InstX8632Call &) = delete;
-
-public:
-  static InstX8632Call *create(Cfg *Func, Variable *Dest, Operand *CallTarget) {
-    return new (Func->allocate<InstX8632Call>())
-        InstX8632Call(Func, Dest, CallTarget);
-  }
-  Operand *getCallTarget() const { return getSrc(0); }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Call); }
-
-private:
-  InstX8632Call(Cfg *Func, Variable *Dest, Operand *CallTarget);
-};
-
-/// Emit a one-operand (GPR) instruction.
-void emitIASOpTyGPR(const Cfg *Func, Type Ty, const Operand *Var,
-                    const X8632::AssemblerX8632::GPREmitterOneOp &Emitter);
-void emitIASAsAddrOpTyGPR(
-    const Cfg *Func, Type Ty, const Operand *Op0, const Operand *Op1,
-    const X8632::AssemblerX8632::GPREmitterAddrOp &Emitter);
-
-/// Instructions of the form x := op(x).
-template <InstX8632::InstKindX8632 K>
-class InstX8632InplaceopGPR : public InstX8632 {
-  InstX8632InplaceopGPR() = delete;
-  InstX8632InplaceopGPR(const InstX8632InplaceopGPR &) = delete;
-  InstX8632InplaceopGPR &operator=(const InstX8632InplaceopGPR &) = delete;
-
-public:
-  static InstX8632InplaceopGPR *create(Cfg *Func, Operand *SrcDest) {
-    return new (Func->allocate<InstX8632InplaceopGPR>())
-        InstX8632InplaceopGPR(Func, SrcDest);
-  }
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrEmit();
-    assert(getSrcSize() == 1);
-    Str << "\t" << Opcode << "\t";
-    getSrc(0)->emit(Func);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    assert(getSrcSize() == 1);
-    const Variable *Var = getDest();
-    Type Ty = Var->getType();
-    emitIASOpTyGPR(Func, Ty, Var, Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    dumpDest(Func);
-    Str << " = " << Opcode << "." << getDest()->getType() << " ";
-    dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
-
-private:
-  InstX8632InplaceopGPR(Cfg *Func, Operand *SrcDest)
-      : InstX8632(Func, K, 1, llvm::dyn_cast<Variable>(SrcDest)) {
-    addSource(SrcDest);
-  }
-
-  static const char *Opcode;
-  static const X8632::AssemblerX8632::GPREmitterOneOp Emitter;
-};
-
-/// Emit a two-operand (GPR) instruction, where the dest operand is a
-/// Variable that's guaranteed to be a register.
-template <bool VarCanBeByte = true, bool SrcCanBeByte = true>
-void emitIASRegOpTyGPR(const Cfg *Func, Type Ty, const Variable *Dst,
-                       const Operand *Src,
-                       const X8632::AssemblerX8632::GPREmitterRegOp &Emitter);
-
-/// Instructions of the form x := op(y).
-template <InstX8632::InstKindX8632 K>
-class InstX8632UnaryopGPR : public InstX8632 {
-  InstX8632UnaryopGPR() = delete;
-  InstX8632UnaryopGPR(const InstX8632UnaryopGPR &) = delete;
-  InstX8632UnaryopGPR &operator=(const InstX8632UnaryopGPR &) = delete;
-
-public:
-  static InstX8632UnaryopGPR *create(Cfg *Func, Variable *Dest, Operand *Src) {
-    return new (Func->allocate<InstX8632UnaryopGPR>())
-        InstX8632UnaryopGPR(Func, Dest, Src);
-  }
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrEmit();
-    assert(getSrcSize() == 1);
-    Type SrcTy = getSrc(0)->getType();
-    Type DestTy = getDest()->getType();
-    Str << "\t" << Opcode << getWidthString(SrcTy);
-    // Movsx and movzx need both the source and dest type width letter
-    // to define the operation.  The other unary operations have the
-    // same source and dest type and as a result need only one letter.
-    if (SrcTy != DestTy)
-      Str << getWidthString(DestTy);
-    Str << "\t";
-    getSrc(0)->emit(Func);
-    Str << ", ";
-    getDest()->emit(Func);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    assert(getSrcSize() == 1);
-    const Variable *Var = getDest();
-    Type Ty = Var->getType();
-    const Operand *Src = getSrc(0);
-    emitIASRegOpTyGPR(Func, Ty, Var, Src, Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    dumpDest(Func);
-    Str << " = " << Opcode << "." << getSrc(0)->getType() << " ";
-    dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
-
-private:
-  InstX8632UnaryopGPR(Cfg *Func, Variable *Dest, Operand *Src)
-      : InstX8632(Func, K, 1, Dest) {
-    addSource(Src);
-  }
-
-  static const char *Opcode;
-  static const X8632::AssemblerX8632::GPREmitterRegOp Emitter;
-};
-
-void emitIASRegOpTyXMM(const Cfg *Func, Type Ty, const Variable *Var,
-                       const Operand *Src,
-                       const X8632::AssemblerX8632::XmmEmitterRegOp &Emitter);
-
-template <InstX8632::InstKindX8632 K>
-class InstX8632UnaryopXmm : public InstX8632 {
-  InstX8632UnaryopXmm() = delete;
-  InstX8632UnaryopXmm(const InstX8632UnaryopXmm &) = delete;
-  InstX8632UnaryopXmm &operator=(const InstX8632UnaryopXmm &) = delete;
-
-public:
-  static InstX8632UnaryopXmm *create(Cfg *Func, Variable *Dest, Operand *Src) {
-    return new (Func->allocate<InstX8632UnaryopXmm>())
-        InstX8632UnaryopXmm(Func, Dest, Src);
-  }
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrEmit();
-    assert(getSrcSize() == 1);
-    Str << "\t" << Opcode << "\t";
-    getSrc(0)->emit(Func);
-    Str << ", ";
-    getDest()->emit(Func);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    Type Ty = getDest()->getType();
-    assert(getSrcSize() == 1);
-    emitIASRegOpTyXMM(Func, Ty, getDest(), getSrc(0), Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    dumpDest(Func);
-    Str << " = " << Opcode << "." << getDest()->getType() << " ";
-    dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
-
-private:
-  InstX8632UnaryopXmm(Cfg *Func, Variable *Dest, Operand *Src)
-      : InstX8632(Func, K, 1, Dest) {
-    addSource(Src);
-  }
-
-  static const char *Opcode;
-  static const X8632::AssemblerX8632::XmmEmitterRegOp Emitter;
-};
-
-template <InstX8632::InstKindX8632 K>
-class InstX8632BinopGPRShift : public InstX8632 {
-  InstX8632BinopGPRShift() = delete;
-  InstX8632BinopGPRShift(const InstX8632BinopGPRShift &) = delete;
-  InstX8632BinopGPRShift &operator=(const InstX8632BinopGPRShift &) = delete;
-
-public:
-  /// Create a binary-op GPR shift instruction.
-  static InstX8632BinopGPRShift *create(Cfg *Func, Variable *Dest,
-                                        Operand *Source) {
-    return new (Func->allocate<InstX8632BinopGPRShift>())
-        InstX8632BinopGPRShift(Func, Dest, Source);
-  }
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    const bool ShiftHack = true;
-    emitTwoAddress(Opcode, this, Func, ShiftHack);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    Type Ty = getDest()->getType();
-    assert(getSrcSize() == 2);
-    emitIASGPRShift(Func, Ty, getDest(), getSrc(1), Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    dumpDest(Func);
-    Str << " = " << Opcode << "." << getDest()->getType() << " ";
-    dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
-
-private:
-  InstX8632BinopGPRShift(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX8632(Func, K, 2, Dest) {
-    addSource(Dest);
-    addSource(Source);
-  }
-
-  static const char *Opcode;
-  static const X8632::AssemblerX8632::GPREmitterShiftOp Emitter;
-};
-
-template <InstX8632::InstKindX8632 K>
-class InstX8632BinopGPR : public InstX8632 {
-  InstX8632BinopGPR() = delete;
-  InstX8632BinopGPR(const InstX8632BinopGPR &) = delete;
-  InstX8632BinopGPR &operator=(const InstX8632BinopGPR &) = delete;
-
-public:
-  /// Create an ordinary binary-op instruction like add or sub.
-  static InstX8632BinopGPR *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX8632BinopGPR>())
-        InstX8632BinopGPR(Func, Dest, Source);
-  }
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    const bool ShiftHack = false;
-    emitTwoAddress(Opcode, this, Func, ShiftHack);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    Type Ty = getDest()->getType();
-    assert(getSrcSize() == 2);
-    emitIASRegOpTyGPR(Func, Ty, getDest(), getSrc(1), Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    dumpDest(Func);
-    Str << " = " << Opcode << "." << getDest()->getType() << " ";
-    dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
-
-private:
-  InstX8632BinopGPR(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX8632(Func, K, 2, Dest) {
-    addSource(Dest);
-    addSource(Source);
-  }
-
-  static const char *Opcode;
-  static const X8632::AssemblerX8632::GPREmitterRegOp Emitter;
-};
-
-template <InstX8632::InstKindX8632 K>
-class InstX8632BinopRMW : public InstX8632 {
-  InstX8632BinopRMW() = delete;
-  InstX8632BinopRMW(const InstX8632BinopRMW &) = delete;
-  InstX8632BinopRMW &operator=(const InstX8632BinopRMW &) = delete;
-
-public:
-  /// Create an ordinary binary-op instruction like add or sub.
-  static InstX8632BinopRMW *create(Cfg *Func, OperandX8632Mem *DestSrc0,
-                                   Operand *Src1) {
-    return new (Func->allocate<InstX8632BinopRMW>())
-        InstX8632BinopRMW(Func, DestSrc0, Src1);
-  }
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    const bool ShiftHack = false;
-    emitTwoAddress(Opcode, this, Func, ShiftHack);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    Type Ty = getSrc(0)->getType();
-    assert(getSrcSize() == 2);
-    emitIASAsAddrOpTyGPR(Func, Ty, getSrc(0), getSrc(1), Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    Str << Opcode << "." << getSrc(0)->getType() << " ";
-    dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
-
-private:
-  InstX8632BinopRMW(Cfg *Func, OperandX8632Mem *DestSrc0, Operand *Src1)
-      : InstX8632(Func, K, 2, nullptr) {
-    addSource(DestSrc0);
-    addSource(Src1);
-  }
-  static const char *Opcode;
-  static const X8632::AssemblerX8632::GPREmitterAddrOp Emitter;
-};
-
-template <InstX8632::InstKindX8632 K, bool NeedsElementType>
-class InstX8632BinopXmm : public InstX8632 {
-  InstX8632BinopXmm() = delete;
-  InstX8632BinopXmm(const InstX8632BinopXmm &) = delete;
-  InstX8632BinopXmm &operator=(const InstX8632BinopXmm &) = delete;
-
-public:
-  /// Create an XMM binary-op instruction like addss or addps.
-  static InstX8632BinopXmm *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX8632BinopXmm>())
-        InstX8632BinopXmm(Func, Dest, Source);
-  }
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    validateVectorAddrMode();
-    const bool ShiftHack = false;
-    emitTwoAddress(Opcode, this, Func, ShiftHack);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    validateVectorAddrMode();
-    Type Ty = getDest()->getType();
-    if (NeedsElementType)
-      Ty = typeElementType(Ty);
-    assert(getSrcSize() == 2);
-    emitIASRegOpTyXMM(Func, Ty, getDest(), getSrc(1), Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    dumpDest(Func);
-    Str << " = " << Opcode << "." << getDest()->getType() << " ";
-    dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
-
-private:
-  InstX8632BinopXmm(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX8632(Func, K, 2, Dest) {
-    addSource(Dest);
-    addSource(Source);
-  }
-
-  static const char *Opcode;
-  static const X8632::AssemblerX8632::XmmEmitterRegOp Emitter;
-};
-
-void emitIASXmmShift(const Cfg *Func, Type Ty, const Variable *Var,
-                     const Operand *Src,
-                     const X8632::AssemblerX8632::XmmEmitterShiftOp &Emitter);
-
-template <InstX8632::InstKindX8632 K, bool AllowAllTypes = false>
-class InstX8632BinopXmmShift : public InstX8632 {
-  InstX8632BinopXmmShift() = delete;
-  InstX8632BinopXmmShift(const InstX8632BinopXmmShift &) = delete;
-  InstX8632BinopXmmShift &operator=(const InstX8632BinopXmmShift &) = delete;
-
-public:
-  /// Create an XMM binary-op shift operation.
-  static InstX8632BinopXmmShift *create(Cfg *Func, Variable *Dest,
-                                        Operand *Source) {
-    return new (Func->allocate<InstX8632BinopXmmShift>())
-        InstX8632BinopXmmShift(Func, Dest, Source);
-  }
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    validateVectorAddrMode();
-    const bool ShiftHack = false;
-    emitTwoAddress(Opcode, this, Func, ShiftHack);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    validateVectorAddrMode();
-    Type Ty = getDest()->getType();
-    assert(AllowAllTypes || isVectorType(Ty));
-    Type ElementTy = typeElementType(Ty);
-    assert(getSrcSize() == 2);
-    emitIASXmmShift(Func, ElementTy, getDest(), getSrc(1), Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    dumpDest(Func);
-    Str << " = " << Opcode << "." << getDest()->getType() << " ";
-    dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
-
-private:
-  InstX8632BinopXmmShift(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX8632(Func, K, 2, Dest) {
-    addSource(Dest);
-    addSource(Source);
-  }
-
-  static const char *Opcode;
-  static const X8632::AssemblerX8632::XmmEmitterShiftOp Emitter;
-};
-
-template <InstX8632::InstKindX8632 K> class InstX8632Ternop : public InstX8632 {
-  InstX8632Ternop() = delete;
-  InstX8632Ternop(const InstX8632Ternop &) = delete;
-  InstX8632Ternop &operator=(const InstX8632Ternop &) = delete;
-
-public:
-  /// Create a ternary-op instruction like div or idiv.
-  static InstX8632Ternop *create(Cfg *Func, Variable *Dest, Operand *Source1,
-                                 Operand *Source2) {
-    return new (Func->allocate<InstX8632Ternop>())
-        InstX8632Ternop(Func, Dest, Source1, Source2);
-  }
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrEmit();
-    assert(getSrcSize() == 3);
-    Str << "\t" << Opcode << "\t";
-    getSrc(2)->emit(Func);
-    Str << ", ";
-    getSrc(1)->emit(Func);
-    Str << ", ";
-    getDest()->emit(Func);
-  }
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    dumpDest(Func);
-    Str << " = " << Opcode << "." << getDest()->getType() << " ";
-    dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
-
-private:
-  InstX8632Ternop(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
-      : InstX8632(Func, K, 3, Dest) {
-    addSource(Dest);
-    addSource(Source1);
-    addSource(Source2);
-  }
-
-  static const char *Opcode;
-};
-
-/// Instructions of the form x := y op z
-template <InstX8632::InstKindX8632 K>
-class InstX8632ThreeAddressop : public InstX8632 {
-  InstX8632ThreeAddressop() = delete;
-  InstX8632ThreeAddressop(const InstX8632ThreeAddressop &) = delete;
-  InstX8632ThreeAddressop &operator=(const InstX8632ThreeAddressop &) = delete;
-
-public:
-  static InstX8632ThreeAddressop *create(Cfg *Func, Variable *Dest,
-                                         Operand *Source0, Operand *Source1) {
-    return new (Func->allocate<InstX8632ThreeAddressop>())
-        InstX8632ThreeAddressop(Func, Dest, Source0, Source1);
-  }
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrEmit();
-    assert(getSrcSize() == 2);
-    Str << "\t" << Opcode << "\t";
-    getSrc(1)->emit(Func);
-    Str << ", ";
-    getSrc(0)->emit(Func);
-    Str << ", ";
-    getDest()->emit(Func);
-  }
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    dumpDest(Func);
-    Str << " = " << Opcode << "." << getDest()->getType() << " ";
-    dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
-
-private:
-  InstX8632ThreeAddressop(Cfg *Func, Variable *Dest, Operand *Source0,
-                          Operand *Source1)
-      : InstX8632(Func, K, 2, Dest) {
-    addSource(Source0);
-    addSource(Source1);
-  }
-
-  static const char *Opcode;
-};
-
-/// Base class for assignment instructions
-template <InstX8632::InstKindX8632 K>
-class InstX8632Movlike : public InstX8632 {
-  InstX8632Movlike() = delete;
-  InstX8632Movlike(const InstX8632Movlike &) = delete;
-  InstX8632Movlike &operator=(const InstX8632Movlike &) = delete;
-
-public:
-  static InstX8632Movlike *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX8632Movlike>())
-        InstX8632Movlike(Func, Dest, Source);
-  }
-  bool isRedundantAssign() const override {
-    return checkForRedundantAssign(getDest(), getSrc(0));
-  }
-  bool isSimpleAssign() const override { return true; }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    Str << Opcode << "." << getDest()->getType() << " ";
-    dumpDest(Func);
-    Str << ", ";
-    dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
-
-private:
-  InstX8632Movlike(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX8632(Func, K, 1, Dest) {
-    addSource(Source);
-  }
-
-  static const char *Opcode;
-};
-
-typedef InstX8632InplaceopGPR<InstX8632::Bswap> InstX8632Bswap;
-typedef InstX8632InplaceopGPR<InstX8632::Neg> InstX8632Neg;
-typedef InstX8632UnaryopGPR<InstX8632::Bsf> InstX8632Bsf;
-typedef InstX8632UnaryopGPR<InstX8632::Bsr> InstX8632Bsr;
-typedef InstX8632UnaryopGPR<InstX8632::Lea> InstX8632Lea;
-/// Cbwdq instruction - wrapper for cbw, cwd, and cdq
-typedef InstX8632UnaryopGPR<InstX8632::Cbwdq> InstX8632Cbwdq;
-typedef InstX8632UnaryopGPR<InstX8632::Movsx> InstX8632Movsx;
-typedef InstX8632UnaryopGPR<InstX8632::Movzx> InstX8632Movzx;
-typedef InstX8632UnaryopXmm<InstX8632::Movd> InstX8632Movd;
-typedef InstX8632UnaryopXmm<InstX8632::Sqrtss> InstX8632Sqrtss;
-/// Move/assignment instruction - wrapper for mov/movss/movsd.
-typedef InstX8632Movlike<InstX8632::Mov> InstX8632Mov;
-/// Move packed - copy 128 bit values between XMM registers, or mem128
-/// and XMM registers.
-typedef InstX8632Movlike<InstX8632::Movp> InstX8632Movp;
-/// Movq - copy between XMM registers, or mem64 and XMM registers.
-typedef InstX8632Movlike<InstX8632::Movq> InstX8632Movq;
-typedef InstX8632BinopGPR<InstX8632::Add> InstX8632Add;
-typedef InstX8632BinopRMW<InstX8632::AddRMW> InstX8632AddRMW;
-typedef InstX8632BinopXmm<InstX8632::Addps, true> InstX8632Addps;
-typedef InstX8632BinopGPR<InstX8632::Adc> InstX8632Adc;
-typedef InstX8632BinopRMW<InstX8632::AdcRMW> InstX8632AdcRMW;
-typedef InstX8632BinopXmm<InstX8632::Addss, false> InstX8632Addss;
-typedef InstX8632BinopXmm<InstX8632::Padd, true> InstX8632Padd;
-typedef InstX8632BinopGPR<InstX8632::Sub> InstX8632Sub;
-typedef InstX8632BinopRMW<InstX8632::SubRMW> InstX8632SubRMW;
-typedef InstX8632BinopXmm<InstX8632::Subps, true> InstX8632Subps;
-typedef InstX8632BinopXmm<InstX8632::Subss, false> InstX8632Subss;
-typedef InstX8632BinopGPR<InstX8632::Sbb> InstX8632Sbb;
-typedef InstX8632BinopRMW<InstX8632::SbbRMW> InstX8632SbbRMW;
-typedef InstX8632BinopXmm<InstX8632::Psub, true> InstX8632Psub;
-typedef InstX8632BinopGPR<InstX8632::And> InstX8632And;
-typedef InstX8632BinopRMW<InstX8632::AndRMW> InstX8632AndRMW;
-typedef InstX8632BinopXmm<InstX8632::Pand, false> InstX8632Pand;
-typedef InstX8632BinopXmm<InstX8632::Pandn, false> InstX8632Pandn;
-typedef InstX8632BinopGPR<InstX8632::Or> InstX8632Or;
-typedef InstX8632BinopRMW<InstX8632::OrRMW> InstX8632OrRMW;
-typedef InstX8632BinopXmm<InstX8632::Por, false> InstX8632Por;
-typedef InstX8632BinopGPR<InstX8632::Xor> InstX8632Xor;
-typedef InstX8632BinopRMW<InstX8632::XorRMW> InstX8632XorRMW;
-typedef InstX8632BinopXmm<InstX8632::Pxor, false> InstX8632Pxor;
-typedef InstX8632BinopGPR<InstX8632::Imul> InstX8632Imul;
-typedef InstX8632BinopXmm<InstX8632::Mulps, true> InstX8632Mulps;
-typedef InstX8632BinopXmm<InstX8632::Mulss, false> InstX8632Mulss;
-typedef InstX8632BinopXmm<InstX8632::Pmull, true> InstX8632Pmull;
-typedef InstX8632BinopXmm<InstX8632::Pmuludq, false> InstX8632Pmuludq;
-typedef InstX8632BinopXmm<InstX8632::Divps, true> InstX8632Divps;
-typedef InstX8632BinopXmm<InstX8632::Divss, false> InstX8632Divss;
-typedef InstX8632BinopGPRShift<InstX8632::Rol> InstX8632Rol;
-typedef InstX8632BinopGPRShift<InstX8632::Shl> InstX8632Shl;
-typedef InstX8632BinopXmmShift<InstX8632::Psll> InstX8632Psll;
-typedef InstX8632BinopXmmShift<InstX8632::Psrl, true> InstX8632Psrl;
-typedef InstX8632BinopGPRShift<InstX8632::Shr> InstX8632Shr;
-typedef InstX8632BinopGPRShift<InstX8632::Sar> InstX8632Sar;
-typedef InstX8632BinopXmmShift<InstX8632::Psra> InstX8632Psra;
-typedef InstX8632BinopXmm<InstX8632::Pcmpeq, true> InstX8632Pcmpeq;
-typedef InstX8632BinopXmm<InstX8632::Pcmpgt, true> InstX8632Pcmpgt;
-/// movss is only a binary operation when the source and dest
-/// operands are both registers (the high bits of dest are left untouched).
-/// In other cases, it behaves like a copy (mov-like) operation (and the
-/// high bits of dest are cleared).
-/// InstX8632Movss will assert that both its source and dest operands are
-/// registers, so the lowering code should use _mov instead of _movss
-/// in cases where a copy operation is intended.
-typedef InstX8632BinopXmm<InstX8632::MovssRegs, false> InstX8632MovssRegs;
-typedef InstX8632Ternop<InstX8632::Idiv> InstX8632Idiv;
-typedef InstX8632Ternop<InstX8632::Div> InstX8632Div;
-typedef InstX8632Ternop<InstX8632::Insertps> InstX8632Insertps;
-typedef InstX8632Ternop<InstX8632::Pinsr> InstX8632Pinsr;
-typedef InstX8632Ternop<InstX8632::Shufps> InstX8632Shufps;
-typedef InstX8632Ternop<InstX8632::Blendvps> InstX8632Blendvps;
-typedef InstX8632Ternop<InstX8632::Pblendvb> InstX8632Pblendvb;
-typedef InstX8632ThreeAddressop<InstX8632::Pextr> InstX8632Pextr;
-typedef InstX8632ThreeAddressop<InstX8632::Pshufd> InstX8632Pshufd;
-
-/// Base class for a lockable x86-32 instruction (emits a locked prefix).
-class InstX8632Lockable : public InstX8632 {
-  InstX8632Lockable() = delete;
-  InstX8632Lockable(const InstX8632Lockable &) = delete;
-  InstX8632Lockable &operator=(const InstX8632Lockable &) = delete;
-
-protected:
-  bool Locked;
-
-  InstX8632Lockable(Cfg *Func, InstKindX8632 Kind, SizeT Maxsrcs,
-                    Variable *Dest, bool Locked)
-      : InstX8632(Func, Kind, Maxsrcs, Dest), Locked(Locked) {
-    // Assume that such instructions are used for Atomics and be careful
-    // with optimizations.
-    HasSideEffects = Locked;
-  }
-};
-
-/// Mul instruction - unsigned multiply.
-class InstX8632Mul : public InstX8632 {
-  InstX8632Mul() = delete;
-  InstX8632Mul(const InstX8632Mul &) = delete;
-  InstX8632Mul &operator=(const InstX8632Mul &) = delete;
-
-public:
-  static InstX8632Mul *create(Cfg *Func, Variable *Dest, Variable *Source1,
-                              Operand *Source2) {
-    return new (Func->allocate<InstX8632Mul>())
-        InstX8632Mul(Func, Dest, Source1, Source2);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Mul); }
-
-private:
-  InstX8632Mul(Cfg *Func, Variable *Dest, Variable *Source1, Operand *Source2);
-};
-
-/// Shld instruction - shift across a pair of operands.
-class InstX8632Shld : public InstX8632 {
-  InstX8632Shld() = delete;
-  InstX8632Shld(const InstX8632Shld &) = delete;
-  InstX8632Shld &operator=(const InstX8632Shld &) = delete;
-
-public:
-  static InstX8632Shld *create(Cfg *Func, Variable *Dest, Variable *Source1,
-                               Variable *Source2) {
-    return new (Func->allocate<InstX8632Shld>())
-        InstX8632Shld(Func, Dest, Source1, Source2);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Shld); }
-
-private:
-  InstX8632Shld(Cfg *Func, Variable *Dest, Variable *Source1,
-                Variable *Source2);
-};
-
-/// Shrd instruction - shift across a pair of operands.
-class InstX8632Shrd : public InstX8632 {
-  InstX8632Shrd() = delete;
-  InstX8632Shrd(const InstX8632Shrd &) = delete;
-  InstX8632Shrd &operator=(const InstX8632Shrd &) = delete;
-
-public:
-  static InstX8632Shrd *create(Cfg *Func, Variable *Dest, Variable *Source1,
-                               Variable *Source2) {
-    return new (Func->allocate<InstX8632Shrd>())
-        InstX8632Shrd(Func, Dest, Source1, Source2);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Shrd); }
-
-private:
-  InstX8632Shrd(Cfg *Func, Variable *Dest, Variable *Source1,
-                Variable *Source2);
-};
-
-/// Conditional move instruction.
-class InstX8632Cmov : public InstX8632 {
-  InstX8632Cmov() = delete;
-  InstX8632Cmov(const InstX8632Cmov &) = delete;
-  InstX8632Cmov &operator=(const InstX8632Cmov &) = delete;
-
-public:
-  static InstX8632Cmov *create(Cfg *Func, Variable *Dest, Operand *Source,
-                               X8632::Traits::Cond::BrCond Cond) {
-    return new (Func->allocate<InstX8632Cmov>())
-        InstX8632Cmov(Func, Dest, Source, Cond);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Cmov); }
-
-private:
-  InstX8632Cmov(Cfg *Func, Variable *Dest, Operand *Source,
-                X8632::Traits::Cond::BrCond Cond);
-
-  X8632::Traits::Cond::BrCond Condition;
-};
-
-/// Cmpps instruction - compare packed singled-precision floating point
-/// values
-class InstX8632Cmpps : public InstX8632 {
-  InstX8632Cmpps() = delete;
-  InstX8632Cmpps(const InstX8632Cmpps &) = delete;
-  InstX8632Cmpps &operator=(const InstX8632Cmpps &) = delete;
-
-public:
-  static InstX8632Cmpps *create(Cfg *Func, Variable *Dest, Operand *Source,
-                                X8632::Traits::Cond::CmppsCond Condition) {
-    return new (Func->allocate<InstX8632Cmpps>())
-        InstX8632Cmpps(Func, Dest, Source, Condition);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Cmpps); }
-
-private:
-  InstX8632Cmpps(Cfg *Func, Variable *Dest, Operand *Source,
-                 X8632::Traits::Cond::CmppsCond Cond);
-
-  X8632::Traits::Cond::CmppsCond Condition;
-};
-
-/// Cmpxchg instruction - cmpxchg <dest>, <desired> will compare if <dest>
-/// equals eax. If so, the ZF is set and <desired> is stored in <dest>.
-/// If not, ZF is cleared and <dest> is copied to eax (or subregister).
-/// <dest> can be a register or memory, while <desired> must be a register.
-/// It is the user's responsiblity to mark eax with a FakeDef.
-class InstX8632Cmpxchg : public InstX8632Lockable {
-  InstX8632Cmpxchg() = delete;
-  InstX8632Cmpxchg(const InstX8632Cmpxchg &) = delete;
-  InstX8632Cmpxchg &operator=(const InstX8632Cmpxchg &) = delete;
-
-public:
-  static InstX8632Cmpxchg *create(Cfg *Func, Operand *DestOrAddr, Variable *Eax,
-                                  Variable *Desired, bool Locked) {
-    return new (Func->allocate<InstX8632Cmpxchg>())
-        InstX8632Cmpxchg(Func, DestOrAddr, Eax, Desired, Locked);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Cmpxchg); }
-
-private:
-  InstX8632Cmpxchg(Cfg *Func, Operand *DestOrAddr, Variable *Eax,
-                   Variable *Desired, bool Locked);
-};
-
-/// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64>
-/// equals edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>.
-/// If not, ZF is cleared and <m64> is copied to edx:eax.
-/// The caller is responsible for inserting FakeDefs to mark edx
-/// and eax as modified.
-/// <m64> must be a memory operand.
-class InstX8632Cmpxchg8b : public InstX8632Lockable {
-  InstX8632Cmpxchg8b() = delete;
-  InstX8632Cmpxchg8b(const InstX8632Cmpxchg8b &) = delete;
-  InstX8632Cmpxchg8b &operator=(const InstX8632Cmpxchg8b &) = delete;
-
-public:
-  static InstX8632Cmpxchg8b *create(Cfg *Func, OperandX8632Mem *Dest,
-                                    Variable *Edx, Variable *Eax, Variable *Ecx,
-                                    Variable *Ebx, bool Locked) {
-    return new (Func->allocate<InstX8632Cmpxchg8b>())
-        InstX8632Cmpxchg8b(Func, Dest, Edx, Eax, Ecx, Ebx, Locked);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Cmpxchg8b); }
-
-private:
-  InstX8632Cmpxchg8b(Cfg *Func, OperandX8632Mem *Dest, Variable *Edx,
-                     Variable *Eax, Variable *Ecx, Variable *Ebx, bool Locked);
-};
-
-/// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i}
-/// as appropriate.  s=float, d=double, i=int.  X and Y are determined
-/// from dest/src types.  Sign and zero extension on the integer
-/// operand needs to be done separately.
-class InstX8632Cvt : public InstX8632 {
-  InstX8632Cvt() = delete;
-  InstX8632Cvt(const InstX8632Cvt &) = delete;
-  InstX8632Cvt &operator=(const InstX8632Cvt &) = delete;
-
-public:
-  enum CvtVariant { Si2ss, Tss2si, Float2float, Dq2ps, Tps2dq };
-  static InstX8632Cvt *create(Cfg *Func, Variable *Dest, Operand *Source,
-                              CvtVariant Variant) {
-    return new (Func->allocate<InstX8632Cvt>())
-        InstX8632Cvt(Func, Dest, Source, Variant);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Cvt); }
-  bool isTruncating() const { return Variant == Tss2si || Variant == Tps2dq; }
-
-private:
-  CvtVariant Variant;
-  InstX8632Cvt(Cfg *Func, Variable *Dest, Operand *Source, CvtVariant Variant);
-};
-
-/// cmp - Integer compare instruction.
-class InstX8632Icmp : public InstX8632 {
-  InstX8632Icmp() = delete;
-  InstX8632Icmp(const InstX8632Icmp &) = delete;
-  InstX8632Icmp &operator=(const InstX8632Icmp &) = delete;
-
-public:
-  static InstX8632Icmp *create(Cfg *Func, Operand *Src1, Operand *Src2) {
-    return new (Func->allocate<InstX8632Icmp>())
-        InstX8632Icmp(Func, Src1, Src2);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Icmp); }
-
-private:
-  InstX8632Icmp(Cfg *Func, Operand *Src1, Operand *Src2);
-};
-
-/// ucomiss/ucomisd - floating-point compare instruction.
-class InstX8632Ucomiss : public InstX8632 {
-  InstX8632Ucomiss() = delete;
-  InstX8632Ucomiss(const InstX8632Ucomiss &) = delete;
-  InstX8632Ucomiss &operator=(const InstX8632Ucomiss &) = delete;
-
-public:
-  static InstX8632Ucomiss *create(Cfg *Func, Operand *Src1, Operand *Src2) {
-    return new (Func->allocate<InstX8632Ucomiss>())
-        InstX8632Ucomiss(Func, Src1, Src2);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Ucomiss); }
-
-private:
-  InstX8632Ucomiss(Cfg *Func, Operand *Src1, Operand *Src2);
-};
-
-/// UD2 instruction.
-class InstX8632UD2 : public InstX8632 {
-  InstX8632UD2() = delete;
-  InstX8632UD2(const InstX8632UD2 &) = delete;
-  InstX8632UD2 &operator=(const InstX8632UD2 &) = delete;
-
-public:
-  static InstX8632UD2 *create(Cfg *Func) {
-    return new (Func->allocate<InstX8632UD2>()) InstX8632UD2(Func);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, UD2); }
-
-private:
-  explicit InstX8632UD2(Cfg *Func);
-};
-
-/// Test instruction.
-class InstX8632Test : public InstX8632 {
-  InstX8632Test() = delete;
-  InstX8632Test(const InstX8632Test &) = delete;
-  InstX8632Test &operator=(const InstX8632Test &) = delete;
-
-public:
-  static InstX8632Test *create(Cfg *Func, Operand *Source1, Operand *Source2) {
-    return new (Func->allocate<InstX8632Test>())
-        InstX8632Test(Func, Source1, Source2);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Test); }
-
-private:
-  InstX8632Test(Cfg *Func, Operand *Source1, Operand *Source2);
-};
-
-/// Mfence instruction.
-class InstX8632Mfence : public InstX8632 {
-  InstX8632Mfence() = delete;
-  InstX8632Mfence(const InstX8632Mfence &) = delete;
-  InstX8632Mfence &operator=(const InstX8632Mfence &) = delete;
-
-public:
-  static InstX8632Mfence *create(Cfg *Func) {
-    return new (Func->allocate<InstX8632Mfence>()) InstX8632Mfence(Func);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Mfence); }
-
-private:
-  explicit InstX8632Mfence(Cfg *Func);
-};
-
-/// This is essentially a "mov" instruction with an OperandX8632Mem
-/// operand instead of Variable as the destination.  It's important
-/// for liveness that there is no Dest operand.
-class InstX8632Store : public InstX8632 {
-  InstX8632Store() = delete;
-  InstX8632Store(const InstX8632Store &) = delete;
-  InstX8632Store &operator=(const InstX8632Store &) = delete;
-
-public:
-  static InstX8632Store *create(Cfg *Func, Operand *Value, OperandX8632 *Mem) {
-    return new (Func->allocate<InstX8632Store>())
-        InstX8632Store(Func, Value, Mem);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Store); }
-
-private:
-  InstX8632Store(Cfg *Func, Operand *Value, OperandX8632 *Mem);
-};
-
-/// This is essentially a vector "mov" instruction with an OperandX8632Mem
-/// operand instead of Variable as the destination.  It's important
-/// for liveness that there is no Dest operand. The source must be an
-/// Xmm register, since Dest is mem.
-class InstX8632StoreP : public InstX8632 {
-  InstX8632StoreP() = delete;
-  InstX8632StoreP(const InstX8632StoreP &) = delete;
-  InstX8632StoreP &operator=(const InstX8632StoreP &) = delete;
-
-public:
-  static InstX8632StoreP *create(Cfg *Func, Variable *Value,
-                                 OperandX8632Mem *Mem) {
-    return new (Func->allocate<InstX8632StoreP>())
-        InstX8632StoreP(Func, Value, Mem);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, StoreP); }
-
-private:
-  InstX8632StoreP(Cfg *Func, Variable *Value, OperandX8632Mem *Mem);
-};
-
-class InstX8632StoreQ : public InstX8632 {
-  InstX8632StoreQ() = delete;
-  InstX8632StoreQ(const InstX8632StoreQ &) = delete;
-  InstX8632StoreQ &operator=(const InstX8632StoreQ &) = delete;
-
-public:
-  static InstX8632StoreQ *create(Cfg *Func, Variable *Value,
-                                 OperandX8632Mem *Mem) {
-    return new (Func->allocate<InstX8632StoreQ>())
-        InstX8632StoreQ(Func, Value, Mem);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, StoreQ); }
-
-private:
-  InstX8632StoreQ(Cfg *Func, Variable *Value, OperandX8632Mem *Mem);
-};
-
-/// Nop instructions of varying length
-class InstX8632Nop : public InstX8632 {
-  InstX8632Nop() = delete;
-  InstX8632Nop(const InstX8632Nop &) = delete;
-  InstX8632Nop &operator=(const InstX8632Nop &) = delete;
-
-public:
-  // TODO: Replace with enum.
-  typedef unsigned NopVariant;
-
-  static InstX8632Nop *create(Cfg *Func, NopVariant Variant) {
-    return new (Func->allocate<InstX8632Nop>()) InstX8632Nop(Func, Variant);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Nop); }
-
-private:
-  InstX8632Nop(Cfg *Func, SizeT Length);
-
-  NopVariant Variant;
-};
-
-/// Fld - load a value onto the x87 FP stack.
-class InstX8632Fld : public InstX8632 {
-  InstX8632Fld() = delete;
-  InstX8632Fld(const InstX8632Fld &) = delete;
-  InstX8632Fld &operator=(const InstX8632Fld &) = delete;
-
-public:
-  static InstX8632Fld *create(Cfg *Func, Operand *Src) {
-    return new (Func->allocate<InstX8632Fld>()) InstX8632Fld(Func, Src);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Fld); }
-
-private:
-  InstX8632Fld(Cfg *Func, Operand *Src);
-};
-
-/// Fstp - store x87 st(0) into memory and pop st(0).
-class InstX8632Fstp : public InstX8632 {
-  InstX8632Fstp() = delete;
-  InstX8632Fstp(const InstX8632Fstp &) = delete;
-  InstX8632Fstp &operator=(const InstX8632Fstp &) = delete;
-
-public:
-  static InstX8632Fstp *create(Cfg *Func, Variable *Dest) {
-    return new (Func->allocate<InstX8632Fstp>()) InstX8632Fstp(Func, Dest);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Fstp); }
-
-private:
-  InstX8632Fstp(Cfg *Func, Variable *Dest);
-};
-
-class InstX8632Pop : public InstX8632 {
-  InstX8632Pop() = delete;
-  InstX8632Pop(const InstX8632Pop &) = delete;
-  InstX8632Pop &operator=(const InstX8632Pop &) = delete;
-
-public:
-  static InstX8632Pop *create(Cfg *Func, Variable *Dest) {
-    return new (Func->allocate<InstX8632Pop>()) InstX8632Pop(Func, Dest);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Pop); }
-
-private:
-  InstX8632Pop(Cfg *Func, Variable *Dest);
-};
-
-class InstX8632Push : public InstX8632 {
-  InstX8632Push() = delete;
-  InstX8632Push(const InstX8632Push &) = delete;
-  InstX8632Push &operator=(const InstX8632Push &) = delete;
-
-public:
-  static InstX8632Push *create(Cfg *Func, Variable *Source) {
-    return new (Func->allocate<InstX8632Push>()) InstX8632Push(Func, Source);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Push); }
-
-private:
-  InstX8632Push(Cfg *Func, Variable *Source);
-};
-
-/// Ret instruction.  Currently only supports the "ret" version that
-/// does not pop arguments.  This instruction takes a Source operand
-/// (for non-void returning functions) for liveness analysis, though
-/// a FakeUse before the ret would do just as well.
-class InstX8632Ret : public InstX8632 {
-  InstX8632Ret() = delete;
-  InstX8632Ret(const InstX8632Ret &) = delete;
-  InstX8632Ret &operator=(const InstX8632Ret &) = delete;
-
-public:
-  static InstX8632Ret *create(Cfg *Func, Variable *Source = nullptr) {
-    return new (Func->allocate<InstX8632Ret>()) InstX8632Ret(Func, Source);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Ret); }
-
-private:
-  InstX8632Ret(Cfg *Func, Variable *Source);
-};
-
-/// Conditional set-byte instruction.
-class InstX8632Setcc : public InstX8632 {
-  InstX8632Setcc() = delete;
-  InstX8632Setcc(const InstX8632Cmov &) = delete;
-  InstX8632Setcc &operator=(const InstX8632Setcc &) = delete;
-
-public:
-  static InstX8632Setcc *create(Cfg *Func, Variable *Dest,
-                                X8632::Traits::Cond::BrCond Cond) {
-    return new (Func->allocate<InstX8632Setcc>())
-        InstX8632Setcc(Func, Dest, Cond);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Setcc); }
-
-private:
-  InstX8632Setcc(Cfg *Func, Variable *Dest, X8632::Traits::Cond::BrCond Cond);
-
-  const X8632::Traits::Cond::BrCond Condition;
-};
-
-/// Exchanging Add instruction.  Exchanges the first operand (destination
-/// operand) with the second operand (source operand), then loads the sum
-/// of the two values into the destination operand. The destination may be
-/// a register or memory, while the source must be a register.
-///
-/// Both the dest and source are updated. The caller should then insert a
-/// FakeDef to reflect the second udpate.
-class InstX8632Xadd : public InstX8632Lockable {
-  InstX8632Xadd() = delete;
-  InstX8632Xadd(const InstX8632Xadd &) = delete;
-  InstX8632Xadd &operator=(const InstX8632Xadd &) = delete;
-
-public:
-  static InstX8632Xadd *create(Cfg *Func, Operand *Dest, Variable *Source,
-                               bool Locked) {
-    return new (Func->allocate<InstX8632Xadd>())
-        InstX8632Xadd(Func, Dest, Source, Locked);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Xadd); }
-
-private:
-  InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source, bool Locked);
-};
-
-/// Exchange instruction.  Exchanges the first operand (destination
-/// operand) with the second operand (source operand). At least one of
-/// the operands must be a register (and the other can be reg or mem).
-/// Both the Dest and Source are updated. If there is a memory operand,
-/// then the instruction is automatically "locked" without the need for
-/// a lock prefix.
-class InstX8632Xchg : public InstX8632 {
-  InstX8632Xchg() = delete;
-  InstX8632Xchg(const InstX8632Xchg &) = delete;
-  InstX8632Xchg &operator=(const InstX8632Xchg &) = delete;
-
-public:
-  static InstX8632Xchg *create(Cfg *Func, Operand *Dest, Variable *Source) {
-    return new (Func->allocate<InstX8632Xchg>())
-        InstX8632Xchg(Func, Dest, Source);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Xchg); }
-
-private:
-  InstX8632Xchg(Cfg *Func, Operand *Dest, Variable *Source);
-};
-
-/// Declare partial template specializations of emit() methods that
-/// already have default implementations.  Without this, there is the
-/// possibility of ODR violations and link errors.
-template <> void InstX8632Addss::emit(const Cfg *Func) const;
-template <> void InstX8632Blendvps::emit(const Cfg *Func) const;
-template <> void InstX8632Cbwdq::emit(const Cfg *Func) const;
-template <> void InstX8632Div::emit(const Cfg *Func) const;
-template <> void InstX8632Divss::emit(const Cfg *Func) const;
-template <> void InstX8632Idiv::emit(const Cfg *Func) const;
-template <> void InstX8632Imul::emit(const Cfg *Func) const;
-template <> void InstX8632Lea::emit(const Cfg *Func) const;
-template <> void InstX8632Mulss::emit(const Cfg *Func) const;
-template <> void InstX8632Padd::emit(const Cfg *Func) const;
-template <> void InstX8632Pblendvb::emit(const Cfg *Func) const;
-template <> void InstX8632Pcmpeq::emit(const Cfg *Func) const;
-template <> void InstX8632Pcmpgt::emit(const Cfg *Func) const;
-template <> void InstX8632Pextr::emit(const Cfg *Func) const;
-template <> void InstX8632Pinsr::emit(const Cfg *Func) const;
-template <> void InstX8632Pmull::emit(const Cfg *Func) const;
-template <> void InstX8632Pmuludq::emit(const Cfg *Func) const;
-template <> void InstX8632Psll::emit(const Cfg *Func) const;
-template <> void InstX8632Psra::emit(const Cfg *Func) const;
-template <> void InstX8632Psrl::emit(const Cfg *Func) const;
-template <> void InstX8632Psub::emit(const Cfg *Func) const;
-template <> void InstX8632Sqrtss::emit(const Cfg *Func) const;
-template <> void InstX8632Subss::emit(const Cfg *Func) const;
-
-template <> void InstX8632Blendvps::emitIAS(const Cfg *Func) const;
-template <> void InstX8632Cbwdq::emitIAS(const Cfg *Func) const;
-template <> void InstX8632Div::emitIAS(const Cfg *Func) const;
-template <> void InstX8632Idiv::emitIAS(const Cfg *Func) const;
-template <> void InstX8632Imul::emitIAS(const Cfg *Func) const;
-template <> void InstX8632Insertps::emitIAS(const Cfg *Func) const;
-template <> void InstX8632Movd::emitIAS(const Cfg *Func) const;
-template <> void InstX8632MovssRegs::emitIAS(const Cfg *Func) const;
-template <> void InstX8632Pblendvb::emitIAS(const Cfg *Func) const;
-template <> void InstX8632Pextr::emitIAS(const Cfg *Func) const;
-template <> void InstX8632Pinsr::emitIAS(const Cfg *Func) const;
-template <> void InstX8632Movsx::emitIAS(const Cfg *Func) const;
-template <> void InstX8632Movzx::emitIAS(const Cfg *Func) const;
-template <> void InstX8632Pmull::emitIAS(const Cfg *Func) const;
-template <> void InstX8632Pshufd::emitIAS(const Cfg *Func) const;
-template <> void InstX8632Shufps::emitIAS(const Cfg *Func) const;
-
-} // end of namespace Ice
-
 #endif // SUBZERO_SRC_ICEINSTX8632_H
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
new file mode 100644
index 0000000..7275e44
--- /dev/null
+++ b/src/IceInstX86Base.h
@@ -0,0 +1,3148 @@
+//===- subzero/src/IceInstX86Base.h - Generic x86 instructions -*- C++ -*--===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the InstX86Base template class, as well as the generic X86
+/// Instruction class hierarchy. Only X86 instructions common across all/most
+/// X86 targets should be defined here, with target-specific instructions
+/// declared in the target's traits.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICEINSTX86BASE_H
+#define SUBZERO_SRC_ICEINSTX86BASE_H
+
+#include "IceDefs.h"
+#include "IceInst.h"
+#include "IceOperand.h"
+
+namespace Ice {
+
+namespace X86Internal {
+
+template <class Machine> struct MachineTraits;
+
+template <class Machine> class InstX86Base : public InstTarget {
+  InstX86Base<Machine>() = delete;
+  InstX86Base<Machine>(const InstX86Base &) = delete;
+  InstX86Base &operator=(const InstX86Base &) = delete;
+
+public:
+  using Traits = MachineTraits<Machine>;
+
+  enum InstKindX86 {
+    k__Start = Inst::Target,
+    Adc,
+    AdcRMW,
+    Add,
+    AddRMW,
+    Addps,
+    Addss,
+    Adjuststack,
+    And,
+    AndRMW,
+    Blendvps,
+    Br,
+    Bsf,
+    Bsr,
+    Bswap,
+    Call,
+    Cbwdq,
+    Cmov,
+    Cmpps,
+    Cmpxchg,
+    Cmpxchg8b,
+    Cvt,
+    Div,
+    Divps,
+    Divss,
+    FakeRMW,
+    Fld,
+    Fstp,
+    Icmp,
+    Idiv,
+    Imul,
+    Insertps,
+    Jmp,
+    Label,
+    Lea,
+    Load,
+    Mfence,
+    Mov,
+    Movd,
+    Movp,
+    Movq,
+    MovssRegs,
+    Movsx,
+    Movzx,
+    Mul,
+    Mulps,
+    Mulss,
+    Neg,
+    Nop,
+    Or,
+    OrRMW,
+    Padd,
+    Pand,
+    Pandn,
+    Pblendvb,
+    Pcmpeq,
+    Pcmpgt,
+    Pextr,
+    Pinsr,
+    Pmull,
+    Pmuludq,
+    Pop,
+    Por,
+    Pshufd,
+    Psll,
+    Psra,
+    Psrl,
+    Psub,
+    Push,
+    Pxor,
+    Ret,
+    Rol,
+    Sar,
+    Sbb,
+    SbbRMW,
+    Setcc,
+    Shl,
+    Shld,
+    Shr,
+    Shrd,
+    Shufps,
+    Sqrtss,
+    Store,
+    StoreP,
+    StoreQ,
+    Sub,
+    SubRMW,
+    Subps,
+    Subss,
+    Test,
+    Ucomiss,
+    UD2,
+    Xadd,
+    Xchg,
+    Xor,
+    XorRMW
+  };
+
+  static const char *getWidthString(Type Ty);
+  static const char *getFldString(Type Ty);
+  static typename Traits::Cond::BrCond
+  getOppositeCondition(typename Traits::Cond::BrCond Cond);
+  void dump(const Cfg *Func) const override;
+
+  // Shared emit routines for common forms of instructions.
+  // See the definition of emitTwoAddress() for a description of
+  // ShiftHack.
+  static void emitTwoAddress(const char *Opcode, const Inst *Inst,
+                             const Cfg *Func, bool ShiftHack = false);
+
+  static void
+  emitIASGPRShift(const Cfg *Func, Type Ty, const Variable *Var,
+                  const Operand *Src,
+                  const typename Traits::Assembler::GPREmitterShiftOp &Emitter);
+
+protected:
+  InstX86Base<Machine>(Cfg *Func, InstKindX86 Kind, SizeT Maxsrcs,
+                       Variable *Dest)
+      : InstTarget(Func, static_cast<InstKind>(Kind), Maxsrcs, Dest) {}
+
+  static bool isClassof(const Inst *Inst, InstKindX86 MyKind) {
+    return Inst->getKind() == static_cast<InstKind>(MyKind);
+  }
+  // Most instructions that operate on vector arguments require vector
+  // memory operands to be fully aligned (16-byte alignment for PNaCl
+  // vector types).  The stack frame layout and call ABI ensure proper
+  // alignment for stack operands, but memory operands (originating
+  // from load/store bitcode instructions) only have element-size
+  // alignment guarantees.  This function validates that none of the
+  // operands is a memory operand of vector type, calling
+  // report_fatal_error() if one is found.  This function should be
+  // called during emission, and maybe also in the ctor (as long as
+  // that fits the lowering style).
+  void validateVectorAddrMode() const {
+    if (this->getDest())
+      this->validateVectorAddrModeOpnd(this->getDest());
+    for (SizeT i = 0; i < this->getSrcSize(); ++i) {
+      this->validateVectorAddrModeOpnd(this->getSrc(i));
+    }
+  }
+
+private:
+  static void validateVectorAddrModeOpnd(const Operand *Opnd) {
+    if (llvm::isa<typename InstX86Base<Machine>::Traits::X86OperandMem>(Opnd) &&
+        isVectorType(Opnd->getType())) {
+      llvm::report_fatal_error("Possible misaligned vector memory operation");
+    }
+  }
+};
+
+/// InstX86FakeRMW represents a non-atomic read-modify-write operation on a
+/// memory location.  An InstX86FakeRMW is a "fake" instruction in that it
+/// still needs to be lowered to some actual RMW instruction.
+///
+/// If A is some memory address, D is some data value to apply, and OP is an
+/// arithmetic operator, the instruction operates as: (*A) = (*A) OP D
+template <class Machine>
+class InstX86FakeRMW final : public InstX86Base<Machine> {
+  InstX86FakeRMW() = delete;
+  InstX86FakeRMW(const InstX86FakeRMW &) = delete;
+  InstX86FakeRMW &operator=(const InstX86FakeRMW &) = delete;
+
+public:
+  static InstX86FakeRMW *create(Cfg *Func, Operand *Data, Operand *Addr,
+                                Variable *Beacon, InstArithmetic::OpKind Op,
+                                uint32_t Align = 1) {
+    // TODO(stichnot): Stop ignoring alignment specification.
+    (void)Align;
+    return new (Func->allocate<InstX86FakeRMW>())
+        InstX86FakeRMW(Func, Data, Addr, Op, Beacon);
+  }
+  Operand *getAddr() const { return this->getSrc(1); }
+  Operand *getData() const { return this->getSrc(0); }
+  InstArithmetic::OpKind getOp() const { return Op; }
+  Variable *getBeacon() const { return llvm::cast<Variable>(this->getSrc(2)); }
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::FakeRMW);
+  }
+
+private:
+  InstArithmetic::OpKind Op;
+  InstX86FakeRMW(Cfg *Func, Operand *Data, Operand *Addr,
+                 InstArithmetic::OpKind Op, Variable *Beacon);
+};
+
+/// InstX86Label represents an intra-block label that is the target
+/// of an intra-block branch.  The offset between the label and the
+/// branch must be fit into one byte (considered "near").  These are
+/// used for lowering i1 calculations, Select instructions, and 64-bit
+/// compares on a 32-bit architecture, without basic block splitting.
+/// Basic block splitting is not so desirable for several reasons, one
+/// of which is the impact on decisions based on whether a variable's
+/// live range spans multiple basic blocks.
+///
+/// Intra-block control flow must be used with caution.  Consider the
+/// sequence for "c = (a >= b ? x : y)".
+///     cmp a, b
+///     br lt, L1
+///     mov c, x
+///     jmp L2
+///   L1:
+///     mov c, y
+///   L2:
+///
+/// Labels L1 and L2 are intra-block labels.  Without knowledge of the
+/// intra-block control flow, liveness analysis will determine the "mov
+/// c, x" instruction to be dead.  One way to prevent this is to insert
+/// a "FakeUse(c)" instruction anywhere between the two "mov c, ..."
+/// instructions, e.g.:
+///
+///     cmp a, b
+///     br lt, L1
+///     mov c, x
+///     jmp L2
+///     FakeUse(c)
+///   L1:
+///     mov c, y
+///   L2:
+///
+/// The down-side is that "mov c, x" can never be dead-code eliminated
+/// even if there are no uses of c.  As unlikely as this situation is,
+/// it may be prevented by running dead code elimination before
+/// lowering.
+template <class Machine>
+class InstX86Label final : public InstX86Base<Machine> {
+  InstX86Label() = delete;
+  InstX86Label(const InstX86Label &) = delete;
+  InstX86Label &operator=(const InstX86Label &) = delete;
+
+public:
+  static InstX86Label *
+  create(Cfg *Func,
+         typename InstX86Base<Machine>::Traits::TargetLowering *Target) {
+    return new (Func->allocate<InstX86Label>()) InstX86Label(Func, Target);
+  }
+  uint32_t getEmitInstCount() const override { return 0; }
+  IceString getName(const Cfg *Func) const;
+  SizeT getNumber() const { return Number; }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+
+private:
+  InstX86Label(Cfg *Func,
+               typename InstX86Base<Machine>::Traits::TargetLowering *Target);
+
+  SizeT Number; // used for unique label generation.
+};
+
+/// Conditional and unconditional branch instruction.
+template <class Machine> class InstX86Br final : public InstX86Base<Machine> {
+  InstX86Br() = delete;
+  InstX86Br(const InstX86Br &) = delete;
+  InstX86Br &operator=(const InstX86Br &) = delete;
+
+public:
+  /// Create a conditional branch to a node.
+  static InstX86Br *
+  create(Cfg *Func, CfgNode *TargetTrue, CfgNode *TargetFalse,
+         typename InstX86Base<Machine>::Traits::Cond::BrCond Condition) {
+    assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
+    const InstX86Label<Machine> *NoLabel = nullptr;
+    return new (Func->allocate<InstX86Br>())
+        InstX86Br(Func, TargetTrue, TargetFalse, NoLabel, Condition);
+  }
+  /// Create an unconditional branch to a node.
+  static InstX86Br *create(Cfg *Func, CfgNode *Target) {
+    const CfgNode *NoCondTarget = nullptr;
+    const InstX86Label<Machine> *NoLabel = nullptr;
+    return new (Func->allocate<InstX86Br>())
+        InstX86Br(Func, NoCondTarget, Target, NoLabel,
+                  InstX86Base<Machine>::Traits::Cond::Br_None);
+  }
+  /// Create a non-terminator conditional branch to a node, with a
+  /// fallthrough to the next instruction in the current node.  This is
+  /// used for switch lowering.
+  static InstX86Br *
+  create(Cfg *Func, CfgNode *Target,
+         typename InstX86Base<Machine>::Traits::Cond::BrCond Condition) {
+    assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
+    const CfgNode *NoUncondTarget = nullptr;
+    const InstX86Label<Machine> *NoLabel = nullptr;
+    return new (Func->allocate<InstX86Br>())
+        InstX86Br(Func, Target, NoUncondTarget, NoLabel, Condition);
+  }
+  /// Create a conditional intra-block branch (or unconditional, if
+  /// Condition==Br_None) to a label in the current block.
+  static InstX86Br *
+  create(Cfg *Func, InstX86Label<Machine> *Label,
+         typename InstX86Base<Machine>::Traits::Cond::BrCond Condition) {
+    const CfgNode *NoCondTarget = nullptr;
+    const CfgNode *NoUncondTarget = nullptr;
+    return new (Func->allocate<InstX86Br>())
+        InstX86Br(Func, NoCondTarget, NoUncondTarget, Label, Condition);
+  }
+  const CfgNode *getTargetTrue() const { return TargetTrue; }
+  const CfgNode *getTargetFalse() const { return TargetFalse; }
+  bool optimizeBranch(const CfgNode *NextNode);
+  uint32_t getEmitInstCount() const override {
+    uint32_t Sum = 0;
+    if (Label)
+      ++Sum;
+    if (getTargetTrue())
+      ++Sum;
+    if (getTargetFalse())
+      ++Sum;
+    return Sum;
+  }
+  bool isUnconditionalBranch() const override {
+    return !Label && Condition == InstX86Base<Machine>::Traits::Cond::Br_None;
+  }
+  bool repointEdge(CfgNode *OldNode, CfgNode *NewNode) override;
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Br);
+  }
+
+private:
+  InstX86Br(Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse,
+            const InstX86Label<Machine> *Label,
+            typename InstX86Base<Machine>::Traits::Cond::BrCond Condition);
+
+  typename InstX86Base<Machine>::Traits::Cond::BrCond Condition;
+  const CfgNode *TargetTrue;
+  const CfgNode *TargetFalse;
+  const InstX86Label<Machine> *Label; // Intra-block branch target
+};
+
+/// Jump to a target outside this function, such as tailcall, nacljump,
+/// naclret, unreachable.  This is different from a Branch instruction
+/// in that there is no intra-function control flow to represent.
+template <class Machine> class InstX86Jmp final : public InstX86Base<Machine> {
+  InstX86Jmp() = delete;
+  InstX86Jmp(const InstX86Jmp &) = delete;
+  InstX86Jmp &operator=(const InstX86Jmp &) = delete;
+
+public:
+  static InstX86Jmp *create(Cfg *Func, Operand *Target) {
+    return new (Func->allocate<InstX86Jmp>()) InstX86Jmp(Func, Target);
+  }
+  Operand *getJmpTarget() const { return this->getSrc(0); }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Jmp);
+  }
+
+private:
+  InstX86Jmp(Cfg *Func, Operand *Target);
+};
+
+/// AdjustStack instruction - subtracts esp by the given amount and
+/// updates the stack offset during code emission.
+template <class Machine>
+class InstX86AdjustStack final : public InstX86Base<Machine> {
+  InstX86AdjustStack() = delete;
+  InstX86AdjustStack(const InstX86AdjustStack &) = delete;
+  InstX86AdjustStack &operator=(const InstX86AdjustStack &) = delete;
+
+public:
+  static InstX86AdjustStack *create(Cfg *Func, SizeT Amount, Variable *Esp) {
+    return new (Func->allocate<InstX86AdjustStack>())
+        InstX86AdjustStack(Func, Amount, Esp);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst,
+                                           InstX86Base<Machine>::Adjuststack);
+  }
+
+private:
+  InstX86AdjustStack(Cfg *Func, SizeT Amount, Variable *Esp);
+  SizeT Amount;
+};
+
+/// Call instruction.  Arguments should have already been pushed.
+template <class Machine> class InstX86Call final : public InstX86Base<Machine> {
+  InstX86Call() = delete;
+  InstX86Call(const InstX86Call &) = delete;
+  InstX86Call &operator=(const InstX86Call &) = delete;
+
+public:
+  static InstX86Call *create(Cfg *Func, Variable *Dest, Operand *CallTarget) {
+    return new (Func->allocate<InstX86Call>())
+        InstX86Call(Func, Dest, CallTarget);
+  }
+  Operand *getCallTarget() const { return this->getSrc(0); }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Call);
+  }
+
+private:
+  InstX86Call(Cfg *Func, Variable *Dest, Operand *CallTarget);
+};
+
+/// Emit a one-operand (GPR) instruction.
+template <class Machine>
+void emitIASOpTyGPR(const Cfg *Func, Type Ty, const Operand *Var,
+                    const typename InstX86Base<
+                        Machine>::Traits::Assembler::GPREmitterOneOp &Emitter);
+
+template <class Machine>
+void emitIASAsAddrOpTyGPR(
+    const Cfg *Func, Type Ty, const Operand *Op0, const Operand *Op1,
+    const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp
+        &Emitter);
+
+/// Instructions of the form x := op(x).
+template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
+class InstX86BaseInplaceopGPR : public InstX86Base<Machine> {
+  InstX86BaseInplaceopGPR() = delete;
+  InstX86BaseInplaceopGPR(const InstX86BaseInplaceopGPR &) = delete;
+  InstX86BaseInplaceopGPR &operator=(const InstX86BaseInplaceopGPR &) = delete;
+
+public:
+  using Base = InstX86BaseInplaceopGPR<Machine, K>;
+
+  void emit(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrEmit();
+    assert(this->getSrcSize() == 1);
+    Str << "\t" << Opcode << "\t";
+    this->getSrc(0)->emit(Func);
+  }
+  void emitIAS(const Cfg *Func) const override {
+    assert(this->getSrcSize() == 1);
+    const Variable *Var = this->getDest();
+    Type Ty = Var->getType();
+    emitIASOpTyGPR<Machine>(Func, Ty, Var, Emitter);
+  }
+  void dump(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrDump();
+    this->dumpDest(Func);
+    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+    this->dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
+  }
+
+protected:
+  InstX86BaseInplaceopGPR(Cfg *Func, Operand *SrcDest)
+      : InstX86Base<Machine>(Func, K, 1, llvm::dyn_cast<Variable>(SrcDest)) {
+    this->addSource(SrcDest);
+  }
+
+private:
+  static const char *Opcode;
+  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterOneOp
+      Emitter;
+};
+
+/// Emit a two-operand (GPR) instruction, where the dest operand is a
+/// Variable that's guaranteed to be a register.
+template <class Machine, bool VarCanBeByte = true, bool SrcCanBeByte = true>
+void emitIASRegOpTyGPR(
+    const Cfg *Func, Type Ty, const Variable *Dst, const Operand *Src,
+    const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
+        &Emitter);
+
+/// Instructions of the form x := op(y).
+template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
+class InstX86BaseUnaryopGPR : public InstX86Base<Machine> {
+  InstX86BaseUnaryopGPR() = delete;
+  InstX86BaseUnaryopGPR(const InstX86BaseUnaryopGPR &) = delete;
+  InstX86BaseUnaryopGPR &operator=(const InstX86BaseUnaryopGPR &) = delete;
+
+public:
+  using Base = InstX86BaseUnaryopGPR<Machine, K>;
+
+  void emit(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrEmit();
+    assert(this->getSrcSize() == 1);
+    Type SrcTy = this->getSrc(0)->getType();
+    Type DestTy = this->getDest()->getType();
+    Str << "\t" << Opcode << this->getWidthString(SrcTy);
+    // Movsx and movzx need both the source and dest type width letter
+    // to define the operation.  The other unary operations have the
+    // same source and dest type and as a result need only one letter.
+    if (SrcTy != DestTy)
+      Str << this->getWidthString(DestTy);
+    Str << "\t";
+    this->getSrc(0)->emit(Func);
+    Str << ", ";
+    this->getDest()->emit(Func);
+  }
+  void emitIAS(const Cfg *Func) const override {
+    assert(this->getSrcSize() == 1);
+    const Variable *Var = this->getDest();
+    Type Ty = Var->getType();
+    const Operand *Src = this->getSrc(0);
+    emitIASRegOpTyGPR<Machine>(Func, Ty, Var, Src, Emitter);
+  }
+  void dump(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrDump();
+    this->dumpDest(Func);
+    Str << " = " << Opcode << "." << this->getSrc(0)->getType() << " ";
+    this->dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
+  }
+
+protected:
+  InstX86BaseUnaryopGPR(Cfg *Func, Variable *Dest, Operand *Src)
+      : InstX86Base<Machine>(Func, K, 1, Dest) {
+    this->addSource(Src);
+  }
+
+  static const char *Opcode;
+  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
+      Emitter;
+};
+
+template <class Machine>
+void emitIASRegOpTyXMM(
+    const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
+    const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
+        &Emitter);
+
+template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
+class InstX86BaseUnaryopXmm : public InstX86Base<Machine> {
+  InstX86BaseUnaryopXmm() = delete;
+  InstX86BaseUnaryopXmm(const InstX86BaseUnaryopXmm &) = delete;
+  InstX86BaseUnaryopXmm &operator=(const InstX86BaseUnaryopXmm &) = delete;
+
+public:
+  using Base = InstX86BaseUnaryopXmm<Machine, K>;
+
+  void emit(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrEmit();
+    assert(this->getSrcSize() == 1);
+    Str << "\t" << Opcode << "\t";
+    this->getSrc(0)->emit(Func);
+    Str << ", ";
+    this->getDest()->emit(Func);
+  }
+  void emitIAS(const Cfg *Func) const override {
+    Type Ty = this->getDest()->getType();
+    assert(this->getSrcSize() == 1);
+    emitIASRegOpTyXMM<Machine>(Func, Ty, this->getDest(), this->getSrc(0),
+                               Emitter);
+  }
+  void dump(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrDump();
+    this->dumpDest(Func);
+    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+    this->dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
+  }
+
+protected:
+  InstX86BaseUnaryopXmm(Cfg *Func, Variable *Dest, Operand *Src)
+      : InstX86Base<Machine>(Func, K, 1, Dest) {
+    this->addSource(Src);
+  }
+
+  static const char *Opcode;
+  static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
+      Emitter;
+};
+
+template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
+class InstX86BaseBinopGPRShift : public InstX86Base<Machine> {
+  InstX86BaseBinopGPRShift() = delete;
+  InstX86BaseBinopGPRShift(const InstX86BaseBinopGPRShift &) = delete;
+  InstX86BaseBinopGPRShift &
+  operator=(const InstX86BaseBinopGPRShift &) = delete;
+
+public:
+  using Base = InstX86BaseBinopGPRShift<Machine, K>;
+
+  void emit(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    const bool ShiftHack = true;
+    this->emitTwoAddress(Opcode, this, Func, ShiftHack);
+  }
+  void emitIAS(const Cfg *Func) const override {
+    Type Ty = this->getDest()->getType();
+    assert(this->getSrcSize() == 2);
+    this->emitIASGPRShift(Func, Ty, this->getDest(), this->getSrc(1), Emitter);
+  }
+  void dump(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrDump();
+    this->dumpDest(Func);
+    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+    this->dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
+  }
+
+protected:
+  InstX86BaseBinopGPRShift(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86Base<Machine>(Func, K, 2, Dest) {
+    this->addSource(Dest);
+    this->addSource(Source);
+  }
+
+  static const char *Opcode;
+  static const typename InstX86Base<
+      Machine>::Traits::Assembler::GPREmitterShiftOp Emitter;
+};
+
+template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
+class InstX86BaseBinopGPR : public InstX86Base<Machine> {
+  InstX86BaseBinopGPR() = delete;
+  InstX86BaseBinopGPR(const InstX86BaseBinopGPR &) = delete;
+  InstX86BaseBinopGPR &operator=(const InstX86BaseBinopGPR &) = delete;
+
+public:
+  using Base = InstX86BaseBinopGPR<Machine, K>;
+
+  void emit(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    const bool ShiftHack = false;
+    this->emitTwoAddress(Opcode, this, Func, ShiftHack);
+  }
+  void emitIAS(const Cfg *Func) const override {
+    Type Ty = this->getDest()->getType();
+    assert(this->getSrcSize() == 2);
+    emitIASRegOpTyGPR<Machine>(Func, Ty, this->getDest(), this->getSrc(1),
+                               Emitter);
+  }
+  void dump(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrDump();
+    this->dumpDest(Func);
+    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+    this->dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
+  }
+
+protected:
+  InstX86BaseBinopGPR(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86Base<Machine>(Func, K, 2, Dest) {
+    this->addSource(Dest);
+    this->addSource(Source);
+  }
+
+  static const char *Opcode;
+  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
+      Emitter;
+};
+
+template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
+class InstX86BaseBinopRMW : public InstX86Base<Machine> {
+  InstX86BaseBinopRMW() = delete;
+  InstX86BaseBinopRMW(const InstX86BaseBinopRMW &) = delete;
+  InstX86BaseBinopRMW &operator=(const InstX86BaseBinopRMW &) = delete;
+
+public:
+  using Base = InstX86BaseBinopRMW<Machine, K>;
+
+  void emit(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    const bool ShiftHack = false;
+    this->emitTwoAddress(Opcode, this, Func, ShiftHack);
+  }
+  void emitIAS(const Cfg *Func) const override {
+    Type Ty = this->getSrc(0)->getType();
+    assert(this->getSrcSize() == 2);
+    emitIASAsAddrOpTyGPR<Machine>(Func, Ty, this->getSrc(0), this->getSrc(1),
+                                  Emitter);
+  }
+  void dump(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrDump();
+    Str << Opcode << "." << this->getSrc(0)->getType() << " ";
+    this->dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
+  }
+
+protected:
+  InstX86BaseBinopRMW(
+      Cfg *Func, typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+      Operand *Src1)
+      : InstX86Base<Machine>(Func, K, 2, nullptr) {
+    this->addSource(DestSrc0);
+    this->addSource(Src1);
+  }
+
+  static const char *Opcode;
+  static const typename InstX86Base<
+      Machine>::Traits::Assembler::GPREmitterAddrOp Emitter;
+};
+
+template <class Machine, typename InstX86Base<Machine>::InstKindX86 K,
+          bool NeedsElementType>
+class InstX86BaseBinopXmm : public InstX86Base<Machine> {
+  InstX86BaseBinopXmm() = delete;
+  InstX86BaseBinopXmm(const InstX86BaseBinopXmm &) = delete;
+  InstX86BaseBinopXmm &operator=(const InstX86BaseBinopXmm &) = delete;
+
+public:
+  using Base = InstX86BaseBinopXmm<Machine, K, NeedsElementType>;
+
+  void emit(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    this->validateVectorAddrMode();
+    const bool ShiftHack = false;
+    this->emitTwoAddress(Opcode, this, Func, ShiftHack);
+  }
+  void emitIAS(const Cfg *Func) const override {
+    this->validateVectorAddrMode();
+    Type Ty = this->getDest()->getType();
+    if (NeedsElementType)
+      Ty = typeElementType(Ty);
+    assert(this->getSrcSize() == 2);
+    emitIASRegOpTyXMM<Machine>(Func, Ty, this->getDest(), this->getSrc(1),
+                               Emitter);
+  }
+  void dump(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrDump();
+    this->dumpDest(Func);
+    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+    this->dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
+  }
+
+protected:
+  InstX86BaseBinopXmm(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86Base<Machine>(Func, K, 2, Dest) {
+    this->addSource(Dest);
+    this->addSource(Source);
+  }
+
+  static const char *Opcode;
+  static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
+      Emitter;
+};
+
+template <class Machine>
+void emitIASXmmShift(
+    const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
+    const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterShiftOp
+        &Emitter);
+
+template <class Machine, typename InstX86Base<Machine>::InstKindX86 K,
+          bool AllowAllTypes = false>
+class InstX86BaseBinopXmmShift : public InstX86Base<Machine> {
+  InstX86BaseBinopXmmShift() = delete;
+  InstX86BaseBinopXmmShift(const InstX86BaseBinopXmmShift &) = delete;
+  InstX86BaseBinopXmmShift &
+  operator=(const InstX86BaseBinopXmmShift &) = delete;
+
+public:
+  using Base = InstX86BaseBinopXmmShift<Machine, K, AllowAllTypes>;
+
+  void emit(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    this->validateVectorAddrMode();
+    const bool ShiftHack = false;
+    this->emitTwoAddress(Opcode, this, Func, ShiftHack);
+  }
+  void emitIAS(const Cfg *Func) const override {
+    this->validateVectorAddrMode();
+    Type Ty = this->getDest()->getType();
+    assert(AllowAllTypes || isVectorType(Ty));
+    Type ElementTy = typeElementType(Ty);
+    assert(this->getSrcSize() == 2);
+    emitIASXmmShift<Machine>(Func, ElementTy, this->getDest(), this->getSrc(1),
+                             Emitter);
+  }
+  void dump(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrDump();
+    this->dumpDest(Func);
+    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+    this->dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
+  }
+
+protected:
+  InstX86BaseBinopXmmShift(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86Base<Machine>(Func, K, 2, Dest) {
+    this->addSource(Dest);
+    this->addSource(Source);
+  }
+
+  static const char *Opcode;
+  static const typename InstX86Base<
+      Machine>::Traits::Assembler::XmmEmitterShiftOp Emitter;
+};
+
+template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
+class InstX86BaseTernop : public InstX86Base<Machine> {
+  InstX86BaseTernop() = delete;
+  InstX86BaseTernop(const InstX86BaseTernop &) = delete;
+  InstX86BaseTernop &operator=(const InstX86BaseTernop &) = delete;
+
+public:
+  using Base = InstX86BaseTernop<Machine, K>;
+
+  void emit(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrEmit();
+    assert(this->getSrcSize() == 3);
+    Str << "\t" << Opcode << "\t";
+    this->getSrc(2)->emit(Func);
+    Str << ", ";
+    this->getSrc(1)->emit(Func);
+    Str << ", ";
+    this->getDest()->emit(Func);
+  }
+  void dump(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrDump();
+    this->dumpDest(Func);
+    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+    this->dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
+  }
+
+protected:
+  InstX86BaseTernop(Cfg *Func, Variable *Dest, Operand *Source1,
+                    Operand *Source2)
+      : InstX86Base<Machine>(Func, K, 3, Dest) {
+    this->addSource(Dest);
+    this->addSource(Source1);
+    this->addSource(Source2);
+  }
+
+  static const char *Opcode;
+};
+
+// Instructions of the form x := y op z
+template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
+class InstX86BaseThreeAddressop : public InstX86Base<Machine> {
+  InstX86BaseThreeAddressop() = delete;
+  InstX86BaseThreeAddressop(const InstX86BaseThreeAddressop &) = delete;
+  InstX86BaseThreeAddressop &
+  operator=(const InstX86BaseThreeAddressop &) = delete;
+
+public:
+  using Base = InstX86BaseThreeAddressop<Machine, K>;
+
+  void emit(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrEmit();
+    assert(this->getSrcSize() == 2);
+    Str << "\t" << Opcode << "\t";
+    this->getSrc(1)->emit(Func);
+    Str << ", ";
+    this->getSrc(0)->emit(Func);
+    Str << ", ";
+    this->getDest()->emit(Func);
+  }
+  void dump(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrDump();
+    this->dumpDest(Func);
+    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+    this->dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
+  }
+
+protected:
+  InstX86BaseThreeAddressop(Cfg *Func, Variable *Dest, Operand *Source0,
+                            Operand *Source1)
+      : InstX86Base<Machine>(Func, K, 2, Dest) {
+    this->addSource(Source0);
+    this->addSource(Source1);
+  }
+
+  static const char *Opcode;
+};
+
+/// Base class for assignment instructions
+template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
+class InstX86BaseMovlike : public InstX86Base<Machine> {
+  InstX86BaseMovlike() = delete;
+  InstX86BaseMovlike(const InstX86BaseMovlike &) = delete;
+  InstX86BaseMovlike &operator=(const InstX86BaseMovlike &) = delete;
+
+public:
+  using Base = InstX86BaseMovlike<Machine, K>;
+
+  bool isRedundantAssign() const override {
+    return checkForRedundantAssign(this->getDest(), this->getSrc(0));
+  }
+  bool isSimpleAssign() const override { return true; }
+  void dump(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrDump();
+    Str << Opcode << "." << this->getDest()->getType() << " ";
+    this->dumpDest(Func);
+    Str << ", ";
+    this->dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
+  }
+
+protected:
+  InstX86BaseMovlike(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86Base<Machine>(Func, K, 1, Dest) {
+    this->addSource(Source);
+  }
+
+  static const char *Opcode;
+};
+
+template <class Machine>
+class InstX86Bswap
+    : public InstX86BaseInplaceopGPR<Machine, InstX86Base<Machine>::Bswap> {
+public:
+  static InstX86Bswap *create(Cfg *Func, Operand *SrcDest) {
+    return new (Func->allocate<InstX86Bswap>()) InstX86Bswap(Func, SrcDest);
+  }
+
+private:
+  InstX86Bswap(Cfg *Func, Operand *SrcDest)
+      : InstX86BaseInplaceopGPR<Machine, InstX86Base<Machine>::Bswap>(Func,
+                                                                      SrcDest) {
+  }
+};
+
+template <class Machine>
+class InstX86Neg
+    : public InstX86BaseInplaceopGPR<Machine, InstX86Base<Machine>::Neg> {
+public:
+  static InstX86Neg *create(Cfg *Func, Operand *SrcDest) {
+    return new (Func->allocate<InstX86Neg>()) InstX86Neg(Func, SrcDest);
+  }
+
+private:
+  InstX86Neg(Cfg *Func, Operand *SrcDest)
+      : InstX86BaseInplaceopGPR<Machine, InstX86Base<Machine>::Neg>(Func,
+                                                                    SrcDest) {}
+};
+
+template <class Machine>
+class InstX86Bsf
+    : public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Bsf> {
+public:
+  static InstX86Bsf *create(Cfg *Func, Variable *Dest, Operand *Src) {
+    return new (Func->allocate<InstX86Bsf>()) InstX86Bsf(Func, Dest, Src);
+  }
+
+private:
+  InstX86Bsf(Cfg *Func, Variable *Dest, Operand *Src)
+      : InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Bsf>(Func, Dest,
+                                                                  Src) {}
+};
+
+template <class Machine>
+class InstX86Bsr
+    : public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Bsr> {
+public:
+  static InstX86Bsr *create(Cfg *Func, Variable *Dest, Operand *Src) {
+    return new (Func->allocate<InstX86Bsr>()) InstX86Bsr(Func, Dest, Src);
+  }
+
+private:
+  InstX86Bsr(Cfg *Func, Variable *Dest, Operand *Src)
+      : InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Bsr>(Func, Dest,
+                                                                  Src) {}
+};
+
+template <class Machine>
+class InstX86Lea
+    : public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Lea> {
+public:
+  static InstX86Lea *create(Cfg *Func, Variable *Dest, Operand *Src) {
+    return new (Func->allocate<InstX86Lea>()) InstX86Lea(Func, Dest, Src);
+  }
+
+  void emit(const Cfg *Func) const override;
+
+private:
+  InstX86Lea(Cfg *Func, Variable *Dest, Operand *Src)
+      : InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Lea>(Func, Dest,
+                                                                  Src) {}
+};
+
+// Cbwdq instruction - wrapper for cbw, cwd, and cdq
+template <class Machine>
+class InstX86Cbwdq
+    : public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Cbwdq> {
+public:
+  static InstX86Cbwdq *create(Cfg *Func, Variable *Dest, Operand *Src) {
+    return new (Func->allocate<InstX86Cbwdq>()) InstX86Cbwdq(Func, Dest, Src);
+  }
+
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Cbwdq(Cfg *Func, Variable *Dest, Operand *Src)
+      : InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Cbwdq>(Func, Dest,
+                                                                    Src) {}
+};
+
+template <class Machine>
+class InstX86Movsx
+    : public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movsx> {
+public:
+  static InstX86Movsx *create(Cfg *Func, Variable *Dest, Operand *Src) {
+    return new (Func->allocate<InstX86Movsx>()) InstX86Movsx(Func, Dest, Src);
+  }
+
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Movsx(Cfg *Func, Variable *Dest, Operand *Src)
+      : InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movsx>(Func, Dest,
+                                                                    Src) {}
+};
+
+template <class Machine>
+class InstX86Movzx
+    : public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movzx> {
+public:
+  static InstX86Movzx *create(Cfg *Func, Variable *Dest, Operand *Src) {
+    return new (Func->allocate<InstX86Movzx>()) InstX86Movzx(Func, Dest, Src);
+  }
+
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Movzx(Cfg *Func, Variable *Dest, Operand *Src)
+      : InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movzx>(Func, Dest,
+                                                                    Src) {}
+};
+
+template <class Machine>
+class InstX86Movd
+    : public InstX86BaseUnaryopXmm<Machine, InstX86Base<Machine>::Movd> {
+public:
+  static InstX86Movd *create(Cfg *Func, Variable *Dest, Operand *Src) {
+    return new (Func->allocate<InstX86Movd>()) InstX86Movd(Func, Dest, Src);
+  }
+
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Movd(Cfg *Func, Variable *Dest, Operand *Src)
+      : InstX86BaseUnaryopXmm<Machine, InstX86Base<Machine>::Movd>(Func, Dest,
+                                                                   Src) {}
+};
+
+template <class Machine>
+class InstX86Sqrtss
+    : public InstX86BaseUnaryopXmm<Machine, InstX86Base<Machine>::Sqrtss> {
+public:
+  static InstX86Sqrtss *create(Cfg *Func, Variable *Dest, Operand *Src) {
+    return new (Func->allocate<InstX86Sqrtss>()) InstX86Sqrtss(Func, Dest, Src);
+  }
+
+  virtual void emit(const Cfg *Func) const override;
+
+private:
+  InstX86Sqrtss(Cfg *Func, Variable *Dest, Operand *Src)
+      : InstX86BaseUnaryopXmm<Machine, InstX86Base<Machine>::Sqrtss>(Func, Dest,
+                                                                     Src) {}
+};
+
+/// Move/assignment instruction - wrapper for mov/movss/movsd.
+template <class Machine>
+class InstX86Mov
+    : public InstX86BaseMovlike<Machine, InstX86Base<Machine>::Mov> {
+public:
+  static InstX86Mov *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Mov>()) InstX86Mov(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Mov(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseMovlike<Machine, InstX86Base<Machine>::Mov>(Func, Dest,
+                                                               Source) {}
+};
+
+/// Move packed - copy 128 bit values between XMM registers, or mem128
+/// and XMM registers.
+template <class Machine>
+class InstX86Movp
+    : public InstX86BaseMovlike<Machine, InstX86Base<Machine>::Movp> {
+public:
+  static InstX86Movp *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Movp>()) InstX86Movp(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Movp(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseMovlike<Machine, InstX86Base<Machine>::Movp>(Func, Dest,
+                                                                Source) {}
+};
+
+/// Movq - copy between XMM registers, or mem64 and XMM registers.
+template <class Machine>
+class InstX86Movq
+    : public InstX86BaseMovlike<Machine, InstX86Base<Machine>::Movq> {
+public:
+  static InstX86Movq *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Movq>()) InstX86Movq(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Movq(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseMovlike<Machine, InstX86Base<Machine>::Movq>(Func, Dest,
+                                                                Source) {}
+};
+
+template <class Machine>
+class InstX86Add
+    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Add> {
+public:
+  static InstX86Add *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Add>()) InstX86Add(Func, Dest, Source);
+  }
+
+private:
+  InstX86Add(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Add>(Func, Dest,
+                                                                Source) {}
+};
+
+template <class Machine>
+class InstX86AddRMW
+    : public InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::AddRMW> {
+public:
+  static InstX86AddRMW *
+  create(Cfg *Func,
+         typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+         Operand *Src1) {
+    return new (Func->allocate<InstX86AddRMW>())
+        InstX86AddRMW(Func, DestSrc0, Src1);
+  }
+
+private:
+  InstX86AddRMW(Cfg *Func,
+                typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+                Operand *Src1)
+      : InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::AddRMW>(
+            Func, DestSrc0, Src1) {}
+};
+
+template <class Machine>
+class InstX86Addps
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Addps, true> {
+public:
+  static InstX86Addps *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Addps>())
+        InstX86Addps(Func, Dest, Source);
+  }
+
+private:
+  InstX86Addps(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Addps, true>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Adc
+    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Adc> {
+public:
+  static InstX86Adc *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Adc>()) InstX86Adc(Func, Dest, Source);
+  }
+
+private:
+  InstX86Adc(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Adc>(Func, Dest,
+                                                                Source) {}
+};
+
+template <class Machine>
+class InstX86AdcRMW
+    : public InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::AdcRMW> {
+public:
+  static InstX86AdcRMW *
+  create(Cfg *Func,
+         typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+         Operand *Src1) {
+    return new (Func->allocate<InstX86AdcRMW>())
+        InstX86AdcRMW(Func, DestSrc0, Src1);
+  }
+
+private:
+  InstX86AdcRMW(Cfg *Func,
+                typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+                Operand *Src1)
+      : InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::AdcRMW>(
+            Func, DestSrc0, Src1) {}
+};
+
+template <class Machine>
+class InstX86Addss
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Addss, false> {
+public:
+  static InstX86Addss *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Addss>())
+        InstX86Addss(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+
+private:
+  InstX86Addss(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Addss, false>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Padd
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Padd, true> {
+public:
+  static InstX86Padd *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Padd>()) InstX86Padd(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+
+private:
+  InstX86Padd(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Padd, true>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Sub
+    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Sub> {
+public:
+  static InstX86Sub *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Sub>()) InstX86Sub(Func, Dest, Source);
+  }
+
+private:
+  InstX86Sub(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Sub>(Func, Dest,
+                                                                Source) {}
+};
+
+template <class Machine>
+class InstX86SubRMW
+    : public InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::SubRMW> {
+public:
+  static InstX86SubRMW *
+  create(Cfg *Func,
+         typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+         Operand *Src1) {
+    return new (Func->allocate<InstX86SubRMW>())
+        InstX86SubRMW(Func, DestSrc0, Src1);
+  }
+
+private:
+  InstX86SubRMW(Cfg *Func,
+                typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+                Operand *Src1)
+      : InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::SubRMW>(
+            Func, DestSrc0, Src1) {}
+};
+
+template <class Machine>
+class InstX86Subps
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Subps, true> {
+public:
+  static InstX86Subps *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Subps>())
+        InstX86Subps(Func, Dest, Source);
+  }
+
+private:
+  InstX86Subps(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Subps, true>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Subss
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Subss, false> {
+public:
+  static InstX86Subss *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Subss>())
+        InstX86Subss(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+
+private:
+  InstX86Subss(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Subss, false>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Sbb
+    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Sbb> {
+public:
+  static InstX86Sbb *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Sbb>()) InstX86Sbb(Func, Dest, Source);
+  }
+
+private:
+  InstX86Sbb(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Sbb>(Func, Dest,
+                                                                Source) {}
+};
+
+template <class Machine>
+class InstX86SbbRMW
+    : public InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::SbbRMW> {
+public:
+  static InstX86SbbRMW *
+  create(Cfg *Func,
+         typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+         Operand *Src1) {
+    return new (Func->allocate<InstX86SbbRMW>())
+        InstX86SbbRMW(Func, DestSrc0, Src1);
+  }
+
+private:
+  InstX86SbbRMW(Cfg *Func,
+                typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+                Operand *Src1)
+      : InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::SbbRMW>(
+            Func, DestSrc0, Src1) {}
+};
+
+template <class Machine>
+class InstX86Psub
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Psub, true> {
+public:
+  static InstX86Psub *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Psub>()) InstX86Psub(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+
+private:
+  InstX86Psub(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Psub, true>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86And
+    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::And> {
+public:
+  static InstX86And *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86And>()) InstX86And(Func, Dest, Source);
+  }
+
+private:
+  InstX86And(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::And>(Func, Dest,
+                                                                Source) {}
+};
+
+template <class Machine>
+class InstX86AndRMW
+    : public InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::AndRMW> {
+public:
+  static InstX86AndRMW *
+  create(Cfg *Func,
+         typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+         Operand *Src1) {
+    return new (Func->allocate<InstX86AndRMW>())
+        InstX86AndRMW(Func, DestSrc0, Src1);
+  }
+
+private:
+  InstX86AndRMW(Cfg *Func,
+                typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+                Operand *Src1)
+      : InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::AndRMW>(
+            Func, DestSrc0, Src1) {}
+};
+
+template <class Machine>
+class InstX86Pand
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pand, false> {
+public:
+  static InstX86Pand *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Pand>()) InstX86Pand(Func, Dest, Source);
+  }
+
+private:
+  InstX86Pand(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pand, false>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Pandn
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pandn, false> {
+public:
+  static InstX86Pandn *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Pandn>())
+        InstX86Pandn(Func, Dest, Source);
+  }
+
+private:
+  InstX86Pandn(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pandn, false>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Or
+    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Or> {
+public:
+  static InstX86Or *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Or>()) InstX86Or(Func, Dest, Source);
+  }
+
+private:
+  InstX86Or(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Or>(Func, Dest,
+                                                               Source) {}
+};
+
+template <class Machine>
+class InstX86OrRMW
+    : public InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::OrRMW> {
+public:
+  static InstX86OrRMW *
+  create(Cfg *Func,
+         typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+         Operand *Src1) {
+    return new (Func->allocate<InstX86OrRMW>())
+        InstX86OrRMW(Func, DestSrc0, Src1);
+  }
+
+private:
+  InstX86OrRMW(Cfg *Func,
+               typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+               Operand *Src1)
+      : InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::OrRMW>(
+            Func, DestSrc0, Src1) {}
+};
+
+template <class Machine>
+class InstX86Por
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Por, false> {
+public:
+  static InstX86Por *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Por>()) InstX86Por(Func, Dest, Source);
+  }
+
+private:
+  InstX86Por(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Por, false>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Xor
+    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Xor> {
+public:
+  static InstX86Xor *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Xor>()) InstX86Xor(Func, Dest, Source);
+  }
+
+private:
+  InstX86Xor(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Xor>(Func, Dest,
+                                                                Source) {}
+};
+
+template <class Machine>
+class InstX86XorRMW
+    : public InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::XorRMW> {
+public:
+  static InstX86XorRMW *
+  create(Cfg *Func,
+         typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+         Operand *Src1) {
+    return new (Func->allocate<InstX86XorRMW>())
+        InstX86XorRMW(Func, DestSrc0, Src1);
+  }
+
+private:
+  InstX86XorRMW(Cfg *Func,
+                typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
+                Operand *Src1)
+      : InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::XorRMW>(
+            Func, DestSrc0, Src1) {}
+};
+
+template <class Machine>
+class InstX86Pxor
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pxor, false> {
+public:
+  static InstX86Pxor *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Pxor>()) InstX86Pxor(Func, Dest, Source);
+  }
+
+private:
+  InstX86Pxor(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pxor, false>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Imul
+    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Imul> {
+public:
+  static InstX86Imul *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Imul>()) InstX86Imul(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Imul(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Imul>(Func, Dest,
+                                                                 Source) {}
+};
+
+template <class Machine>
+class InstX86Mulps
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Mulps, true> {
+public:
+  static InstX86Mulps *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Mulps>())
+        InstX86Mulps(Func, Dest, Source);
+  }
+
+private:
+  InstX86Mulps(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Mulps, true>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Mulss
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Mulss, false> {
+public:
+  static InstX86Mulss *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Mulss>())
+        InstX86Mulss(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+
+private:
+  InstX86Mulss(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Mulss, false>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Pmull
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pmull, true> {
+public:
+  static InstX86Pmull *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Pmull>())
+        InstX86Pmull(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Pmull(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pmull, true>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Pmuludq
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pmuludq,
+                                 false> {
+public:
+  static InstX86Pmuludq *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Pmuludq>())
+        InstX86Pmuludq(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+
+private:
+  InstX86Pmuludq(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pmuludq, false>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Divps
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Divps, true> {
+public:
+  static InstX86Divps *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Divps>())
+        InstX86Divps(Func, Dest, Source);
+  }
+
+private:
+  InstX86Divps(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Divps, true>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Divss
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Divss, false> {
+public:
+  static InstX86Divss *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Divss>())
+        InstX86Divss(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+
+private:
+  InstX86Divss(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Divss, false>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Rol
+    : public InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Rol> {
+public:
+  static InstX86Rol *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Rol>()) InstX86Rol(Func, Dest, Source);
+  }
+
+private:
+  InstX86Rol(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Rol>(Func, Dest,
+                                                                     Source) {}
+};
+
+template <class Machine>
+class InstX86Shl
+    : public InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Shl> {
+public:
+  static InstX86Shl *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Shl>()) InstX86Shl(Func, Dest, Source);
+  }
+
+private:
+  InstX86Shl(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Shl>(Func, Dest,
+                                                                     Source) {}
+};
+
+template <class Machine>
+class InstX86Psll
+    : public InstX86BaseBinopXmmShift<Machine, InstX86Base<Machine>::Psll> {
+public:
+  static InstX86Psll *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Psll>()) InstX86Psll(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+
+private:
+  InstX86Psll(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmmShift<Machine, InstX86Base<Machine>::Psll>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Psrl
+    : public InstX86BaseBinopXmmShift<Machine, InstX86Base<Machine>::Psrl,
+                                      true> {
+public:
+  static InstX86Psrl *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Psrl>()) InstX86Psrl(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+
+private:
+  InstX86Psrl(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmmShift<Machine, InstX86Base<Machine>::Psrl, true>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Shr
+    : public InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Shr> {
+public:
+  static InstX86Shr *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Shr>()) InstX86Shr(Func, Dest, Source);
+  }
+
+private:
+  InstX86Shr(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Shr>(Func, Dest,
+                                                                     Source) {}
+};
+
+template <class Machine>
+class InstX86Sar
+    : public InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Sar> {
+public:
+  static InstX86Sar *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Sar>()) InstX86Sar(Func, Dest, Source);
+  }
+
+private:
+  InstX86Sar(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Sar>(Func, Dest,
+                                                                     Source) {}
+};
+
+template <class Machine>
+class InstX86Psra
+    : public InstX86BaseBinopXmmShift<Machine, InstX86Base<Machine>::Psra> {
+public:
+  static InstX86Psra *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Psra>()) InstX86Psra(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+
+private:
+  InstX86Psra(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmmShift<Machine, InstX86Base<Machine>::Psra>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Pcmpeq
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pcmpeq, true> {
+public:
+  static InstX86Pcmpeq *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Pcmpeq>())
+        InstX86Pcmpeq(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+
+private:
+  InstX86Pcmpeq(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pcmpeq, true>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Pcmpgt
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pcmpgt, true> {
+public:
+  static InstX86Pcmpgt *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86Pcmpgt>())
+        InstX86Pcmpgt(Func, Dest, Source);
+  }
+
+  void emit(const Cfg *Func) const override;
+
+private:
+  InstX86Pcmpgt(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pcmpgt, true>(
+            Func, Dest, Source) {}
+};
+
+/// movss is only a binary operation when the source and dest
+/// operands are both registers (the high bits of dest are left untouched).
+/// In other cases, it behaves like a copy (mov-like) operation (and the
+/// high bits of dest are cleared).
+/// InstX86Movss will assert that both its source and dest operands are
+/// registers, so the lowering code should use _mov instead of _movss
+/// in cases where a copy operation is intended.
+template <class Machine>
+class InstX86MovssRegs
+    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::MovssRegs,
+                                 false> {
+public:
+  static InstX86MovssRegs *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX86MovssRegs>())
+        InstX86MovssRegs(Func, Dest, Source);
+  }
+
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86MovssRegs(Cfg *Func, Variable *Dest, Operand *Source)
+      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::MovssRegs, false>(
+            Func, Dest, Source) {}
+};
+
+template <class Machine>
+class InstX86Idiv
+    : public InstX86BaseTernop<Machine, InstX86Base<Machine>::Idiv> {
+public:
+  static InstX86Idiv *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                             Operand *Source2) {
+    return new (Func->allocate<InstX86Idiv>())
+        InstX86Idiv(Func, Dest, Source1, Source2);
+  }
+
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Idiv(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
+      : InstX86BaseTernop<Machine, InstX86Base<Machine>::Idiv>(
+            Func, Dest, Source1, Source2) {}
+};
+
+template <class Machine>
+class InstX86Div
+    : public InstX86BaseTernop<Machine, InstX86Base<Machine>::Div> {
+public:
+  static InstX86Div *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                            Operand *Source2) {
+    return new (Func->allocate<InstX86Div>())
+        InstX86Div(Func, Dest, Source1, Source2);
+  }
+
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Div(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
+      : InstX86BaseTernop<Machine, InstX86Base<Machine>::Div>(
+            Func, Dest, Source1, Source2) {}
+};
+
+template <class Machine>
+class InstX86Insertps
+    : public InstX86BaseTernop<Machine, InstX86Base<Machine>::Insertps> {
+public:
+  static InstX86Insertps *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                                 Operand *Source2) {
+    return new (Func->allocate<InstX86Insertps>())
+        InstX86Insertps(Func, Dest, Source1, Source2);
+  }
+
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Insertps(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
+      : InstX86BaseTernop<Machine, InstX86Base<Machine>::Insertps>(
+            Func, Dest, Source1, Source2) {}
+};
+
+template <class Machine>
+class InstX86Pinsr
+    : public InstX86BaseTernop<Machine, InstX86Base<Machine>::Pinsr> {
+public:
+  static InstX86Pinsr *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                              Operand *Source2) {
+    return new (Func->allocate<InstX86Pinsr>())
+        InstX86Pinsr(Func, Dest, Source1, Source2);
+  }
+
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Pinsr(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
+      : InstX86BaseTernop<Machine, InstX86Base<Machine>::Pinsr>(
+            Func, Dest, Source1, Source2) {}
+};
+
+template <class Machine>
+class InstX86Shufps
+    : public InstX86BaseTernop<Machine, InstX86Base<Machine>::Shufps> {
+public:
+  static InstX86Shufps *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                               Operand *Source2) {
+    return new (Func->allocate<InstX86Shufps>())
+        InstX86Shufps(Func, Dest, Source1, Source2);
+  }
+
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Shufps(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
+      : InstX86BaseTernop<Machine, InstX86Base<Machine>::Shufps>(
+            Func, Dest, Source1, Source2) {}
+};
+
+template <class Machine>
+class InstX86Blendvps
+    : public InstX86BaseTernop<Machine, InstX86Base<Machine>::Blendvps> {
+public:
+  static InstX86Blendvps *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                                 Operand *Source2) {
+    return new (Func->allocate<InstX86Blendvps>())
+        InstX86Blendvps(Func, Dest, Source1, Source2);
+  }
+
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Fund) const override;
+
+private:
+  InstX86Blendvps(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
+      : InstX86BaseTernop<Machine, InstX86Base<Machine>::Blendvps>(
+            Func, Dest, Source1, Source2) {}
+};
+
+template <class Machine>
+class InstX86Pblendvb
+    : public InstX86BaseTernop<Machine, InstX86Base<Machine>::Pblendvb> {
+public:
+  static InstX86Pblendvb *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                                 Operand *Source2) {
+    return new (Func->allocate<InstX86Pblendvb>())
+        InstX86Pblendvb(Func, Dest, Source1, Source2);
+  }
+
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Pblendvb(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
+      : InstX86BaseTernop<Machine, InstX86Base<Machine>::Pblendvb>(
+            Func, Dest, Source1, Source2) {}
+};
+
+template <class Machine>
+class InstX86Pextr
+    : public InstX86BaseThreeAddressop<Machine, InstX86Base<Machine>::Pextr> {
+public:
+  static InstX86Pextr *create(Cfg *Func, Variable *Dest, Operand *Source0,
+                              Operand *Source1) {
+    return new (Func->allocate<InstX86Pextr>())
+        InstX86Pextr(Func, Dest, Source0, Source1);
+  }
+
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Pextr(Cfg *Func, Variable *Dest, Operand *Source0, Operand *Source1)
+      : InstX86BaseThreeAddressop<Machine, InstX86Base<Machine>::Pextr>(
+            Func, Dest, Source0, Source1) {}
+};
+
+template <class Machine>
+class InstX86Pshufd
+    : public InstX86BaseThreeAddressop<Machine, InstX86Base<Machine>::Pshufd> {
+public:
+  static InstX86Pshufd *create(Cfg *Func, Variable *Dest, Operand *Source0,
+                               Operand *Source1) {
+    return new (Func->allocate<InstX86Pshufd>())
+        InstX86Pshufd(Func, Dest, Source0, Source1);
+  }
+
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86Pshufd(Cfg *Func, Variable *Dest, Operand *Source0, Operand *Source1)
+      : InstX86BaseThreeAddressop<Machine, InstX86Base<Machine>::Pshufd>(
+            Func, Dest, Source0, Source1) {}
+};
+
+/// Base class for a lockable x86-32 instruction (emits a locked prefix).
+template <class Machine>
+class InstX86BaseLockable : public InstX86Base<Machine> {
+  InstX86BaseLockable() = delete;
+  InstX86BaseLockable(const InstX86BaseLockable &) = delete;
+  InstX86BaseLockable &operator=(const InstX86BaseLockable &) = delete;
+
+protected:
+  bool Locked;
+
+  InstX86BaseLockable(Cfg *Func,
+                      typename InstX86Base<Machine>::InstKindX86 Kind,
+                      SizeT Maxsrcs, Variable *Dest, bool Locked)
+      : InstX86Base<Machine>(Func, Kind, Maxsrcs, Dest), Locked(Locked) {
+    // Assume that such instructions are used for Atomics and be careful
+    // with optimizations.
+    this->HasSideEffects = Locked;
+  }
+};
+
+/// Mul instruction - unsigned multiply.
+template <class Machine> class InstX86Mul final : public InstX86Base<Machine> {
+  InstX86Mul() = delete;
+  InstX86Mul(const InstX86Mul &) = delete;
+  InstX86Mul &operator=(const InstX86Mul &) = delete;
+
+public:
+  static InstX86Mul *create(Cfg *Func, Variable *Dest, Variable *Source1,
+                            Operand *Source2) {
+    return new (Func->allocate<InstX86Mul>())
+        InstX86Mul(Func, Dest, Source1, Source2);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Mul);
+  }
+
+private:
+  InstX86Mul(Cfg *Func, Variable *Dest, Variable *Source1, Operand *Source2);
+};
+
+/// Shld instruction - shift across a pair of operands.
+template <class Machine> class InstX86Shld final : public InstX86Base<Machine> {
+  InstX86Shld() = delete;
+  InstX86Shld(const InstX86Shld &) = delete;
+  InstX86Shld &operator=(const InstX86Shld &) = delete;
+
+public:
+  static InstX86Shld *create(Cfg *Func, Variable *Dest, Variable *Source1,
+                             Variable *Source2) {
+    return new (Func->allocate<InstX86Shld>())
+        InstX86Shld(Func, Dest, Source1, Source2);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Shld);
+  }
+
+private:
+  InstX86Shld(Cfg *Func, Variable *Dest, Variable *Source1, Variable *Source2);
+};
+
+/// Shrd instruction - shift across a pair of operands.
+template <class Machine> class InstX86Shrd final : public InstX86Base<Machine> {
+  InstX86Shrd() = delete;
+  InstX86Shrd(const InstX86Shrd &) = delete;
+  InstX86Shrd &operator=(const InstX86Shrd &) = delete;
+
+public:
+  static InstX86Shrd *create(Cfg *Func, Variable *Dest, Variable *Source1,
+                             Variable *Source2) {
+    return new (Func->allocate<InstX86Shrd>())
+        InstX86Shrd(Func, Dest, Source1, Source2);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Shrd);
+  }
+
+private:
+  InstX86Shrd(Cfg *Func, Variable *Dest, Variable *Source1, Variable *Source2);
+};
+
+/// Conditional move instruction.
+template <class Machine> class InstX86Cmov final : public InstX86Base<Machine> {
+  InstX86Cmov() = delete;
+  InstX86Cmov(const InstX86Cmov &) = delete;
+  InstX86Cmov &operator=(const InstX86Cmov &) = delete;
+
+public:
+  static InstX86Cmov *
+  create(Cfg *Func, Variable *Dest, Operand *Source,
+         typename InstX86Base<Machine>::Traits::Cond::BrCond Cond) {
+    return new (Func->allocate<InstX86Cmov>())
+        InstX86Cmov(Func, Dest, Source, Cond);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Cmov);
+  }
+
+private:
+  InstX86Cmov(Cfg *Func, Variable *Dest, Operand *Source,
+              typename InstX86Base<Machine>::Traits::Cond::BrCond Cond);
+
+  typename InstX86Base<Machine>::Traits::Cond::BrCond Condition;
+};
+
+/// Cmpps instruction - compare packed singled-precision floating point
+/// values
+template <class Machine>
+class InstX86Cmpps final : public InstX86Base<Machine> {
+  InstX86Cmpps() = delete;
+  InstX86Cmpps(const InstX86Cmpps &) = delete;
+  InstX86Cmpps &operator=(const InstX86Cmpps &) = delete;
+
+public:
+  static InstX86Cmpps *
+  create(Cfg *Func, Variable *Dest, Operand *Source,
+         typename InstX86Base<Machine>::Traits::Cond::CmppsCond Condition) {
+    return new (Func->allocate<InstX86Cmpps>())
+        InstX86Cmpps(Func, Dest, Source, Condition);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Cmpps);
+  }
+
+private:
+  InstX86Cmpps(Cfg *Func, Variable *Dest, Operand *Source,
+               typename InstX86Base<Machine>::Traits::Cond::CmppsCond Cond);
+
+  typename InstX86Base<Machine>::Traits::Cond::CmppsCond Condition;
+};
+
+/// Cmpxchg instruction - cmpxchg <dest>, <desired> will compare if <dest>
+/// equals eax. If so, the ZF is set and <desired> is stored in <dest>.
+/// If not, ZF is cleared and <dest> is copied to eax (or subregister).
+/// <dest> can be a register or memory, while <desired> must be a register.
+/// It is the user's responsiblity to mark eax with a FakeDef.
+template <class Machine>
+class InstX86Cmpxchg final : public InstX86BaseLockable<Machine> {
+  InstX86Cmpxchg() = delete;
+  InstX86Cmpxchg(const InstX86Cmpxchg &) = delete;
+  InstX86Cmpxchg &operator=(const InstX86Cmpxchg &) = delete;
+
+public:
+  static InstX86Cmpxchg *create(Cfg *Func, Operand *DestOrAddr, Variable *Eax,
+                                Variable *Desired, bool Locked) {
+    return new (Func->allocate<InstX86Cmpxchg>())
+        InstX86Cmpxchg(Func, DestOrAddr, Eax, Desired, Locked);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Cmpxchg);
+  }
+
+private:
+  InstX86Cmpxchg(Cfg *Func, Operand *DestOrAddr, Variable *Eax,
+                 Variable *Desired, bool Locked);
+};
+
+/// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64>
+/// equals edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>.
+/// If not, ZF is cleared and <m64> is copied to edx:eax.
+/// The caller is responsible for inserting FakeDefs to mark edx
+/// and eax as modified.
+/// <m64> must be a memory operand.
+template <class Machine>
+class InstX86Cmpxchg8b final : public InstX86BaseLockable<Machine> {
+  InstX86Cmpxchg8b() = delete;
+  InstX86Cmpxchg8b(const InstX86Cmpxchg8b &) = delete;
+  InstX86Cmpxchg8b &operator=(const InstX86Cmpxchg8b &) = delete;
+
+public:
+  static InstX86Cmpxchg8b *
+  create(Cfg *Func, typename InstX86Base<Machine>::Traits::X86OperandMem *Dest,
+         Variable *Edx, Variable *Eax, Variable *Ecx, Variable *Ebx,
+         bool Locked) {
+    return new (Func->allocate<InstX86Cmpxchg8b>())
+        InstX86Cmpxchg8b(Func, Dest, Edx, Eax, Ecx, Ebx, Locked);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst,
+                                           InstX86Base<Machine>::Cmpxchg8b);
+  }
+
+private:
+  InstX86Cmpxchg8b(Cfg *Func,
+                   typename InstX86Base<Machine>::Traits::X86OperandMem *Dest,
+                   Variable *Edx, Variable *Eax, Variable *Ecx, Variable *Ebx,
+                   bool Locked);
+};
+
+/// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i}
+/// as appropriate.  s=float, d=double, i=int.  X and Y are determined
+/// from dest/src types.  Sign and zero extension on the integer
+/// operand needs to be done separately.
+template <class Machine> class InstX86Cvt final : public InstX86Base<Machine> {
+  InstX86Cvt() = delete;
+  InstX86Cvt(const InstX86Cvt &) = delete;
+  InstX86Cvt &operator=(const InstX86Cvt &) = delete;
+
+public:
+  enum CvtVariant { Si2ss, Tss2si, Float2float, Dq2ps, Tps2dq };
+  static InstX86Cvt *create(Cfg *Func, Variable *Dest, Operand *Source,
+                            CvtVariant Variant) {
+    return new (Func->allocate<InstX86Cvt>())
+        InstX86Cvt(Func, Dest, Source, Variant);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Cvt);
+  }
+  bool isTruncating() const { return Variant == Tss2si || Variant == Tps2dq; }
+
+private:
+  CvtVariant Variant;
+  InstX86Cvt(Cfg *Func, Variable *Dest, Operand *Source, CvtVariant Variant);
+};
+
+/// cmp - Integer compare instruction.
+template <class Machine> class InstX86Icmp final : public InstX86Base<Machine> {
+  InstX86Icmp() = delete;
+  InstX86Icmp(const InstX86Icmp &) = delete;
+  InstX86Icmp &operator=(const InstX86Icmp &) = delete;
+
+public:
+  static InstX86Icmp *create(Cfg *Func, Operand *Src1, Operand *Src2) {
+    return new (Func->allocate<InstX86Icmp>()) InstX86Icmp(Func, Src1, Src2);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Icmp);
+  }
+
+private:
+  InstX86Icmp(Cfg *Func, Operand *Src1, Operand *Src2);
+};
+
+/// ucomiss/ucomisd - floating-point compare instruction.
+template <class Machine>
+class InstX86Ucomiss final : public InstX86Base<Machine> {
+  InstX86Ucomiss() = delete;
+  InstX86Ucomiss(const InstX86Ucomiss &) = delete;
+  InstX86Ucomiss &operator=(const InstX86Ucomiss &) = delete;
+
+public:
+  static InstX86Ucomiss *create(Cfg *Func, Operand *Src1, Operand *Src2) {
+    return new (Func->allocate<InstX86Ucomiss>())
+        InstX86Ucomiss(Func, Src1, Src2);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Ucomiss);
+  }
+
+private:
+  InstX86Ucomiss(Cfg *Func, Operand *Src1, Operand *Src2);
+};
+
+/// UD2 instruction.
+template <class Machine> class InstX86UD2 final : public InstX86Base<Machine> {
+  InstX86UD2() = delete;
+  InstX86UD2(const InstX86UD2 &) = delete;
+  InstX86UD2 &operator=(const InstX86UD2 &) = delete;
+
+public:
+  static InstX86UD2 *create(Cfg *Func) {
+    return new (Func->allocate<InstX86UD2>()) InstX86UD2(Func);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::UD2);
+  }
+
+private:
+  explicit InstX86UD2(Cfg *Func);
+};
+
+/// Test instruction.
+template <class Machine> class InstX86Test final : public InstX86Base<Machine> {
+  InstX86Test() = delete;
+  InstX86Test(const InstX86Test &) = delete;
+  InstX86Test &operator=(const InstX86Test &) = delete;
+
+public:
+  static InstX86Test *create(Cfg *Func, Operand *Source1, Operand *Source2) {
+    return new (Func->allocate<InstX86Test>())
+        InstX86Test(Func, Source1, Source2);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Test);
+  }
+
+private:
+  InstX86Test(Cfg *Func, Operand *Source1, Operand *Source2);
+};
+
+/// Mfence instruction.
+template <class Machine>
+class InstX86Mfence final : public InstX86Base<Machine> {
+  InstX86Mfence() = delete;
+  InstX86Mfence(const InstX86Mfence &) = delete;
+  InstX86Mfence &operator=(const InstX86Mfence &) = delete;
+
+public:
+  static InstX86Mfence *create(Cfg *Func) {
+    return new (Func->allocate<InstX86Mfence>()) InstX86Mfence(Func);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Mfence);
+  }
+
+private:
+  explicit InstX86Mfence(Cfg *Func);
+};
+
+/// This is essentially a "mov" instruction with an
+/// InstX86Base<Machine>::Traits::X86OperandMem
+/// operand instead of Variable as the destination.  It's important
+/// for liveness that there is no Dest operand.
+template <class Machine>
+class InstX86Store final : public InstX86Base<Machine> {
+  InstX86Store() = delete;
+  InstX86Store(const InstX86Store &) = delete;
+  InstX86Store &operator=(const InstX86Store &) = delete;
+
+public:
+  static InstX86Store *
+  create(Cfg *Func, Operand *Value,
+         typename InstX86Base<Machine>::Traits::X86Operand *Mem) {
+    return new (Func->allocate<InstX86Store>()) InstX86Store(Func, Value, Mem);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Store);
+  }
+
+private:
+  InstX86Store(Cfg *Func, Operand *Value,
+               typename InstX86Base<Machine>::Traits::X86Operand *Mem);
+};
+
+/// This is essentially a vector "mov" instruction with an typename
+/// InstX86Base<Machine>::Traits::X86OperandMem
+/// operand instead of Variable as the destination.  It's important
+/// for liveness that there is no Dest operand. The source must be an
+/// Xmm register, since Dest is mem.
+template <class Machine>
+class InstX86StoreP final : public InstX86Base<Machine> {
+  InstX86StoreP() = delete;
+  InstX86StoreP(const InstX86StoreP &) = delete;
+  InstX86StoreP &operator=(const InstX86StoreP &) = delete;
+
+public:
+  static InstX86StoreP *
+  create(Cfg *Func, Variable *Value,
+         typename InstX86Base<Machine>::Traits::X86OperandMem *Mem) {
+    return new (Func->allocate<InstX86StoreP>())
+        InstX86StoreP(Func, Value, Mem);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::StoreP);
+  }
+
+private:
+  InstX86StoreP(Cfg *Func, Variable *Value,
+                typename InstX86Base<Machine>::Traits::X86OperandMem *Mem);
+};
+
+template <class Machine>
+class InstX86StoreQ final : public InstX86Base<Machine> {
+  InstX86StoreQ() = delete;
+  InstX86StoreQ(const InstX86StoreQ &) = delete;
+  InstX86StoreQ &operator=(const InstX86StoreQ &) = delete;
+
+public:
+  static InstX86StoreQ *
+  create(Cfg *Func, Variable *Value,
+         typename InstX86Base<Machine>::Traits::X86OperandMem *Mem) {
+    return new (Func->allocate<InstX86StoreQ>())
+        InstX86StoreQ(Func, Value, Mem);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::StoreQ);
+  }
+
+private:
+  InstX86StoreQ(Cfg *Func, Variable *Value,
+                typename InstX86Base<Machine>::Traits::X86OperandMem *Mem);
+};
+
+/// Nop instructions of varying length
+template <class Machine> class InstX86Nop final : public InstX86Base<Machine> {
+  InstX86Nop() = delete;
+  InstX86Nop(const InstX86Nop &) = delete;
+  InstX86Nop &operator=(const InstX86Nop &) = delete;
+
+public:
+  // TODO: Replace with enum.
+  typedef unsigned NopVariant;
+
+  static InstX86Nop *create(Cfg *Func, NopVariant Variant) {
+    return new (Func->allocate<InstX86Nop>()) InstX86Nop(Func, Variant);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Nop);
+  }
+
+private:
+  InstX86Nop(Cfg *Func, SizeT Length);
+
+  NopVariant Variant;
+};
+
+/// Fld - load a value onto the x87 FP stack.
+template <class Machine> class InstX86Fld final : public InstX86Base<Machine> {
+  InstX86Fld() = delete;
+  InstX86Fld(const InstX86Fld &) = delete;
+  InstX86Fld &operator=(const InstX86Fld &) = delete;
+
+public:
+  static InstX86Fld *create(Cfg *Func, Operand *Src) {
+    return new (Func->allocate<InstX86Fld>()) InstX86Fld(Func, Src);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Fld);
+  }
+
+private:
+  InstX86Fld(Cfg *Func, Operand *Src);
+};
+
+/// Fstp - store x87 st(0) into memory and pop st(0).
+template <class Machine> class InstX86Fstp final : public InstX86Base<Machine> {
+  InstX86Fstp() = delete;
+  InstX86Fstp(const InstX86Fstp &) = delete;
+  InstX86Fstp &operator=(const InstX86Fstp &) = delete;
+
+public:
+  static InstX86Fstp *create(Cfg *Func, Variable *Dest) {
+    return new (Func->allocate<InstX86Fstp>()) InstX86Fstp(Func, Dest);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Fstp);
+  }
+
+private:
+  InstX86Fstp(Cfg *Func, Variable *Dest);
+};
+
+template <class Machine> class InstX86Pop final : public InstX86Base<Machine> {
+  InstX86Pop() = delete;
+  InstX86Pop(const InstX86Pop &) = delete;
+  InstX86Pop &operator=(const InstX86Pop &) = delete;
+
+public:
+  static InstX86Pop *create(Cfg *Func, Variable *Dest) {
+    return new (Func->allocate<InstX86Pop>()) InstX86Pop(Func, Dest);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Pop);
+  }
+
+private:
+  InstX86Pop(Cfg *Func, Variable *Dest);
+};
+
+template <class Machine> class InstX86Push final : public InstX86Base<Machine> {
+  InstX86Push() = delete;
+  InstX86Push(const InstX86Push &) = delete;
+  InstX86Push &operator=(const InstX86Push &) = delete;
+
+public:
+  static InstX86Push *create(Cfg *Func, Variable *Source) {
+    return new (Func->allocate<InstX86Push>()) InstX86Push(Func, Source);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Push);
+  }
+
+private:
+  InstX86Push(Cfg *Func, Variable *Source);
+};
+
+/// Ret instruction.  Currently only supports the "ret" version that
+/// does not pop arguments.  This instruction takes a Source operand
+/// (for non-void returning functions) for liveness analysis, though
+/// a FakeUse before the ret would do just as well.
+template <class Machine> class InstX86Ret final : public InstX86Base<Machine> {
+  InstX86Ret() = delete;
+  InstX86Ret(const InstX86Ret &) = delete;
+  InstX86Ret &operator=(const InstX86Ret &) = delete;
+
+public:
+  static InstX86Ret *create(Cfg *Func, Variable *Source = nullptr) {
+    return new (Func->allocate<InstX86Ret>()) InstX86Ret(Func, Source);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Ret);
+  }
+
+private:
+  InstX86Ret(Cfg *Func, Variable *Source);
+};
+
+/// Conditional set-byte instruction.
+template <class Machine>
+class InstX86Setcc final : public InstX86Base<Machine> {
+  InstX86Setcc() = delete;
+  InstX86Setcc(const InstX86Cmov<Machine> &) = delete;
+  InstX86Setcc &operator=(const InstX86Setcc &) = delete;
+
+public:
+  static InstX86Setcc *
+  create(Cfg *Func, Variable *Dest,
+         typename InstX86Base<Machine>::Traits::Cond::BrCond Cond) {
+    return new (Func->allocate<InstX86Setcc>()) InstX86Setcc(Func, Dest, Cond);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Setcc);
+  }
+
+private:
+  InstX86Setcc(Cfg *Func, Variable *Dest,
+               typename InstX86Base<Machine>::Traits::Cond::BrCond Cond);
+
+  const typename InstX86Base<Machine>::Traits::Cond::BrCond Condition;
+};
+
+/// Exchanging Add instruction.  Exchanges the first operand (destination
+/// operand) with the second operand (source operand), then loads the sum
+/// of the two values into the destination operand. The destination may be
+/// a register or memory, while the source must be a register.
+///
+/// Both the dest and source are updated. The caller should then insert a
+/// FakeDef to reflect the second udpate.
+template <class Machine>
+class InstX86Xadd final : public InstX86BaseLockable<Machine> {
+  InstX86Xadd() = delete;
+  InstX86Xadd(const InstX86Xadd &) = delete;
+  InstX86Xadd &operator=(const InstX86Xadd &) = delete;
+
+public:
+  static InstX86Xadd *create(Cfg *Func, Operand *Dest, Variable *Source,
+                             bool Locked) {
+    return new (Func->allocate<InstX86Xadd>())
+        InstX86Xadd(Func, Dest, Source, Locked);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Xadd);
+  }
+
+private:
+  InstX86Xadd(Cfg *Func, Operand *Dest, Variable *Source, bool Locked);
+};
+
+/// Exchange instruction.  Exchanges the first operand (destination
+/// operand) with the second operand (source operand). At least one of
+/// the operands must be a register (and the other can be reg or mem).
+/// Both the Dest and Source are updated. If there is a memory operand,
+/// then the instruction is automatically "locked" without the need for
+/// a lock prefix.
+template <class Machine> class InstX86Xchg final : public InstX86Base<Machine> {
+  InstX86Xchg() = delete;
+  InstX86Xchg(const InstX86Xchg &) = delete;
+  InstX86Xchg &operator=(const InstX86Xchg &) = delete;
+
+public:
+  static InstX86Xchg *create(Cfg *Func, Operand *Dest, Variable *Source) {
+    return new (Func->allocate<InstX86Xchg>()) InstX86Xchg(Func, Dest, Source);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) {
+    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Xchg);
+  }
+
+private:
+  InstX86Xchg(Cfg *Func, Operand *Dest, Variable *Source);
+};
+
+/// struct Insts is a template that can be used to instantiate all the X86
+/// instructions for a target with a simple
+///
+/// using Insts = ::Ice::X86Internal::Insts<TargeT>;
+template <class Machine> struct Insts {
+  using FakeRMW = InstX86FakeRMW<Machine>;
+  using Label = InstX86Label<Machine>;
+
+  using AdjustStack = InstX86AdjustStack<Machine>;
+  using Call = InstX86Call<Machine>;
+
+  using Br = InstX86Br<Machine>;
+  using Jmp = InstX86Jmp<Machine>;
+  using Bswap = InstX86Bswap<Machine>;
+  using Neg = InstX86Neg<Machine>;
+  using Bsf = InstX86Bsf<Machine>;
+  using Bsr = InstX86Bsr<Machine>;
+  using Lea = InstX86Lea<Machine>;
+  using Cbwdq = InstX86Cbwdq<Machine>;
+  using Movsx = InstX86Movsx<Machine>;
+  using Movzx = InstX86Movzx<Machine>;
+  using Movd = InstX86Movd<Machine>;
+  using Sqrtss = InstX86Sqrtss<Machine>;
+  using Mov = InstX86Mov<Machine>;
+  using Movp = InstX86Movp<Machine>;
+  using Movq = InstX86Movq<Machine>;
+  using Add = InstX86Add<Machine>;
+  using AddRMW = InstX86AddRMW<Machine>;
+  using Addps = InstX86Addps<Machine>;
+  using Adc = InstX86Adc<Machine>;
+  using AdcRMW = InstX86AdcRMW<Machine>;
+  using Addss = InstX86Addss<Machine>;
+  using Padd = InstX86Padd<Machine>;
+  using Sub = InstX86Sub<Machine>;
+  using SubRMW = InstX86SubRMW<Machine>;
+  using Subps = InstX86Subps<Machine>;
+  using Subss = InstX86Subss<Machine>;
+  using Sbb = InstX86Sbb<Machine>;
+  using SbbRMW = InstX86SbbRMW<Machine>;
+  using Psub = InstX86Psub<Machine>;
+  using And = InstX86And<Machine>;
+  using AndRMW = InstX86AndRMW<Machine>;
+  using Pand = InstX86Pand<Machine>;
+  using Pandn = InstX86Pandn<Machine>;
+  using Or = InstX86Or<Machine>;
+  using OrRMW = InstX86OrRMW<Machine>;
+  using Por = InstX86Por<Machine>;
+  using Xor = InstX86Xor<Machine>;
+  using XorRMW = InstX86XorRMW<Machine>;
+  using Pxor = InstX86Pxor<Machine>;
+  using Imul = InstX86Imul<Machine>;
+  using Mulps = InstX86Mulps<Machine>;
+  using Mulss = InstX86Mulss<Machine>;
+  using Pmull = InstX86Pmull<Machine>;
+  using Pmuludq = InstX86Pmuludq<Machine>;
+  using Divps = InstX86Divps<Machine>;
+  using Divss = InstX86Divss<Machine>;
+  using Rol = InstX86Rol<Machine>;
+  using Shl = InstX86Shl<Machine>;
+  using Psll = InstX86Psll<Machine>;
+  using Psrl = InstX86Psrl<Machine>;
+  using Shr = InstX86Shr<Machine>;
+  using Sar = InstX86Sar<Machine>;
+  using Psra = InstX86Psra<Machine>;
+  using Pcmpeq = InstX86Pcmpeq<Machine>;
+  using Pcmpgt = InstX86Pcmpgt<Machine>;
+  using MovssRegs = InstX86MovssRegs<Machine>;
+  using Idiv = InstX86Idiv<Machine>;
+  using Div = InstX86Div<Machine>;
+  using Insertps = InstX86Insertps<Machine>;
+  using Pinsr = InstX86Pinsr<Machine>;
+  using Shufps = InstX86Shufps<Machine>;
+  using Blendvps = InstX86Blendvps<Machine>;
+  using Pblendvb = InstX86Pblendvb<Machine>;
+  using Pextr = InstX86Pextr<Machine>;
+  using Pshufd = InstX86Pshufd<Machine>;
+  using Lockable = InstX86BaseLockable<Machine>;
+  using Mul = InstX86Mul<Machine>;
+  using Shld = InstX86Shld<Machine>;
+  using Shrd = InstX86Shrd<Machine>;
+  using Cmov = InstX86Cmov<Machine>;
+  using Cmpps = InstX86Cmpps<Machine>;
+  using Cmpxchg = InstX86Cmpxchg<Machine>;
+  using Cmpxchg8b = InstX86Cmpxchg8b<Machine>;
+  using Cvt = InstX86Cvt<Machine>;
+  using Icmp = InstX86Icmp<Machine>;
+  using Ucomiss = InstX86Ucomiss<Machine>;
+  using UD2 = InstX86UD2<Machine>;
+  using Test = InstX86Test<Machine>;
+  using Mfence = InstX86Mfence<Machine>;
+  using Store = InstX86Store<Machine>;
+  using StoreP = InstX86StoreP<Machine>;
+  using StoreQ = InstX86StoreQ<Machine>;
+  using Nop = InstX86Nop<Machine>;
+  using Fld = InstX86Fld<Machine>;
+  using Fstp = InstX86Fstp<Machine>;
+  using Pop = InstX86Pop<Machine>;
+  using Push = InstX86Push<Machine>;
+  using Ret = InstX86Ret<Machine>;
+  using Setcc = InstX86Setcc<Machine>;
+  using Xadd = InstX86Xadd<Machine>;
+  using Xchg = InstX86Xchg<Machine>;
+};
+
+/// X86 Instructions have static data (particularly, opcodes and instruction
+/// emitters). Each X86 target needs to define all of these, so this macro is
+/// provided so that, if something changes, then all X86 targets will be updated
+/// automatically.
+#define X86INSTS_DEFINE_STATIC_DATA(Machine)                                   \
+  namespace Ice {                                                              \
+  namespace X86Internal {                                                      \
+  /* In-place ops */                                                           \
+  template <> const char *InstX86Bswap<Machine>::Base::Opcode = "bswap";       \
+  template <> const char *InstX86Neg<Machine>::Base::Opcode = "neg";           \
+  /* Unary ops */                                                              \
+  template <> const char *InstX86Bsf<Machine>::Base::Opcode = "bsf";           \
+  template <> const char *InstX86Bsr<Machine>::Base::Opcode = "bsr";           \
+  template <> const char *InstX86Lea<Machine>::Base::Opcode = "lea";           \
+  template <> const char *InstX86Movd<Machine>::Base::Opcode = "movd";         \
+  template <> const char *InstX86Movsx<Machine>::Base::Opcode = "movs";        \
+  template <> const char *InstX86Movzx<Machine>::Base::Opcode = "movz";        \
+  template <> const char *InstX86Sqrtss<Machine>::Base::Opcode = "sqrtss";     \
+  template <> const char *InstX86Cbwdq<Machine>::Base::Opcode = "cbw/cwd/cdq"; \
+  /* Mov-like ops */                                                           \
+  template <> const char *InstX86Mov<Machine>::Base::Opcode = "mov";           \
+  template <> const char *InstX86Movp<Machine>::Base::Opcode = "movups";       \
+  template <> const char *InstX86Movq<Machine>::Base::Opcode = "movq";         \
+  /* Binary ops */                                                             \
+  template <> const char *InstX86Add<Machine>::Base::Opcode = "add";           \
+  template <> const char *InstX86AddRMW<Machine>::Base::Opcode = "add";        \
+  template <> const char *InstX86Addps<Machine>::Base::Opcode = "addps";       \
+  template <> const char *InstX86Adc<Machine>::Base::Opcode = "adc";           \
+  template <> const char *InstX86AdcRMW<Machine>::Base::Opcode = "adc";        \
+  template <> const char *InstX86Addss<Machine>::Base::Opcode = "addss";       \
+  template <> const char *InstX86Padd<Machine>::Base::Opcode = "padd";         \
+  template <> const char *InstX86Sub<Machine>::Base::Opcode = "sub";           \
+  template <> const char *InstX86SubRMW<Machine>::Base::Opcode = "sub";        \
+  template <> const char *InstX86Subps<Machine>::Base::Opcode = "subps";       \
+  template <> const char *InstX86Subss<Machine>::Base::Opcode = "subss";       \
+  template <> const char *InstX86Sbb<Machine>::Base::Opcode = "sbb";           \
+  template <> const char *InstX86SbbRMW<Machine>::Base::Opcode = "sbb";        \
+  template <> const char *InstX86Psub<Machine>::Base::Opcode = "psub";         \
+  template <> const char *InstX86And<Machine>::Base::Opcode = "and";           \
+  template <> const char *InstX86AndRMW<Machine>::Base::Opcode = "and";        \
+  template <> const char *InstX86Pand<Machine>::Base::Opcode = "pand";         \
+  template <> const char *InstX86Pandn<Machine>::Base::Opcode = "pandn";       \
+  template <> const char *InstX86Or<Machine>::Base::Opcode = "or";             \
+  template <> const char *InstX86OrRMW<Machine>::Base::Opcode = "or";          \
+  template <> const char *InstX86Por<Machine>::Base::Opcode = "por";           \
+  template <> const char *InstX86Xor<Machine>::Base::Opcode = "xor";           \
+  template <> const char *InstX86XorRMW<Machine>::Base::Opcode = "xor";        \
+  template <> const char *InstX86Pxor<Machine>::Base::Opcode = "pxor";         \
+  template <> const char *InstX86Imul<Machine>::Base::Opcode = "imul";         \
+  template <> const char *InstX86Mulps<Machine>::Base::Opcode = "mulps";       \
+  template <> const char *InstX86Mulss<Machine>::Base::Opcode = "mulss";       \
+  template <> const char *InstX86Pmull<Machine>::Base::Opcode = "pmull";       \
+  template <> const char *InstX86Pmuludq<Machine>::Base::Opcode = "pmuludq";   \
+  template <> const char *InstX86Div<Machine>::Base::Opcode = "div";           \
+  template <> const char *InstX86Divps<Machine>::Base::Opcode = "divps";       \
+  template <> const char *InstX86Idiv<Machine>::Base::Opcode = "idiv";         \
+  template <> const char *InstX86Divss<Machine>::Base::Opcode = "divss";       \
+  template <> const char *InstX86Rol<Machine>::Base::Opcode = "rol";           \
+  template <> const char *InstX86Shl<Machine>::Base::Opcode = "shl";           \
+  template <> const char *InstX86Psll<Machine>::Base::Opcode = "psll";         \
+  template <> const char *InstX86Shr<Machine>::Base::Opcode = "shr";           \
+  template <> const char *InstX86Sar<Machine>::Base::Opcode = "sar";           \
+  template <> const char *InstX86Psra<Machine>::Base::Opcode = "psra";         \
+  template <> const char *InstX86Psrl<Machine>::Base::Opcode = "psrl";         \
+  template <> const char *InstX86Pcmpeq<Machine>::Base::Opcode = "pcmpeq";     \
+  template <> const char *InstX86Pcmpgt<Machine>::Base::Opcode = "pcmpgt";     \
+  template <> const char *InstX86MovssRegs<Machine>::Base::Opcode = "movss";   \
+  /* Ternary ops */                                                            \
+  template <> const char *InstX86Insertps<Machine>::Base::Opcode = "insertps"; \
+  template <> const char *InstX86Shufps<Machine>::Base::Opcode = "shufps";     \
+  template <> const char *InstX86Pinsr<Machine>::Base::Opcode = "pinsr";       \
+  template <> const char *InstX86Blendvps<Machine>::Base::Opcode = "blendvps"; \
+  template <> const char *InstX86Pblendvb<Machine>::Base::Opcode = "pblendvb"; \
+  /* Three address ops */                                                      \
+  template <> const char *InstX86Pextr<Machine>::Base::Opcode = "pextr";       \
+  template <> const char *InstX86Pshufd<Machine>::Base::Opcode = "pshufd";     \
+  /* Inplace GPR ops */                                                        \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterOneOp               \
+      InstX86Bswap<Machine>::Base::Emitter = {                                 \
+          &InstX86Base<Machine>::Traits::Assembler::bswap,                     \
+          nullptr /* only a reg form exists */                                 \
+  };                                                                           \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterOneOp               \
+      InstX86Neg<Machine>::Base::Emitter = {                                   \
+          &InstX86Base<Machine>::Traits::Assembler::neg,                       \
+          &InstX86Base<Machine>::Traits::Assembler::neg};                      \
+                                                                               \
+  /* Unary GPR ops */                                                          \
+  template <> /* uses specialized emitter. */                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
+      InstX86Cbwdq<Machine>::Base::Emitter = {nullptr, nullptr, nullptr};      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
+      InstX86Bsf<Machine>::Base::Emitter = {                                   \
+          &InstX86Base<Machine>::Traits::Assembler::bsf,                       \
+          &InstX86Base<Machine>::Traits::Assembler::bsf, nullptr};             \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
+      InstX86Bsr<Machine>::Base::Emitter = {                                   \
+          &InstX86Base<Machine>::Traits::Assembler::bsr,                       \
+          &InstX86Base<Machine>::Traits::Assembler::bsr, nullptr};             \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
+      InstX86Lea<Machine>::Base::Emitter = {                                   \
+          /* reg/reg and reg/imm are illegal */ nullptr,                       \
+          &InstX86Base<Machine>::Traits::Assembler::lea, nullptr};             \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
+      InstX86Movsx<Machine>::Base::Emitter = {                                 \
+          &InstX86Base<Machine>::Traits::Assembler::movsx,                     \
+          &InstX86Base<Machine>::Traits::Assembler::movsx, nullptr};           \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
+      InstX86Movzx<Machine>::Base::Emitter = {                                 \
+          &InstX86Base<Machine>::Traits::Assembler::movzx,                     \
+          &InstX86Base<Machine>::Traits::Assembler::movzx, nullptr};           \
+                                                                               \
+  /* Unary XMM ops */                                                          \
+  template <> /* uses specialized emitter. */                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Movd<Machine>::Base::Emitter = {nullptr, nullptr};                \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Sqrtss<Machine>::Base::Emitter = {                                \
+          &InstX86Base<Machine>::Traits::Assembler::sqrtss,                    \
+          &InstX86Base<Machine>::Traits::Assembler::sqrtss};                   \
+                                                                               \
+  /* Binary GPR ops */                                                         \
+  template <> /* uses specialized emitter. */                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
+      InstX86Imul<Machine>::Base::Emitter = {nullptr, nullptr, nullptr};       \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
+      InstX86Add<Machine>::Base::Emitter = {                                   \
+          &InstX86Base<Machine>::Traits::Assembler::add,                       \
+          &InstX86Base<Machine>::Traits::Assembler::add,                       \
+          &InstX86Base<Machine>::Traits::Assembler::add};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp              \
+      InstX86AddRMW<Machine>::Base::Emitter = {                                \
+          &InstX86Base<Machine>::Traits::Assembler::add,                       \
+          &InstX86Base<Machine>::Traits::Assembler::add};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
+      InstX86Adc<Machine>::Base::Emitter = {                                   \
+          &InstX86Base<Machine>::Traits::Assembler::adc,                       \
+          &InstX86Base<Machine>::Traits::Assembler::adc,                       \
+          &InstX86Base<Machine>::Traits::Assembler::adc};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp              \
+      InstX86AdcRMW<Machine>::Base::Emitter = {                                \
+          &InstX86Base<Machine>::Traits::Assembler::adc,                       \
+          &InstX86Base<Machine>::Traits::Assembler::adc};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
+      InstX86And<Machine>::Base::Emitter = {                                   \
+          &InstX86Base<Machine>::Traits::Assembler::And,                       \
+          &InstX86Base<Machine>::Traits::Assembler::And,                       \
+          &InstX86Base<Machine>::Traits::Assembler::And};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp              \
+      InstX86AndRMW<Machine>::Base::Emitter = {                                \
+          &InstX86Base<Machine>::Traits::Assembler::And,                       \
+          &InstX86Base<Machine>::Traits::Assembler::And};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
+      InstX86Or<Machine>::Base::Emitter = {                                    \
+          &InstX86Base<Machine>::Traits::Assembler::Or,                        \
+          &InstX86Base<Machine>::Traits::Assembler::Or,                        \
+          &InstX86Base<Machine>::Traits::Assembler::Or};                       \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp              \
+      InstX86OrRMW<Machine>::Base::Emitter = {                                 \
+          &InstX86Base<Machine>::Traits::Assembler::Or,                        \
+          &InstX86Base<Machine>::Traits::Assembler::Or};                       \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
+      InstX86Sbb<Machine>::Base::Emitter = {                                   \
+          &InstX86Base<Machine>::Traits::Assembler::sbb,                       \
+          &InstX86Base<Machine>::Traits::Assembler::sbb,                       \
+          &InstX86Base<Machine>::Traits::Assembler::sbb};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp              \
+      InstX86SbbRMW<Machine>::Base::Emitter = {                                \
+          &InstX86Base<Machine>::Traits::Assembler::sbb,                       \
+          &InstX86Base<Machine>::Traits::Assembler::sbb};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
+      InstX86Sub<Machine>::Base::Emitter = {                                   \
+          &InstX86Base<Machine>::Traits::Assembler::sub,                       \
+          &InstX86Base<Machine>::Traits::Assembler::sub,                       \
+          &InstX86Base<Machine>::Traits::Assembler::sub};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp              \
+      InstX86SubRMW<Machine>::Base::Emitter = {                                \
+          &InstX86Base<Machine>::Traits::Assembler::sub,                       \
+          &InstX86Base<Machine>::Traits::Assembler::sub};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
+      InstX86Xor<Machine>::Base::Emitter = {                                   \
+          &InstX86Base<Machine>::Traits::Assembler::Xor,                       \
+          &InstX86Base<Machine>::Traits::Assembler::Xor,                       \
+          &InstX86Base<Machine>::Traits::Assembler::Xor};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp              \
+      InstX86XorRMW<Machine>::Base::Emitter = {                                \
+          &InstX86Base<Machine>::Traits::Assembler::Xor,                       \
+          &InstX86Base<Machine>::Traits::Assembler::Xor};                      \
+                                                                               \
+  /* Binary Shift GPR ops */                                                   \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterShiftOp             \
+      InstX86Rol<Machine>::Base::Emitter = {                                   \
+          &InstX86Base<Machine>::Traits::Assembler::rol,                       \
+          &InstX86Base<Machine>::Traits::Assembler::rol};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterShiftOp             \
+      InstX86Sar<Machine>::Base::Emitter = {                                   \
+          &InstX86Base<Machine>::Traits::Assembler::sar,                       \
+          &InstX86Base<Machine>::Traits::Assembler::sar};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterShiftOp             \
+      InstX86Shl<Machine>::Base::Emitter = {                                   \
+          &InstX86Base<Machine>::Traits::Assembler::shl,                       \
+          &InstX86Base<Machine>::Traits::Assembler::shl};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::GPREmitterShiftOp             \
+      InstX86Shr<Machine>::Base::Emitter = {                                   \
+          &InstX86Base<Machine>::Traits::Assembler::shr,                       \
+          &InstX86Base<Machine>::Traits::Assembler::shr};                      \
+                                                                               \
+  /* Binary XMM ops */                                                         \
+  template <> /* uses specialized emitter. */                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86MovssRegs<Machine>::Base::Emitter = {nullptr, nullptr};           \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Addss<Machine>::Base::Emitter = {                                 \
+          &InstX86Base<Machine>::Traits::Assembler::addss,                     \
+          &InstX86Base<Machine>::Traits::Assembler::addss};                    \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Addps<Machine>::Base::Emitter = {                                 \
+          &InstX86Base<Machine>::Traits::Assembler::addps,                     \
+          &InstX86Base<Machine>::Traits::Assembler::addps};                    \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Divss<Machine>::Base::Emitter = {                                 \
+          &InstX86Base<Machine>::Traits::Assembler::divss,                     \
+          &InstX86Base<Machine>::Traits::Assembler::divss};                    \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Divps<Machine>::Base::Emitter = {                                 \
+          &InstX86Base<Machine>::Traits::Assembler::divps,                     \
+          &InstX86Base<Machine>::Traits::Assembler::divps};                    \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Mulss<Machine>::Base::Emitter = {                                 \
+          &InstX86Base<Machine>::Traits::Assembler::mulss,                     \
+          &InstX86Base<Machine>::Traits::Assembler::mulss};                    \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Mulps<Machine>::Base::Emitter = {                                 \
+          &InstX86Base<Machine>::Traits::Assembler::mulps,                     \
+          &InstX86Base<Machine>::Traits::Assembler::mulps};                    \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Padd<Machine>::Base::Emitter = {                                  \
+          &InstX86Base<Machine>::Traits::Assembler::padd,                      \
+          &InstX86Base<Machine>::Traits::Assembler::padd};                     \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Pand<Machine>::Base::Emitter = {                                  \
+          &InstX86Base<Machine>::Traits::Assembler::pand,                      \
+          &InstX86Base<Machine>::Traits::Assembler::pand};                     \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Pandn<Machine>::Base::Emitter = {                                 \
+          &InstX86Base<Machine>::Traits::Assembler::pandn,                     \
+          &InstX86Base<Machine>::Traits::Assembler::pandn};                    \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Pcmpeq<Machine>::Base::Emitter = {                                \
+          &InstX86Base<Machine>::Traits::Assembler::pcmpeq,                    \
+          &InstX86Base<Machine>::Traits::Assembler::pcmpeq};                   \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Pcmpgt<Machine>::Base::Emitter = {                                \
+          &InstX86Base<Machine>::Traits::Assembler::pcmpgt,                    \
+          &InstX86Base<Machine>::Traits::Assembler::pcmpgt};                   \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Pmull<Machine>::Base::Emitter = {                                 \
+          &InstX86Base<Machine>::Traits::Assembler::pmull,                     \
+          &InstX86Base<Machine>::Traits::Assembler::pmull};                    \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Pmuludq<Machine>::Base::Emitter = {                               \
+          &InstX86Base<Machine>::Traits::Assembler::pmuludq,                   \
+          &InstX86Base<Machine>::Traits::Assembler::pmuludq};                  \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Por<Machine>::Base::Emitter = {                                   \
+          &InstX86Base<Machine>::Traits::Assembler::por,                       \
+          &InstX86Base<Machine>::Traits::Assembler::por};                      \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Psub<Machine>::Base::Emitter = {                                  \
+          &InstX86Base<Machine>::Traits::Assembler::psub,                      \
+          &InstX86Base<Machine>::Traits::Assembler::psub};                     \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Pxor<Machine>::Base::Emitter = {                                  \
+          &InstX86Base<Machine>::Traits::Assembler::pxor,                      \
+          &InstX86Base<Machine>::Traits::Assembler::pxor};                     \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Subss<Machine>::Base::Emitter = {                                 \
+          &InstX86Base<Machine>::Traits::Assembler::subss,                     \
+          &InstX86Base<Machine>::Traits::Assembler::subss};                    \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
+      InstX86Subps<Machine>::Base::Emitter = {                                 \
+          &InstX86Base<Machine>::Traits::Assembler::subps,                     \
+          &InstX86Base<Machine>::Traits::Assembler::subps};                    \
+                                                                               \
+  /* Binary XMM Shift ops */                                                   \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterShiftOp             \
+      InstX86Psll<Machine>::Base::Emitter = {                                  \
+          &InstX86Base<Machine>::Traits::Assembler::psll,                      \
+          &InstX86Base<Machine>::Traits::Assembler::psll,                      \
+          &InstX86Base<Machine>::Traits::Assembler::psll};                     \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterShiftOp             \
+      InstX86Psra<Machine>::Base::Emitter = {                                  \
+          &InstX86Base<Machine>::Traits::Assembler::psra,                      \
+          &InstX86Base<Machine>::Traits::Assembler::psra,                      \
+          &InstX86Base<Machine>::Traits::Assembler::psra};                     \
+  template <>                                                                  \
+  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterShiftOp             \
+      InstX86Psrl<Machine>::Base::Emitter = {                                  \
+          &InstX86Base<Machine>::Traits::Assembler::psrl,                      \
+          &InstX86Base<Machine>::Traits::Assembler::psrl,                      \
+          &InstX86Base<Machine>::Traits::Assembler::psrl};                     \
+  }                                                                            \
+  }
+
+} // end of namespace X86Internal
+} // end of namespace Ice
+
+#include "IceInstX86BaseImpl.h"
+
+#endif // SUBZERO_SRC_ICEINSTX86BASE_H
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
new file mode 100644
index 0000000..2d9ee0f
--- /dev/null
+++ b/src/IceInstX86BaseImpl.h
@@ -0,0 +1,3162 @@
+//===- subzero/src/IceInstX86BaseImpl.h - Generic X86 instructions -*- C++ -*=//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the InstX86Base class and its descendants.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICEINSTX86BASEIMPL_H
+#define SUBZERO_SRC_ICEINSTX86BASEIMPL_H
+
+#include "IceInstX86Base.h"
+
+#include "IceAssemblerX86Base.h"
+#include "IceCfg.h"
+#include "IceCfgNode.h"
+#include "IceDefs.h"
+#include "IceInst.h"
+#include "IceOperand.h"
+#include "IceTargetLowering.h"
+
+namespace Ice {
+
+namespace X86Internal {
+
+template <class Machine>
+const char *InstX86Base<Machine>::getWidthString(Type Ty) {
+  return Traits::TypeAttributes[Ty].WidthString;
+}
+
+template <class Machine>
+const char *InstX86Base<Machine>::getFldString(Type Ty) {
+  return Traits::TypeAttributes[Ty].FldString;
+}
+
+template <class Machine>
+typename InstX86Base<Machine>::Traits::Cond::BrCond
+InstX86Base<Machine>::getOppositeCondition(typename Traits::Cond::BrCond Cond) {
+  return Traits::InstBrAttributes[Cond].Opposite;
+}
+
+template <class Machine>
+InstX86FakeRMW<Machine>::InstX86FakeRMW(Cfg *Func, Operand *Data, Operand *Addr,
+                                        InstArithmetic::OpKind Op,
+                                        Variable *Beacon)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::FakeRMW, 3, nullptr),
+      Op(Op) {
+  this->addSource(Data);
+  this->addSource(Addr);
+  this->addSource(Beacon);
+}
+
+template <class Machine>
+InstX86AdjustStack<Machine>::InstX86AdjustStack(Cfg *Func, SizeT Amount,
+                                                Variable *Esp)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Adjuststack, 1, Esp),
+      Amount(Amount) {
+  this->addSource(Esp);
+}
+
+template <class Machine>
+InstX86Mul<Machine>::InstX86Mul(Cfg *Func, Variable *Dest, Variable *Source1,
+                                Operand *Source2)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Mul, 2, Dest) {
+  this->addSource(Source1);
+  this->addSource(Source2);
+}
+
+template <class Machine>
+InstX86Shld<Machine>::InstX86Shld(Cfg *Func, Variable *Dest, Variable *Source1,
+                                  Variable *Source2)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Shld, 3, Dest) {
+  this->addSource(Dest);
+  this->addSource(Source1);
+  this->addSource(Source2);
+}
+
+template <class Machine>
+InstX86Shrd<Machine>::InstX86Shrd(Cfg *Func, Variable *Dest, Variable *Source1,
+                                  Variable *Source2)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Shrd, 3, Dest) {
+  this->addSource(Dest);
+  this->addSource(Source1);
+  this->addSource(Source2);
+}
+
+template <class Machine>
+InstX86Label<Machine>::InstX86Label(
+    Cfg *Func, typename InstX86Base<Machine>::Traits::TargetLowering *Target)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Label, 0, nullptr),
+      Number(Target->makeNextLabelNumber()) {}
+
+template <class Machine>
+IceString InstX86Label<Machine>::getName(const Cfg *Func) const {
+  return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number);
+}
+
+template <class Machine>
+InstX86Br<Machine>::InstX86Br(
+    Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse,
+    const InstX86Label<Machine> *Label,
+    typename InstX86Base<Machine>::Traits::Cond::BrCond Condition)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Br, 0, nullptr),
+      Condition(Condition), TargetTrue(TargetTrue), TargetFalse(TargetFalse),
+      Label(Label) {}
+
+template <class Machine>
+bool InstX86Br<Machine>::optimizeBranch(const CfgNode *NextNode) {
+  // If there is no next block, then there can be no fallthrough to
+  // optimize.
+  if (NextNode == nullptr)
+    return false;
+  // Intra-block conditional branches can't be optimized.
+  if (Label)
+    return false;
+  // If there is no fallthrough node, such as a non-default case label
+  // for a switch instruction, then there is no opportunity to
+  // optimize.
+  if (getTargetFalse() == nullptr)
+    return false;
+
+  // Unconditional branch to the next node can be removed.
+  if (Condition == InstX86Base<Machine>::Traits::Cond::Br_None &&
+      getTargetFalse() == NextNode) {
+    assert(getTargetTrue() == nullptr);
+    this->setDeleted();
+    return true;
+  }
+  // If the fallthrough is to the next node, set fallthrough to nullptr
+  // to indicate.
+  if (getTargetFalse() == NextNode) {
+    TargetFalse = nullptr;
+    return true;
+  }
+  // If TargetTrue is the next node, and TargetFalse is not nullptr
+  // (which was already tested above), then invert the branch
+  // condition, swap the targets, and set new fallthrough to nullptr.
+  if (getTargetTrue() == NextNode) {
+    assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
+    Condition = this->getOppositeCondition(Condition);
+    TargetTrue = getTargetFalse();
+    TargetFalse = nullptr;
+    return true;
+  }
+  return false;
+}
+
+template <class Machine>
+bool InstX86Br<Machine>::repointEdge(CfgNode *OldNode, CfgNode *NewNode) {
+  if (TargetFalse == OldNode) {
+    TargetFalse = NewNode;
+    return true;
+  } else if (TargetTrue == OldNode) {
+    TargetTrue = NewNode;
+    return true;
+  }
+  return false;
+}
+
+template <class Machine>
+InstX86Jmp<Machine>::InstX86Jmp(Cfg *Func, Operand *Target)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Jmp, 1, nullptr) {
+  this->addSource(Target);
+}
+
+template <class Machine>
+InstX86Call<Machine>::InstX86Call(Cfg *Func, Variable *Dest,
+                                  Operand *CallTarget)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Call, 1, Dest) {
+  this->HasSideEffects = true;
+  this->addSource(CallTarget);
+}
+
+template <class Machine>
+InstX86Cmov<Machine>::InstX86Cmov(
+    Cfg *Func, Variable *Dest, Operand *Source,
+    typename InstX86Base<Machine>::Traits::Cond::BrCond Condition)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Cmov, 2, Dest),
+      Condition(Condition) {
+  // The final result is either the original Dest, or Source, so mark
+  // both as sources.
+  this->addSource(Dest);
+  this->addSource(Source);
+}
+
+template <class Machine>
+InstX86Cmpps<Machine>::InstX86Cmpps(
+    Cfg *Func, Variable *Dest, Operand *Source,
+    typename InstX86Base<Machine>::Traits::Cond::CmppsCond Condition)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Cmpps, 2, Dest),
+      Condition(Condition) {
+  this->addSource(Dest);
+  this->addSource(Source);
+}
+
+template <class Machine>
+InstX86Cmpxchg<Machine>::InstX86Cmpxchg(Cfg *Func, Operand *DestOrAddr,
+                                        Variable *Eax, Variable *Desired,
+                                        bool Locked)
+    : InstX86BaseLockable<Machine>(Func, InstX86Base<Machine>::Cmpxchg, 3,
+                                   llvm::dyn_cast<Variable>(DestOrAddr),
+                                   Locked) {
+  assert(Eax->getRegNum() ==
+         InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
+  this->addSource(DestOrAddr);
+  this->addSource(Eax);
+  this->addSource(Desired);
+}
+
+template <class Machine>
+InstX86Cmpxchg8b<Machine>::InstX86Cmpxchg8b(
+    Cfg *Func, typename InstX86Base<Machine>::Traits::X86OperandMem *Addr,
+    Variable *Edx, Variable *Eax, Variable *Ecx, Variable *Ebx, bool Locked)
+    : InstX86BaseLockable<Machine>(Func, InstX86Base<Machine>::Cmpxchg, 5,
+                                   nullptr, Locked) {
+  assert(Edx->getRegNum() ==
+         InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+  assert(Eax->getRegNum() ==
+         InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
+  assert(Ecx->getRegNum() ==
+         InstX86Base<Machine>::Traits::RegisterSet::Reg_ecx);
+  assert(Ebx->getRegNum() ==
+         InstX86Base<Machine>::Traits::RegisterSet::Reg_ebx);
+  this->addSource(Addr);
+  this->addSource(Edx);
+  this->addSource(Eax);
+  this->addSource(Ecx);
+  this->addSource(Ebx);
+}
+
+template <class Machine>
+InstX86Cvt<Machine>::InstX86Cvt(Cfg *Func, Variable *Dest, Operand *Source,
+                                CvtVariant Variant)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Cvt, 1, Dest),
+      Variant(Variant) {
+  this->addSource(Source);
+}
+
+template <class Machine>
+InstX86Icmp<Machine>::InstX86Icmp(Cfg *Func, Operand *Src0, Operand *Src1)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Icmp, 2, nullptr) {
+  this->addSource(Src0);
+  this->addSource(Src1);
+}
+
+template <class Machine>
+InstX86Ucomiss<Machine>::InstX86Ucomiss(Cfg *Func, Operand *Src0, Operand *Src1)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Ucomiss, 2, nullptr) {
+  this->addSource(Src0);
+  this->addSource(Src1);
+}
+
+template <class Machine>
+InstX86UD2<Machine>::InstX86UD2(Cfg *Func)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::UD2, 0, nullptr) {}
+
+template <class Machine>
+InstX86Test<Machine>::InstX86Test(Cfg *Func, Operand *Src1, Operand *Src2)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Test, 2, nullptr) {
+  this->addSource(Src1);
+  this->addSource(Src2);
+}
+
+template <class Machine>
+InstX86Mfence<Machine>::InstX86Mfence(Cfg *Func)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Mfence, 0, nullptr) {
+  this->HasSideEffects = true;
+}
+
+template <class Machine>
+InstX86Store<Machine>::InstX86Store(
+    Cfg *Func, Operand *Value,
+    typename InstX86Base<Machine>::Traits::X86Operand *Mem)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Store, 2, nullptr) {
+  this->addSource(Value);
+  this->addSource(Mem);
+}
+
+template <class Machine>
+InstX86StoreP<Machine>::InstX86StoreP(
+    Cfg *Func, Variable *Value,
+    typename InstX86Base<Machine>::Traits::X86OperandMem *Mem)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::StoreP, 2, nullptr) {
+  this->addSource(Value);
+  this->addSource(Mem);
+}
+
+template <class Machine>
+InstX86StoreQ<Machine>::InstX86StoreQ(
+    Cfg *Func, Variable *Value,
+    typename InstX86Base<Machine>::Traits::X86OperandMem *Mem)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::StoreQ, 2, nullptr) {
+  this->addSource(Value);
+  this->addSource(Mem);
+}
+
+template <class Machine>
+InstX86Nop<Machine>::InstX86Nop(Cfg *Func, InstX86Nop::NopVariant Variant)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Nop, 0, nullptr),
+      Variant(Variant) {}
+
+template <class Machine>
+InstX86Fld<Machine>::InstX86Fld(Cfg *Func, Operand *Src)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Fld, 1, nullptr) {
+  this->addSource(Src);
+}
+
+template <class Machine>
+InstX86Fstp<Machine>::InstX86Fstp(Cfg *Func, Variable *Dest)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Fstp, 0, Dest) {}
+
+template <class Machine>
+InstX86Pop<Machine>::InstX86Pop(Cfg *Func, Variable *Dest)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Pop, 0, Dest) {
+  // A pop instruction affects the stack pointer and so it should not
+  // be allowed to be automatically dead-code eliminated.  (The
+  // corresponding push instruction doesn't need this treatment
+  // because it has no dest variable and therefore won't be dead-code
+  // eliminated.)  This is needed for late-stage liveness analysis
+  // (e.g. asm-verbose mode).
+  this->HasSideEffects = true;
+}
+
+template <class Machine>
+InstX86Push<Machine>::InstX86Push(Cfg *Func, Variable *Source)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Push, 1, nullptr) {
+  this->addSource(Source);
+}
+
+template <class Machine>
+InstX86Ret<Machine>::InstX86Ret(Cfg *Func, Variable *Source)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Ret, Source ? 1 : 0,
+                           nullptr) {
+  if (Source)
+    this->addSource(Source);
+}
+
+template <class Machine>
+InstX86Setcc<Machine>::InstX86Setcc(
+    Cfg *Func, Variable *Dest,
+    typename InstX86Base<Machine>::Traits::Cond::BrCond Cond)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Setcc, 0, Dest),
+      Condition(Cond) {}
+
+template <class Machine>
+InstX86Xadd<Machine>::InstX86Xadd(Cfg *Func, Operand *Dest, Variable *Source,
+                                  bool Locked)
+    : InstX86BaseLockable<Machine>(Func, InstX86Base<Machine>::Xadd, 2,
+                                   llvm::dyn_cast<Variable>(Dest), Locked) {
+  this->addSource(Dest);
+  this->addSource(Source);
+}
+
+template <class Machine>
+InstX86Xchg<Machine>::InstX86Xchg(Cfg *Func, Operand *Dest, Variable *Source)
+    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Xchg, 2,
+                           llvm::dyn_cast<Variable>(Dest)) {
+  this->addSource(Dest);
+  this->addSource(Source);
+}
+
+// ======================== Dump routines ======================== //
+
+template <class Machine>
+void InstX86Base<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "[" << Traits::TargetName << "] ";
+  Inst::dump(Func);
+}
+
+template <class Machine>
+void InstX86FakeRMW<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Type Ty = getData()->getType();
+  Str << "rmw " << InstArithmetic::getOpName(getOp()) << " " << Ty << " *";
+  getAddr()->dump(Func);
+  Str << ", ";
+  getData()->dump(Func);
+  Str << ", beacon=";
+  getBeacon()->dump(Func);
+}
+
+template <class Machine>
+void InstX86Label<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  Str << getName(Func) << ":";
+}
+
+template <class Machine>
+void InstX86Label<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Asm->BindLocalLabel(Number);
+}
+
+template <class Machine>
+void InstX86Label<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << getName(Func) << ":";
+}
+
+template <class Machine> void InstX86Br<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  Str << "\t";
+
+  if (Condition == InstX86Base<Machine>::Traits::Cond::Br_None) {
+    Str << "jmp";
+  } else {
+    Str << InstX86Base<Machine>::Traits::InstBrAttributes[Condition].EmitString;
+  }
+
+  if (Label) {
+    Str << "\t" << Label->getName(Func);
+  } else {
+    if (Condition == InstX86Base<Machine>::Traits::Cond::Br_None) {
+      Str << "\t" << getTargetFalse()->getAsmName();
+    } else {
+      Str << "\t" << getTargetTrue()->getAsmName();
+      if (getTargetFalse()) {
+        Str << "\n\tjmp\t" << getTargetFalse()->getAsmName();
+      }
+    }
+  }
+}
+
+template <class Machine>
+void InstX86Br<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  if (Label) {
+    class Label *L = Asm->GetOrCreateLocalLabel(Label->getNumber());
+    // In all these cases, local Labels should only be used for Near.
+    const bool Near = true;
+    if (Condition == InstX86Base<Machine>::Traits::Cond::Br_None) {
+      Asm->jmp(L, Near);
+    } else {
+      Asm->j(Condition, L, Near);
+    }
+  } else {
+    // Pessimistically assume it's far. This only affects Labels that
+    // are not Bound.
+    const bool Near = false;
+    if (Condition == InstX86Base<Machine>::Traits::Cond::Br_None) {
+      class Label *L =
+          Asm->GetOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
+      assert(!getTargetTrue());
+      Asm->jmp(L, Near);
+    } else {
+      class Label *L =
+          Asm->GetOrCreateCfgNodeLabel(getTargetTrue()->getIndex());
+      Asm->j(Condition, L, Near);
+      if (getTargetFalse()) {
+        class Label *L2 =
+            Asm->GetOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
+        Asm->jmp(L2, Near);
+      }
+    }
+  }
+}
+
+template <class Machine> void InstX86Br<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "br ";
+
+  if (Condition == InstX86Base<Machine>::Traits::Cond::Br_None) {
+    Str << "label %"
+        << (Label ? Label->getName(Func) : getTargetFalse()->getName());
+    return;
+  }
+
+  Str << InstX86Base<Machine>::Traits::InstBrAttributes[Condition]
+             .DisplayString;
+  if (Label) {
+    Str << ", label %" << Label->getName(Func);
+  } else {
+    Str << ", label %" << getTargetTrue()->getName();
+    if (getTargetFalse()) {
+      Str << ", label %" << getTargetFalse()->getName();
+    }
+  }
+}
+
+template <class Machine> void InstX86Jmp<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 1);
+  Str << "\tjmp\t*";
+  getJmpTarget()->emit(Func);
+}
+
+template <class Machine>
+void InstX86Jmp<Machine>::emitIAS(const Cfg *Func) const {
+  // Note: Adapted (mostly copied) from InstX86Call<Machine>::emitIAS().
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Operand *Target = getJmpTarget();
+  if (const auto Var = llvm::dyn_cast<Variable>(Target)) {
+    if (Var->hasReg()) {
+      Asm->jmp(InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+          Var->getRegNum()));
+    } else {
+      // The jmp instruction with a memory operand should be possible
+      // to encode, but it isn't a valid sandboxed instruction, and
+      // there shouldn't be a register allocation issue to jump
+      // through a scratch register, so we don't really need to bother
+      // implementing it.
+      llvm::report_fatal_error("Assembler can't jmp to memory operand");
+    }
+  } else if (const auto Mem = llvm::dyn_cast<
+                 typename InstX86Base<Machine>::Traits::X86OperandMem>(
+                 Target)) {
+    (void)Mem;
+    assert(Mem->getSegmentRegister() ==
+           InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+    llvm::report_fatal_error("Assembler can't jmp to memory operand");
+  } else if (const auto CR = llvm::dyn_cast<ConstantRelocatable>(Target)) {
+    assert(CR->getOffset() == 0 && "We only support jumping to a function");
+    Asm->jmp(CR);
+  } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Target)) {
+    // NaCl trampoline calls refer to an address within the sandbox directly.
+    // This is usually only needed for non-IRT builds and otherwise not
+    // very portable or stable. Usually this is only done for "calls"
+    // and not jumps.
+    // TODO(jvoung): Support this when there is a lowering that
+    // actually triggers this case.
+    (void)Imm;
+    llvm::report_fatal_error("Unexpected jmp to absolute address");
+  } else {
+    llvm::report_fatal_error("Unexpected operand type");
+  }
+}
+
+template <class Machine> void InstX86Jmp<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "jmp ";
+  getJmpTarget()->dump(Func);
+}
+
+template <class Machine>
+void InstX86Call<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 1);
+  Str << "\tcall\t";
+  if (const auto CI = llvm::dyn_cast<ConstantInteger32>(getCallTarget())) {
+    // Emit without a leading '$'.
+    Str << CI->getValue();
+  } else if (const auto CallTarget =
+                 llvm::dyn_cast<ConstantRelocatable>(getCallTarget())) {
+    CallTarget->emitWithoutPrefix(Func->getTarget());
+  } else {
+    Str << "*";
+    getCallTarget()->emit(Func);
+  }
+  Func->getTarget()->resetStackAdjustment();
+}
+
+template <class Machine>
+void InstX86Call<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Operand *Target = getCallTarget();
+  if (const auto Var = llvm::dyn_cast<Variable>(Target)) {
+    if (Var->hasReg()) {
+      Asm->call(InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+          Var->getRegNum()));
+    } else {
+      Asm->call(
+          static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+              Func->getTarget())
+              ->stackVarToAsmOperand(Var));
+    }
+  } else if (const auto Mem = llvm::dyn_cast<
+                 typename InstX86Base<Machine>::Traits::X86OperandMem>(
+                 Target)) {
+    assert(Mem->getSegmentRegister() ==
+           InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+    Asm->call(Mem->toAsmAddress(Asm));
+  } else if (const auto CR = llvm::dyn_cast<ConstantRelocatable>(Target)) {
+    assert(CR->getOffset() == 0 && "We only support calling a function");
+    Asm->call(CR);
+  } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Target)) {
+    Asm->call(Immediate(Imm->getValue()));
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+  Func->getTarget()->resetStackAdjustment();
+}
+
+template <class Machine>
+void InstX86Call<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  if (this->getDest()) {
+    this->dumpDest(Func);
+    Str << " = ";
+  }
+  Str << "call ";
+  getCallTarget()->dump(Func);
+}
+
+// The ShiftHack parameter is used to emit "cl" instead of "ecx" for
+// shift instructions, in order to be syntactically valid.  The
+// this->Opcode parameter needs to be char* and not IceString because of
+// template issues.
+template <class Machine>
+void InstX86Base<Machine>::emitTwoAddress(const char *Opcode, const Inst *Inst,
+                                          const Cfg *Func, bool ShiftHack) {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(Inst->getSrcSize() == 2);
+  Operand *Dest = Inst->getDest();
+  if (Dest == nullptr)
+    Dest = Inst->getSrc(0);
+  assert(Dest == Inst->getSrc(0));
+  Operand *Src1 = Inst->getSrc(1);
+  Str << "\t" << Opcode << InstX86Base<Machine>::getWidthString(Dest->getType())
+      << "\t";
+  const auto ShiftReg = llvm::dyn_cast<Variable>(Src1);
+  if (ShiftHack && ShiftReg &&
+      ShiftReg->getRegNum() ==
+          InstX86Base<Machine>::Traits::RegisterSet::Reg_ecx)
+    Str << "%cl";
+  else
+    Src1->emit(Func);
+  Str << ", ";
+  Dest->emit(Func);
+}
+
+template <class Machine>
+void emitIASOpTyGPR(const Cfg *Func, Type Ty, const Operand *Op,
+                    const typename InstX86Base<
+                        Machine>::Traits::Assembler::GPREmitterOneOp &Emitter) {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  if (const auto Var = llvm::dyn_cast<Variable>(Op)) {
+    if (Var->hasReg()) {
+      // We cheat a little and use GPRRegister even for byte operations.
+      typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister VarReg =
+          InstX86Base<Machine>::Traits::RegisterSet::getEncodedByteRegOrGPR(
+              Ty, Var->getRegNum());
+      (Asm->*(Emitter.Reg))(Ty, VarReg);
+    } else {
+      typename InstX86Base<Machine>::Traits::Address StackAddr(
+          static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+              Func->getTarget())
+              ->stackVarToAsmOperand(Var));
+      (Asm->*(Emitter.Addr))(Ty, StackAddr);
+    }
+  } else if (const auto Mem = llvm::dyn_cast<
+                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Op)) {
+    Mem->emitSegmentOverride(Asm);
+    (Asm->*(Emitter.Addr))(Ty, Mem->toAsmAddress(Asm));
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+}
+
+template <class Machine, bool VarCanBeByte, bool SrcCanBeByte>
+void emitIASRegOpTyGPR(
+    const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
+    const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
+        &Emitter) {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  assert(Var->hasReg());
+  // We cheat a little and use GPRRegister even for byte operations.
+  typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister VarReg =
+      VarCanBeByte
+          ? InstX86Base<Machine>::Traits::RegisterSet::getEncodedByteRegOrGPR(
+                Ty, Var->getRegNum())
+          : InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+                Var->getRegNum());
+  if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
+    if (SrcVar->hasReg()) {
+      typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister SrcReg =
+          SrcCanBeByte
+              ? InstX86Base<Machine>::Traits::RegisterSet::
+                    getEncodedByteRegOrGPR(Ty, SrcVar->getRegNum())
+              : InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+                    SrcVar->getRegNum());
+      (Asm->*(Emitter.GPRGPR))(Ty, VarReg, SrcReg);
+    } else {
+      typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
+          static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+              Func->getTarget())
+              ->stackVarToAsmOperand(SrcVar);
+      (Asm->*(Emitter.GPRAddr))(Ty, VarReg, SrcStackAddr);
+    }
+  } else if (const auto Mem = llvm::dyn_cast<
+                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
+    Mem->emitSegmentOverride(Asm);
+    (Asm->*(Emitter.GPRAddr))(Ty, VarReg, Mem->toAsmAddress(Asm));
+  } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
+    (Asm->*(Emitter.GPRImm))(Ty, VarReg, Immediate(Imm->getValue()));
+  } else if (const auto Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
+    AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Reloc);
+    (Asm->*(Emitter.GPRImm))(Ty, VarReg, Immediate(Reloc->getOffset(), Fixup));
+  } else if (const auto Split = llvm::dyn_cast<
+                 typename InstX86Base<Machine>::Traits::VariableSplit>(Src)) {
+    (Asm->*(Emitter.GPRAddr))(Ty, VarReg, Split->toAsmAddress(Func));
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+}
+
+template <class Machine>
+void emitIASAddrOpTyGPR(
+    const Cfg *Func, Type Ty,
+    const typename InstX86Base<Machine>::Traits::Address &Addr,
+    const Operand *Src,
+    const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp
+        &Emitter) {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  // Src can only be Reg or Immediate.
+  if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
+    assert(SrcVar->hasReg());
+    typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister SrcReg =
+        InstX86Base<Machine>::Traits::RegisterSet::getEncodedByteRegOrGPR(
+            Ty, SrcVar->getRegNum());
+    (Asm->*(Emitter.AddrGPR))(Ty, Addr, SrcReg);
+  } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
+    (Asm->*(Emitter.AddrImm))(Ty, Addr, Immediate(Imm->getValue()));
+  } else if (const auto Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
+    AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Reloc);
+    (Asm->*(Emitter.AddrImm))(Ty, Addr, Immediate(Reloc->getOffset(), Fixup));
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+}
+
+template <class Machine>
+void emitIASAsAddrOpTyGPR(
+    const Cfg *Func, Type Ty, const Operand *Op0, const Operand *Op1,
+    const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp
+        &Emitter) {
+  if (const auto Op0Var = llvm::dyn_cast<Variable>(Op0)) {
+    assert(!Op0Var->hasReg());
+    typename InstX86Base<Machine>::Traits::Address StackAddr(
+        static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+            Func->getTarget())
+            ->stackVarToAsmOperand(Op0Var));
+    emitIASAddrOpTyGPR<Machine>(Func, Ty, StackAddr, Op1, Emitter);
+  } else if (const auto Op0Mem = llvm::dyn_cast<
+                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Op0)) {
+    typename InstX86Base<Machine>::Traits::Assembler *Asm =
+        Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+    Op0Mem->emitSegmentOverride(Asm);
+    emitIASAddrOpTyGPR<Machine>(Func, Ty, Op0Mem->toAsmAddress(Asm), Op1,
+                                Emitter);
+  } else if (const auto Split = llvm::dyn_cast<
+                 typename InstX86Base<Machine>::Traits::VariableSplit>(Op0)) {
+    emitIASAddrOpTyGPR<Machine>(Func, Ty, Split->toAsmAddress(Func), Op1,
+                                Emitter);
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+}
+
+template <class Machine>
+void InstX86Base<Machine>::emitIASGPRShift(
+    const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
+    const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterShiftOp
+        &Emitter) {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  // Technically, the Dest Var can be mem as well, but we only use Reg.
+  // We can extend this to check Dest if we decide to use that form.
+  assert(Var->hasReg());
+  // We cheat a little and use GPRRegister even for byte operations.
+  typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister VarReg =
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedByteRegOrGPR(
+          Ty, Var->getRegNum());
+  // Src must be reg == ECX or an Imm8.
+  // This is asserted by the assembler.
+  if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
+    assert(SrcVar->hasReg());
+    typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister SrcReg =
+        InstX86Base<Machine>::Traits::RegisterSet::getEncodedByteRegOrGPR(
+            Ty, SrcVar->getRegNum());
+    (Asm->*(Emitter.GPRGPR))(Ty, VarReg, SrcReg);
+  } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
+    (Asm->*(Emitter.GPRImm))(Ty, VarReg, Immediate(Imm->getValue()));
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+}
+
+template <class Machine>
+void emitIASGPRShiftDouble(
+    const Cfg *Func, const Variable *Dest, const Operand *Src1Op,
+    const Operand *Src2Op,
+    const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterShiftD
+        &Emitter) {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  // Dest can be reg or mem, but we only use the reg variant.
+  assert(Dest->hasReg());
+  typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister DestReg =
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+          Dest->getRegNum());
+  // SrcVar1 must be reg.
+  const auto SrcVar1 = llvm::cast<Variable>(Src1Op);
+  assert(SrcVar1->hasReg());
+  typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister SrcReg =
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+          SrcVar1->getRegNum());
+  Type Ty = SrcVar1->getType();
+  // Src2 can be the implicit CL register or an immediate.
+  if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src2Op)) {
+    (Asm->*(Emitter.GPRGPRImm))(Ty, DestReg, SrcReg,
+                                Immediate(Imm->getValue()));
+  } else {
+    assert(llvm::cast<Variable>(Src2Op)->getRegNum() ==
+           InstX86Base<Machine>::Traits::RegisterSet::Reg_ecx);
+    (Asm->*(Emitter.GPRGPR))(Ty, DestReg, SrcReg);
+  }
+}
+
+template <class Machine>
+void emitIASXmmShift(
+    const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
+    const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterShiftOp
+        &Emitter) {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  assert(Var->hasReg());
+  typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister VarReg =
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+          Var->getRegNum());
+  if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
+    if (SrcVar->hasReg()) {
+      typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister SrcReg =
+          InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+              SrcVar->getRegNum());
+      (Asm->*(Emitter.XmmXmm))(Ty, VarReg, SrcReg);
+    } else {
+      typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
+          static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+              Func->getTarget())
+              ->stackVarToAsmOperand(SrcVar);
+      (Asm->*(Emitter.XmmAddr))(Ty, VarReg, SrcStackAddr);
+    }
+  } else if (const auto Mem = llvm::dyn_cast<
+                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
+    assert(Mem->getSegmentRegister() ==
+           InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+    (Asm->*(Emitter.XmmAddr))(Ty, VarReg, Mem->toAsmAddress(Asm));
+  } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
+    (Asm->*(Emitter.XmmImm))(Ty, VarReg, Immediate(Imm->getValue()));
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+}
+
+template <class Machine>
+void emitIASRegOpTyXMM(
+    const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
+    const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
+        &Emitter) {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  assert(Var->hasReg());
+  typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister VarReg =
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+          Var->getRegNum());
+  if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
+    if (SrcVar->hasReg()) {
+      typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister SrcReg =
+          InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+              SrcVar->getRegNum());
+      (Asm->*(Emitter.XmmXmm))(Ty, VarReg, SrcReg);
+    } else {
+      typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
+          static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+              Func->getTarget())
+              ->stackVarToAsmOperand(SrcVar);
+      (Asm->*(Emitter.XmmAddr))(Ty, VarReg, SrcStackAddr);
+    }
+  } else if (const auto Mem = llvm::dyn_cast<
+                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
+    assert(Mem->getSegmentRegister() ==
+           InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+    (Asm->*(Emitter.XmmAddr))(Ty, VarReg, Mem->toAsmAddress(Asm));
+  } else if (const auto Imm = llvm::dyn_cast<Constant>(Src)) {
+    (Asm->*(Emitter.XmmAddr))(
+        Ty, VarReg,
+        InstX86Base<Machine>::Traits::Address::ofConstPool(Asm, Imm));
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+}
+
+template <class Machine, typename DReg_t, typename SReg_t,
+          DReg_t (*destEnc)(int32_t), SReg_t (*srcEnc)(int32_t)>
+void emitIASCastRegOp(const Cfg *Func, Type DispatchTy, const Variable *Dest,
+                      const Operand *Src,
+                      const typename InstX86Base<Machine>::Traits::Assembler::
+                          template CastEmitterRegOp<DReg_t, SReg_t> &Emitter) {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  assert(Dest->hasReg());
+  DReg_t DestReg = destEnc(Dest->getRegNum());
+  if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
+    if (SrcVar->hasReg()) {
+      SReg_t SrcReg = srcEnc(SrcVar->getRegNum());
+      (Asm->*(Emitter.RegReg))(DispatchTy, DestReg, SrcReg);
+    } else {
+      typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
+          static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+              Func->getTarget())
+              ->stackVarToAsmOperand(SrcVar);
+      (Asm->*(Emitter.RegAddr))(DispatchTy, DestReg, SrcStackAddr);
+    }
+  } else if (const auto Mem = llvm::dyn_cast<
+                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
+    Mem->emitSegmentOverride(Asm);
+    (Asm->*(Emitter.RegAddr))(DispatchTy, DestReg, Mem->toAsmAddress(Asm));
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+}
+
+template <class Machine, typename DReg_t, typename SReg_t,
+          DReg_t (*destEnc)(int32_t), SReg_t (*srcEnc)(int32_t)>
+void emitIASThreeOpImmOps(
+    const Cfg *Func, Type DispatchTy, const Variable *Dest, const Operand *Src0,
+    const Operand *Src1,
+    const typename InstX86Base<Machine>::Traits::Assembler::
+        template ThreeOpImmEmitter<DReg_t, SReg_t> Emitter) {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  // This only handles Dest being a register, and Src1 being an immediate.
+  assert(Dest->hasReg());
+  DReg_t DestReg = destEnc(Dest->getRegNum());
+  Immediate Imm(llvm::cast<ConstantInteger32>(Src1)->getValue());
+  if (const auto SrcVar = llvm::dyn_cast<Variable>(Src0)) {
+    if (SrcVar->hasReg()) {
+      SReg_t SrcReg = srcEnc(SrcVar->getRegNum());
+      (Asm->*(Emitter.RegRegImm))(DispatchTy, DestReg, SrcReg, Imm);
+    } else {
+      typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
+          static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+              Func->getTarget())
+              ->stackVarToAsmOperand(SrcVar);
+      (Asm->*(Emitter.RegAddrImm))(DispatchTy, DestReg, SrcStackAddr, Imm);
+    }
+  } else if (const auto Mem = llvm::dyn_cast<
+                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Src0)) {
+    Mem->emitSegmentOverride(Asm);
+    (Asm->*(Emitter.RegAddrImm))(DispatchTy, DestReg, Mem->toAsmAddress(Asm),
+                                 Imm);
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+}
+
+template <class Machine>
+void emitIASMovlikeXMM(
+    const Cfg *Func, const Variable *Dest, const Operand *Src,
+    const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterMovOps
+        Emitter) {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  if (Dest->hasReg()) {
+    typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister DestReg =
+        InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+            Dest->getRegNum());
+    if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
+      if (SrcVar->hasReg()) {
+        (Asm->*(Emitter.XmmXmm))(
+            DestReg, InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+                         SrcVar->getRegNum()));
+      } else {
+        typename InstX86Base<Machine>::Traits::Address StackAddr(
+            static_cast<typename InstX86Base<Machine>::Traits::TargetLowering
+                            *>(Func->getTarget())
+                ->stackVarToAsmOperand(SrcVar));
+        (Asm->*(Emitter.XmmAddr))(DestReg, StackAddr);
+      }
+    } else if (const auto SrcMem = llvm::dyn_cast<
+                   typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
+      assert(SrcMem->getSegmentRegister() ==
+             InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+      (Asm->*(Emitter.XmmAddr))(DestReg, SrcMem->toAsmAddress(Asm));
+    } else {
+      llvm_unreachable("Unexpected operand type");
+    }
+  } else {
+    typename InstX86Base<Machine>::Traits::Address StackAddr(
+        static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+            Func->getTarget())
+            ->stackVarToAsmOperand(Dest));
+    // Src must be a register in this case.
+    const auto SrcVar = llvm::cast<Variable>(Src);
+    assert(SrcVar->hasReg());
+    (Asm->*(Emitter.AddrXmm))(
+        StackAddr, InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+                       SrcVar->getRegNum()));
+  }
+}
+
+template <class Machine>
+void InstX86Sqrtss<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 1);
+  Type Ty = this->getSrc(0)->getType();
+  assert(isScalarFloatingType(Ty));
+  Str << "\tsqrt" << InstX86Base<Machine>::Traits::TypeAttributes[Ty].SdSsString
+      << "\t";
+  this->getSrc(0)->emit(Func);
+  Str << ", ";
+  this->getDest()->emit(Func);
+}
+
+template <class Machine>
+void InstX86Addss<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  char buf[30];
+  snprintf(
+      buf, llvm::array_lengthof(buf), "add%s",
+      InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+          .SdSsString);
+  this->emitTwoAddress(buf, this, Func);
+}
+
+template <class Machine>
+void InstX86Padd<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  char buf[30];
+  snprintf(
+      buf, llvm::array_lengthof(buf), "padd%s",
+      InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+          .PackString);
+  this->emitTwoAddress(buf, this, Func);
+}
+
+template <class Machine>
+void InstX86Pmull<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  char buf[30];
+  bool TypesAreValid = this->getDest()->getType() == IceType_v4i32 ||
+                       this->getDest()->getType() == IceType_v8i16;
+  bool InstructionSetIsValid =
+      this->getDest()->getType() == IceType_v8i16 ||
+      static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+          Func->getTarget())
+              ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1;
+  (void)TypesAreValid;
+  (void)InstructionSetIsValid;
+  assert(TypesAreValid);
+  assert(InstructionSetIsValid);
+  snprintf(
+      buf, llvm::array_lengthof(buf), "pmull%s",
+      InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+          .PackString);
+  this->emitTwoAddress(buf, this, Func);
+}
+
+template <class Machine>
+void InstX86Pmull<Machine>::emitIAS(const Cfg *Func) const {
+  Type Ty = this->getDest()->getType();
+  bool TypesAreValid = Ty == IceType_v4i32 || Ty == IceType_v8i16;
+  bool InstructionSetIsValid =
+      Ty == IceType_v8i16 ||
+      static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+          Func->getTarget())
+              ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1;
+  (void)TypesAreValid;
+  (void)InstructionSetIsValid;
+  assert(TypesAreValid);
+  assert(InstructionSetIsValid);
+  assert(this->getSrcSize() == 2);
+  Type ElementTy = typeElementType(Ty);
+  emitIASRegOpTyXMM<Machine>(Func, ElementTy, this->getDest(), this->getSrc(1),
+                             this->Emitter);
+}
+
+template <class Machine>
+void InstX86Subss<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  char buf[30];
+  snprintf(
+      buf, llvm::array_lengthof(buf), "sub%s",
+      InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+          .SdSsString);
+  this->emitTwoAddress(buf, this, Func);
+}
+
+template <class Machine>
+void InstX86Psub<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  char buf[30];
+  snprintf(
+      buf, llvm::array_lengthof(buf), "psub%s",
+      InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+          .PackString);
+  this->emitTwoAddress(buf, this, Func);
+}
+
+template <class Machine>
+void InstX86Mulss<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  char buf[30];
+  snprintf(
+      buf, llvm::array_lengthof(buf), "mul%s",
+      InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+          .SdSsString);
+  this->emitTwoAddress(buf, this, Func);
+}
+
+template <class Machine>
+void InstX86Pmuludq<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  assert(this->getSrc(0)->getType() == IceType_v4i32 &&
+         this->getSrc(1)->getType() == IceType_v4i32);
+  this->emitTwoAddress(this->Opcode, this, Func);
+}
+
+template <class Machine>
+void InstX86Divss<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  char buf[30];
+  snprintf(
+      buf, llvm::array_lengthof(buf), "div%s",
+      InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+          .SdSsString);
+  this->emitTwoAddress(buf, this, Func);
+}
+
+template <class Machine> void InstX86Div<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 3);
+  Operand *Src1 = this->getSrc(1);
+  Str << "\t" << this->Opcode << this->getWidthString(Src1->getType()) << "\t";
+  Src1->emit(Func);
+}
+
+template <class Machine>
+void InstX86Div<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 3);
+  const Operand *Src = this->getSrc(1);
+  Type Ty = Src->getType();
+  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterOneOp
+      Emitter = {&InstX86Base<Machine>::Traits::Assembler::div,
+                 &InstX86Base<Machine>::Traits::Assembler::div};
+  emitIASOpTyGPR<Machine>(Func, Ty, Src, Emitter);
+}
+
+template <class Machine>
+void InstX86Idiv<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 3);
+  Operand *Src1 = this->getSrc(1);
+  Str << "\t" << this->Opcode << this->getWidthString(Src1->getType()) << "\t";
+  Src1->emit(Func);
+}
+
+template <class Machine>
+void InstX86Idiv<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 3);
+  const Operand *Src = this->getSrc(1);
+  Type Ty = Src->getType();
+  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterOneOp
+      Emitter = {&InstX86Base<Machine>::Traits::Assembler::idiv,
+                 &InstX86Base<Machine>::Traits::Assembler::idiv};
+  emitIASOpTyGPR<Machine>(Func, Ty, Src, Emitter);
+}
+
+// pblendvb and blendvps take xmm0 as a final implicit argument.
+template <class Machine>
+void emitVariableBlendInst(const char *Opcode, const Inst *Inst,
+                           const Cfg *Func) {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(Inst->getSrcSize() == 3);
+  assert(llvm::cast<Variable>(Inst->getSrc(2))->getRegNum() ==
+         InstX86Base<Machine>::Traits::RegisterSet::Reg_xmm0);
+  Str << "\t" << Opcode << "\t";
+  Inst->getSrc(1)->emit(Func);
+  Str << ", ";
+  Inst->getDest()->emit(Func);
+}
+
+template <class Machine>
+void emitIASVariableBlendInst(
+    const Inst *Inst, const Cfg *Func,
+    const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
+        &Emitter) {
+  assert(Inst->getSrcSize() == 3);
+  assert(llvm::cast<Variable>(Inst->getSrc(2))->getRegNum() ==
+         InstX86Base<Machine>::Traits::RegisterSet::Reg_xmm0);
+  const Variable *Dest = Inst->getDest();
+  const Operand *Src = Inst->getSrc(1);
+  emitIASRegOpTyXMM<Machine>(Func, Dest->getType(), Dest, Src, Emitter);
+}
+
+template <class Machine>
+void InstX86Blendvps<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  assert(static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+             Func->getTarget())
+             ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
+  emitVariableBlendInst<Machine>(this->Opcode, this, Func);
+}
+
+template <class Machine>
+void InstX86Blendvps<Machine>::emitIAS(const Cfg *Func) const {
+  assert(static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+             Func->getTarget())
+             ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
+  static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
+      Emitter = {&InstX86Base<Machine>::Traits::Assembler::blendvps,
+                 &InstX86Base<Machine>::Traits::Assembler::blendvps};
+  emitIASVariableBlendInst<Machine>(this, Func, Emitter);
+}
+
+template <class Machine>
+void InstX86Pblendvb<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  assert(static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+             Func->getTarget())
+             ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
+  emitVariableBlendInst<Machine>(this->Opcode, this, Func);
+}
+
+template <class Machine>
+void InstX86Pblendvb<Machine>::emitIAS(const Cfg *Func) const {
+  assert(static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+             Func->getTarget())
+             ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
+  static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
+      Emitter = {&InstX86Base<Machine>::Traits::Assembler::pblendvb,
+                 &InstX86Base<Machine>::Traits::Assembler::pblendvb};
+  emitIASVariableBlendInst<Machine>(this, Func, Emitter);
+}
+
+template <class Machine>
+void InstX86Imul<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 2);
+  Variable *Dest = this->getDest();
+  if (isByteSizedArithType(Dest->getType())) {
+    // The 8-bit version of imul only allows the form "imul r/m8".
+    const auto Src0Var = llvm::dyn_cast<Variable>(this->getSrc(0));
+    (void)Src0Var;
+    assert(Src0Var &&
+           Src0Var->getRegNum() ==
+               InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
+    Str << "\timulb\t";
+    this->getSrc(1)->emit(Func);
+  } else if (llvm::isa<Constant>(this->getSrc(1))) {
+    Str << "\timul" << this->getWidthString(Dest->getType()) << "\t";
+    this->getSrc(1)->emit(Func);
+    Str << ", ";
+    this->getSrc(0)->emit(Func);
+    Str << ", ";
+    Dest->emit(Func);
+  } else {
+    this->emitTwoAddress("imul", this, Func);
+  }
+}
+
+template <class Machine>
+void InstX86Imul<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 2);
+  const Variable *Var = this->getDest();
+  Type Ty = Var->getType();
+  const Operand *Src = this->getSrc(1);
+  if (isByteSizedArithType(Ty)) {
+    // The 8-bit version of imul only allows the form "imul r/m8".
+    const auto Src0Var = llvm::dyn_cast<Variable>(this->getSrc(0));
+    (void)Src0Var;
+    assert(Src0Var &&
+           Src0Var->getRegNum() ==
+               InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
+    static const typename InstX86Base<
+        Machine>::Traits::Assembler::GPREmitterOneOp Emitter = {
+        &InstX86Base<Machine>::Traits::Assembler::imul,
+        &InstX86Base<Machine>::Traits::Assembler::imul};
+    emitIASOpTyGPR<Machine>(Func, Ty, this->getSrc(1), Emitter);
+  } else {
+    // We only use imul as a two-address instruction even though
+    // there is a 3 operand version when one of the operands is a constant.
+    assert(Var == this->getSrc(0));
+    static const typename InstX86Base<
+        Machine>::Traits::Assembler::GPREmitterRegOp Emitter = {
+        &InstX86Base<Machine>::Traits::Assembler::imul,
+        &InstX86Base<Machine>::Traits::Assembler::imul,
+        &InstX86Base<Machine>::Traits::Assembler::imul};
+    emitIASRegOpTyGPR<Machine>(Func, Ty, Var, Src, Emitter);
+  }
+}
+
+template <class Machine>
+void InstX86Insertps<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 3);
+  assert(static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+             Func->getTarget())
+             ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
+  const Variable *Dest = this->getDest();
+  assert(Dest == this->getSrc(0));
+  Type Ty = Dest->getType();
+  static const typename InstX86Base<Machine>::Traits::Assembler::
+      template ThreeOpImmEmitter<
+          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister>
+          Emitter = {&InstX86Base<Machine>::Traits::Assembler::insertps,
+                     &InstX86Base<Machine>::Traits::Assembler::insertps};
+  emitIASThreeOpImmOps<
+      Machine, typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+      typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm,
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm>(
+      Func, Ty, Dest, this->getSrc(1), this->getSrc(2), Emitter);
+}
+
+template <class Machine>
+void InstX86Cbwdq<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 1);
+  Operand *Src0 = this->getSrc(0);
+  assert(llvm::isa<Variable>(Src0));
+  assert(llvm::cast<Variable>(Src0)->getRegNum() ==
+         InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
+  switch (Src0->getType()) {
+  default:
+    llvm_unreachable("unexpected source type!");
+    break;
+  case IceType_i8:
+    assert(this->getDest()->getRegNum() ==
+           InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
+    Str << "\tcbtw";
+    break;
+  case IceType_i16:
+    assert(this->getDest()->getRegNum() ==
+           InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+    Str << "\tcwtd";
+    break;
+  case IceType_i32:
+    assert(this->getDest()->getRegNum() ==
+           InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+    Str << "\tcltd";
+    break;
+  }
+}
+
+template <class Machine>
+void InstX86Cbwdq<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  assert(this->getSrcSize() == 1);
+  Operand *Src0 = this->getSrc(0);
+  assert(llvm::isa<Variable>(Src0));
+  assert(llvm::cast<Variable>(Src0)->getRegNum() ==
+         InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
+  switch (Src0->getType()) {
+  default:
+    llvm_unreachable("unexpected source type!");
+    break;
+  case IceType_i8:
+    assert(this->getDest()->getRegNum() ==
+           InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
+    Asm->cbw();
+    break;
+  case IceType_i16:
+    assert(this->getDest()->getRegNum() ==
+           InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+    Asm->cwd();
+    break;
+  case IceType_i32:
+    assert(this->getDest()->getRegNum() ==
+           InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+    Asm->cdq();
+    break;
+  }
+}
+
+template <class Machine> void InstX86Mul<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 2);
+  assert(llvm::isa<Variable>(this->getSrc(0)));
+  assert(llvm::cast<Variable>(this->getSrc(0))->getRegNum() ==
+         InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
+  assert(
+      this->getDest()->getRegNum() ==
+      InstX86Base<Machine>::Traits::RegisterSet::Reg_eax); // TODO: allow edx?
+  Str << "\tmul" << this->getWidthString(this->getDest()->getType()) << "\t";
+  this->getSrc(1)->emit(Func);
+}
+
+template <class Machine>
+void InstX86Mul<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 2);
+  assert(llvm::isa<Variable>(this->getSrc(0)));
+  assert(llvm::cast<Variable>(this->getSrc(0))->getRegNum() ==
+         InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
+  assert(
+      this->getDest()->getRegNum() ==
+      InstX86Base<Machine>::Traits::RegisterSet::Reg_eax); // TODO: allow edx?
+  const Operand *Src = this->getSrc(1);
+  Type Ty = Src->getType();
+  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterOneOp
+      Emitter = {&InstX86Base<Machine>::Traits::Assembler::mul,
+                 &InstX86Base<Machine>::Traits::Assembler::mul};
+  emitIASOpTyGPR<Machine>(Func, Ty, Src, Emitter);
+}
+
+template <class Machine> void InstX86Mul<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  this->dumpDest(Func);
+  Str << " = mul." << this->getDest()->getType() << " ";
+  this->dumpSources(Func);
+}
+
+template <class Machine>
+void InstX86Shld<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  Variable *Dest = this->getDest();
+  assert(this->getSrcSize() == 3);
+  assert(Dest == this->getSrc(0));
+  Str << "\tshld" << this->getWidthString(Dest->getType()) << "\t";
+  if (const auto ShiftReg = llvm::dyn_cast<Variable>(this->getSrc(2))) {
+    (void)ShiftReg;
+    assert(ShiftReg->getRegNum() ==
+           InstX86Base<Machine>::Traits::RegisterSet::Reg_ecx);
+    Str << "%cl";
+  } else {
+    this->getSrc(2)->emit(Func);
+  }
+  Str << ", ";
+  this->getSrc(1)->emit(Func);
+  Str << ", ";
+  Dest->emit(Func);
+}
+
+template <class Machine>
+void InstX86Shld<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 3);
+  assert(this->getDest() == this->getSrc(0));
+  const Variable *Dest = this->getDest();
+  const Operand *Src1 = this->getSrc(1);
+  const Operand *Src2 = this->getSrc(2);
+  static const typename InstX86Base<
+      Machine>::Traits::Assembler::GPREmitterShiftD Emitter = {
+      &InstX86Base<Machine>::Traits::Assembler::shld,
+      &InstX86Base<Machine>::Traits::Assembler::shld};
+  emitIASGPRShiftDouble<Machine>(Func, Dest, Src1, Src2, Emitter);
+}
+
+template <class Machine>
+void InstX86Shld<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  this->dumpDest(Func);
+  Str << " = shld." << this->getDest()->getType() << " ";
+  this->dumpSources(Func);
+}
+
+template <class Machine>
+void InstX86Shrd<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  Variable *Dest = this->getDest();
+  assert(this->getSrcSize() == 3);
+  assert(Dest == this->getSrc(0));
+  Str << "\tshrd" << this->getWidthString(Dest->getType()) << "\t";
+  if (const auto ShiftReg = llvm::dyn_cast<Variable>(this->getSrc(2))) {
+    (void)ShiftReg;
+    assert(ShiftReg->getRegNum() ==
+           InstX86Base<Machine>::Traits::RegisterSet::Reg_ecx);
+    Str << "%cl";
+  } else {
+    this->getSrc(2)->emit(Func);
+  }
+  Str << ", ";
+  this->getSrc(1)->emit(Func);
+  Str << ", ";
+  Dest->emit(Func);
+}
+
+template <class Machine>
+void InstX86Shrd<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 3);
+  assert(this->getDest() == this->getSrc(0));
+  const Variable *Dest = this->getDest();
+  const Operand *Src1 = this->getSrc(1);
+  const Operand *Src2 = this->getSrc(2);
+  static const typename InstX86Base<
+      Machine>::Traits::Assembler::GPREmitterShiftD Emitter = {
+      &InstX86Base<Machine>::Traits::Assembler::shrd,
+      &InstX86Base<Machine>::Traits::Assembler::shrd};
+  emitIASGPRShiftDouble<Machine>(Func, Dest, Src1, Src2, Emitter);
+}
+
+template <class Machine>
+void InstX86Shrd<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  this->dumpDest(Func);
+  Str << " = shrd." << this->getDest()->getType() << " ";
+  this->dumpSources(Func);
+}
+
+template <class Machine>
+void InstX86Cmov<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  Variable *Dest = this->getDest();
+  Str << "\t";
+  assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
+  assert(this->getDest()->hasReg());
+  Str << "cmov"
+      << InstX86Base<Machine>::Traits::InstBrAttributes[Condition].DisplayString
+      << this->getWidthString(Dest->getType()) << "\t";
+  this->getSrc(1)->emit(Func);
+  Str << ", ";
+  Dest->emit(Func);
+}
+
+template <class Machine>
+void InstX86Cmov<Machine>::emitIAS(const Cfg *Func) const {
+  assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
+  assert(this->getDest()->hasReg());
+  assert(this->getSrcSize() == 2);
+  Operand *Src = this->getSrc(1);
+  Type SrcTy = Src->getType();
+  assert(SrcTy == IceType_i16 || SrcTy == IceType_i32);
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
+    if (SrcVar->hasReg()) {
+      Asm->cmov(SrcTy, Condition,
+                InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+                    this->getDest()->getRegNum()),
+                InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+                    SrcVar->getRegNum()));
+    } else {
+      Asm->cmov(
+          SrcTy, Condition,
+          InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+              this->getDest()->getRegNum()),
+          static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+              Func->getTarget())
+              ->stackVarToAsmOperand(SrcVar));
+    }
+  } else if (const auto Mem = llvm::dyn_cast<
+                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
+    assert(Mem->getSegmentRegister() ==
+           InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+    Asm->cmov(SrcTy, Condition,
+              InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+                  this->getDest()->getRegNum()),
+              Mem->toAsmAddress(Asm));
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+}
+
+template <class Machine>
+void InstX86Cmov<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "cmov"
+      << InstX86Base<Machine>::Traits::InstBrAttributes[Condition].DisplayString
+      << ".";
+  Str << this->getDest()->getType() << " ";
+  this->dumpDest(Func);
+  Str << ", ";
+  this->dumpSources(Func);
+}
+
+template <class Machine>
+void InstX86Cmpps<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 2);
+  assert(Condition < InstX86Base<Machine>::Traits::Cond::Cmpps_Invalid);
+  Str << "\t";
+  Str << "cmp"
+      << InstX86Base<Machine>::Traits::InstCmppsAttributes[Condition].EmitString
+      << "ps"
+      << "\t";
+  this->getSrc(1)->emit(Func);
+  Str << ", ";
+  this->getDest()->emit(Func);
+}
+
+template <class Machine>
+void InstX86Cmpps<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  assert(this->getSrcSize() == 2);
+  assert(Condition < InstX86Base<Machine>::Traits::Cond::Cmpps_Invalid);
+  // Assuming there isn't any load folding for cmpps, and vector constants
+  // are not allowed in PNaCl.
+  assert(llvm::isa<Variable>(this->getSrc(1)));
+  const auto SrcVar = llvm::cast<Variable>(this->getSrc(1));
+  if (SrcVar->hasReg()) {
+    Asm->cmpps(InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+                   this->getDest()->getRegNum()),
+               InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+                   SrcVar->getRegNum()),
+               Condition);
+  } else {
+    typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
+        static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+            Func->getTarget())
+            ->stackVarToAsmOperand(SrcVar);
+    Asm->cmpps(InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+                   this->getDest()->getRegNum()),
+               SrcStackAddr, Condition);
+  }
+}
+
+template <class Machine>
+void InstX86Cmpps<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  assert(Condition < InstX86Base<Machine>::Traits::Cond::Cmpps_Invalid);
+  this->dumpDest(Func);
+  Str << " = cmp"
+      << InstX86Base<Machine>::Traits::InstCmppsAttributes[Condition].EmitString
+      << "ps"
+      << "\t";
+  this->dumpSources(Func);
+}
+
+template <class Machine>
+void InstX86Cmpxchg<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 3);
+  if (this->Locked) {
+    Str << "\tlock";
+  }
+  Str << "\tcmpxchg" << this->getWidthString(this->getSrc(0)->getType())
+      << "\t";
+  this->getSrc(2)->emit(Func);
+  Str << ", ";
+  this->getSrc(0)->emit(Func);
+}
+
+template <class Machine>
+void InstX86Cmpxchg<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 3);
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Type Ty = this->getSrc(0)->getType();
+  const auto Mem =
+      llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
+          this->getSrc(0));
+  assert(Mem->getSegmentRegister() ==
+         InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+  const typename InstX86Base<Machine>::Traits::Address Addr =
+      Mem->toAsmAddress(Asm);
+  const auto VarReg = llvm::cast<Variable>(this->getSrc(2));
+  assert(VarReg->hasReg());
+  const typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister Reg =
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+          VarReg->getRegNum());
+  Asm->cmpxchg(Ty, Addr, Reg, this->Locked);
+}
+
+template <class Machine>
+void InstX86Cmpxchg<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  if (this->Locked) {
+    Str << "lock ";
+  }
+  Str << "cmpxchg." << this->getSrc(0)->getType() << " ";
+  this->dumpSources(Func);
+}
+
+template <class Machine>
+void InstX86Cmpxchg8b<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 5);
+  if (this->Locked) {
+    Str << "\tlock";
+  }
+  Str << "\tcmpxchg8b\t";
+  this->getSrc(0)->emit(Func);
+}
+
+template <class Machine>
+void InstX86Cmpxchg8b<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 5);
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  const auto Mem =
+      llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
+          this->getSrc(0));
+  assert(Mem->getSegmentRegister() ==
+         InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+  const typename InstX86Base<Machine>::Traits::Address Addr =
+      Mem->toAsmAddress(Asm);
+  Asm->cmpxchg8b(Addr, this->Locked);
+}
+
+template <class Machine>
+void InstX86Cmpxchg8b<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  if (this->Locked) {
+    Str << "lock ";
+  }
+  Str << "cmpxchg8b ";
+  this->dumpSources(Func);
+}
+
+template <class Machine> void InstX86Cvt<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 1);
+  Str << "\tcvt";
+  if (isTruncating())
+    Str << "t";
+  Str << InstX86Base<Machine>::Traits::TypeAttributes[this->getSrc(0)
+                                                          ->getType()]
+             .CvtString << "2"
+      << InstX86Base<
+             Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+             .CvtString << "\t";
+  this->getSrc(0)->emit(Func);
+  Str << ", ";
+  this->getDest()->emit(Func);
+}
+
+template <class Machine>
+void InstX86Cvt<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 1);
+  const Variable *Dest = this->getDest();
+  const Operand *Src = this->getSrc(0);
+  Type DestTy = Dest->getType();
+  Type SrcTy = Src->getType();
+  switch (Variant) {
+  case Si2ss: {
+    assert(isScalarIntegerType(SrcTy));
+    assert(typeWidthInBytes(SrcTy) <= 4);
+    assert(isScalarFloatingType(DestTy));
+    static const typename InstX86Base<Machine>::Traits::Assembler::
+        template CastEmitterRegOp<
+            typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+            typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister>
+            Emitter = {&InstX86Base<Machine>::Traits::Assembler::cvtsi2ss,
+                       &InstX86Base<Machine>::Traits::Assembler::cvtsi2ss};
+    emitIASCastRegOp<
+        Machine,
+        typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+        typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
+        InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm,
+        InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR>(
+        Func, DestTy, Dest, Src, Emitter);
+    return;
+  }
+  case Tss2si: {
+    assert(isScalarFloatingType(SrcTy));
+    assert(isScalarIntegerType(DestTy));
+    assert(typeWidthInBytes(DestTy) <= 4);
+    static const typename InstX86Base<Machine>::Traits::Assembler::
+        template CastEmitterRegOp<
+            typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
+            typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister>
+            Emitter = {&InstX86Base<Machine>::Traits::Assembler::cvttss2si,
+                       &InstX86Base<Machine>::Traits::Assembler::cvttss2si};
+    emitIASCastRegOp<
+        Machine,
+        typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
+        typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+        InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR,
+        InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm>(
+        Func, SrcTy, Dest, Src, Emitter);
+    return;
+  }
+  case Float2float: {
+    assert(isScalarFloatingType(SrcTy));
+    assert(isScalarFloatingType(DestTy));
+    assert(DestTy != SrcTy);
+    static const typename InstX86Base<
+        Machine>::Traits::Assembler::XmmEmitterRegOp Emitter = {
+        &InstX86Base<Machine>::Traits::Assembler::cvtfloat2float,
+        &InstX86Base<Machine>::Traits::Assembler::cvtfloat2float};
+    emitIASRegOpTyXMM<Machine>(Func, SrcTy, Dest, Src, Emitter);
+    return;
+  }
+  case Dq2ps: {
+    assert(isVectorIntegerType(SrcTy));
+    assert(isVectorFloatingType(DestTy));
+    static const typename InstX86Base<
+        Machine>::Traits::Assembler::XmmEmitterRegOp Emitter = {
+        &InstX86Base<Machine>::Traits::Assembler::cvtdq2ps,
+        &InstX86Base<Machine>::Traits::Assembler::cvtdq2ps};
+    emitIASRegOpTyXMM<Machine>(Func, DestTy, Dest, Src, Emitter);
+    return;
+  }
+  case Tps2dq: {
+    assert(isVectorFloatingType(SrcTy));
+    assert(isVectorIntegerType(DestTy));
+    static const typename InstX86Base<
+        Machine>::Traits::Assembler::XmmEmitterRegOp Emitter = {
+        &InstX86Base<Machine>::Traits::Assembler::cvttps2dq,
+        &InstX86Base<Machine>::Traits::Assembler::cvttps2dq};
+    emitIASRegOpTyXMM<Machine>(Func, DestTy, Dest, Src, Emitter);
+    return;
+  }
+  }
+}
+
+template <class Machine> void InstX86Cvt<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  this->dumpDest(Func);
+  Str << " = cvt";
+  if (isTruncating())
+    Str << "t";
+  Str << InstX86Base<Machine>::Traits::TypeAttributes[this->getSrc(0)
+                                                          ->getType()]
+             .CvtString << "2"
+      << InstX86Base<
+             Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+             .CvtString << " ";
+  this->dumpSources(Func);
+}
+
+template <class Machine>
+void InstX86Icmp<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 2);
+  Str << "\tcmp" << this->getWidthString(this->getSrc(0)->getType()) << "\t";
+  this->getSrc(1)->emit(Func);
+  Str << ", ";
+  this->getSrc(0)->emit(Func);
+}
+
+template <class Machine>
+void InstX86Icmp<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 2);
+  const Operand *Src0 = this->getSrc(0);
+  const Operand *Src1 = this->getSrc(1);
+  Type Ty = Src0->getType();
+  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
+      RegEmitter = {&InstX86Base<Machine>::Traits::Assembler::cmp,
+                    &InstX86Base<Machine>::Traits::Assembler::cmp,
+                    &InstX86Base<Machine>::Traits::Assembler::cmp};
+  static const typename InstX86Base<
+      Machine>::Traits::Assembler::GPREmitterAddrOp AddrEmitter = {
+      &InstX86Base<Machine>::Traits::Assembler::cmp,
+      &InstX86Base<Machine>::Traits::Assembler::cmp};
+  if (const auto SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
+    if (SrcVar0->hasReg()) {
+      emitIASRegOpTyGPR<Machine>(Func, Ty, SrcVar0, Src1, RegEmitter);
+      return;
+    }
+  }
+  emitIASAsAddrOpTyGPR<Machine>(Func, Ty, Src0, Src1, AddrEmitter);
+}
+
+template <class Machine>
+void InstX86Icmp<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "cmp." << this->getSrc(0)->getType() << " ";
+  this->dumpSources(Func);
+}
+
+template <class Machine>
+void InstX86Ucomiss<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 2);
+  Str << "\tucomi"
+      << InstX86Base<Machine>::Traits::TypeAttributes[this->getSrc(0)
+                                                          ->getType()]
+             .SdSsString << "\t";
+  this->getSrc(1)->emit(Func);
+  Str << ", ";
+  this->getSrc(0)->emit(Func);
+}
+
+template <class Machine>
+void InstX86Ucomiss<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 2);
+  // Currently src0 is always a variable by convention, to avoid having
+  // two memory operands.
+  assert(llvm::isa<Variable>(this->getSrc(0)));
+  const auto Src0Var = llvm::cast<Variable>(this->getSrc(0));
+  Type Ty = Src0Var->getType();
+  static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
+      Emitter = {&InstX86Base<Machine>::Traits::Assembler::ucomiss,
+                 &InstX86Base<Machine>::Traits::Assembler::ucomiss};
+  emitIASRegOpTyXMM<Machine>(Func, Ty, Src0Var, this->getSrc(1), Emitter);
+}
+
+template <class Machine>
+void InstX86Ucomiss<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "ucomiss." << this->getSrc(0)->getType() << " ";
+  this->dumpSources(Func);
+}
+
+template <class Machine> void InstX86UD2<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 0);
+  Str << "\tud2";
+}
+
+template <class Machine>
+void InstX86UD2<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Asm->ud2();
+}
+
+template <class Machine> void InstX86UD2<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "ud2";
+}
+
+template <class Machine>
+void InstX86Test<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 2);
+  Str << "\ttest" << this->getWidthString(this->getSrc(0)->getType()) << "\t";
+  this->getSrc(1)->emit(Func);
+  Str << ", ";
+  this->getSrc(0)->emit(Func);
+}
+
+template <class Machine>
+void InstX86Test<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 2);
+  const Operand *Src0 = this->getSrc(0);
+  const Operand *Src1 = this->getSrc(1);
+  Type Ty = Src0->getType();
+  // The Reg/Addr form of test is not encodeable.
+  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
+      RegEmitter = {&InstX86Base<Machine>::Traits::Assembler::test, nullptr,
+                    &InstX86Base<Machine>::Traits::Assembler::test};
+  static const typename InstX86Base<
+      Machine>::Traits::Assembler::GPREmitterAddrOp AddrEmitter = {
+      &InstX86Base<Machine>::Traits::Assembler::test,
+      &InstX86Base<Machine>::Traits::Assembler::test};
+  if (const auto SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
+    if (SrcVar0->hasReg()) {
+      emitIASRegOpTyGPR<Machine>(Func, Ty, SrcVar0, Src1, RegEmitter);
+      return;
+    }
+  }
+  llvm_unreachable("Nothing actually generates this so it's untested");
+  emitIASAsAddrOpTyGPR<Machine>(Func, Ty, Src0, Src1, AddrEmitter);
+}
+
+template <class Machine>
+void InstX86Test<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "test." << this->getSrc(0)->getType() << " ";
+  this->dumpSources(Func);
+}
+
+template <class Machine>
+void InstX86Mfence<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 0);
+  Str << "\tmfence";
+}
+
+template <class Machine>
+void InstX86Mfence<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Asm->mfence();
+}
+
+template <class Machine>
+void InstX86Mfence<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "mfence";
+}
+
+template <class Machine>
+void InstX86Store<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 2);
+  Type Ty = this->getSrc(0)->getType();
+  Str << "\tmov" << this->getWidthString(Ty)
+      << InstX86Base<Machine>::Traits::TypeAttributes[Ty].SdSsString << "\t";
+  this->getSrc(0)->emit(Func);
+  Str << ", ";
+  this->getSrc(1)->emit(Func);
+}
+
+template <class Machine>
+void InstX86Store<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 2);
+  const Operand *Dest = this->getSrc(1);
+  const Operand *Src = this->getSrc(0);
+  Type DestTy = Dest->getType();
+  if (isScalarFloatingType(DestTy)) {
+    // Src must be a register, since Dest is a Mem operand of some kind.
+    const auto SrcVar = llvm::cast<Variable>(Src);
+    assert(SrcVar->hasReg());
+    typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister SrcReg =
+        InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+            SrcVar->getRegNum());
+    typename InstX86Base<Machine>::Traits::Assembler *Asm =
+        Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+    if (const auto DestVar = llvm::dyn_cast<Variable>(Dest)) {
+      assert(!DestVar->hasReg());
+      typename InstX86Base<Machine>::Traits::Address StackAddr(
+          static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+              Func->getTarget())
+              ->stackVarToAsmOperand(DestVar));
+      Asm->movss(DestTy, StackAddr, SrcReg);
+    } else {
+      const auto DestMem =
+          llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
+              Dest);
+      assert(DestMem->getSegmentRegister() ==
+             InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+      Asm->movss(DestTy, DestMem->toAsmAddress(Asm), SrcReg);
+    }
+    return;
+  } else {
+    assert(isScalarIntegerType(DestTy));
+    static const typename InstX86Base<
+        Machine>::Traits::Assembler::GPREmitterAddrOp GPRAddrEmitter = {
+        &InstX86Base<Machine>::Traits::Assembler::mov,
+        &InstX86Base<Machine>::Traits::Assembler::mov};
+    emitIASAsAddrOpTyGPR<Machine>(Func, DestTy, Dest, Src, GPRAddrEmitter);
+  }
+}
+
+template <class Machine>
+void InstX86Store<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "mov." << this->getSrc(0)->getType() << " ";
+  this->getSrc(1)->dump(Func);
+  Str << ", ";
+  this->getSrc(0)->dump(Func);
+}
+
+template <class Machine>
+void InstX86StoreP<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 2);
+  Str << "\tmovups\t";
+  this->getSrc(0)->emit(Func);
+  Str << ", ";
+  this->getSrc(1)->emit(Func);
+}
+
+template <class Machine>
+void InstX86StoreP<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  assert(this->getSrcSize() == 2);
+  const auto SrcVar = llvm::cast<Variable>(this->getSrc(0));
+  const auto DestMem =
+      llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
+          this->getSrc(1));
+  assert(DestMem->getSegmentRegister() ==
+         InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+  assert(SrcVar->hasReg());
+  Asm->movups(DestMem->toAsmAddress(Asm),
+              InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+                  SrcVar->getRegNum()));
+}
+
+template <class Machine>
+void InstX86StoreP<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "storep." << this->getSrc(0)->getType() << " ";
+  this->getSrc(1)->dump(Func);
+  Str << ", ";
+  this->getSrc(0)->dump(Func);
+}
+
+template <class Machine>
+void InstX86StoreQ<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 2);
+  assert(this->getSrc(1)->getType() == IceType_i64 ||
+         this->getSrc(1)->getType() == IceType_f64);
+  Str << "\tmovq\t";
+  this->getSrc(0)->emit(Func);
+  Str << ", ";
+  this->getSrc(1)->emit(Func);
+}
+
+template <class Machine>
+void InstX86StoreQ<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  assert(this->getSrcSize() == 2);
+  const auto SrcVar = llvm::cast<Variable>(this->getSrc(0));
+  const auto DestMem =
+      llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
+          this->getSrc(1));
+  assert(DestMem->getSegmentRegister() ==
+         InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+  assert(SrcVar->hasReg());
+  Asm->movq(DestMem->toAsmAddress(Asm),
+            InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+                SrcVar->getRegNum()));
+}
+
+template <class Machine>
+void InstX86StoreQ<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "storeq." << this->getSrc(0)->getType() << " ";
+  this->getSrc(1)->dump(Func);
+  Str << ", ";
+  this->getSrc(0)->dump(Func);
+}
+
+template <class Machine> void InstX86Lea<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 1);
+  assert(this->getDest()->hasReg());
+  Str << "\tleal\t";
+  Operand *Src0 = this->getSrc(0);
+  if (const auto Src0Var = llvm::dyn_cast<Variable>(Src0)) {
+    Type Ty = Src0Var->getType();
+    // lea on x86-32 doesn't accept mem128 operands, so cast VSrc0 to an
+    // acceptable type.
+    Src0Var->asType(isVectorType(Ty) ? IceType_i32 : Ty)->emit(Func);
+  } else {
+    Src0->emit(Func);
+  }
+  Str << ", ";
+  this->getDest()->emit(Func);
+}
+
+template <class Machine> void InstX86Mov<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 1);
+  Operand *Src = this->getSrc(0);
+  Type SrcTy = Src->getType();
+  Type DestTy = this->getDest()->getType();
+  Str << "\tmov"
+      << (!isScalarFloatingType(DestTy)
+              ? this->getWidthString(SrcTy)
+              : InstX86Base<Machine>::Traits::TypeAttributes[DestTy].SdSsString)
+      << "\t";
+  // For an integer truncation operation, src is wider than dest.
+  // Ideally, we use a mov instruction whose data width matches the
+  // narrower dest.  This is a problem if e.g. src is a register like
+  // esi or si where there is no 8-bit version of the register.  To be
+  // safe, we instead widen the dest to match src.  This works even
+  // for stack-allocated dest variables because typeWidthOnStack()
+  // pads to a 4-byte boundary even if only a lower portion is used.
+  // TODO: This assert disallows usages such as copying a floating point
+  // value between a vector and a scalar (which movss is used for).
+  // Clean this up.
+  assert(Func->getTarget()->typeWidthInBytesOnStack(DestTy) ==
+         Func->getTarget()->typeWidthInBytesOnStack(SrcTy));
+  Src->emit(Func);
+  Str << ", ";
+  this->getDest()->asType(SrcTy)->emit(Func);
+}
+
+template <class Machine>
+void InstX86Mov<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 1);
+  const Variable *Dest = this->getDest();
+  const Operand *Src = this->getSrc(0);
+  Type DestTy = Dest->getType();
+  Type SrcTy = Src->getType();
+  // Mov can be used for GPRs or XMM registers. Also, the type does not
+  // necessarily match (Mov can be used for bitcasts). However, when
+  // the type does not match, one of the operands must be a register.
+  // Thus, the strategy is to find out if Src or Dest are a register,
+  // then use that register's type to decide on which emitter set to use.
+  // The emitter set will include reg-reg movs, but that case should
+  // be unused when the types don't match.
+  static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
+      XmmRegEmitter = {&InstX86Base<Machine>::Traits::Assembler::movss,
+                       &InstX86Base<Machine>::Traits::Assembler::movss};
+  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
+      GPRRegEmitter = {&InstX86Base<Machine>::Traits::Assembler::mov,
+                       &InstX86Base<Machine>::Traits::Assembler::mov,
+                       &InstX86Base<Machine>::Traits::Assembler::mov};
+  static const typename InstX86Base<
+      Machine>::Traits::Assembler::GPREmitterAddrOp GPRAddrEmitter = {
+      &InstX86Base<Machine>::Traits::Assembler::mov,
+      &InstX86Base<Machine>::Traits::Assembler::mov};
+  // For an integer truncation operation, src is wider than dest.
+  // Ideally, we use a mov instruction whose data width matches the
+  // narrower dest.  This is a problem if e.g. src is a register like
+  // esi or si where there is no 8-bit version of the register.  To be
+  // safe, we instead widen the dest to match src.  This works even
+  // for stack-allocated dest variables because typeWidthOnStack()
+  // pads to a 4-byte boundary even if only a lower portion is used.
+  // TODO: This assert disallows usages such as copying a floating point
+  // value between a vector and a scalar (which movss is used for).
+  // Clean this up.
+  assert(
+      Func->getTarget()->typeWidthInBytesOnStack(this->getDest()->getType()) ==
+      Func->getTarget()->typeWidthInBytesOnStack(Src->getType()));
+  if (Dest->hasReg()) {
+    if (isScalarFloatingType(DestTy)) {
+      emitIASRegOpTyXMM<Machine>(Func, DestTy, Dest, Src, XmmRegEmitter);
+      return;
+    } else {
+      assert(isScalarIntegerType(DestTy));
+      // Widen DestTy for truncation (see above note). We should only do this
+      // when both Src and Dest are integer types.
+      if (isScalarIntegerType(SrcTy)) {
+        DestTy = SrcTy;
+      }
+      emitIASRegOpTyGPR<Machine>(Func, DestTy, Dest, Src, GPRRegEmitter);
+      return;
+    }
+  } else {
+    // Dest must be Stack and Src *could* be a register. Use Src's type
+    // to decide on the emitters.
+    typename InstX86Base<Machine>::Traits::Address StackAddr(
+        static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+            Func->getTarget())
+            ->stackVarToAsmOperand(Dest));
+    if (isScalarFloatingType(SrcTy)) {
+      // Src must be a register.
+      const auto SrcVar = llvm::cast<Variable>(Src);
+      assert(SrcVar->hasReg());
+      typename InstX86Base<Machine>::Traits::Assembler *Asm =
+          Func->getAssembler<
+              typename InstX86Base<Machine>::Traits::Assembler>();
+      Asm->movss(SrcTy, StackAddr,
+                 InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+                     SrcVar->getRegNum()));
+      return;
+    } else {
+      // Src can be a register or immediate.
+      assert(isScalarIntegerType(SrcTy));
+      emitIASAddrOpTyGPR<Machine>(Func, SrcTy, StackAddr, Src, GPRAddrEmitter);
+      return;
+    }
+    return;
+  }
+}
+
+template <class Machine>
+void InstX86Movd<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  assert(this->getSrcSize() == 1);
+  const Variable *Dest = this->getDest();
+  const auto SrcVar = llvm::cast<Variable>(this->getSrc(0));
+  // For insert/extract element (one of Src/Dest is an Xmm vector and
+  // the other is an int type).
+  if (SrcVar->getType() == IceType_i32) {
+    assert(isVectorType(Dest->getType()));
+    assert(Dest->hasReg());
+    typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister DestReg =
+        InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+            Dest->getRegNum());
+    if (SrcVar->hasReg()) {
+      Asm->movd(DestReg,
+                InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+                    SrcVar->getRegNum()));
+    } else {
+      typename InstX86Base<Machine>::Traits::Address StackAddr(
+          static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+              Func->getTarget())
+              ->stackVarToAsmOperand(SrcVar));
+      Asm->movd(DestReg, StackAddr);
+    }
+  } else {
+    assert(isVectorType(SrcVar->getType()));
+    assert(SrcVar->hasReg());
+    assert(Dest->getType() == IceType_i32);
+    typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister SrcReg =
+        InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+            SrcVar->getRegNum());
+    if (Dest->hasReg()) {
+      Asm->movd(InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+                    Dest->getRegNum()),
+                SrcReg);
+    } else {
+      typename InstX86Base<Machine>::Traits::Address StackAddr(
+          static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+              Func->getTarget())
+              ->stackVarToAsmOperand(Dest));
+      Asm->movd(StackAddr, SrcReg);
+    }
+  }
+}
+
+template <class Machine>
+void InstX86Movp<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  // TODO(wala,stichnot): movups works with all vector operands, but
+  // there exist other instructions (movaps, movdqa, movdqu) that may
+  // perform better, depending on the data type and alignment of the
+  // operands.
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 1);
+  Str << "\tmovups\t";
+  this->getSrc(0)->emit(Func);
+  Str << ", ";
+  this->getDest()->emit(Func);
+}
+
+template <class Machine>
+void InstX86Movp<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 1);
+  assert(isVectorType(this->getDest()->getType()));
+  const Variable *Dest = this->getDest();
+  const Operand *Src = this->getSrc(0);
+  static const typename InstX86Base<
+      Machine>::Traits::Assembler::XmmEmitterMovOps Emitter = {
+      &InstX86Base<Machine>::Traits::Assembler::movups,
+      &InstX86Base<Machine>::Traits::Assembler::movups,
+      &InstX86Base<Machine>::Traits::Assembler::movups};
+  emitIASMovlikeXMM<Machine>(Func, Dest, Src, Emitter);
+}
+
+template <class Machine>
+void InstX86Movq<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 1);
+  assert(this->getDest()->getType() == IceType_i64 ||
+         this->getDest()->getType() == IceType_f64);
+  Str << "\tmovq\t";
+  this->getSrc(0)->emit(Func);
+  Str << ", ";
+  this->getDest()->emit(Func);
+}
+
+template <class Machine>
+void InstX86Movq<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 1);
+  assert(this->getDest()->getType() == IceType_i64 ||
+         this->getDest()->getType() == IceType_f64);
+  const Variable *Dest = this->getDest();
+  const Operand *Src = this->getSrc(0);
+  static const typename InstX86Base<
+      Machine>::Traits::Assembler::XmmEmitterMovOps Emitter = {
+      &InstX86Base<Machine>::Traits::Assembler::movq,
+      &InstX86Base<Machine>::Traits::Assembler::movq,
+      &InstX86Base<Machine>::Traits::Assembler::movq};
+  emitIASMovlikeXMM<Machine>(Func, Dest, Src, Emitter);
+}
+
+template <class Machine>
+void InstX86MovssRegs<Machine>::emitIAS(const Cfg *Func) const {
+  // This is Binop variant is only intended to be used for reg-reg moves
+  // where part of the Dest register is untouched.
+  assert(this->getSrcSize() == 2);
+  const Variable *Dest = this->getDest();
+  assert(Dest == this->getSrc(0));
+  const auto SrcVar = llvm::cast<Variable>(this->getSrc(1));
+  assert(Dest->hasReg() && SrcVar->hasReg());
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Asm->movss(IceType_f32,
+             InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+                 Dest->getRegNum()),
+             InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+                 SrcVar->getRegNum()));
+}
+
+template <class Machine>
+void InstX86Movsx<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 1);
+  const Variable *Dest = this->getDest();
+  const Operand *Src = this->getSrc(0);
+  // Dest must be a > 8-bit register, but Src can be 8-bit. In practice
+  // we just use the full register for Dest to avoid having an
+  // OperandSizeOverride prefix. It also allows us to only dispatch on SrcTy.
+  Type SrcTy = Src->getType();
+  assert(typeWidthInBytes(Dest->getType()) > 1);
+  assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy));
+  emitIASRegOpTyGPR<Machine, false, true>(Func, SrcTy, Dest, Src,
+                                          this->Emitter);
+}
+
+template <class Machine>
+void InstX86Movzx<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 1);
+  const Variable *Dest = this->getDest();
+  const Operand *Src = this->getSrc(0);
+  Type SrcTy = Src->getType();
+  assert(typeWidthInBytes(Dest->getType()) > 1);
+  assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy));
+  emitIASRegOpTyGPR<Machine, false, true>(Func, SrcTy, Dest, Src,
+                                          this->Emitter);
+}
+
+template <class Machine> void InstX86Nop<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  // TODO: Emit the right code for each variant.
+  Str << "\tnop\t# variant = " << Variant;
+}
+
+template <class Machine>
+void InstX86Nop<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  // TODO: Emit the right code for the variant.
+  Asm->nop();
+}
+
+template <class Machine> void InstX86Nop<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "nop (variant = " << Variant << ")";
+}
+
+template <class Machine> void InstX86Fld<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 1);
+  Type Ty = this->getSrc(0)->getType();
+  SizeT Width = typeWidthInBytes(Ty);
+  const auto Var = llvm::dyn_cast<Variable>(this->getSrc(0));
+  if (Var && Var->hasReg()) {
+    // This is a physical xmm register, so we need to spill it to a
+    // temporary stack slot.
+    Str << "\tsubl\t$" << Width << ", %esp"
+        << "\n";
+    Str << "\tmov"
+        << InstX86Base<Machine>::Traits::TypeAttributes[Ty].SdSsString << "\t";
+    Var->emit(Func);
+    Str << ", (%esp)\n";
+    Str << "\tfld" << this->getFldString(Ty) << "\t"
+        << "(%esp)\n";
+    Str << "\taddl\t$" << Width << ", %esp";
+    return;
+  }
+  Str << "\tfld" << this->getFldString(Ty) << "\t";
+  this->getSrc(0)->emit(Func);
+}
+
+template <class Machine>
+void InstX86Fld<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  assert(this->getSrcSize() == 1);
+  const Operand *Src = this->getSrc(0);
+  Type Ty = Src->getType();
+  if (const auto Var = llvm::dyn_cast<Variable>(Src)) {
+    if (Var->hasReg()) {
+      // This is a physical xmm register, so we need to spill it to a
+      // temporary stack slot.
+      Immediate Width(typeWidthInBytes(Ty));
+      Asm->sub(IceType_i32,
+               InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
+               Width);
+      typename InstX86Base<Machine>::Traits::Address StackSlot =
+          typename InstX86Base<Machine>::Traits::Address(
+              InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, 0);
+      Asm->movss(Ty, StackSlot,
+                 InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+                     Var->getRegNum()));
+      Asm->fld(Ty, StackSlot);
+      Asm->add(IceType_i32,
+               InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
+               Width);
+    } else {
+      typename InstX86Base<Machine>::Traits::Address StackAddr(
+          static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+              Func->getTarget())
+              ->stackVarToAsmOperand(Var));
+      Asm->fld(Ty, StackAddr);
+    }
+  } else if (const auto Mem = llvm::dyn_cast<
+                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
+    assert(Mem->getSegmentRegister() ==
+           InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+    Asm->fld(Ty, Mem->toAsmAddress(Asm));
+  } else if (const auto Imm = llvm::dyn_cast<Constant>(Src)) {
+    Asm->fld(Ty, InstX86Base<Machine>::Traits::Address::ofConstPool(Asm, Imm));
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+}
+
+template <class Machine> void InstX86Fld<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "fld." << this->getSrc(0)->getType() << " ";
+  this->dumpSources(Func);
+}
+
+template <class Machine>
+void InstX86Fstp<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 0);
+  // TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to
+  // "partially" delete the fstp if the Dest is unused.
+  // Even if Dest is unused, the fstp should be kept for the SideEffects
+  // of popping the stack.
+  if (!this->getDest()) {
+    Str << "\tfstp\tst(0)";
+    return;
+  }
+  Type Ty = this->getDest()->getType();
+  size_t Width = typeWidthInBytes(Ty);
+  if (!this->getDest()->hasReg()) {
+    Str << "\tfstp" << this->getFldString(Ty) << "\t";
+    this->getDest()->emit(Func);
+    return;
+  }
+  // Dest is a physical (xmm) register, so st(0) needs to go through
+  // memory.  Hack this by creating a temporary stack slot, spilling
+  // st(0) there, loading it into the xmm register, and deallocating
+  // the stack slot.
+  Str << "\tsubl\t$" << Width << ", %esp\n";
+  Str << "\tfstp" << this->getFldString(Ty) << "\t"
+      << "(%esp)\n";
+  Str << "\tmov" << InstX86Base<Machine>::Traits::TypeAttributes[Ty].SdSsString
+      << "\t"
+      << "(%esp), ";
+  this->getDest()->emit(Func);
+  Str << "\n";
+  Str << "\taddl\t$" << Width << ", %esp";
+}
+
+template <class Machine>
+void InstX86Fstp<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  assert(this->getSrcSize() == 0);
+  const Variable *Dest = this->getDest();
+  // TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to
+  // "partially" delete the fstp if the Dest is unused.
+  // Even if Dest is unused, the fstp should be kept for the SideEffects
+  // of popping the stack.
+  if (!Dest) {
+    Asm->fstp(InstX86Base<Machine>::Traits::RegisterSet::getEncodedSTReg(0));
+    return;
+  }
+  Type Ty = Dest->getType();
+  if (!Dest->hasReg()) {
+    typename InstX86Base<Machine>::Traits::Address StackAddr(
+        static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+            Func->getTarget())
+            ->stackVarToAsmOperand(Dest));
+    Asm->fstp(Ty, StackAddr);
+  } else {
+    // Dest is a physical (xmm) register, so st(0) needs to go through
+    // memory.  Hack this by creating a temporary stack slot, spilling
+    // st(0) there, loading it into the xmm register, and deallocating
+    // the stack slot.
+    Immediate Width(typeWidthInBytes(Ty));
+    Asm->sub(IceType_i32,
+             InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, Width);
+    typename InstX86Base<Machine>::Traits::Address StackSlot =
+        typename InstX86Base<Machine>::Traits::Address(
+            InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, 0);
+    Asm->fstp(Ty, StackSlot);
+    Asm->movss(Ty, InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
+                       Dest->getRegNum()),
+               StackSlot);
+    Asm->add(IceType_i32,
+             InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, Width);
+  }
+}
+
+template <class Machine>
+void InstX86Fstp<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  this->dumpDest(Func);
+  Str << " = fstp." << this->getDest()->getType() << ", st(0)";
+}
+
+template <class Machine>
+void InstX86Pcmpeq<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  char buf[30];
+  snprintf(
+      buf, llvm::array_lengthof(buf), "pcmpeq%s",
+      InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+          .PackString);
+  this->emitTwoAddress(buf, this, Func);
+}
+
+template <class Machine>
+void InstX86Pcmpgt<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  char buf[30];
+  snprintf(
+      buf, llvm::array_lengthof(buf), "pcmpgt%s",
+      InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+          .PackString);
+  this->emitTwoAddress(buf, this, Func);
+}
+
+template <class Machine>
+void InstX86Pextr<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 2);
+  // pextrb and pextrd are SSE4.1 instructions.
+  assert(this->getSrc(0)->getType() == IceType_v8i16 ||
+         this->getSrc(0)->getType() == IceType_v8i1 ||
+         static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+             Func->getTarget())
+                 ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
+  Str << "\t" << this->Opcode
+      << InstX86Base<Machine>::Traits::TypeAttributes[this->getSrc(0)
+                                                          ->getType()]
+             .PackString << "\t";
+  this->getSrc(1)->emit(Func);
+  Str << ", ";
+  this->getSrc(0)->emit(Func);
+  Str << ", ";
+  Variable *Dest = this->getDest();
+  // pextrw must take a register dest. There is an SSE4.1 version that takes
+  // a memory dest, but we aren't using it. For uniformity, just restrict
+  // them all to have a register dest for now.
+  assert(Dest->hasReg());
+  Dest->asType(IceType_i32)->emit(Func);
+}
+
+template <class Machine>
+void InstX86Pextr<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 2);
+  // pextrb and pextrd are SSE4.1 instructions.
+  const Variable *Dest = this->getDest();
+  Type DispatchTy = Dest->getType();
+  assert(DispatchTy == IceType_i16 ||
+         static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+             Func->getTarget())
+                 ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
+  // pextrw must take a register dest. There is an SSE4.1 version that takes
+  // a memory dest, but we aren't using it. For uniformity, just restrict
+  // them all to have a register dest for now.
+  assert(Dest->hasReg());
+  // pextrw's Src(0) must be a register (both SSE4.1 and SSE2).
+  assert(llvm::cast<Variable>(this->getSrc(0))->hasReg());
+  static const typename InstX86Base<Machine>::Traits::Assembler::
+      template ThreeOpImmEmitter<
+          typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
+          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister>
+          Emitter = {&InstX86Base<Machine>::Traits::Assembler::pextr, nullptr};
+  emitIASThreeOpImmOps<
+      Machine, typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
+      typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR,
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm>(
+      Func, DispatchTy, Dest, this->getSrc(0), this->getSrc(1), Emitter);
+}
+
+template <class Machine>
+void InstX86Pinsr<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 3);
+  // pinsrb and pinsrd are SSE4.1 instructions.
+  assert(this->getDest()->getType() == IceType_v8i16 ||
+         this->getDest()->getType() == IceType_v8i1 ||
+         static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+             Func->getTarget())
+                 ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
+  Str << "\t" << this->Opcode
+      << InstX86Base<
+             Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+             .PackString << "\t";
+  this->getSrc(2)->emit(Func);
+  Str << ", ";
+  Operand *Src1 = this->getSrc(1);
+  if (const auto Src1Var = llvm::dyn_cast<Variable>(Src1)) {
+    // If src1 is a register, it should always be r32.
+    if (Src1Var->hasReg()) {
+      Src1Var->asType(IceType_i32)->emit(Func);
+    } else {
+      Src1Var->emit(Func);
+    }
+  } else {
+    Src1->emit(Func);
+  }
+  Str << ", ";
+  this->getDest()->emit(Func);
+}
+
+template <class Machine>
+void InstX86Pinsr<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 3);
+  assert(this->getDest() == this->getSrc(0));
+  // pinsrb and pinsrd are SSE4.1 instructions.
+  const Operand *Src0 = this->getSrc(1);
+  Type DispatchTy = Src0->getType();
+  assert(DispatchTy == IceType_i16 ||
+         static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+             Func->getTarget())
+                 ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
+  // If src1 is a register, it should always be r32 (this should fall out
+  // from the encodings for ByteRegs overlapping the encodings for r32),
+  // but we have to trust the regalloc to not choose "ah", where it
+  // doesn't overlap.
+  static const typename InstX86Base<Machine>::Traits::Assembler::
+      template ThreeOpImmEmitter<
+          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+          typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister>
+          Emitter = {&InstX86Base<Machine>::Traits::Assembler::pinsr,
+                     &InstX86Base<Machine>::Traits::Assembler::pinsr};
+  emitIASThreeOpImmOps<
+      Machine, typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+      typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm,
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR>(
+      Func, DispatchTy, this->getDest(), Src0, this->getSrc(2), Emitter);
+}
+
+template <class Machine>
+void InstX86Pshufd<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 2);
+  const Variable *Dest = this->getDest();
+  Type Ty = Dest->getType();
+  static const typename InstX86Base<Machine>::Traits::Assembler::
+      template ThreeOpImmEmitter<
+          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister>
+          Emitter = {&InstX86Base<Machine>::Traits::Assembler::pshufd,
+                     &InstX86Base<Machine>::Traits::Assembler::pshufd};
+  emitIASThreeOpImmOps<
+      Machine, typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+      typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm,
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm>(
+      Func, Ty, Dest, this->getSrc(0), this->getSrc(1), Emitter);
+}
+
+template <class Machine>
+void InstX86Shufps<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 3);
+  const Variable *Dest = this->getDest();
+  assert(Dest == this->getSrc(0));
+  Type Ty = Dest->getType();
+  static const typename InstX86Base<Machine>::Traits::Assembler::
+      template ThreeOpImmEmitter<
+          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister>
+          Emitter = {&InstX86Base<Machine>::Traits::Assembler::shufps,
+                     &InstX86Base<Machine>::Traits::Assembler::shufps};
+  emitIASThreeOpImmOps<
+      Machine, typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+      typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm,
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm>(
+      Func, Ty, Dest, this->getSrc(1), this->getSrc(2), Emitter);
+}
+
+template <class Machine> void InstX86Pop<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 0);
+  Str << "\tpop\t";
+  this->getDest()->emit(Func);
+}
+
+template <class Machine>
+void InstX86Pop<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 0);
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  if (this->getDest()->hasReg()) {
+    Asm->popl(InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+        this->getDest()->getRegNum()));
+  } else {
+    Asm->popl(
+        static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+            Func->getTarget())
+            ->stackVarToAsmOperand(this->getDest()));
+  }
+}
+
+template <class Machine> void InstX86Pop<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  this->dumpDest(Func);
+  Str << " = pop." << this->getDest()->getType() << " ";
+}
+
+template <class Machine>
+void InstX86AdjustStack<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  Str << "\tsubl\t$" << Amount << ", %esp";
+  Func->getTarget()->updateStackAdjustment(Amount);
+}
+
+template <class Machine>
+void InstX86AdjustStack<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Asm->sub(IceType_i32,
+           InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
+           Immediate(Amount));
+  Func->getTarget()->updateStackAdjustment(Amount);
+}
+
+template <class Machine>
+void InstX86AdjustStack<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "esp = sub.i32 esp, " << Amount;
+}
+
+template <class Machine>
+void InstX86Push<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 1);
+  // Push is currently only used for saving GPRs.
+  const auto Var = llvm::cast<Variable>(this->getSrc(0));
+  assert(Var->hasReg());
+  Str << "\tpush\t";
+  Var->emit(Func);
+}
+
+template <class Machine>
+void InstX86Push<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 1);
+  // Push is currently only used for saving GPRs.
+  const auto Var = llvm::cast<Variable>(this->getSrc(0));
+  assert(Var->hasReg());
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Asm->pushl(InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+      Var->getRegNum()));
+}
+
+template <class Machine>
+void InstX86Push<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "push." << this->getSrc(0)->getType() << " ";
+  this->dumpSources(Func);
+}
+
+template <class Machine>
+void InstX86Psll<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  assert(this->getDest()->getType() == IceType_v8i16 ||
+         this->getDest()->getType() == IceType_v8i1 ||
+         this->getDest()->getType() == IceType_v4i32 ||
+         this->getDest()->getType() == IceType_v4i1);
+  char buf[30];
+  snprintf(
+      buf, llvm::array_lengthof(buf), "psll%s",
+      InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+          .PackString);
+  this->emitTwoAddress(buf, this, Func);
+}
+
+template <class Machine>
+void InstX86Psra<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  assert(this->getDest()->getType() == IceType_v8i16 ||
+         this->getDest()->getType() == IceType_v8i1 ||
+         this->getDest()->getType() == IceType_v4i32 ||
+         this->getDest()->getType() == IceType_v4i1);
+  char buf[30];
+  snprintf(
+      buf, llvm::array_lengthof(buf), "psra%s",
+      InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+          .PackString);
+  this->emitTwoAddress(buf, this, Func);
+}
+
+template <class Machine>
+void InstX86Psrl<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  char buf[30];
+  snprintf(
+      buf, llvm::array_lengthof(buf), "psrl%s",
+      InstX86Base<Machine>::Traits::TypeAttributes[this->getDest()->getType()]
+          .PackString);
+  this->emitTwoAddress(buf, this, Func);
+}
+
+template <class Machine> void InstX86Ret<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  Str << "\tret";
+}
+
+template <class Machine>
+void InstX86Ret<Machine>::emitIAS(const Cfg *Func) const {
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Asm->ret();
+}
+
+template <class Machine> void InstX86Ret<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Type Ty =
+      (this->getSrcSize() == 0 ? IceType_void : this->getSrc(0)->getType());
+  Str << "ret." << Ty << " ";
+  this->dumpSources(Func);
+}
+
+template <class Machine>
+void InstX86Setcc<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  Str << "\tset"
+      << InstX86Base<Machine>::Traits::InstBrAttributes[Condition].DisplayString
+      << "\t";
+  this->Dest->emit(Func);
+}
+
+template <class Machine>
+void InstX86Setcc<Machine>::emitIAS(const Cfg *Func) const {
+  assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
+  assert(this->getDest()->getType() == IceType_i1);
+  assert(this->getSrcSize() == 0);
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  if (this->getDest()->hasReg())
+    Asm->setcc(Condition,
+               InstX86Base<Machine>::Traits::RegisterSet::getEncodedByteReg(
+                   this->getDest()->getRegNum()));
+  else
+    Asm->setcc(
+        Condition,
+        static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
+            Func->getTarget())
+            ->stackVarToAsmOperand(this->getDest()));
+  return;
+}
+
+template <class Machine>
+void InstX86Setcc<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "setcc."
+      << InstX86Base<Machine>::Traits::InstBrAttributes[Condition].DisplayString
+      << " ";
+  this->dumpDest(Func);
+}
+
+template <class Machine>
+void InstX86Xadd<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  if (this->Locked) {
+    Str << "\tlock";
+  }
+  Str << "\txadd" << this->getWidthString(this->getSrc(0)->getType()) << "\t";
+  this->getSrc(1)->emit(Func);
+  Str << ", ";
+  this->getSrc(0)->emit(Func);
+}
+
+template <class Machine>
+void InstX86Xadd<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 2);
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Type Ty = this->getSrc(0)->getType();
+  const auto Mem =
+      llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
+          this->getSrc(0));
+  assert(Mem->getSegmentRegister() ==
+         InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+  const typename InstX86Base<Machine>::Traits::Address Addr =
+      Mem->toAsmAddress(Asm);
+  const auto VarReg = llvm::cast<Variable>(this->getSrc(1));
+  assert(VarReg->hasReg());
+  const typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister Reg =
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+          VarReg->getRegNum());
+  Asm->xadd(Ty, Addr, Reg, this->Locked);
+}
+
+template <class Machine>
+void InstX86Xadd<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  if (this->Locked) {
+    Str << "lock ";
+  }
+  Type Ty = this->getSrc(0)->getType();
+  Str << "xadd." << Ty << " ";
+  this->dumpSources(Func);
+}
+
+template <class Machine>
+void InstX86Xchg<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  Str << "\txchg" << this->getWidthString(this->getSrc(0)->getType()) << "\t";
+  this->getSrc(1)->emit(Func);
+  Str << ", ";
+  this->getSrc(0)->emit(Func);
+}
+
+template <class Machine>
+void InstX86Xchg<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 2);
+  typename InstX86Base<Machine>::Traits::Assembler *Asm =
+      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Type Ty = this->getSrc(0)->getType();
+  const auto Mem =
+      llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
+          this->getSrc(0));
+  assert(Mem->getSegmentRegister() ==
+         InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+  const typename InstX86Base<Machine>::Traits::Address Addr =
+      Mem->toAsmAddress(Asm);
+  const auto VarReg = llvm::cast<Variable>(this->getSrc(1));
+  assert(VarReg->hasReg());
+  const typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister Reg =
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+          VarReg->getRegNum());
+  Asm->xchg(Ty, Addr, Reg);
+}
+
+template <class Machine>
+void InstX86Xchg<Machine>::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Type Ty = this->getSrc(0)->getType();
+  Str << "xchg." << Ty << " ";
+  this->dumpSources(Func);
+}
+
+} // end of namespace X86Internal
+
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ICEINSTX86BASEIMPL_H
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index c6a72d6..0d9572a 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -77,6 +77,7 @@
     llvm::array_lengthof(TableTypeX8632Attributes);
 
 const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16;
+const char *MachineTraits<TargetX8632>::TargetName = "X8632";
 
 } // end of namespace X86Internal
 
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h
index 4cd22fa..ae9abe1 100644
--- a/src/IceTargetLoweringX8632Traits.h
+++ b/src/IceTargetLoweringX8632Traits.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the X8632 Target Lowering Traits.
-//
+///
+/// \file
+/// This file declares the X8632 Target Lowering Traits.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX8632TRAITS_H
@@ -19,15 +20,22 @@
 #include "IceDefs.h"
 #include "IceInst.h"
 #include "IceInstX8632.def"
+#include "IceOperand.h"
 #include "IceRegistersX8632.h"
 #include "IceTargetLoweringX8632.def"
+#include "IceTargetLowering.h"
 
 namespace Ice {
 
 class TargetX8632;
 
+namespace X8632 {
+class AssemblerX8632;
+} // end of namespace X8632
+
 namespace X86Internal {
 
+template <class Machine> struct Insts;
 template <class Machine> struct MachineTraits;
 
 template <> struct MachineTraits<TargetX8632> {
@@ -56,7 +64,7 @@
   class Operand {
   public:
     Operand(const Operand &other)
-        : length_(other.length_), fixup_(other.fixup_) {
+        : fixup_(other.fixup_), length_(other.length_) {
       memmove(&encoding_[0], &other.encoding_[0], other.length_);
     }
 
@@ -98,7 +106,7 @@
     AssemblerFixup *fixup() const { return fixup_; }
 
   protected:
-    Operand() : length_(0), fixup_(nullptr) {} // Needed by subclass Address.
+    Operand() : fixup_(nullptr), length_(0) {} // Needed by subclass Address.
 
     void SetModRM(int mod, GPRRegister rm) {
       assert((mod & ~3) == 0);
@@ -128,20 +136,20 @@
     void SetFixup(AssemblerFixup *fixup) { fixup_ = fixup; }
 
   private:
-    uint8_t length_;
-    uint8_t encoding_[6];
     AssemblerFixup *fixup_;
+    uint8_t encoding_[6];
+    uint8_t length_;
 
     explicit Operand(GPRRegister reg) : fixup_(nullptr) { SetModRM(3, reg); }
 
-    // Get the operand encoding byte at the given index.
+    /// Get the operand encoding byte at the given index.
     uint8_t encoding_at(intptr_t index) const {
       assert(index >= 0 && index < length_);
       return encoding_[index];
     }
 
-    // Returns whether or not this operand is really the given register in
-    // disguise. Used from the assembler to generate better encodings.
+    /// Returns whether or not this operand is really the given register in
+    /// disguise. Used from the assembler to generate better encodings.
     bool IsRegister(GPRRegister reg) const {
       return ((encoding_[0] & 0xF8) ==
               0xC0) // Addressing mode is register only.
@@ -205,8 +213,8 @@
       }
     }
 
-    // AbsoluteTag is a special tag used by clients to create an absolute
-    // Address.
+    /// AbsoluteTag is a special tag used by clients to create an absolute
+    /// Address.
     enum AbsoluteTag { ABSOLUTE };
 
     Address(AbsoluteTag, const uintptr_t Addr) {
@@ -255,27 +263,182 @@
     End
   };
 
-  // The maximum number of arguments to pass in XMM registers
+  static const char *TargetName;
+
+  static IceString getRegName(SizeT RegNum, Type Ty) {
+    assert(RegNum < RegisterSet::Reg_NUM);
+    static const char *RegNames8[] = {
+#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
+          frameptr, isI8, isInt, isFP)                                         \
+  name8,
+        REGX8632_TABLE
+#undef X
+    };
+
+    static const char *RegNames16[] = {
+#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
+          frameptr, isI8, isInt, isFP)                                         \
+  name16,
+        REGX8632_TABLE
+#undef X
+    };
+
+    static const char *RegNames[] = {
+#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
+          frameptr, isI8, isInt, isFP)                                         \
+  name,
+        REGX8632_TABLE
+#undef X
+    };
+
+    switch (Ty) {
+    case IceType_i1:
+    case IceType_i8:
+      return RegNames8[RegNum];
+    case IceType_i16:
+      return RegNames16[RegNum];
+    default:
+      return RegNames[RegNum];
+    }
+  }
+
+  static void initRegisterSet(llvm::SmallBitVector *IntegerRegisters,
+                              llvm::SmallBitVector *IntegerRegistersI8,
+                              llvm::SmallBitVector *FloatRegisters,
+                              llvm::SmallBitVector *VectorRegisters,
+                              llvm::SmallBitVector *ScratchRegs) {
+#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
+          frameptr, isI8, isInt, isFP)                                         \
+  (*IntegerRegisters)[RegisterSet::val] = isInt;                               \
+  (*IntegerRegistersI8)[RegisterSet::val] = isI8;                              \
+  (*FloatRegisters)[RegisterSet::val] = isFP;                                  \
+  (*VectorRegisters)[RegisterSet::val] = isFP;                                 \
+  (*ScratchRegs)[RegisterSet::val] = scratch;
+    REGX8632_TABLE;
+#undef X
+  }
+
+  static llvm::SmallBitVector
+  getRegisterSet(TargetLowering::RegSetMask Include,
+                 TargetLowering::RegSetMask Exclude) {
+    llvm::SmallBitVector Registers(RegisterSet::Reg_NUM);
+
+#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
+          frameptr, isI8, isInt, isFP)                                         \
+  if (scratch && (Include & ::Ice::TargetLowering::RegSet_CallerSave))         \
+    Registers[RegisterSet::val] = true;                                        \
+  if (preserved && (Include & ::Ice::TargetLowering::RegSet_CalleeSave))       \
+    Registers[RegisterSet::val] = true;                                        \
+  if (stackptr && (Include & ::Ice::TargetLowering::RegSet_StackPointer))      \
+    Registers[RegisterSet::val] = true;                                        \
+  if (frameptr && (Include & ::Ice::TargetLowering::RegSet_FramePointer))      \
+    Registers[RegisterSet::val] = true;                                        \
+  if (scratch && (Exclude & ::Ice::TargetLowering::RegSet_CallerSave))         \
+    Registers[RegisterSet::val] = false;                                       \
+  if (preserved && (Exclude & ::Ice::TargetLowering::RegSet_CalleeSave))       \
+    Registers[RegisterSet::val] = false;                                       \
+  if (stackptr && (Exclude & ::Ice::TargetLowering::RegSet_StackPointer))      \
+    Registers[RegisterSet::val] = false;                                       \
+  if (frameptr && (Exclude & ::Ice::TargetLowering::RegSet_FramePointer))      \
+    Registers[RegisterSet::val] = false;
+
+    REGX8632_TABLE
+
+#undef X
+
+    return Registers;
+  }
+
+  static void
+  makeRandomRegisterPermutation(GlobalContext *Ctx, Cfg *Func,
+                                llvm::SmallVectorImpl<int32_t> &Permutation,
+                                const llvm::SmallBitVector &ExcludeRegisters) {
+    // TODO(stichnot): Declaring Permutation this way loses type/size
+    // information.  Fix this in conjunction with the caller-side TODO.
+    assert(Permutation.size() >= RegisterSet::Reg_NUM);
+    // Expected upper bound on the number of registers in a single equivalence
+    // class.  For x86-32, this would comprise the 8 XMM registers.  This is for
+    // performance, not correctness.
+    static const unsigned MaxEquivalenceClassSize = 8;
+    typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
+    typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;
+    EquivalenceClassMap EquivalenceClasses;
+    SizeT NumShuffled = 0, NumPreserved = 0;
+
+// Build up the equivalence classes of registers by looking at the register
+// properties as well as whether the registers should be explicitly excluded
+// from shuffling.
+#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
+          frameptr, isI8, isInt, isFP)                                         \
+  if (ExcludeRegisters[RegisterSet::val]) {                                    \
+    /* val stays the same in the resulting permutation. */                     \
+    Permutation[RegisterSet::val] = RegisterSet::val;                          \
+    ++NumPreserved;                                                            \
+  } else {                                                                     \
+    const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) |   \
+                           (isInt << 3) | (isFP << 4);                         \
+    /* val is assigned to an equivalence class based on its properties. */     \
+    EquivalenceClasses[Index].push_back(RegisterSet::val);                     \
+  }
+    REGX8632_TABLE
+#undef X
+
+    RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
+
+    // Shuffle the resulting equivalence classes.
+    for (auto I : EquivalenceClasses) {
+      const RegisterList &List = I.second;
+      RegisterList Shuffled(List);
+      RandomShuffle(Shuffled.begin(), Shuffled.end(), RNG);
+      for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) {
+        Permutation[List[SI]] = Shuffled[SI];
+        ++NumShuffled;
+      }
+    }
+
+    assert(NumShuffled + NumPreserved == RegisterSet::Reg_NUM);
+
+    if (Func->isVerbose(IceV_Random)) {
+      OstreamLocker L(Func->getContext());
+      Ostream &Str = Func->getContext()->getStrDump();
+      Str << "Register equivalence classes:\n";
+      for (auto I : EquivalenceClasses) {
+        Str << "{";
+        const RegisterList &List = I.second;
+        bool First = true;
+        for (int32_t Register : List) {
+          if (!First)
+            Str << " ";
+          First = false;
+          Str << getRegName(Register, IceType_i32);
+        }
+        Str << "}\n";
+      }
+    }
+  }
+
+  /// The maximum number of arguments to pass in XMM registers
   static const uint32_t X86_MAX_XMM_ARGS = 4;
-  // The number of bits in a byte
+  /// The number of bits in a byte
   static const uint32_t X86_CHAR_BIT = 8;
-  // Stack alignment. This is defined in IceTargetLoweringX8632.cpp because it
-  // is used as an argument to std::max(), and the default std::less<T> has an
-  // operator(T const&, T const&) which requires this member to have an address.
+  /// Stack alignment. This is defined in IceTargetLoweringX8632.cpp because it
+  /// is used as an argument to std::max(), and the default std::less<T> has an
+  /// operator(T const&, T const&) which requires this member to have an
+  /// address.
   static const uint32_t X86_STACK_ALIGNMENT_BYTES;
-  // Size of the return address on the stack
+  /// Size of the return address on the stack
   static const uint32_t X86_RET_IP_SIZE_BYTES = 4;
-  // The number of different NOP instructions
+  /// The number of different NOP instructions
   static const uint32_t X86_NUM_NOP_VARIANTS = 5;
 
-  // Value is in bytes. Return Value adjusted to the next highest multiple
-  // of the stack alignment.
+  /// Value is in bytes. Return Value adjusted to the next highest multiple
+  /// of the stack alignment.
   static uint32_t applyStackAlignment(uint32_t Value) {
     return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
   }
 
-  // Return the type which the elements of the vector have in the X86
-  // representation of the vector.
+  /// Return the type which the elements of the vector have in the X86
+  /// representation of the vector.
   static Type getInVectorElementType(Type Ty) {
     assert(isVectorType(Ty));
     size_t Index = static_cast<size_t>(Ty);
@@ -287,51 +450,54 @@
   // Note: The following data structures are defined in
   // IceTargetLoweringX8632.cpp.
 
-  // The following table summarizes the logic for lowering the fcmp
-  // instruction.  There is one table entry for each of the 16 conditions.
-  //
-  // The first four columns describe the case when the operands are
-  // floating point scalar values.  A comment in lowerFcmp() describes the
-  // lowering template.  In the most general case, there is a compare
-  // followed by two conditional branches, because some fcmp conditions
-  // don't map to a single x86 conditional branch.  However, in many cases
-  // it is possible to swap the operands in the comparison and have a
-  // single conditional branch.  Since it's quite tedious to validate the
-  // table by hand, good execution tests are helpful.
-  //
-  // The last two columns describe the case when the operands are vectors
-  // of floating point values.  For most fcmp conditions, there is a clear
-  // mapping to a single x86 cmpps instruction variant.  Some fcmp
-  // conditions require special code to handle and these are marked in the
-  // table with a Cmpps_Invalid predicate.
+  /// The following table summarizes the logic for lowering the fcmp
+  /// instruction. There is one table entry for each of the 16 conditions.
+  ///
+  /// The first four columns describe the case when the operands are floating
+  /// point scalar values.  A comment in lowerFcmp() describes the lowering
+  /// template.  In the most general case, there is a compare followed by two
+  /// conditional branches, because some fcmp conditions don't map to a single
+  /// x86 conditional branch.  However, in many cases it is possible to swap the
+  /// operands in the comparison and have a single conditional branch.  Since
+  /// it's quite tedious to validate the table by hand, good execution tests are
+  /// helpful.
+  ///
+  /// The last two columns describe the case when the operands are vectors of
+  /// floating point values.  For most fcmp conditions, there is a clear mapping
+  /// to a single x86 cmpps instruction variant.  Some fcmp conditions require
+  /// special code to handle and these are marked in the table with a
+  /// Cmpps_Invalid predicate.
+  /// {@
   static const struct TableFcmpType {
     uint32_t Default;
     bool SwapScalarOperands;
-    CondX86::BrCond C1, C2;
+    Cond::BrCond C1, C2;
     bool SwapVectorOperands;
-    CondX86::CmppsCond Predicate;
+    Cond::CmppsCond Predicate;
   } TableFcmp[];
   static const size_t TableFcmpSize;
+  /// @}
 
-  // The following table summarizes the logic for lowering the icmp instruction
-  // for i32 and narrower types.  Each icmp condition has a clear mapping to an
-  // x86 conditional branch instruction.
-
-  static const struct TableIcmp32Type {
-    CondX86::BrCond Mapping;
-  } TableIcmp32[];
+  /// The following table summarizes the logic for lowering the icmp instruction
+  /// for i32 and narrower types.  Each icmp condition has a clear mapping to an
+  /// x86 conditional branch instruction.
+  /// {@
+  static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[];
   static const size_t TableIcmp32Size;
+  /// @}
 
-  // The following table summarizes the logic for lowering the icmp instruction
-  // for the i64 type.  For Eq and Ne, two separate 32-bit comparisons and
-  // conditional branches are needed.  For the other conditions, three separate
-  // conditional branches are needed.
+  /// The following table summarizes the logic for lowering the icmp instruction
+  /// for the i64 type.  For Eq and Ne, two separate 32-bit comparisons and
+  /// conditional branches are needed.  For the other conditions, three separate
+  /// conditional branches are needed.
+  /// {@
   static const struct TableIcmp64Type {
-    CondX86::BrCond C1, C2, C3;
+    Cond::BrCond C1, C2, C3;
   } TableIcmp64[];
   static const size_t TableIcmp64Size;
+  /// @}
 
-  static CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
+  static Cond::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
     size_t Index = static_cast<size_t>(Cond);
     assert(Index < TableIcmp32Size);
     return TableIcmp32[Index].Mapping;
@@ -341,6 +507,190 @@
     Type InVectorElementType;
   } TableTypeX8632Attributes[];
   static const size_t TableTypeX8632AttributesSize;
+
+  //----------------------------------------------------------------------------
+  //      __  __   __  ______  ______
+  //    /\ \/\ "-.\ \/\  ___\/\__  _\
+  //    \ \ \ \ \-.  \ \___  \/_/\ \/
+  //     \ \_\ \_\\"\_\/\_____\ \ \_\
+  //      \/_/\/_/ \/_/\/_____/  \/_/
+  //
+  //----------------------------------------------------------------------------
+  using Insts = ::Ice::X86Internal::Insts<TargetX8632>;
+
+  using TargetLowering = TargetX8632;
+  using Assembler = X8632::AssemblerX8632;
+
+  /// X86Operand extends the Operand hierarchy.  Its subclasses are
+  /// X86OperandMem and VariableSplit.
+  class X86Operand : public ::Ice::Operand {
+    X86Operand() = delete;
+    X86Operand(const X86Operand &) = delete;
+    X86Operand &operator=(const X86Operand &) = delete;
+
+  public:
+    enum OperandKindX8632 { k__Start = ::Ice::Operand::kTarget, kMem, kSplit };
+    using ::Ice::Operand::dump;
+
+    void dump(const Cfg *, Ostream &Str) const override;
+
+  protected:
+    X86Operand(OperandKindX8632 Kind, Type Ty)
+        : Operand(static_cast<::Ice::Operand::OperandKind>(Kind), Ty) {}
+  };
+
+  /// X86OperandMem represents the m32 addressing mode, with optional base and
+  /// index registers, a constant offset, and a fixed shift value for the index
+  /// register.
+  class X86OperandMem : public X86Operand {
+    X86OperandMem() = delete;
+    X86OperandMem(const X86OperandMem &) = delete;
+    X86OperandMem &operator=(const X86OperandMem &) = delete;
+
+  public:
+    enum SegmentRegisters {
+      DefaultSegment = -1,
+#define X(val, name, prefix) val,
+      SEG_REGX8632_TABLE
+#undef X
+          SegReg_NUM
+    };
+    static X86OperandMem *create(Cfg *Func, Type Ty, Variable *Base,
+                                 Constant *Offset, Variable *Index = nullptr,
+                                 uint16_t Shift = 0,
+                                 SegmentRegisters SegmentReg = DefaultSegment) {
+      return new (Func->allocate<X86OperandMem>())
+          X86OperandMem(Func, Ty, Base, Offset, Index, Shift, SegmentReg);
+    }
+    Variable *getBase() const { return Base; }
+    Constant *getOffset() const { return Offset; }
+    Variable *getIndex() const { return Index; }
+    uint16_t getShift() const { return Shift; }
+    SegmentRegisters getSegmentRegister() const { return SegmentReg; }
+    void emitSegmentOverride(Assembler *Asm) const;
+    Address toAsmAddress(Assembler *Asm) const;
+
+    void emit(const Cfg *Func) const override;
+    using X86Operand::dump;
+    void dump(const Cfg *Func, Ostream &Str) const override;
+
+    static bool classof(const Operand *Operand) {
+      return Operand->getKind() == static_cast<OperandKind>(kMem);
+    }
+
+    void setRandomized(bool R) { Randomized = R; }
+
+    bool getRandomized() const { return Randomized; }
+
+  private:
+    X86OperandMem(Cfg *Func, Type Ty, Variable *Base, Constant *Offset,
+                  Variable *Index, uint16_t Shift, SegmentRegisters SegmentReg);
+
+    Variable *Base;
+    Constant *Offset;
+    Variable *Index;
+    uint16_t Shift;
+    SegmentRegisters SegmentReg : 16;
+    /// A flag to show if this memory operand is a randomized one. Randomized
+    /// memory operands are generated in
+    /// TargetX86Base::randomizeOrPoolImmediate()
+    bool Randomized;
+  };
+
+  /// VariableSplit is a way to treat an f64 memory location as a pair of i32
+  /// locations (Low and High).  This is needed for some cases of the Bitcast
+  /// instruction.  Since it's not possible for integer registers to access the
+  /// XMM registers and vice versa, the lowering forces the f64 to be spilled to
+  /// the stack and then accesses through the VariableSplit.
+  // TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit
+  // targets can natively handle these.
+  class VariableSplit : public X86Operand {
+    VariableSplit() = delete;
+    VariableSplit(const VariableSplit &) = delete;
+    VariableSplit &operator=(const VariableSplit &) = delete;
+
+  public:
+    enum Portion { Low, High };
+    static VariableSplit *create(Cfg *Func, Variable *Var, Portion Part) {
+      return new (Func->allocate<VariableSplit>())
+          VariableSplit(Func, Var, Part);
+    }
+    int32_t getOffset() const { return Part == High ? 4 : 0; }
+
+    Address toAsmAddress(const Cfg *Func) const;
+    void emit(const Cfg *Func) const override;
+    using X86Operand::dump;
+    void dump(const Cfg *Func, Ostream &Str) const override;
+
+    static bool classof(const Operand *Operand) {
+      return Operand->getKind() == static_cast<OperandKind>(kSplit);
+    }
+
+  private:
+    VariableSplit(Cfg *Func, Variable *Var, Portion Part)
+        : X86Operand(kSplit, IceType_i32), Var(Var), Part(Part) {
+      assert(Var->getType() == IceType_f64);
+      Vars = Func->allocateArrayOf<Variable *>(1);
+      Vars[0] = Var;
+      NumVars = 1;
+    }
+
+    Variable *Var;
+    Portion Part;
+  };
+
+  /// SpillVariable decorates a Variable by linking it to another Variable.
+  /// When stack frame offsets are computed, the SpillVariable is given a
+  /// distinct stack slot only if its linked Variable has a register.  If the
+  /// linked Variable has a stack slot, then the Variable and SpillVariable
+  /// share that slot.
+  class SpillVariable : public Variable {
+    SpillVariable() = delete;
+    SpillVariable(const SpillVariable &) = delete;
+    SpillVariable &operator=(const SpillVariable &) = delete;
+
+  public:
+    static SpillVariable *create(Cfg *Func, Type Ty, SizeT Index) {
+      return new (Func->allocate<SpillVariable>()) SpillVariable(Ty, Index);
+    }
+    const static OperandKind SpillVariableKind =
+        static_cast<OperandKind>(kVariable_Target);
+    static bool classof(const Operand *Operand) {
+      return Operand->getKind() == SpillVariableKind;
+    }
+    void setLinkedTo(Variable *Var) { LinkedTo = Var; }
+    Variable *getLinkedTo() const { return LinkedTo; }
+    // Inherit dump() and emit() from Variable.
+
+  private:
+    SpillVariable(Type Ty, SizeT Index)
+        : Variable(SpillVariableKind, Ty, Index), LinkedTo(nullptr) {}
+    Variable *LinkedTo;
+  };
+
+  // Note: The following data structures are defined in IceInstX8632.cpp.
+
+  static const struct InstBrAttributesType {
+    Cond::BrCond Opposite;
+    const char *DisplayString;
+    const char *EmitString;
+  } InstBrAttributes[];
+
+  static const struct InstCmppsAttributesType {
+    const char *EmitString;
+  } InstCmppsAttributes[];
+
+  static const struct TypeAttributesType {
+    const char *CvtString;   // i (integer), s (single FP), d (double FP)
+    const char *SdSsString;  // ss, sd, or <blank>
+    const char *PackString;  // b, w, d, or <blank>
+    const char *WidthString; // b, w, l, q, or <blank>
+    const char *FldString;   // s, l, or <blank>
+  } TypeAttributes[];
+
+  static const char *InstSegmentRegNames[];
+
+  static uint8_t InstSegmentPrefixes[];
 };
 
 } // end of namespace X86Internal
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 37a1107..32d7d6b 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -183,7 +183,7 @@
   void lowerSwitch(const InstSwitch *Inst) override;
   void lowerUnreachable(const InstUnreachable *Inst) override;
   void lowerOther(const Inst *Instr) override;
-  void lowerRMW(const InstX8632FakeRMW *RMW);
+  void lowerRMW(const typename Traits::Insts::FakeRMW *RMW);
   void prelowerPhis() override;
   void lowerPhiAssignments(CfgNode *Node,
                            const AssignList &Assignments) override;
@@ -234,8 +234,8 @@
   /// Turn a pointer operand into a memory operand that can be
   /// used by a real load/store operation. Legalizes the operand as well.
   /// This is a nop if the operand is already a legal memory operand.
-  OperandX8632Mem *formMemoryOperand(Operand *Ptr, Type Ty,
-                                     bool DoLegalize = true);
+  typename Traits::X86OperandMem *formMemoryOperand(Operand *Ptr, Type Ty,
+                                                    bool DoLegalize = true);
 
   Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister);
   static Type stackSlotType();
@@ -253,99 +253,99 @@
                                  int32_t RegNum = Variable::NoRegister);
 
   /// Return a memory operand corresponding to a stack allocated Variable.
-  OperandX8632Mem *getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
-                                                uint32_t Offset = 0);
+  typename Traits::X86OperandMem *
+  getMemoryOperandForStackSlot(Type Ty, Variable *Slot, uint32_t Offset = 0);
 
   void makeRandomRegisterPermutation(
       llvm::SmallVectorImpl<int32_t> &Permutation,
       const llvm::SmallBitVector &ExcludeRegisters) const override;
 
-  // TODO(jpp): move the helper methods below to the MachineTraits.
   /// The following are helpers that insert lowered x86 instructions
   /// with minimal syntactic overhead, so that the lowering code can
   /// look as close to assembly as practical.
   void _adc(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Adc::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Adc::create(Func, Dest, Src0));
   }
-  void _adc_rmw(OperandX8632Mem *DestSrc0, Operand *Src1) {
-    Context.insert(InstX8632AdcRMW::create(Func, DestSrc0, Src1));
+  void _adc_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
+    Context.insert(Traits::Insts::AdcRMW::create(Func, DestSrc0, Src1));
   }
   void _add(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Add::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Add::create(Func, Dest, Src0));
   }
-  void _add_rmw(OperandX8632Mem *DestSrc0, Operand *Src1) {
-    Context.insert(InstX8632AddRMW::create(Func, DestSrc0, Src1));
+  void _add_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
+    Context.insert(Traits::Insts::AddRMW::create(Func, DestSrc0, Src1));
   }
   void _adjust_stack(int32_t Amount) {
-    Context.insert(InstX8632AdjustStack::create(
+    Context.insert(Traits::Insts::AdjustStack::create(
         Func, Amount, getPhysicalRegister(Traits::RegisterSet::Reg_esp)));
   }
   void _addps(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Addps::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Addps::create(Func, Dest, Src0));
   }
   void _addss(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Addss::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Addss::create(Func, Dest, Src0));
   }
   void _and(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632And::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::And::create(Func, Dest, Src0));
   }
-  void _and_rmw(OperandX8632Mem *DestSrc0, Operand *Src1) {
-    Context.insert(InstX8632AndRMW::create(Func, DestSrc0, Src1));
+  void _and_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
+    Context.insert(Traits::Insts::AndRMW::create(Func, DestSrc0, Src1));
   }
   void _blendvps(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(InstX8632Blendvps::create(Func, Dest, Src0, Src1));
+    Context.insert(Traits::Insts::Blendvps::create(Func, Dest, Src0, Src1));
   }
   void _br(typename Traits::Cond::BrCond Condition, CfgNode *TargetTrue,
            CfgNode *TargetFalse) {
     Context.insert(
-        InstX8632Br::create(Func, TargetTrue, TargetFalse, Condition));
+        Traits::Insts::Br::create(Func, TargetTrue, TargetFalse, Condition));
   }
   void _br(CfgNode *Target) {
-    Context.insert(InstX8632Br::create(Func, Target));
+    Context.insert(Traits::Insts::Br::create(Func, Target));
   }
   void _br(typename Traits::Cond::BrCond Condition, CfgNode *Target) {
-    Context.insert(InstX8632Br::create(Func, Target, Condition));
+    Context.insert(Traits::Insts::Br::create(Func, Target, Condition));
   }
-  void _br(typename Traits::Cond::BrCond Condition, InstX8632Label *Label) {
-    Context.insert(InstX8632Br::create(Func, Label, Condition));
+  void _br(typename Traits::Cond::BrCond Condition,
+           typename Traits::Insts::Label *Label) {
+    Context.insert(Traits::Insts::Br::create(Func, Label, Condition));
   }
   void _bsf(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Bsf::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Bsf::create(Func, Dest, Src0));
   }
   void _bsr(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Bsr::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Bsr::create(Func, Dest, Src0));
   }
   void _bswap(Variable *SrcDest) {
-    Context.insert(InstX8632Bswap::create(Func, SrcDest));
+    Context.insert(Traits::Insts::Bswap::create(Func, SrcDest));
   }
   void _cbwdq(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Cbwdq::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Cbwdq::create(Func, Dest, Src0));
   }
   void _cmov(Variable *Dest, Operand *Src0,
              typename Traits::Cond::BrCond Condition) {
-    Context.insert(InstX8632Cmov::create(Func, Dest, Src0, Condition));
+    Context.insert(Traits::Insts::Cmov::create(Func, Dest, Src0, Condition));
   }
   void _cmp(Operand *Src0, Operand *Src1) {
-    Context.insert(InstX8632Icmp::create(Func, Src0, Src1));
+    Context.insert(Traits::Insts::Icmp::create(Func, Src0, Src1));
   }
   void _cmpps(Variable *Dest, Operand *Src0,
               typename Traits::Cond::CmppsCond Condition) {
-    Context.insert(InstX8632Cmpps::create(Func, Dest, Src0, Condition));
+    Context.insert(Traits::Insts::Cmpps::create(Func, Dest, Src0, Condition));
   }
   void _cmpxchg(Operand *DestOrAddr, Variable *Eax, Variable *Desired,
                 bool Locked) {
     Context.insert(
-        InstX8632Cmpxchg::create(Func, DestOrAddr, Eax, Desired, Locked));
+        Traits::Insts::Cmpxchg::create(Func, DestOrAddr, Eax, Desired, Locked));
     // Mark eax as possibly modified by cmpxchg.
     Context.insert(
         InstFakeDef::create(Func, Eax, llvm::dyn_cast<Variable>(DestOrAddr)));
     _set_dest_nonkillable();
     Context.insert(InstFakeUse::create(Func, Eax));
   }
-  void _cmpxchg8b(OperandX8632Mem *Addr, Variable *Edx, Variable *Eax,
-                  Variable *Ecx, Variable *Ebx, bool Locked) {
-    Context.insert(
-        InstX8632Cmpxchg8b::create(Func, Addr, Edx, Eax, Ecx, Ebx, Locked));
+  void _cmpxchg8b(typename Traits::X86OperandMem *Addr, Variable *Edx,
+                  Variable *Eax, Variable *Ecx, Variable *Ebx, bool Locked) {
+    Context.insert(Traits::Insts::Cmpxchg8b::create(Func, Addr, Edx, Eax, Ecx,
+                                                    Ebx, Locked));
     // Mark edx, and eax as possibly modified by cmpxchg8b.
     Context.insert(InstFakeDef::create(Func, Edx));
     _set_dest_nonkillable();
@@ -354,38 +354,41 @@
     _set_dest_nonkillable();
     Context.insert(InstFakeUse::create(Func, Eax));
   }
-  void _cvt(Variable *Dest, Operand *Src0, InstX8632Cvt::CvtVariant Variant) {
-    Context.insert(InstX8632Cvt::create(Func, Dest, Src0, Variant));
+  void _cvt(Variable *Dest, Operand *Src0,
+            typename Traits::Insts::Cvt::CvtVariant Variant) {
+    Context.insert(Traits::Insts::Cvt::create(Func, Dest, Src0, Variant));
   }
   void _div(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(InstX8632Div::create(Func, Dest, Src0, Src1));
+    Context.insert(Traits::Insts::Div::create(Func, Dest, Src0, Src1));
   }
   void _divps(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Divps::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Divps::create(Func, Dest, Src0));
   }
   void _divss(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Divss::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Divss::create(Func, Dest, Src0));
   }
-  void _fld(Operand *Src0) { Context.insert(InstX8632Fld::create(Func, Src0)); }
+  void _fld(Operand *Src0) {
+    Context.insert(Traits::Insts::Fld::create(Func, Src0));
+  }
   void _fstp(Variable *Dest) {
-    Context.insert(InstX8632Fstp::create(Func, Dest));
+    Context.insert(Traits::Insts::Fstp::create(Func, Dest));
   }
   void _idiv(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(InstX8632Idiv::create(Func, Dest, Src0, Src1));
+    Context.insert(Traits::Insts::Idiv::create(Func, Dest, Src0, Src1));
   }
   void _imul(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Imul::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Imul::create(Func, Dest, Src0));
   }
   void _insertps(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(InstX8632Insertps::create(Func, Dest, Src0, Src1));
+    Context.insert(Traits::Insts::Insertps::create(Func, Dest, Src0, Src1));
   }
   void _jmp(Operand *Target) {
-    Context.insert(InstX8632Jmp::create(Func, Target));
+    Context.insert(Traits::Insts::Jmp::create(Func, Target));
   }
   void _lea(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Lea::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Lea::create(Func, Dest, Src0));
   }
-  void _mfence() { Context.insert(InstX8632Mfence::create(Func)); }
+  void _mfence() { Context.insert(Traits::Insts::Mfence::create(Func)); }
   /// If Dest=nullptr is passed in, then a new variable is created,
   /// marked as infinite register allocation weight, and returned
   /// through the in/out Dest argument.
@@ -393,175 +396,175 @@
             int32_t RegNum = Variable::NoRegister) {
     if (Dest == nullptr)
       Dest = makeReg(Src0->getType(), RegNum);
-    Context.insert(InstX8632Mov::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Mov::create(Func, Dest, Src0));
   }
   void _mov_nonkillable(Variable *Dest, Operand *Src0) {
-    Inst *NewInst = InstX8632Mov::create(Func, Dest, Src0);
+    Inst *NewInst = Traits::Insts::Mov::create(Func, Dest, Src0);
     NewInst->setDestNonKillable();
     Context.insert(NewInst);
   }
   void _movd(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Movd::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Movd::create(Func, Dest, Src0));
   }
   void _movp(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Movp::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Movp::create(Func, Dest, Src0));
   }
   void _movq(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Movq::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Movq::create(Func, Dest, Src0));
   }
   void _movss(Variable *Dest, Variable *Src0) {
-    Context.insert(InstX8632MovssRegs::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::MovssRegs::create(Func, Dest, Src0));
   }
   void _movsx(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Movsx::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Movsx::create(Func, Dest, Src0));
   }
   void _movzx(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Movzx::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Movzx::create(Func, Dest, Src0));
   }
   void _mul(Variable *Dest, Variable *Src0, Operand *Src1) {
-    Context.insert(InstX8632Mul::create(Func, Dest, Src0, Src1));
+    Context.insert(Traits::Insts::Mul::create(Func, Dest, Src0, Src1));
   }
   void _mulps(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Mulps::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Mulps::create(Func, Dest, Src0));
   }
   void _mulss(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Mulss::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Mulss::create(Func, Dest, Src0));
   }
   void _neg(Variable *SrcDest) {
-    Context.insert(InstX8632Neg::create(Func, SrcDest));
+    Context.insert(Traits::Insts::Neg::create(Func, SrcDest));
   }
   void _nop(SizeT Variant) {
-    Context.insert(InstX8632Nop::create(Func, Variant));
+    Context.insert(Traits::Insts::Nop::create(Func, Variant));
   }
   void _or(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Or::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Or::create(Func, Dest, Src0));
   }
-  void _or_rmw(OperandX8632Mem *DestSrc0, Operand *Src1) {
-    Context.insert(InstX8632OrRMW::create(Func, DestSrc0, Src1));
+  void _or_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
+    Context.insert(Traits::Insts::OrRMW::create(Func, DestSrc0, Src1));
   }
   void _padd(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Padd::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Padd::create(Func, Dest, Src0));
   }
   void _pand(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Pand::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Pand::create(Func, Dest, Src0));
   }
   void _pandn(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Pandn::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Pandn::create(Func, Dest, Src0));
   }
   void _pblendvb(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(InstX8632Pblendvb::create(Func, Dest, Src0, Src1));
+    Context.insert(Traits::Insts::Pblendvb::create(Func, Dest, Src0, Src1));
   }
   void _pcmpeq(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Pcmpeq::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Pcmpeq::create(Func, Dest, Src0));
   }
   void _pcmpgt(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Pcmpgt::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Pcmpgt::create(Func, Dest, Src0));
   }
   void _pextr(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(InstX8632Pextr::create(Func, Dest, Src0, Src1));
+    Context.insert(Traits::Insts::Pextr::create(Func, Dest, Src0, Src1));
   }
   void _pinsr(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(InstX8632Pinsr::create(Func, Dest, Src0, Src1));
+    Context.insert(Traits::Insts::Pinsr::create(Func, Dest, Src0, Src1));
   }
   void _pmull(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Pmull::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Pmull::create(Func, Dest, Src0));
   }
   void _pmuludq(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Pmuludq::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Pmuludq::create(Func, Dest, Src0));
   }
   void _pop(Variable *Dest) {
-    Context.insert(InstX8632Pop::create(Func, Dest));
+    Context.insert(Traits::Insts::Pop::create(Func, Dest));
   }
   void _por(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Por::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Por::create(Func, Dest, Src0));
   }
   void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(InstX8632Pshufd::create(Func, Dest, Src0, Src1));
+    Context.insert(Traits::Insts::Pshufd::create(Func, Dest, Src0, Src1));
   }
   void _psll(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Psll::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Psll::create(Func, Dest, Src0));
   }
   void _psra(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Psra::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Psra::create(Func, Dest, Src0));
   }
   void _psrl(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Psrl::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Psrl::create(Func, Dest, Src0));
   }
   void _psub(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Psub::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Psub::create(Func, Dest, Src0));
   }
   void _push(Variable *Src0) {
-    Context.insert(InstX8632Push::create(Func, Src0));
+    Context.insert(Traits::Insts::Push::create(Func, Src0));
   }
   void _pxor(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Pxor::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Pxor::create(Func, Dest, Src0));
   }
   void _ret(Variable *Src0 = nullptr) {
-    Context.insert(InstX8632Ret::create(Func, Src0));
+    Context.insert(Traits::Insts::Ret::create(Func, Src0));
   }
   void _rol(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Rol::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Rol::create(Func, Dest, Src0));
   }
   void _sar(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Sar::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Sar::create(Func, Dest, Src0));
   }
   void _sbb(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Sbb::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Sbb::create(Func, Dest, Src0));
   }
-  void _sbb_rmw(OperandX8632Mem *DestSrc0, Operand *Src1) {
-    Context.insert(InstX8632SbbRMW::create(Func, DestSrc0, Src1));
+  void _sbb_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
+    Context.insert(Traits::Insts::SbbRMW::create(Func, DestSrc0, Src1));
   }
   void _setcc(Variable *Dest, typename Traits::Cond::BrCond Condition) {
-    Context.insert(InstX8632Setcc::create(Func, Dest, Condition));
+    Context.insert(Traits::Insts::Setcc::create(Func, Dest, Condition));
   }
   void _shl(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Shl::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Shl::create(Func, Dest, Src0));
   }
   void _shld(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstX8632Shld::create(Func, Dest, Src0, Src1));
+    Context.insert(Traits::Insts::Shld::create(Func, Dest, Src0, Src1));
   }
   void _shr(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Shr::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Shr::create(Func, Dest, Src0));
   }
   void _shrd(Variable *Dest, Variable *Src0, Variable *Src1) {
-    Context.insert(InstX8632Shrd::create(Func, Dest, Src0, Src1));
+    Context.insert(Traits::Insts::Shrd::create(Func, Dest, Src0, Src1));
   }
   void _shufps(Variable *Dest, Operand *Src0, Operand *Src1) {
-    Context.insert(InstX8632Shufps::create(Func, Dest, Src0, Src1));
+    Context.insert(Traits::Insts::Shufps::create(Func, Dest, Src0, Src1));
   }
   void _sqrtss(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Sqrtss::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Sqrtss::create(Func, Dest, Src0));
   }
-  void _store(Operand *Value, OperandX8632 *Mem) {
-    Context.insert(InstX8632Store::create(Func, Value, Mem));
+  void _store(Operand *Value, typename Traits::X86Operand *Mem) {
+    Context.insert(Traits::Insts::Store::create(Func, Value, Mem));
   }
-  void _storep(Variable *Value, OperandX8632Mem *Mem) {
-    Context.insert(InstX8632StoreP::create(Func, Value, Mem));
+  void _storep(Variable *Value, typename Traits::X86OperandMem *Mem) {
+    Context.insert(Traits::Insts::StoreP::create(Func, Value, Mem));
   }
-  void _storeq(Variable *Value, OperandX8632Mem *Mem) {
-    Context.insert(InstX8632StoreQ::create(Func, Value, Mem));
+  void _storeq(Variable *Value, typename Traits::X86OperandMem *Mem) {
+    Context.insert(Traits::Insts::StoreQ::create(Func, Value, Mem));
   }
   void _sub(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Sub::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Sub::create(Func, Dest, Src0));
   }
-  void _sub_rmw(OperandX8632Mem *DestSrc0, Operand *Src1) {
-    Context.insert(InstX8632SubRMW::create(Func, DestSrc0, Src1));
+  void _sub_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
+    Context.insert(Traits::Insts::SubRMW::create(Func, DestSrc0, Src1));
   }
   void _subps(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Subps::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Subps::create(Func, Dest, Src0));
   }
   void _subss(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Subss::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Subss::create(Func, Dest, Src0));
   }
   void _test(Operand *Src0, Operand *Src1) {
-    Context.insert(InstX8632Test::create(Func, Src0, Src1));
+    Context.insert(Traits::Insts::Test::create(Func, Src0, Src1));
   }
   void _ucomiss(Operand *Src0, Operand *Src1) {
-    Context.insert(InstX8632Ucomiss::create(Func, Src0, Src1));
+    Context.insert(Traits::Insts::Ucomiss::create(Func, Src0, Src1));
   }
-  void _ud2() { Context.insert(InstX8632UD2::create(Func)); }
+  void _ud2() { Context.insert(Traits::Insts::UD2::create(Func)); }
   void _xadd(Operand *Dest, Variable *Src, bool Locked) {
-    Context.insert(InstX8632Xadd::create(Func, Dest, Src, Locked));
+    Context.insert(Traits::Insts::Xadd::create(Func, Dest, Src, Locked));
     // The xadd exchanges Dest and Src (modifying Src).
     // Model that update with a FakeDef followed by a FakeUse.
     Context.insert(
@@ -570,7 +573,7 @@
     Context.insert(InstFakeUse::create(Func, Src));
   }
   void _xchg(Operand *Dest, Variable *Src) {
-    Context.insert(InstX8632Xchg::create(Func, Dest, Src));
+    Context.insert(Traits::Insts::Xchg::create(Func, Dest, Src));
     // The xchg modifies Dest and Src -- model that update with a
     // FakeDef/FakeUse.
     Context.insert(
@@ -579,10 +582,10 @@
     Context.insert(InstFakeUse::create(Func, Src));
   }
   void _xor(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Xor::create(Func, Dest, Src0));
+    Context.insert(Traits::Insts::Xor::create(Func, Dest, Src0));
   }
-  void _xor_rmw(OperandX8632Mem *DestSrc0, Operand *Src1) {
-    Context.insert(InstX8632XorRMW::create(Func, DestSrc0, Src1));
+  void _xor_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
+    Context.insert(Traits::Insts::XorRMW::create(Func, DestSrc0, Src1));
   }
   void _set_dest_nonkillable() {
     Context.getLastInserted()->setDestNonKillable();
@@ -600,13 +603,12 @@
   llvm::SmallBitVector ScratchRegs;
   llvm::SmallBitVector RegsUsed;
   VarList PhysicalRegisters[IceType_NUM];
-  static IceString RegNames[];
 
   /// Randomize a given immediate operand
   Operand *randomizeOrPoolImmediate(Constant *Immediate,
                                     int32_t RegNum = Variable::NoRegister);
-  OperandX8632Mem *
-  randomizeOrPoolImmediate(OperandX8632Mem *MemOperand,
+  typename Traits::X86OperandMem *
+  randomizeOrPoolImmediate(typename Traits::X86OperandMem *MemOperand,
                            int32_t RegNum = Variable::NoRegister);
   bool RandomizationPoolingPaused = false;
 
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 6a0f3b3..68cbf94 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -23,20 +23,16 @@
 #include "IceDefs.h"
 #include "IceELFObjectWriter.h"
 #include "IceGlobalInits.h"
-#include "IceInstX8632.h"
 #include "IceLiveness.h"
 #include "IceOperand.h"
-#include "IceRegistersX8632.h"
-#include "IceTargetLoweringX8632.def"
-#include "IceTargetLoweringX8632.h"
 #include "IceUtils.h"
 #include "llvm/Support/MathExtras.h"
 
 namespace Ice {
 namespace X86Internal {
 
-/// A helper class to ease the settings of RandomizationPoolingPause
-/// to disable constant blinding or pooling for some translation phases.
+/// A helper class to ease the settings of RandomizationPoolingPause to disable
+/// constant blinding or pooling for some translation phases.
 class BoolFlagSaver {
   BoolFlagSaver() = delete;
   BoolFlagSaver(const BoolFlagSaver &) = delete;
@@ -85,8 +81,7 @@
   };
 
   /// Currently the actual enum values are not used (other than CK_None), but we
-  /// go
-  /// ahead and produce them anyway for symmetry with the
+  /// go ahead and produce them anyway for symmetry with the
   /// BoolFoldingProducerKind.
   enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
 
@@ -163,12 +158,11 @@
   return CK_None;
 }
 
-/// Returns true if the producing instruction has a "complex" lowering
-/// sequence.  This generally means that its lowering sequence requires
-/// more than one conditional branch, namely 64-bit integer compares
-/// and some floating-point compares.  When this is true, and there is
-/// more than one consumer, we prefer to disable the folding
-/// optimization because it minimizes branches.
+/// Returns true if the producing instruction has a "complex" lowering sequence.
+/// This generally means that its lowering sequence requires more than one
+/// conditional branch, namely 64-bit integer compares and some floating-point
+/// compares.  When this is true, and there is more than one consumer, we prefer
+/// to disable the folding optimization because it minimizes branches.
 template <class MachineTraits>
 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
   switch (getProducerKind(Instr)) {
@@ -226,10 +220,10 @@
       setInvalid(I.first);
       continue;
     }
-    // Mark as "dead" rather than outright deleting.  This is so that
-    // other peephole style optimizations during or before lowering
-    // have access to this instruction in undeleted form.  See for
-    // example tryOptimizedCmpxchgCmpBr().
+    // Mark as "dead" rather than outright deleting.  This is so that other
+    // peephole style optimizations during or before lowering have access to
+    // this instruction in undeleted form.  See for example
+    // tryOptimizedCmpxchgCmpBr().
     I.second.Instr->setDead();
   }
 }
@@ -283,24 +277,18 @@
          TargetInstructionSet::X86InstructionSet_Begin) +
         Traits::InstructionSet::Begin);
   }
-  // TODO: Don't initialize IntegerRegisters and friends every time.
-  // Instead, initialize in some sort of static initializer for the
-  // class.
+  // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
+  // initialize in some sort of static initializer for the class.
   llvm::SmallBitVector IntegerRegisters(Traits::RegisterSet::Reg_NUM);
   llvm::SmallBitVector IntegerRegistersI8(Traits::RegisterSet::Reg_NUM);
   llvm::SmallBitVector FloatRegisters(Traits::RegisterSet::Reg_NUM);
   llvm::SmallBitVector VectorRegisters(Traits::RegisterSet::Reg_NUM);
   llvm::SmallBitVector InvalidRegisters(Traits::RegisterSet::Reg_NUM);
   ScratchRegs.resize(Traits::RegisterSet::Reg_NUM);
-#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
-          frameptr, isI8, isInt, isFP)                                         \
-  IntegerRegisters[Traits::RegisterSet::val] = isInt;                          \
-  IntegerRegistersI8[Traits::RegisterSet::val] = isI8;                         \
-  FloatRegisters[Traits::RegisterSet::val] = isFP;                             \
-  VectorRegisters[Traits::RegisterSet::val] = isFP;                            \
-  ScratchRegs[Traits::RegisterSet::val] = scratch;
-  REGX8632_TABLE;
-#undef X
+
+  Traits::initRegisterSet(&IntegerRegisters, &IntegerRegistersI8,
+                          &FloatRegisters, &VectorRegisters, &ScratchRegs);
+
   TypeToRegisterSet[IceType_void] = InvalidRegisters;
   TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
   TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
@@ -348,19 +336,18 @@
   // Argument lowering
   Func->doArgLowering();
 
-  // Target lowering.  This requires liveness analysis for some parts
-  // of the lowering decisions, such as compare/branch fusing.  If
-  // non-lightweight liveness analysis is used, the instructions need
-  // to be renumbered first.  TODO: This renumbering should only be
-  // necessary if we're actually calculating live intervals, which we
-  // only do for register allocation.
+  // Target lowering.  This requires liveness analysis for some parts of the
+  // lowering decisions, such as compare/branch fusing.  If non-lightweight
+  // liveness analysis is used, the instructions need to be renumbered first
+  // TODO: This renumbering should only be necessary if we're actually
+  // calculating live intervals, which we only do for register allocation.
   Func->renumberInstructions();
   if (Func->hasError())
     return;
 
-  // TODO: It should be sufficient to use the fastest liveness
-  // calculation, i.e. livenessLightweight().  However, for some
-  // reason that slows down the rest of the translation.  Investigate.
+  // TODO: It should be sufficient to use the fastest liveness calculation, i.e.
+  // livenessLightweight().  However, for some reason that slows down the rest
+  // of the translation.  Investigate.
   Func->liveness(Liveness_Basic);
   if (Func->hasError())
     return;
@@ -376,19 +363,19 @@
     return;
   Func->dump("After x86 codegen");
 
-  // Register allocation.  This requires instruction renumbering and
-  // full liveness analysis.
+  // Register allocation.  This requires instruction renumbering and full
+  // liveness analysis.
   Func->renumberInstructions();
   if (Func->hasError())
     return;
   Func->liveness(Liveness_Intervals);
   if (Func->hasError())
     return;
-  // Validate the live range computations.  The expensive validation
-  // call is deliberately only made when assertions are enabled.
+  // Validate the live range computations.  The expensive validation call is
+  // deliberately only made when assertions are enabled.
   assert(Func->validateLiveness());
-  // The post-codegen dump is done here, after liveness analysis and
-  // associated cleanup, to make the dump cleaner and more useful.
+  // The post-codegen dump is done here, after liveness analysis and associated
+  // cleanup, to make the dump cleaner and more useful.
   Func->dump("After initial x8632 codegen");
   Func->getVMetadata()->init(VMK_All);
   regAlloc(RAK_Global);
@@ -397,9 +384,9 @@
   Func->dump("After linear scan regalloc");
 
   if (Ctx->getFlags().getPhiEdgeSplit()) {
-    // We need to pause constant blinding or pooling during advanced
-    // phi lowering, unless the lowering assignment has a physical
-    // register for the dest Variable.
+    // We need to pause constant blinding or pooling during advanced phi
+    // lowering, unless the lowering assignment has a physical register for the
+    // dest Variable.
     {
       BoolFlagSaver B(RandomizationPoolingPaused, true);
       Func->advancedPhiLowering();
@@ -416,11 +403,10 @@
   Func->contractEmptyNodes();
   Func->reorderNodes();
 
-  // Branch optimization.  This needs to be done just before code
-  // emission.  In particular, no transformations that insert or
-  // reorder CfgNodes should be done after branch optimization.  We go
-  // ahead and do it before nop insertion to reduce the amount of work
-  // needed for searching for opportunities.
+  // Branch optimization.  This needs to be done just before code emission.  In
+  // particular, no transformations that insert or reorder CfgNodes should be
+  // done after branch optimization.  We go ahead and do it before nop insertion
+  // to reduce the amount of work needed for searching for opportunities.
   Func->doBranchOpt();
   Func->dump("After branch optimization");
 
@@ -468,8 +454,7 @@
 
 bool canRMW(const InstArithmetic *Arith) {
   Type Ty = Arith->getDest()->getType();
-  // X86 vector instructions write to a register and have no RMW
-  // option.
+  // X86 vector instructions write to a register and have no RMW option.
   if (isVectorType(Ty))
     return false;
   bool isI64 = Ty == IceType_i64;
@@ -496,11 +481,14 @@
   }
 }
 
+template <class Machine>
 bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
   if (A == B)
     return true;
-  if (auto *MemA = llvm::dyn_cast<OperandX8632Mem>(A)) {
-    if (auto *MemB = llvm::dyn_cast<OperandX8632Mem>(B)) {
+  if (auto *MemA = llvm::dyn_cast<
+          typename TargetX86Base<Machine>::Traits::X86OperandMem>(A)) {
+    if (auto *MemB = llvm::dyn_cast<
+            typename TargetX86Base<Machine>::Traits::X86OperandMem>(B)) {
       return MemA->getBase() == MemB->getBase() &&
              MemA->getOffset() == MemB->getOffset() &&
              MemA->getIndex() == MemB->getIndex() &&
@@ -565,8 +553,8 @@
             // still trigger, resulting in two loads and one store, which is
             // worse than the original one load and one store.  However, this is
             // probably rare, and caching probably keeps it just as fast.
-            if (!isSameMemAddressOperand(Load->getSourceAddress(),
-                                         Store->getAddr()))
+            if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),
+                                                  Store->getAddr()))
               continue;
             Operand *ArithSrcFromLoad = Arith->getSrc(0);
             Operand *ArithSrcOther = Arith->getSrc(1);
@@ -593,7 +581,7 @@
             Store->setRmwBeacon(Beacon);
             InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
             Node->getInsts().insert(I3, BeaconDef);
-            InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(
+            auto *RMW = Traits::Insts::FakeRMW::create(
                 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
             Node->getInsts().insert(I3, RMW);
           }
@@ -721,22 +709,13 @@
 
 template <class Machine>
 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
-  if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
+  if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {
     return Br->optimizeBranch(NextNode);
   }
   return false;
 }
 
 template <class Machine>
-IceString TargetX86Base<Machine>::RegNames[] = {
-#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
-          frameptr, isI8, isInt, isFP)                                         \
-  name,
-    REGX8632_TABLE
-#undef X
-};
-
-template <class Machine>
 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
   if (Ty == IceType_void)
     Ty = IceType_i32;
@@ -760,30 +739,7 @@
 
 template <class Machine>
 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {
-  assert(RegNum < Traits::RegisterSet::Reg_NUM);
-  static IceString RegNames8[] = {
-#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
-          frameptr, isI8, isInt, isFP)                                         \
-  name8,
-      REGX8632_TABLE
-#undef X
-  };
-  static IceString RegNames16[] = {
-#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
-          frameptr, isI8, isInt, isFP)                                         \
-  name16,
-      REGX8632_TABLE
-#undef X
-  };
-  switch (Ty) {
-  case IceType_i1:
-  case IceType_i8:
-    return RegNames8[RegNum];
-  case IceType_i16:
-    return RegNames16[RegNum];
-  default:
-    return RegNames[RegNum];
-  }
+  return Traits::getRegName(RegNum, Ty);
 }
 
 template <class Machine>
@@ -884,17 +840,16 @@
   InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
   if (Arg->hasReg()) {
     assert(Ty != IceType_i64);
-    OperandX8632Mem *Mem = OperandX8632Mem::create(
+    typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
         Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
     if (isVectorType(Arg->getType())) {
       _movp(Arg, Mem);
     } else {
       _mov(Arg, Mem);
     }
-    // This argument-copying instruction uses an explicit
-    // OperandX8632Mem operand instead of a Variable, so its
-    // fill-from-stack operation has to be tracked separately for
-    // statistics.
+    // This argument-copying instruction uses an explicit Traits::X86OperandMem
+    // operand instead of a Variable, so its fill-from-stack operation has to be
+    // tracked separately for statistics.
     Ctx->statsUpdateFills();
   }
 }
@@ -965,7 +920,8 @@
   // that stack slot.
   std::function<bool(Variable *)> TargetVarHook =
       [&VariablesLinkedToSpillSlots](Variable *Var) {
-        if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
+        if (auto *SpillVar =
+                llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
           assert(Var->getWeight().isZero());
           if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
             VariablesLinkedToSpillSlots.push_back(Var);
@@ -1069,7 +1025,8 @@
   // Assign stack offsets to variables that have been linked to spilled
   // variables.
   for (Variable *Var : VariablesLinkedToSpillSlots) {
-    Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();
+    Variable *Linked =
+        (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
     Var->setStackOffset(Linked->getStackOffset());
   }
   this->HasComputedFrame = true;
@@ -1106,7 +1063,7 @@
   InstList &Insts = Node->getInsts();
   InstList::reverse_iterator RI, E;
   for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
-    if (llvm::isa<InstX8632Ret>(*RI))
+    if (llvm::isa<typename Traits::Insts::Ret>(*RI))
       break;
   }
   if (RI == E)
@@ -1216,8 +1173,8 @@
         Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
     return legalize(ConstInt);
   }
-  if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
-    OperandX8632Mem *MemOperand = OperandX8632Mem::create(
+  if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
+    auto *MemOperand = Traits::X86OperandMem::create(
         Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
         Mem->getShift(), Mem->getSegmentRegister());
     // Test if we should randomize or pool the offset, if so randomize it or
@@ -1245,7 +1202,7 @@
     // check if we need to blind/pool the constant
     return legalize(ConstInt);
   }
-  if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
+  if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
     Constant *Offset = Mem->getOffset();
     if (Offset == nullptr) {
       Offset = Ctx->getConstantInt32(4);
@@ -1259,7 +1216,7 @@
           Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
                               SymOffset->getSuppressMangling());
     }
-    OperandX8632Mem *MemOperand = OperandX8632Mem::create(
+    auto *MemOperand = Traits::X86OperandMem::create(
         Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
         Mem->getShift(), Mem->getSegmentRegister());
     // Test if the Offset is an eligible i32 constants for randomization and
@@ -1275,32 +1232,7 @@
 llvm::SmallBitVector
 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
                                        RegSetMask Exclude) const {
-  llvm::SmallBitVector Registers(Traits::RegisterSet::Reg_NUM);
-
-#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
-          frameptr, isI8, isInt, isFP)                                         \
-  if (scratch && (Include & RegSet_CallerSave))                                \
-    Registers[Traits::RegisterSet::val] = true;                                \
-  if (preserved && (Include & RegSet_CalleeSave))                              \
-    Registers[Traits::RegisterSet::val] = true;                                \
-  if (stackptr && (Include & RegSet_StackPointer))                             \
-    Registers[Traits::RegisterSet::val] = true;                                \
-  if (frameptr && (Include & RegSet_FramePointer))                             \
-    Registers[Traits::RegisterSet::val] = true;                                \
-  if (scratch && (Exclude & RegSet_CallerSave))                                \
-    Registers[Traits::RegisterSet::val] = false;                               \
-  if (preserved && (Exclude & RegSet_CalleeSave))                              \
-    Registers[Traits::RegisterSet::val] = false;                               \
-  if (stackptr && (Exclude & RegSet_StackPointer))                             \
-    Registers[Traits::RegisterSet::val] = false;                               \
-  if (frameptr && (Exclude & RegSet_FramePointer))                             \
-    Registers[Traits::RegisterSet::val] = false;
-
-  REGX8632_TABLE
-
-#undef X
-
-  return Registers;
+  return Traits::getRegisterSet(Include, Exclude);
 }
 
 template <class Machine>
@@ -1423,17 +1355,20 @@
   Constant *Zero = Ctx->getConstantZero(IceType_i32);
   for (uint32_t i = 0; i < Count9; ++i) {
     const uint16_t Shift = 3; // log2(9-1)
-    _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
+    _lea(T,
+         Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
     _set_dest_nonkillable();
   }
   for (uint32_t i = 0; i < Count5; ++i) {
     const uint16_t Shift = 2; // log2(5-1)
-    _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
+    _lea(T,
+         Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
     _set_dest_nonkillable();
   }
   for (uint32_t i = 0; i < Count3; ++i) {
     const uint16_t Shift = 1; // log2(3-1)
-    _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
+    _lea(T,
+         Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
     _set_dest_nonkillable();
   }
   if (Count2) {
@@ -1601,7 +1536,8 @@
       Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
       Constant *BitTest = Ctx->getConstantInt32(0x20);
       Constant *Zero = Ctx->getConstantZero(IceType_i32);
-      InstX8632Label *Label = InstX8632Label::create(Func, this);
+      typename Traits::Insts::Label *Label =
+          Traits::Insts::Label::create(Func, this);
       _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
       _mov(T_2, Src0Lo);
       _mov(T_3, Src0Hi);
@@ -1636,7 +1572,8 @@
       Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
       Constant *BitTest = Ctx->getConstantInt32(0x20);
       Constant *Zero = Ctx->getConstantZero(IceType_i32);
-      InstX8632Label *Label = InstX8632Label::create(Func, this);
+      typename Traits::Insts::Label *Label =
+          Traits::Insts::Label::create(Func, this);
       _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
       _mov(T_2, Src0Lo);
       _mov(T_3, Src0Hi);
@@ -1671,7 +1608,8 @@
       Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
       Constant *BitTest = Ctx->getConstantInt32(0x20);
       Constant *SignExtend = Ctx->getConstantInt32(0x1f);
-      InstX8632Label *Label = InstX8632Label::create(Func, this);
+      typename Traits::Insts::Label *Label =
+          Traits::Insts::Label::create(Func, this);
       _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
       _mov(T_2, Src0Lo);
       _mov(T_3, Src0Hi);
@@ -1709,7 +1647,7 @@
   if (isVectorType(Dest->getType())) {
     // TODO: Trap on integer divide and integer modulo by zero.
     // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
-    if (llvm::isa<OperandX8632Mem>(Src1))
+    if (llvm::isa<typename Traits::X86OperandMem>(Src1))
       Src1 = legalizeToVar(Src1);
     switch (Inst->getOp()) {
     case InstArithmetic::_num:
@@ -2208,7 +2146,8 @@
       Variable *esp =
           Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
       Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
-      StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
+      StackArgLocations.push_back(
+          Traits::X86OperandMem::create(Func, Ty, esp, Loc));
       ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
     }
   }
@@ -2305,7 +2244,7 @@
       CallTarget = CallTargetVar;
     }
   }
-  Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
+  Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
   Context.insert(NewCall);
   if (NeedSandboxing)
     _bundle_unlock();
@@ -2532,7 +2471,7 @@
     Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
     // t1 = cvt Src0RM; Dest = t1
     Variable *T = makeReg(Dest->getType());
-    _cvt(T, Src0RM, InstX8632Cvt::Float2float);
+    _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
     _mov(Dest, T);
     break;
   }
@@ -2541,10 +2480,10 @@
       assert(Dest->getType() == IceType_v4i32 &&
              Inst->getSrc(0)->getType() == IceType_v4f32);
       Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
-      if (llvm::isa<OperandX8632Mem>(Src0RM))
+      if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
         Src0RM = legalizeToVar(Src0RM);
       Variable *T = makeReg(Dest->getType());
-      _cvt(T, Src0RM, InstX8632Cvt::Tps2dq);
+      _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
       _movp(Dest, T);
     } else if (Dest->getType() == IceType_i64) {
       // Use a helper for converting floating-point values to 64-bit
@@ -2567,7 +2506,7 @@
       // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
       Variable *T_1 = makeReg(IceType_i32);
       Variable *T_2 = makeReg(Dest->getType());
-      _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
+      _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
       _mov(T_2, T_1); // T_1 and T_2 may have different integer types
       if (Dest->getType() == IceType_i1)
         _and(T_2, Ctx->getConstantInt1(1));
@@ -2606,7 +2545,7 @@
       // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
       Variable *T_1 = makeReg(IceType_i32);
       Variable *T_2 = makeReg(Dest->getType());
-      _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
+      _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
       _mov(T_2, T_1); // T_1 and T_2 may have different integer types
       if (Dest->getType() == IceType_i1)
         _and(T_2, Ctx->getConstantInt1(1));
@@ -2618,10 +2557,10 @@
       assert(Dest->getType() == IceType_v4f32 &&
              Inst->getSrc(0)->getType() == IceType_v4i32);
       Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
-      if (llvm::isa<OperandX8632Mem>(Src0RM))
+      if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
         Src0RM = legalizeToVar(Src0RM);
       Variable *T = makeReg(Dest->getType());
-      _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);
+      _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
       _movp(Dest, T);
     } else if (Inst->getSrc(0)->getType() == IceType_i64) {
       // Use a helper for x86-32.
@@ -2645,7 +2584,7 @@
         _mov(T_1, Src0RM);
       else
         _movsx(T_1, Src0RM);
-      _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
+      _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
       _mov(Dest, T_2);
     }
     break;
@@ -2686,7 +2625,7 @@
         _mov(T_1, Src0RM);
       else
         _movzx(T_1, Src0RM);
-      _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
+      _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
       _mov(Dest, T_2);
     }
     break;
@@ -2728,8 +2667,8 @@
       Variable *T = nullptr;
       // TODO: Should be able to force a spill setup by calling legalize() with
       // Legal_Mem and not Legal_Reg or Legal_Imm.
-      SpillVariable *SpillVar =
-          Func->template makeVariable<SpillVariable>(SrcType);
+      typename Traits::SpillVariable *SpillVar =
+          Func->template makeVariable<typename Traits::SpillVariable>(SrcType);
       SpillVar->setLinkedTo(Dest);
       Variable *Spill = SpillVar;
       Spill->setWeight(RegWeight::Zero);
@@ -2748,14 +2687,17 @@
       //   a_hi.i32 = t_hi.i32
       Operand *SpillLo, *SpillHi;
       if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
-        SpillVariable *SpillVar =
-            Func->template makeVariable<SpillVariable>(IceType_f64);
+        typename Traits::SpillVariable *SpillVar =
+            Func->template makeVariable<typename Traits::SpillVariable>(
+                IceType_f64);
         SpillVar->setLinkedTo(Src0Var);
         Variable *Spill = SpillVar;
         Spill->setWeight(RegWeight::Zero);
         _movq(Spill, Src0RM);
-        SpillLo = VariableSplit::create(Func, Spill, VariableSplit::Low);
-        SpillHi = VariableSplit::create(Func, Spill, VariableSplit::High);
+        SpillLo = Traits::VariableSplit::create(Func, Spill,
+                                                Traits::VariableSplit::Low);
+        SpillHi = Traits::VariableSplit::create(Func, Spill,
+                                                Traits::VariableSplit::High);
       } else {
         SpillLo = loOperand(Src0RM);
         SpillHi = hiOperand(Src0RM);
@@ -2774,7 +2716,7 @@
     case IceType_f64: {
       Src0 = legalize(Src0);
       assert(Src0->getType() == IceType_i64);
-      if (llvm::isa<OperandX8632Mem>(Src0)) {
+      if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
         Variable *T = Func->template makeVariable(Dest->getType());
         _movq(T, Src0);
         _movq(Dest, T);
@@ -2787,17 +2729,18 @@
       //   t_hi.i32 = b_hi.i32
       //   hi(s.f64) = t_hi.i32
       //   a.f64 = s.f64
-      SpillVariable *SpillVar =
-          Func->template makeVariable<SpillVariable>(IceType_f64);
+      typename Traits::SpillVariable *SpillVar =
+          Func->template makeVariable<typename Traits::SpillVariable>(
+              IceType_f64);
       SpillVar->setLinkedTo(Dest);
       Variable *Spill = SpillVar;
       Spill->setWeight(RegWeight::Zero);
 
       Variable *T_Lo = nullptr, *T_Hi = nullptr;
-      VariableSplit *SpillLo =
-          VariableSplit::create(Func, Spill, VariableSplit::Low);
-      VariableSplit *SpillHi =
-          VariableSplit::create(Func, Spill, VariableSplit::High);
+      typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
+          Func, Spill, Traits::VariableSplit::Low);
+      typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
+          Func, Spill, Traits::VariableSplit::High);
       _mov(T_Lo, loOperand(Src0));
       // Technically, the Spill is defined after the _store happens, but
       // SpillLo is considered a "use" of Spill so define Spill before it
@@ -2897,7 +2840,7 @@
 
     // Compute the location of the element in memory.
     unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
-    OperandX8632Mem *Loc =
+    typename Traits::X86OperandMem *Loc =
         getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
     _mov(ExtractedElementR, Loc);
   }
@@ -2943,7 +2886,7 @@
     } else {
       Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
       Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
-      if (llvm::isa<OperandX8632Mem>(Src1RM))
+      if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
         Src1RM = legalizeToVar(Src1RM);
 
       switch (Condition) {
@@ -3018,7 +2961,8 @@
   Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default);
   _mov(Dest, Default);
   if (HasC1) {
-    InstX8632Label *Label = InstX8632Label::create(Func, this);
+    typename Traits::Insts::Label *Label =
+        Traits::Insts::Label::create(Func, this);
     _br(Traits::TableFcmp[Index].C1, Label);
     if (HasC2) {
       _br(Traits::TableFcmp[Index].C2, Label);
@@ -3091,13 +3035,13 @@
       llvm_unreachable("unexpected condition");
       break;
     case InstIcmp::Eq: {
-      if (llvm::isa<OperandX8632Mem>(Src1RM))
+      if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
         Src1RM = legalizeToVar(Src1RM);
       _movp(T, Src0RM);
       _pcmpeq(T, Src1RM);
     } break;
     case InstIcmp::Ne: {
-      if (llvm::isa<OperandX8632Mem>(Src1RM))
+      if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
         Src1RM = legalizeToVar(Src1RM);
       _movp(T, Src0RM);
       _pcmpeq(T, Src1RM);
@@ -3106,7 +3050,7 @@
     } break;
     case InstIcmp::Ugt:
     case InstIcmp::Sgt: {
-      if (llvm::isa<OperandX8632Mem>(Src1RM))
+      if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
         Src1RM = legalizeToVar(Src1RM);
       _movp(T, Src0RM);
       _pcmpgt(T, Src1RM);
@@ -3114,7 +3058,7 @@
     case InstIcmp::Uge:
     case InstIcmp::Sge: {
       // !(Src1RM > Src0RM)
-      if (llvm::isa<OperandX8632Mem>(Src0RM))
+      if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
         Src0RM = legalizeToVar(Src0RM);
       _movp(T, Src1RM);
       _pcmpgt(T, Src0RM);
@@ -3123,7 +3067,7 @@
     } break;
     case InstIcmp::Ult:
     case InstIcmp::Slt: {
-      if (llvm::isa<OperandX8632Mem>(Src0RM))
+      if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
         Src0RM = legalizeToVar(Src0RM);
       _movp(T, Src1RM);
       _pcmpgt(T, Src0RM);
@@ -3131,7 +3075,7 @@
     case InstIcmp::Ule:
     case InstIcmp::Sle: {
       // !(Src0RM > Src1RM)
-      if (llvm::isa<OperandX8632Mem>(Src1RM))
+      if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
         Src1RM = legalizeToVar(Src1RM);
       _movp(T, Src0RM);
       _pcmpgt(T, Src1RM);
@@ -3156,8 +3100,10 @@
     Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
     Constant *Zero = Ctx->getConstantZero(IceType_i32);
     Constant *One = Ctx->getConstantInt32(1);
-    InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
-    InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
+    typename Traits::Insts::Label *LabelFalse =
+        Traits::Insts::Label::create(Func, this);
+    typename Traits::Insts::Label *LabelTrue =
+        Traits::Insts::Label::create(Func, this);
     _mov(Dest, One);
     _cmp(Src0HiRM, Src1HiRI);
     if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
@@ -3293,7 +3239,7 @@
 
     // Compute the location of the position to insert in memory.
     unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
-    OperandX8632Mem *Loc =
+    typename Traits::X86OperandMem *Loc =
         getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
     _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
 
@@ -3383,7 +3329,8 @@
       // can't happen anyway, since this is x86-32 and integer arithmetic only
       // happens on 32-bit quantities.
       Variable *T = makeReg(IceType_f64);
-      OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);
+      typename Traits::X86OperandMem *Addr =
+          formMemoryOperand(Instr->getArg(0), IceType_f64);
       _movq(T, Addr);
       // Then cast the bits back out of the XMM register to the i64 Dest.
       InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
@@ -3433,7 +3380,8 @@
       InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
       lowerCast(Cast);
       // Then store XMM w/ a movq.
-      OperandX8632Mem *Addr = formMemoryOperand(Ptr, IceType_f64);
+      typename Traits::X86OperandMem *Addr =
+          formMemoryOperand(Ptr, IceType_f64);
       _storeq(T, Addr);
       _mfence();
       return;
@@ -3535,7 +3483,7 @@
     // The pand instruction operates on an m128 memory operand, so if
     // Src is an f32 or f64, we need to make sure it's in a register.
     if (isVectorType(Ty)) {
-      if (llvm::isa<OperandX8632Mem>(Src))
+      if (llvm::isa<typename Traits::X86OperandMem>(Src))
         Src = legalizeToVar(Src);
     } else {
       Src = legalizeToVar(Src);
@@ -3590,9 +3538,9 @@
   case Intrinsics::NaClReadTP: {
     if (Ctx->getFlags().getUseSandboxing()) {
       Constant *Zero = Ctx->getConstantZero(IceType_i32);
-      Operand *Src =
-          OperandX8632Mem::create(Func, IceType_i32, nullptr, Zero, nullptr, 0,
-                                  OperandX8632Mem::SegReg_GS);
+      Operand *Src = Traits::X86OperandMem::create(
+          Func, IceType_i32, nullptr, Zero, nullptr, 0,
+          Traits::X86OperandMem::SegReg_GS);
       Variable *Dest = Instr->getDest();
       Variable *T = nullptr;
       _mov(T, Src);
@@ -3655,7 +3603,8 @@
     _mov(T_edx, hiOperand(Expected));
     _mov(T_ebx, loOperand(Desired));
     _mov(T_ecx, hiOperand(Desired));
-    OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
+    typename Traits::X86OperandMem *Addr =
+        formMemoryOperand(Ptr, Expected->getType());
     const bool Locked = true;
     _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
     Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
@@ -3666,7 +3615,8 @@
   }
   Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax);
   _mov(T_eax, Expected);
-  OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
+  typename Traits::X86OperandMem *Addr =
+      formMemoryOperand(Ptr, Expected->getType());
   Variable *DesiredReg = legalizeToVar(Desired);
   const bool Locked = true;
   _cmpxchg(Addr, T_eax, DesiredReg, Locked);
@@ -3768,7 +3718,8 @@
       Op_Hi = &TargetX86Base<Machine>::_adc;
       break;
     }
-    OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
+    typename Traits::X86OperandMem *Addr =
+        formMemoryOperand(Ptr, Dest->getType());
     const bool Locked = true;
     Variable *T = nullptr;
     _mov(T, Val);
@@ -3783,7 +3734,8 @@
       Op_Hi = &TargetX86Base<Machine>::_sbb;
       break;
     }
-    OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
+    typename Traits::X86OperandMem *Addr =
+        formMemoryOperand(Ptr, Dest->getType());
     const bool Locked = true;
     Variable *T = nullptr;
     _mov(T, Val);
@@ -3821,7 +3773,8 @@
       Op_Hi = nullptr;
       break;
     }
-    OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
+    typename Traits::X86OperandMem *Addr =
+        formMemoryOperand(Ptr, Dest->getType());
     Variable *T = nullptr;
     _mov(T, Val);
     _xchg(Addr, T);
@@ -3869,12 +3822,13 @@
   if (Ty == IceType_i64) {
     Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
     Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
-    OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
+    typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
     _mov(T_eax, loOperand(Addr));
     _mov(T_edx, hiOperand(Addr));
     Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
     Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
-    InstX8632Label *Label = InstX8632Label::create(Func, this);
+    typename Traits::Insts::Label *Label =
+        Traits::Insts::Label::create(Func, this);
     const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
     if (!IsXchg8b) {
       Context.insert(Label);
@@ -3916,10 +3870,11 @@
     _mov(DestHi, T_edx);
     return;
   }
-  OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
+  typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
   Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax);
   _mov(T_eax, Addr);
-  InstX8632Label *Label = InstX8632Label::create(Func, this);
+  typename Traits::Insts::Label *Label =
+      Traits::Insts::Label::create(Func, this);
   Context.insert(Label);
   // We want to pick a different register for T than Eax, so don't use
   // _mov(T == nullptr, T_eax).
@@ -4260,11 +4215,11 @@
 
 template <class Machine>
 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {
-  // A Load instruction can be treated the same as an Assign
-  // instruction, after the source operand is transformed into an
-  // OperandX8632Mem operand.  Note that the address mode
-  // optimization already creates an OperandX8632Mem operand, so it
-  // doesn't need another level of transformation.
+  // A Load instruction can be treated the same as an Assign instruction, after
+  // the source operand is transformed into an Traits::X86OperandMem operand.
+  // Note that the address mode optimization already creates an
+  // Traits::X86OperandMem operand, so it doesn't need another level of
+  // transformation.
   Variable *DestLoad = Load->getDest();
   Type Ty = DestLoad->getType();
   Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
@@ -4279,19 +4234,19 @@
   Variable *Index = nullptr;
   uint16_t Shift = 0;
   int32_t Offset = 0; // TODO: make Constant
-  // Vanilla ICE load instructions should not use the segment registers,
-  // and computeAddressOpt only works at the level of Variables and Constants,
-  // not other OperandX8632Mem, so there should be no mention of segment
+  // Vanilla ICE load instructions should not use the segment registers, and
+  // computeAddressOpt only works at the level of Variables and Constants, not
+  // other Traits::X86OperandMem, so there should be no mention of segment
   // registers there either.
-  const OperandX8632Mem::SegmentRegisters SegmentReg =
-      OperandX8632Mem::DefaultSegment;
+  const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
+      Traits::X86OperandMem::DefaultSegment;
   Variable *Base = llvm::dyn_cast<Variable>(Addr);
   computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
   if (Base && Addr != Base) {
     Inst->setDeleted();
     Constant *OffsetOp = Ctx->getConstantInt32(Offset);
-    Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
-                                   Shift, SegmentReg);
+    Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp,
+                                         Index, Shift, SegmentReg);
     Context.insert(InstLoad::create(Func, Dest, Addr));
   }
 }
@@ -4438,7 +4393,8 @@
     // The cmov instruction doesn't allow 8-bit or FP operands, so
     // we need explicit control flow.
     // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
-    InstX8632Label *Label = InstX8632Label::create(Func, this);
+    typename Traits::Insts::Label *Label =
+        Traits::Insts::Label::create(Func, this);
     SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
     _mov(Dest, SrcT);
     _br(Cond, Label);
@@ -4453,7 +4409,7 @@
   // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
   if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
     std::swap(SrcT, SrcF);
-    Cond = InstX8632::getOppositeCondition(Cond);
+    Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
   }
   if (DestTy == IceType_i64) {
     // Set the low portion.
@@ -4488,15 +4444,18 @@
 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
   Operand *Value = Inst->getData();
   Operand *Addr = Inst->getAddr();
-  OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
+  typename Traits::X86OperandMem *NewAddr =
+      formMemoryOperand(Addr, Value->getType());
   Type Ty = NewAddr->getType();
 
   if (Ty == IceType_i64) {
     Value = legalize(Value);
     Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
     Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
-    _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
-    _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
+    _store(ValueHi,
+           llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr)));
+    _store(ValueLo,
+           llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr)));
   } else if (isVectorType(Ty)) {
     _storep(legalizeToVar(Value), NewAddr);
   } else {
@@ -4513,18 +4472,18 @@
   uint16_t Shift = 0;
   int32_t Offset = 0; // TODO: make Constant
   Variable *Base = llvm::dyn_cast<Variable>(Addr);
-  // Vanilla ICE store instructions should not use the segment registers,
-  // and computeAddressOpt only works at the level of Variables and Constants,
-  // not other OperandX8632Mem, so there should be no mention of segment
+  // Vanilla ICE store instructions should not use the segment registers, and
+  // computeAddressOpt only works at the level of Variables and Constants, not
+  // other Traits::X86OperandMem, so there should be no mention of segment
   // registers there either.
-  const OperandX8632Mem::SegmentRegisters SegmentReg =
-      OperandX8632Mem::DefaultSegment;
+  const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
+      Traits::X86OperandMem::DefaultSegment;
   computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
   if (Base && Addr != Base) {
     Inst->setDeleted();
     Constant *OffsetOp = Ctx->getConstantInt32(Offset);
-    Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
-                                   Shift, SegmentReg);
+    Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp,
+                                         Index, Shift, SegmentReg);
     InstStore *NewStore = InstStore::create(Func, Data, Addr);
     if (Inst->getDest())
       NewStore->setRmwBeacon(Inst->getRmwBeacon());
@@ -4552,7 +4511,8 @@
     for (SizeT I = 0; I < NumCases; ++I) {
       Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
       Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
-      InstX8632Label *Label = InstX8632Label::create(Func, this);
+      typename Traits::Insts::Label *Label =
+          Traits::Insts::Label::create(Func, this);
       _cmp(Src0Lo, ValueLo);
       _br(Traits::Cond::Br_ne, Label);
       _cmp(Src0Hi, ValueHi);
@@ -4639,7 +4599,8 @@
 }
 
 template <class Machine>
-void TargetX86Base<Machine>::lowerRMW(const InstX8632FakeRMW *RMW) {
+void TargetX86Base<Machine>::lowerRMW(
+    const typename Traits::Insts::FakeRMW *RMW) {
   // If the beacon variable's live range does not end in this
   // instruction, then it must end in the modified Store instruction
   // that follows.  This means that the original Store instruction is
@@ -4651,12 +4612,14 @@
     return;
   Operand *Src = RMW->getData();
   Type Ty = Src->getType();
-  OperandX8632Mem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
+  typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
   if (Ty == IceType_i64) {
     Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
     Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
-    OperandX8632Mem *AddrLo = llvm::cast<OperandX8632Mem>(loOperand(Addr));
-    OperandX8632Mem *AddrHi = llvm::cast<OperandX8632Mem>(hiOperand(Addr));
+    typename Traits::X86OperandMem *AddrLo =
+        llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr));
+    typename Traits::X86OperandMem *AddrHi =
+        llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr));
     switch (RMW->getOp()) {
     default:
       // TODO(stichnot): Implement other arithmetic operators.
@@ -4715,7 +4678,8 @@
 
 template <class Machine>
 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {
-  if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {
+  if (const auto *RMW =
+          llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) {
     lowerRMW(RMW);
   } else {
     TargetLowering::lowerOther(Instr);
@@ -4991,7 +4955,7 @@
 }
 
 template <class Machine>
-OperandX8632Mem *
+typename TargetX86Base<Machine>::Traits::X86OperandMem *
 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
                                                      uint32_t Offset) {
   // Ensure that Loc is a stack slot.
@@ -5005,7 +4969,7 @@
   Variable *Loc = makeReg(PointerType);
   _lea(Loc, Slot);
   Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
-  return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
+  return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
 }
 
 /// Helper for legalize() to emit the right code to lower an operand to a
@@ -5037,7 +5001,7 @@
   // or in ecx.)
   assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
 
-  if (auto Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
+  if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
     // Before doing anything with a Mem operand, we need to ensure
     // that the Base and Index components are in physical registers.
     Variable *Base = Mem->getBase();
@@ -5051,9 +5015,9 @@
       RegIndex = legalizeToVar(Index);
     }
     if (Base != RegBase || Index != RegIndex) {
-      Mem =
-          OperandX8632Mem::create(Func, Ty, RegBase, Mem->getOffset(), RegIndex,
-                                  Mem->getShift(), Mem->getSegmentRegister());
+      Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(),
+                                          RegIndex, Mem->getShift(),
+                                          Mem->getSegmentRegister());
     }
 
     // For all Memory Operands, we do randomization/pooling here
@@ -5103,7 +5067,7 @@
       llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
       llvm::cast<Constant>(From)->setShouldBePooled(true);
       Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
-      From = OperandX8632Mem::create(Func, Ty, Base, Offset);
+      From = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
     }
     bool NeedsReg = false;
     if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
@@ -5162,13 +5126,13 @@
 }
 
 template <class Machine>
-OperandX8632Mem *TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd,
-                                                           Type Ty,
-                                                           bool DoLegalize) {
-  OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd);
-  // It may be the case that address mode optimization already creates
-  // an OperandX8632Mem, so in that case it wouldn't need another level
-  // of transformation.
+typename TargetX86Base<Machine>::Traits::X86OperandMem *
+TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty,
+                                          bool DoLegalize) {
+  auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd);
+  // It may be the case that address mode optimization already creates an
+  // Traits::X86OperandMem, so in that case it wouldn't need another level of
+  // transformation.
   if (!Mem) {
     Variable *Base = llvm::dyn_cast<Variable>(Opnd);
     Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
@@ -5188,11 +5152,11 @@
       assert(llvm::isa<ConstantInteger32>(Offset) ||
              llvm::isa<ConstantRelocatable>(Offset));
     }
-    Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
+    Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
   }
   // Do legalization, which contains randomization/pooling
   // or do randomization/pooling.
-  return llvm::cast<OperandX8632Mem>(
+  return llvm::cast<typename Traits::X86OperandMem>(
       DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
 }
 
@@ -5218,68 +5182,8 @@
 void TargetX86Base<Machine>::makeRandomRegisterPermutation(
     llvm::SmallVectorImpl<int32_t> &Permutation,
     const llvm::SmallBitVector &ExcludeRegisters) const {
-  // TODO(stichnot): Declaring Permutation this way loses type/size
-  // information.  Fix this in conjunction with the caller-side TODO.
-  assert(Permutation.size() >= Traits::RegisterSet::Reg_NUM);
-  // Expected upper bound on the number of registers in a single
-  // equivalence class.  For x86-32, this would comprise the 8 XMM
-  // registers.  This is for performance, not correctness.
-  static const unsigned MaxEquivalenceClassSize = 8;
-  typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
-  typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;
-  EquivalenceClassMap EquivalenceClasses;
-  SizeT NumShuffled = 0, NumPreserved = 0;
-
-// Build up the equivalence classes of registers by looking at the
-// register properties as well as whether the registers should be
-// explicitly excluded from shuffling.
-#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
-          frameptr, isI8, isInt, isFP)                                         \
-  if (ExcludeRegisters[Traits::RegisterSet::val]) {                            \
-    /* val stays the same in the resulting permutation. */                     \
-    Permutation[Traits::RegisterSet::val] = Traits::RegisterSet::val;          \
-    ++NumPreserved;                                                            \
-  } else {                                                                     \
-    const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) |   \
-                           (isInt << 3) | (isFP << 4);                         \
-    /* val is assigned to an equivalence class based on its properties. */     \
-    EquivalenceClasses[Index].push_back(Traits::RegisterSet::val);             \
-  }
-  REGX8632_TABLE
-#undef X
-
-  RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
-
-  // Shuffle the resulting equivalence classes.
-  for (auto I : EquivalenceClasses) {
-    const RegisterList &List = I.second;
-    RegisterList Shuffled(List);
-    RandomShuffle(Shuffled.begin(), Shuffled.end(), RNG);
-    for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) {
-      Permutation[List[SI]] = Shuffled[SI];
-      ++NumShuffled;
-    }
-  }
-
-  assert(NumShuffled + NumPreserved == Traits::RegisterSet::Reg_NUM);
-
-  if (Func->isVerbose(IceV_Random)) {
-    OstreamLocker L(Func->getContext());
-    Ostream &Str = Func->getContext()->getStrDump();
-    Str << "Register equivalence classes:\n";
-    for (auto I : EquivalenceClasses) {
-      Str << "{";
-      const RegisterList &List = I.second;
-      bool First = true;
-      for (int32_t Register : List) {
-        if (!First)
-          Str << " ";
-        First = false;
-        Str << getRegName(Register, IceType_i32);
-      }
-      Str << "}\n";
-    }
-  }
+  Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation,
+                                        ExcludeRegisters);
 }
 
 template <class Machine>
@@ -5350,8 +5254,8 @@
       uint32_t Cookie = Ctx->getRandomizationCookie();
       _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
       Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
-      _lea(Reg,
-           OperandX8632Mem::create(Func, IceType_i32, Reg, Offset, nullptr, 0));
+      _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset,
+                                              nullptr, 0));
       // make sure liveness analysis won't kill this variable, otherwise a
       // liveness
       // assertion will be triggered.
@@ -5384,8 +5288,9 @@
       const bool SuppressMangling = true;
       Constant *Symbol =
           Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
-      OperandX8632Mem *MemOperand =
-          OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol);
+      typename Traits::X86OperandMem *MemOperand =
+          Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr,
+                                        Symbol);
       _mov(Reg, MemOperand);
       return Reg;
     }
@@ -5396,9 +5301,9 @@
 }
 
 template <class Machine>
-OperandX8632Mem *
-TargetX86Base<Machine>::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand,
-                                                 int32_t RegNum) {
+typename TargetX86Base<Machine>::Traits::X86OperandMem *
+TargetX86Base<Machine>::randomizeOrPoolImmediate(
+    typename Traits::X86OperandMem *MemOperand, int32_t RegNum) {
   assert(MemOperand);
   if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
       RandomizationPoolingPaused == true) {
@@ -5432,8 +5337,9 @@
         Constant *Mask2 =
             Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
 
-        OperandX8632Mem *TempMemOperand = OperandX8632Mem::create(
-            Func, MemOperand->getType(), MemOperand->getBase(), Mask1);
+        typename Traits::X86OperandMem *TempMemOperand =
+            Traits::X86OperandMem::create(Func, MemOperand->getType(),
+                                          MemOperand->getBase(), Mask1);
         // If we have already assigned a physical register, we must come from
         // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
         // the assigned register as this assignment is that start of its use-def
@@ -5447,9 +5353,11 @@
         if (RegNum != Variable::NoRegister)
           _set_dest_nonkillable();
 
-        OperandX8632Mem *NewMemOperand = OperandX8632Mem::create(
-            Func, MemOperand->getType(), RegTemp, Mask2, MemOperand->getIndex(),
-            MemOperand->getShift(), MemOperand->getSegmentRegister());
+        typename Traits::X86OperandMem *NewMemOperand =
+            Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
+                                          Mask2, MemOperand->getIndex(),
+                                          MemOperand->getShift(),
+                                          MemOperand->getSegmentRegister());
 
         // Label this memory operand as randomize, so we won't randomize it
         // again in case we call legalize() mutiple times on this memory
@@ -5484,23 +5392,26 @@
         bool SuppressMangling = true;
         Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
                                                SuppressMangling);
-        OperandX8632Mem *SymbolOperand = OperandX8632Mem::create(
-            Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
+        typename Traits::X86OperandMem *SymbolOperand =
+            Traits::X86OperandMem::create(
+                Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
         _mov(RegTemp, SymbolOperand);
         // If we have a base variable here, we should add the lea instruction
         // to add the value of the base variable to RegTemp. If there is no
         // base variable, we won't need this lea instruction.
         if (MemOperand->getBase()) {
-          OperandX8632Mem *CalculateOperand = OperandX8632Mem::create(
-              Func, MemOperand->getType(), MemOperand->getBase(), nullptr,
-              RegTemp, 0, MemOperand->getSegmentRegister());
+          typename Traits::X86OperandMem *CalculateOperand =
+              Traits::X86OperandMem::create(
+                  Func, MemOperand->getType(), MemOperand->getBase(), nullptr,
+                  RegTemp, 0, MemOperand->getSegmentRegister());
           _lea(RegTemp, CalculateOperand);
           _set_dest_nonkillable();
         }
-        OperandX8632Mem *NewMemOperand = OperandX8632Mem::create(
-            Func, MemOperand->getType(), RegTemp, nullptr,
-            MemOperand->getIndex(), MemOperand->getShift(),
-            MemOperand->getSegmentRegister());
+        typename Traits::X86OperandMem *NewMemOperand =
+            Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
+                                          nullptr, MemOperand->getIndex(),
+                                          MemOperand->getShift(),
+                                          MemOperand->getSegmentRegister());
         return NewMemOperand;
       }
       assert("Unsupported -randomize-pool-immediates option" && false);